# Special I/O


In [1]:
import h5rdmtoolbox as h5tbx
h5tbx.use(None)

<h5rdmtoolbox.conventions.core.use at 0x1ccd6b14fd0>

## Creating datasets and CSV file(s)
Datasets can be created directly form a single or from multiple files. Let's first create two simple CSV files:

In [2]:
import pandas as pd
import numpy as np

np.random.seed(100)

# first
df = pd.DataFrame({'x': np.random.random((4, )),
                   'y': np.random.random((4, ))})
csv_filename1 = h5tbx.utils.generate_temporary_filename(suffix='.csv')
df.to_csv(csv_filename1, index=None)
df

Unnamed: 0,x,y
0,0.543405,0.004719
1,0.278369,0.121569
2,0.424518,0.670749
3,0.844776,0.825853


In [3]:
# second
df = pd.DataFrame({'x': np.random.random((8, )),
                   'y': np.random.random((8, ))})
csv_filename2 = h5tbx.utils.generate_temporary_filename(suffix='.csv')
df.to_csv(csv_filename2, index=None)
df

Unnamed: 0,x,y
0,0.136707,0.811683
1,0.575093,0.171941
2,0.891322,0.816225
3,0.209202,0.274074
4,0.185328,0.431704
5,0.108377,0.94003
6,0.219697,0.817649
7,0.978624,0.336112


Create from a single file:

In [4]:
with h5tbx.File() as h5:
    h5.create_dataset_from_csv(csv_filename=csv_filename1)
    h5.dump()

For creating from multiple CSV files, it must be decided whether to stack (datasets must have same size) or concatenate them:

... concatenating:

In [5]:
with h5tbx.File() as h5:
    h5.create_datasets_from_csv(csv_filenames=[csv_filename1, csv_filename2], combine_opt='concatenate')
    h5.dump()

... stacking:

In [6]:
with h5tbx.File() as h5:
    h5.create_datasets_from_csv(csv_filenames=[csv_filename2, csv_filename2], combine_opt='stack')
    h5.dump()

## Creating datasets and image file(s)
A dataset can be created from image data. The data can be provided as a list of numpy arrays:

In [7]:
with h5tbx.File() as h5:
    h5.create_dataset_from_image([np.random.random((20, 10))] * 5,
                                 'testimg', axis=0)
    h5.dump()

... or as a iterable object which provides the image data one at a time:

In [8]:
class ImgReader:
    """Dummy Image Reader"""
    def __init__(self, imgdir):
        self._imgdir = imgdir
        self._index = 0
        self._size = 5

    def read_img(self):
        # provide random image. Use case would read from file...
        return np.random.random((20, 10))

    def __iter__(self):
        return self

    def __len__(self):
        return self._size

    def __next__(self):
        if self._index < self._size:
            self._index += 1
            return self.read_img()
        raise StopIteration

In [9]:
imgreader = ImgReader('testdir')
with h5tbx.File() as h5:
    h5.create_dataset_from_image(imgreader, 'testimg', axis=0)
    h5.dump()