# HDF

In [1]:
import pandas as pd
import numpy as np

In [2]:
import time
from contextlib import contextmanager
@contextmanager
def timer(name):
	t0 = time.time()
	yield
	print(f'[{name}] done in {time.time() - t0:.0f} s')

In [10]:
def write_hdf(dh, key, path="", name="storage.h5"):
    path = path + name
    hdf = pd.HDFStore(path)
    
    if type(dh) != list:
        hdf.put(key, dh, format='table', data_columns=True)
    else:
        for dh1, key1 in zip(dh, key):
            hdf.put(key1, dh1, format='table', data_columns=True)
        print(hdf)
    hdf.close()

In [4]:
def read_hdf(path="", name="storage.h5"):
    path = path + name
    hdf = pd.HDFStore(path)
    
    keys = hdf.keys()
    
    if len(keys) == 1:
        return hdf[keys[0]]
    else:
        print(keys)
        dh = []
        for key in keys:
            dh.append(hdf[key])
        return dh

# Test

In [5]:
df = pd.DataFrame(np.random.rand(5,3), 
           columns=('A','B','C'))

In [6]:
write_hdf(df, "df", name="df.h5")

In [7]:
read_hdf(name="df.h5")

Unnamed: 0,A,B,C
0,0.086086,0.608637,0.571792
1,0.341429,0.10641,0.932707
2,0.085437,0.871262,0.032328
3,0.718284,0.746658,0.987274
4,0.174593,0.357461,0.546909


# Test 2

In [8]:
df1 = pd.DataFrame(np.random.rand(5,3), 
           columns=('A','B','C'))
df2 = pd.DataFrame(np.random.rand(6,3), 
           columns=('A','B','C'))
df3 = pd.DataFrame(np.random.rand(7,3), 
           columns=('A','B','C'))

In [11]:
write_hdf([df1, df2, df3], ["df1", "df2", "df3"], name="df2.h5")

<class 'pandas.io.pytables.HDFStore'>
File path: df2.h5
/df1            frame_table  (typ->appendable,nrows->5,ncols->3,indexers->[index],dc->[A,B,C])
/df2            frame_table  (typ->appendable,nrows->6,ncols->3,indexers->[index],dc->[A,B,C])
/df3            frame_table  (typ->appendable,nrows->7,ncols->3,indexers->[index],dc->[A,B,C])


In [12]:
read_hdf(name="df2.h5")

['/df1', '/df2', '/df3']


[          A         B         C
 0  0.653228  0.345389  0.602371
 1  0.320084  0.090280  0.062368
 2  0.328028  0.047924  0.320960
 3  0.867612  0.784485  0.852395
 4  0.766220  0.962680  0.518686,           A         B         C
 0  0.544559  0.693761  0.501412
 1  0.999369  0.360658  0.305848
 2  0.957762  0.748511  0.919476
 3  0.899661  0.840745  0.820415
 4  0.189833  0.620796  0.001242
 5  0.634621  0.744584  0.313220,           A         B         C
 0  0.562698  0.345557  0.492980
 1  0.017897  0.788176  0.571837
 2  0.085626  0.035146  0.916957
 3  0.793460  0.155037  0.540743
 4  0.411373  0.118247  0.614968
 5  0.021767  0.823483  0.636586
 6  0.211377  0.614652  0.486537]

In [40]:
read_hdf(name="df.h5")

Unnamed: 0,A,B,C
0,0.901197,0.333985,0.760516
1,0.176274,0.398713,0.75865
2,0.83234,0.755448,0.472261
3,0.539078,0.206933,0.807127
4,0.558323,0.080625,0.227569


In [26]:
df = DataFrame(np.random.rand(5,3), 
           columns=('A','B','C'))

In [27]:
write_hdf(df, "df", name="df.h5")

In [28]:
hdf = HDFStore('df.h5')

In [29]:
hdf.keys()

['/df']

In [30]:
df

Unnamed: 0,A,B,C
0,0.901197,0.333985,0.760516
1,0.176274,0.398713,0.75865
2,0.83234,0.755448,0.472261
3,0.539078,0.206933,0.807127
4,0.558323,0.080625,0.227569


In [31]:
hdf["df"]

Unnamed: 0,A,B,C
0,0.901197,0.333985,0.760516
1,0.176274,0.398713,0.75865
2,0.83234,0.755448,0.472261
3,0.539078,0.206933,0.807127
4,0.558323,0.080625,0.227569


In [7]:
import numpy as np
from pandas import HDFStore,DataFrame
# create (or open) an hdf5 file and opens in append mode
hdf = HDFStore('storage.h5')

In [8]:
df = DataFrame(np.random.rand(5,3), columns=('A','B','C'))
# put the dataset in the storage
hdf.put('d1', df, format='table', data_columns=True)

In [9]:
hdf['d1'].shape

(5, 3)

In [10]:
hdf.append('d1', DataFrame(np.random.rand(5,3), 
           columns=('A','B','C')), 
           format='table', data_columns=True)

In [11]:
hdf['d1'].shape

(10, 3)

In [14]:
hdf.keys()

['/d1']

In [17]:
a = 1
for i in list(a):
    print(a)

TypeError: 'int' object is not iterable

In [23]:
def write_hdf(dh):
    if type(dh) == list:
        print("**")
    else:
        print(dh)

In [24]:
write_hdf(2)

2


In [18]:
a = [1, 2, 3]

In [19]:
type(a)

list