In [2]:
import numpy as np
import h5py

# Creating matrix data

In [3]:
matrix1=np.random.random(size=(1000,1000))
matrix2=np.random.random(size=(10000,100))

In [4]:
with h5py.File('hdf5_data.h5','w') as hdf:
    hdf.create_dataset('dataset1',data=matrix1)
    hdf.create_dataset('dataset2',data=matrix2)


# Reading Data from a file

In [6]:
with h5py.File('hdf5_data.h5','r') as hdf:
    ls=list(hdf.keys())
    print("List of dataset file in this file",ls)
    data=hdf.get('dataset1')
    dataset1=np.array(data)
    print('shape of dataset1:\n',dataset1.shape)

List of dataset file in this file ['dataset1', 'dataset2']
shape of dataset1:
 (1000, 1000)


In [8]:
f=h5py.File('hdf5_data.h5','r')
ls=list(f.keys())
print(ls)
f.close()

['dataset1', 'dataset2']


In [11]:
dataset1

array([[0.20132611, 0.68578503, 0.84086683, ..., 0.35515977, 0.66585747,
        0.70699371],
       [0.31451874, 0.54648382, 0.10658811, ..., 0.43925063, 0.0941639 ,
        0.99428688],
       [0.10618247, 0.38049393, 0.91538485, ..., 0.37897668, 0.68086516,
        0.71731264],
       ...,
       [0.67027327, 0.82326684, 0.6783787 , ..., 0.21932463, 0.1150567 ,
        0.7880956 ],
       [0.33806714, 0.53288765, 0.39568457, ..., 0.48761209, 0.59682753,
        0.75211469],
       [0.98167493, 0.39812935, 0.48461909, ..., 0.59913423, 0.62056932,
        0.15857975]])

# Creating HDF5 group

In [12]:
matrix1=np.random.random(size=(1000,100))
matrix2=np.random.random(size=(10000,100))
matrix3=np.random.random(size=(1000,1000))
matrix4=np.random.random(size=(1000,1000))

In [14]:
with h5py.File('hdf_data.h5','w') as hdf:
    G1=hdf.create_group('Group1')
    G1.create_dataset('dataset1',data=matrix4)
    G1.create_dataset('dataset4',data=matrix1)

    G2=hdf.create_group('Group2/Subgroup1')
    G2.create_dataset('dataset3',data=matrix3)
    
    G3=hdf.create_group('Group2/Subgroup2')
    G3.create_dataset('dataset2',data=matrix2)

# Reading group data

In [27]:
with h5py.File("hdf_data.h5","r") as hdf:
    base_items=list(hdf.items())
    print('Items in the base directory:',base_items)
    G1=hdf.get('Group1')
    G1_items=list(G1.items())
    print("list of items in group1",G1_items)
    dataset4=np.array(G1.get('dataset4'))
    print(dataset4.shape)
    G2=hdf.get('Group2')
    G2_items=list(G2.items())
    G21=G2.get('/Group2/Subgroup1')
    G21_items=list(G21.items())

Items in the base directory: [('Group1', <HDF5 group "/Group1" (2 members)>), ('Group2', <HDF5 group "/Group2" (2 members)>)]
list of items in group1 [('dataset1', <HDF5 dataset "dataset1": shape (1000, 1000), type "<f8">), ('dataset4', <HDF5 dataset "dataset4": shape (1000, 100), type "<f8">)]
(1000, 100)


# Compressing a file 

In [28]:
with h5py.File('hdf_data_compressed.h5','w') as hdf:
    G1=hdf.create_group('Group1')
    G1.create_dataset('dataset1',compression="gzip" ,data=matrix4)
    G1.create_dataset('dataset4',compression="gzip",data=matrix1)

    G2=hdf.create_group('Group2/Subgroup1')
    G2.create_dataset('dataset3',compression="gzip",data=matrix3)
    
    G3=hdf.create_group('Group2/Subgroup2')
    G3.create_dataset('dataset2',compression="gzip",data=matrix2)

# pandas and HDF5

In [29]:
import pandas as pd

In [32]:
hdf=pd.HDFStore('hdf5_pandas.h5')

In [34]:
data={
    'a':[1,2,3,4,5],
    'b':[0,1,2,3,4],
    'c':['a','b','c','d','e']
}

In [35]:
df=pd.DataFrame(data,columns=['a','b','c'])

In [36]:
df

Unnamed: 0,a,b,c
0,1,0,a
1,2,1,b
2,3,2,c
3,4,3,d
4,5,4,e


In [37]:
hdf.put('DF1',df,format='table',data_columns=True)

In [38]:
data={
    'a':[1,23,43,4,5],
    'b':[0,1,42,43,54],
    'c':['a','b','c','d','e']
}

In [39]:
df1=pd.DataFrame(data,columns=['a','b','c'])

In [40]:
hdf.put('DF2',df1,format='table',data_columns=True)

In [41]:
hdf.close()

# Reading data from hdf5 file using pandas

In [43]:
hdf=pd.HDFStore('hdf5_pandas.h5','r')

In [45]:
hdf.groups()

[/DF1 (Group) ''
   children := ['table' (Table)],
 /DF2 (Group) ''
   children := ['table' (Table)]]

In [47]:
df1=hdf.get('/DF1')

In [48]:
df1.head()

Unnamed: 0,a,b,c
0,1,0,a
1,2,1,b
2,3,2,c
3,4,3,d
4,5,4,e
