### How To Create a Pandas DataFrame   
Sometimes, you will want to start from scratch, but you can also convert other data structures, such as lists or NumPy arrays, to Pandas DataFrames. To make a data frame from a NumPy array, you can just pass it to the DataFrame() function in the data argument.

In [1]:
import pandas as pd
import numpy as np
data = np.array([['','Col1','Col2'],
                ['Row1',1,2],
                ['Row2',3,4]])
                
df=pd.DataFrame(data=data[1:,1:],
                  index=data[1:,0],
                  columns=data[0,1:])
df

Unnamed: 0,Col1,Col2
Row1,1,2
Row2,3,4


In [2]:
### ex2: Create Data frame from numpy array
my_2darray = np.array([[1, 2, 3], [4, 5, 6]])
df=pd.DataFrame(my_2darray)
df

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6


In [3]:
## FRom dictionary
my_dict = {1: ['1', '3'], 2: ['1', '2'], 3: ['2', '4']}
df= pd.DataFrame(my_dict)
df

Unnamed: 0,1,2,3
0,1,1,2
1,3,2,4


In [4]:
my_df = pd.DataFrame(data=[4,5,6,7], index=range(0,4), columns=['A'])
my_df

Unnamed: 0,A
0,4
1,5
2,6
3,7


In [5]:
# Take a Series as input to your DataFrame
my_series = pd.Series({"Belgium":"Brussels", "India":"New Delhi", "United Kingdom":"London", "United States":"Washington"})
pd.DataFrame(my_series)

Unnamed: 0,0
Belgium,Brussels
India,New Delhi
United Kingdom,London
United States,Washington


## Pandas Dataframe to Numpy Array and store in HDF5

In [7]:
import h5py
np.random.seed(1234)
df = pd.DataFrame(np.random.randn(6,4),columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
0,0.471435,-1.190976,1.432707,-0.312652
1,-0.720589,0.887163,0.859588,-0.636524
2,0.015696,-2.242685,1.150036,0.991946
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
5,-0.202646,-0.655969,0.193421,0.553439


In [8]:
df.as_matrix()

array([[  4.71435164e-01,  -1.19097569e+00,   1.43270697e+00,
         -3.12651896e-01],
       [ -7.20588733e-01,   8.87162940e-01,   8.59588414e-01,
         -6.36523504e-01],
       [  1.56963721e-02,  -2.24268495e+00,   1.15003572e+00,
          9.91946022e-01],
       [  9.53324128e-01,  -2.02125482e+00,  -3.34077366e-01,
          2.11836468e-03],
       [  4.05453412e-01,   2.89091941e-01,   1.32115819e+00,
         -1.54690555e+00],
       [ -2.02646325e-01,  -6.55969344e-01,   1.93421376e-01,
          5.53438911e-01]])

In [9]:
df.values

array([[  4.71435164e-01,  -1.19097569e+00,   1.43270697e+00,
         -3.12651896e-01],
       [ -7.20588733e-01,   8.87162940e-01,   8.59588414e-01,
         -6.36523504e-01],
       [  1.56963721e-02,  -2.24268495e+00,   1.15003572e+00,
          9.91946022e-01],
       [  9.53324128e-01,  -2.02125482e+00,  -3.34077366e-01,
          2.11836468e-03],
       [  4.05453412e-01,   2.89091941e-01,   1.32115819e+00,
         -1.54690555e+00],
       [ -2.02646325e-01,  -6.55969344e-01,   1.93421376e-01,
          5.53438911e-01]])

In [10]:
df_to_nparray = df.to_records(index=False)
df_to_nparray

rec.array([( 0.47143516, -1.19097569,  1.43270697, -0.3126519 ),
           (-0.72058873,  0.88716294,  0.85958841, -0.6365235 ),
           ( 0.01569637, -2.24268495,  1.15003572,  0.99194602),
           ( 0.95332413, -2.02125482, -0.33407737,  0.00211836),
           ( 0.40545341,  0.28909194,  1.32115819, -1.54690555),
           (-0.20264632, -0.65596934,  0.19342138,  0.55343891)], 
          dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8'), ('D', '<f8')])

In [11]:
# initialize file
# 'a' ->  Read/write if exists, create otherwise (default)
f = h5py.File('tuto_myfile.hdf5','a')

# create dataset
f['dset'] = df_to_nparray

# close connection to file
f.close()

In order to evaluate the HDF5 file you should install 'hdf5-tools'.   

In Ubuntu system:

$ sudo apt-get install hdf5-tools

And try:

$ h5dump tuto_myfile.hdf5 

In [12]:
# read from hdf5

# open file
# 'r' -> Readonly, file must exist
f = h5py.File('tuto_myfile.hdf5', 'r')

# load dataset: dset
dset = f['dset']
dset

<HDF5 dataset "dset": shape (6,), type "|V32">

In [13]:
a = dset[...]
f.close()

In [14]:
a

array([( 0.47143516, -1.19097569,  1.43270697, -0.3126519 ),
       (-0.72058873,  0.88716294,  0.85958841, -0.6365235 ),
       ( 0.01569637, -2.24268495,  1.15003572,  0.99194602),
       ( 0.95332413, -2.02125482, -0.33407737,  0.00211836),
       ( 0.40545341,  0.28909194,  1.32115819, -1.54690555),
       (-0.20264632, -0.65596934,  0.19342138,  0.55343891)],
      dtype=[('A', '<f8'), ('B', '<f8'), ('C', '<f8'), ('D', '<f8')])

In [15]:
# http://pandas.pydata.org/pandas-docs/dev/io.html#hdf5-pytables

# Reading hdf5 in pandas
df2 = pd.read_hdf('tuto_myfile.hdf5', 'dset')

In [16]:
df2

Unnamed: 0,A,B,C,D
0,0.471435,-1.190976,1.432707,-0.312652
1,-0.720589,0.887163,0.859588,-0.636524
2,0.015696,-2.242685,1.150036,0.991946
3,0.953324,-2.021255,-0.334077,0.002118
4,0.405453,0.289092,1.321158,-1.546906
5,-0.202646,-0.655969,0.193421,0.553439
