# DataFrame
- 2D arrays conatined within its attached row and column labels
- Heterogeneous types, missing data can be defined as spredd sheet programs an database frameworks
- Can be created by a NumPy array, 2D NumPy array, Dicts, Series, Lists.

In [1]:
# Import NumPy package and load pandas
import numpy as np
import pandas as pd

## Create from dict of series

In [2]:
dict = {'x' : pd.Series(['A','B','C']), 'y' : pd.Series([1, 2, 3])}
pd.DataFrame(dict)

Unnamed: 0,x,y
0,A,1
1,B,2
2,C,3


## Create from dict of lists

In [3]:
dict = {'x' : ['A','B','C'], 'y':[1,2,3]}
pd.DataFrame(dict)

Unnamed: 0,x,y
0,A,1
1,B,2
2,C,3


## Create from dict of tuples

In [4]:
dict = {'x' : ('A','B','C'), 'y':(1,2,3)}
pd.DataFrame(dict)

Unnamed: 0,x,y
0,A,1
1,B,2
2,C,3


## Create from dicts of dicts

In [5]:
dict = {'col' : {'R1':1.2,'R2':2.2}, 'col2':{'R1':3.2,'R2':4.2, 'R3':5.5}}
pd.DataFrame(dict)

Unnamed: 0,col,col2
R1,1.2,3.2
R2,2.2,4.2
R3,,5.5


#### Note: 
- Here pandas will interpret the outer dict keys as the columns and the inner keys as the row indices.

## Create from list of lists

In [6]:
list = [['A','B','C'], [1,2,3]]
pd.DataFrame(list)

Unnamed: 0,0,1,2
0,A,B,C
1,1,2,3


## Create from list of dicts

In [7]:
list = [{'R1':1.2,'R2':2.2}, {'R1':3.2,'R2':4.2, 'R3':5.5}]
pd.DataFrame(list, index =['col1', 'col2'])

Unnamed: 0,R1,R2,R3
col1,1.2,2.2,
col2,3.2,4.2,5.5


## Create with custom index

In [8]:
dict= {
    'col1' : [1,2,3,4],
    'col3' : [5,6,7,8],
    'col2' : ['A','B','C','D'],
    'col4' : ['X','Y','Z','D']
}
pd.DataFrame(dict, index = ['R1', 'R2', 'R3', 'R4'])

Unnamed: 0,col1,col3,col2,col4
R1,1,5,A,X
R2,2,6,B,Y
R3,3,7,C,Z
R4,4,8,D,D


#### Note : 
- When assigning lists or arrays to a column, the value’s length must match the length of the DataFrame.
- Index can contain duplicate labels

## Create with random values with custom index

In [9]:
pd.DataFrame(np.random.randn(5,6))

Unnamed: 0,0,1,2,3,4,5
0,1.388277,-0.785526,-0.1013,1.247383,0.1698,-0.573173
1,-1.508136,1.188635,0.982837,-0.909569,-0.840741,-0.513644
2,0.390256,-2.105114,2.105287,0.039404,0.412655,1.422979
3,-1.261689,2.574962,-1.753315,1.910589,0.200785,0.674465
4,-1.231442,0.485914,1.221517,-0.841505,0.240622,-0.070613


In [10]:
pd.DataFrame(np.arange(9).reshape((3, 3)),
             index=['R1', 'R2', 'R3'],
             columns=['col1', 'col2', 'col3'])

Unnamed: 0,col1,col2,col3
R1,0,1,2
R2,3,4,5
R3,6,7,8


## Create with string

In [11]:
import sys
print(sys.version_info)
if sys.version_info[0] < 3:
    from StringIO import StringIO
else:
    from io import StringIO

sys.version_info(major=3, minor=11, micro=4, releaselevel='final', serial=0)


In [12]:
data1 = StringIO("""
Date       Fruit  Num  Color 
2013-11-24 Banana 22.1 Yellow
2013-11-24 Orange  8.6 Orange
2013-11-24 Apple   7.6 Green
2013-11-24 Celery 10.2 Green
""")

df = pd.read_table(data1, sep='\s+')
df

Unnamed: 0,Date,Fruit,Num,Color
0,2013-11-24,Banana,22.1,Yellow
1,2013-11-24,Orange,8.6,Orange
2,2013-11-24,Apple,7.6,Green
3,2013-11-24,Celery,10.2,Green
