Pandas DataFrames extend Numpy 2D arrays by providing labels to the columns and also explicit indexing (if specified) to the rows
This notebook contains code samples for:


In [1]:
import numpy as np
import pandas as pd

In [39]:
prices2010 = pd.Series([10,20,30,40,50], index = ['Pencil','Pen','Notebook','PrintCartridge','IpadShuffle'])

In [40]:
prices2011 = pd.Series([11,22,33,44,55], index = ['Pencil','Pen','Notebook','PrintCartridge','IpadShuffle'])

In [41]:
frame = pd.DataFrame({'2010': prices2010,'2011': prices2011})
frame

Unnamed: 0,2010,2011
Pencil,10,11
Pen,20,22
Notebook,30,33
PrintCartridge,40,44
IpadShuffle,50,55


In [42]:
frame.values

array([[10, 11],
       [20, 22],
       [30, 33],
       [40, 44],
       [50, 55]], dtype=int64)

In [43]:
frame.index

Index(['Pencil', 'Pen', 'Notebook', 'PrintCartridge', 'IpadShuffle'], dtype='object')

In [44]:
frame['2010']

Pencil            10
Pen               20
Notebook          30
PrintCartridge    40
IpadShuffle       50
Name: 2010, dtype: int64

In [45]:
frame.iloc[0:2]

Unnamed: 0,2010,2011
Pencil,10,11
Pen,20,22


In [46]:
frame.loc['Pencil':'PrintCartridge']

Unnamed: 0,2010,2011
Pencil,10,11
Pen,20,22
Notebook,30,33
PrintCartridge,40,44


In [47]:
frame['difference'] = frame['2011'] - frame['2010']
frame

Unnamed: 0,2010,2011,difference
Pencil,10,11,1
Pen,20,22,2
Notebook,30,33,3
PrintCartridge,40,44,4
IpadShuffle,50,55,5


In [49]:
personnel = pd.DataFrame([{'name':'John','DOB':'1976','Dept':'Telecom'},
                         {'name':'Matt','DOB':'1978','Dept':'Networks'},
                         {'name':'Justin','DOB':'1980','Dept':'Human Resources'}])
personnel

Unnamed: 0,DOB,Dept,name
0,1976,Telecom,John
1,1978,Networks,Matt
2,1980,Human Resources,Justin


In [51]:
personnel_indexed = personnel.set_index('name')
personnel_indexed

Unnamed: 0_level_0,DOB,Dept
name,Unnamed: 1_level_1,Unnamed: 2_level_1
John,1976,Telecom
Matt,1978,Networks
Justin,1980,Human Resources


In [53]:
personnel_indexed.loc['John']

DOB        1976
Dept    Telecom
Name: John, dtype: object

In [54]:
personnel_indexed.loc['John']['Dept']

'Telecom'

In [55]:
personnel_indexed['Dept']

name
John              Telecom
Matt             Networks
Justin    Human Resources
Name: Dept, dtype: object

In [56]:
personnel_indexed['Dept']['John']

'Telecom'