# Data Frames in Pandas

In [1]:
import numpy as np
import pandas as pd

In [6]:
from numpy.random import randn
np.random.seed(101)

In [7]:
df = pd.DataFrame(randn(5,4),index='A B C D E'.split(),columns='W X Y Z'.split())

In [8]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


### Selection and Indexing

In [9]:
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [11]:
df[['W','X']]

Unnamed: 0,W,X
A,2.70685,0.628133
B,0.651118,-0.319318
C,-2.018168,0.740122
D,0.188695,-0.758872
E,0.190794,1.978757


In [12]:
type(df['W'])

pandas.core.series.Series

In [13]:
type(df[['W','X']])

pandas.core.frame.DataFrame

#### Creating a new column

In [15]:
df['new'] = df['W'] + df['Z']

In [16]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303


##### Note: To delete a column pas axis = 1 as parameter and to delete a row pass axis = 0 as parameter
By default axis is always zero

In [18]:
df.drop('new',axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [19]:
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.210676
B,0.651118,-0.319318,-0.848077,0.605965,1.257083
C,-2.018168,0.740122,0.528813,-0.589001,-2.607169
D,0.188695,-0.758872,-0.933237,0.955057,1.143752
E,0.190794,1.978757,2.605967,0.683509,0.874303


Note: To permanently delete a column specify inplace parameter to be true

In [20]:
df.drop('W',axis=1,inplace=True)

In [21]:
df

Unnamed: 0,X,Y,Z,new
A,0.628133,0.907969,0.503826,3.210676
B,-0.319318,-0.848077,0.605965,1.257083
C,0.740122,0.528813,-0.589001,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [22]:
df.drop('A',inplace=True)

In [23]:
df

Unnamed: 0,X,Y,Z,new
B,-0.319318,-0.848077,0.605965,1.257083
C,0.740122,0.528813,-0.589001,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [26]:
df.loc['B']

X     -0.319318
Y     -0.848077
Z      0.605965
new    1.257083
Name: B, dtype: float64

In [27]:
df.iloc[0]

X     -0.319318
Y     -0.848077
Z      0.605965
new    1.257083
Name: B, dtype: float64

In [28]:
df.loc['B','Z']

0.6059653494949336

In [31]:
df.loc[['B','D'],'X']

B   -0.319318
D   -0.758872
Name: X, dtype: float64

In [32]:
df.loc[['B','D'],['X','Y']]

Unnamed: 0,X,Y
B,-0.319318,-0.848077
D,-0.758872,-0.933237


In [33]:
df

Unnamed: 0,X,Y,Z,new
B,-0.319318,-0.848077,0.605965,1.257083
C,0.740122,0.528813,-0.589001,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [34]:
df>0

Unnamed: 0,X,Y,Z,new
B,False,False,True,True
C,True,True,False,False
D,False,False,True,True
E,True,True,True,True


In [35]:
bool = df>0

In [36]:
bool

Unnamed: 0,X,Y,Z,new
B,False,False,True,True
C,True,True,False,False
D,False,False,True,True
E,True,True,True,True


In [37]:
df[bool]

Unnamed: 0,X,Y,Z,new
B,,,0.605965,1.257083
C,0.740122,0.528813,,
D,,,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [40]:
df['X']>0

B    False
C     True
D    False
E     True
Name: X, dtype: bool

In [39]:
df[df['X']>0]

Unnamed: 0,X,Y,Z,new
C,0.740122,0.528813,-0.589001,-2.607169
E,1.978757,2.605967,0.683509,0.874303


In [42]:
result = df[df['X']>0]

In [43]:
result

Unnamed: 0,X,Y,Z,new
C,0.740122,0.528813,-0.589001,-2.607169
E,1.978757,2.605967,0.683509,0.874303


In [44]:
result['X']

C    0.740122
E    1.978757
Name: X, dtype: float64

In [45]:
df[df['X']>0]['X']

C    0.740122
E    1.978757
Name: X, dtype: float64

In [46]:
df

Unnamed: 0,X,Y,Z,new
B,-0.319318,-0.848077,0.605965,1.257083
C,0.740122,0.528813,-0.589001,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [48]:
df[df['X']>0][['X','Z']]

Unnamed: 0,X,Z
C,0.740122,-0.589001
E,1.978757,0.683509


In [52]:
df[(df['X']>0) & (df['Y']>0)]

Unnamed: 0,X,Y,Z,new
C,0.740122,0.528813,-0.589001,-2.607169
E,1.978757,2.605967,0.683509,0.874303


In [56]:
df[(df['X']>0) | (df['Y']>0)]

Unnamed: 0,X,Y,Z,new
C,0.740122,0.528813,-0.589001,-2.607169
E,1.978757,2.605967,0.683509,0.874303


#### More About Index

In [57]:
df

Unnamed: 0,X,Y,Z,new
B,-0.319318,-0.848077,0.605965,1.257083
C,0.740122,0.528813,-0.589001,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [58]:
df.reset_index()

Unnamed: 0,index,X,Y,Z,new
0,B,-0.319318,-0.848077,0.605965,1.257083
1,C,0.740122,0.528813,-0.589001,-2.607169
2,D,-0.758872,-0.933237,0.955057,1.143752
3,E,1.978757,2.605967,0.683509,0.874303


In [59]:
df

Unnamed: 0,X,Y,Z,new
B,-0.319318,-0.848077,0.605965,1.257083
C,0.740122,0.528813,-0.589001,-2.607169
D,-0.758872,-0.933237,0.955057,1.143752
E,1.978757,2.605967,0.683509,0.874303


In [60]:
df.reset_index(inplace=True)
df

Unnamed: 0,index,X,Y,Z,new
0,B,-0.319318,-0.848077,0.605965,1.257083
1,C,0.740122,0.528813,-0.589001,-2.607169
2,D,-0.758872,-0.933237,0.955057,1.143752
3,E,1.978757,2.605967,0.683509,0.874303


In [61]:
df['index']

0    B
1    C
2    D
3    E
Name: index, dtype: object

In [63]:
df.loc[0]

index           B
X       -0.319318
Y       -0.848077
Z        0.605965
new       1.25708
Name: 0, dtype: object

In [67]:
newind = "SA AUS NZ IND".split()

In [68]:
df['Countries'] = newind
df

Unnamed: 0,index,X,Y,Z,new,Countries
0,B,-0.319318,-0.848077,0.605965,1.257083,SA
1,C,0.740122,0.528813,-0.589001,-2.607169,AUS
2,D,-0.758872,-0.933237,0.955057,1.143752,NZ
3,E,1.978757,2.605967,0.683509,0.874303,IND


In [69]:
df.set_index('Countries')

Unnamed: 0_level_0,index,X,Y,Z,new
Countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SA,B,-0.319318,-0.848077,0.605965,1.257083
AUS,C,0.740122,0.528813,-0.589001,-2.607169
NZ,D,-0.758872,-0.933237,0.955057,1.143752
IND,E,1.978757,2.605967,0.683509,0.874303


In [70]:
df

Unnamed: 0,index,X,Y,Z,new,Countries
0,B,-0.319318,-0.848077,0.605965,1.257083,SA
1,C,0.740122,0.528813,-0.589001,-2.607169,AUS
2,D,-0.758872,-0.933237,0.955057,1.143752,NZ
3,E,1.978757,2.605967,0.683509,0.874303,IND


In [71]:
df.set_index('Countries',inplace=True)

In [72]:
df

Unnamed: 0_level_0,index,X,Y,Z,new
Countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SA,B,-0.319318,-0.848077,0.605965,1.257083
AUS,C,0.740122,0.528813,-0.589001,-2.607169
NZ,D,-0.758872,-0.933237,0.955057,1.143752
IND,E,1.978757,2.605967,0.683509,0.874303


In [76]:
df.drop('index',axis = 1)

Unnamed: 0_level_0,X,Y,Z,new
Countries,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SA,-0.319318,-0.848077,0.605965,1.257083
AUS,0.740122,0.528813,-0.589001,-2.607169
NZ,-0.758872,-0.933237,0.955057,1.143752
IND,1.978757,2.605967,0.683509,0.874303


### Multi Index and Index Hierarchy

In [82]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [83]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [84]:
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])


In [85]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [86]:
df.loc['G1']

Unnamed: 0,A,B
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [88]:
df.loc['G1'].loc[1]

A    0.302665
B    1.693723
Name: 1, dtype: float64

In [89]:
df.index.names

FrozenList([None, None])

In [90]:
df.index.names = ['Groups','Num']

In [91]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [92]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [93]:
df.xs(['G1',1])

A    0.302665
B    1.693723
Name: (G1, 1), dtype: float64

In [94]:
df.xs(1,level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502


### Data Frames Finished