In [45]:
import numpy as np
import pandas as pd

In [46]:
from numpy.random import randn

In [47]:
#This function is called when random function is initializes

np.random.seed(101)

In [48]:
df = pd.DataFrame(randn(5,4),['A','B','C','D','E'],['W','X','Y','Z'])   #Press Shift+Tab to know the parameters
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [49]:
df['W']    #OR df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [50]:
#Selecting Columns

df[['W','Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [51]:
df['new'] = df['W'] + df['X']   #Prior declaration of 'new' column is not required.
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [52]:
#Drop Column  (axis =1)
df.drop('new',axis=1,inplace=True)  #In this axis is always set to one (axis =1)
df                                  #IMP :"inplace = True" is set to make the drop operation permanent.

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [53]:
#Drop Row   (axis =0)

df.drop('E',axis =0,inplace=True)
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057


In [54]:
#Selecting Rows

df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [55]:
df.iloc[2] #Gives the elements corrosponding to C (row) i.e. selection by INDEX

W   -2.018168
X    0.740122
Y    0.528813
Z   -0.589001
Name: C, dtype: float64

In [56]:
df.loc['B','Y']

-0.8480769834036315

In [57]:
df.loc[['A','C'],['W','Z']]  #Gives random combinations from the over all existing table

Unnamed: 0,W,Z
A,2.70685,0.503826
C,-2.018168,-0.589001


In [58]:
df>0 #Gives values greater than 0 in the data frame

Unnamed: 0,W,X,Y,Z
A,True,True,True,True
B,True,False,False,True
C,False,True,True,False
D,True,False,False,True


In [59]:
df[df>0]  #This gives nul for numbers less than 0

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057


In [60]:
df[df['W']>0]   #df['W'] >0 was not true for 'C' hence this returns values eleminating column C

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057


In [61]:
df[df['Z']<0] #For Z column<0 only for C row

Unnamed: 0,W,X,Y,Z
C,-2.018168,0.740122,0.528813,-0.589001


In [62]:
df[(df['W']>1) & (df['Y']>0)]    # '&' is used insted of 'and' because it gets confused when simultaneous multiple and operation is performed.

#Similarly for OR we use '|' and not 'or'

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826


In [63]:
#Easy method to create list

newlist= 'CA NY WY OR'.split()
newlist

['CA', 'NY', 'WY', 'OR']

In [64]:
#This adds a new column to the table [NOTE: Elements in list must be equal to number of rows]

df['states'] = newlist
df

Unnamed: 0,W,X,Y,Z,states
A,2.70685,0.628133,0.907969,0.503826,CA
B,0.651118,-0.319318,-0.848077,0.605965,NY
C,-2.018168,0.740122,0.528813,-0.589001,WY
D,0.188695,-0.758872,-0.933237,0.955057,OR


In [65]:
 df.set_index('states')  #This makes the states the table index but this is temporary 

#Inorder to make this permanent use 'inplace = True '

Unnamed: 0_level_0,W,X,Y,Z
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057


# Multi-Index Data Frame

In [77]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index=list(zip(outside,inside))  #This creates a tuple of the outside nad inside values
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [78]:
hier_index=pd.MultiIndex.from_tuples(hier_index) #This creates a milti level index
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [81]:
df=pd.DataFrame(randn(6,2),hier_index,['A','B'])
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [83]:
df.loc['G1'].loc[1]

A    0.302665
B    1.693723
Name: 1, dtype: float64

In [84]:
df.index.names=['Groups','Num']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,0.302665,1.693723
G1,2,-1.706086,-1.159119
G1,3,-0.134841,0.390528
G2,1,0.166905,0.184502
G2,2,0.807706,0.07296
G2,3,0.638787,0.329646


In [85]:
#Cross-Section
df.xs('G1')  #Same as df.loc['G1']

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.302665,1.693723
2,-1.706086,-1.159119
3,-0.134841,0.390528


In [86]:
df.xs(1,level="Num")  #Grabs the row i of G1 and G2 which is tricky with df.loc

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,0.302665,1.693723
G2,0.166905,0.184502
