In [6]:
import numpy as np
from numpy.random import randn
import pandas as pd 

In [7]:
outside = ['G1','G1','G1','G2','G2','G2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [8]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [9]:
df=pd.DataFrame(randn(6,2),hier_index,['A','B'])

In [10]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.524591,-0.680953
G1,2,-0.450818,0.141591
G1,3,2.171578,0.604356
G2,1,-2.345561,0.381401
G2,2,0.875028,2.427094
G2,3,0.125301,1.379602


In [11]:
df.loc['G1']

Unnamed: 0,A,B
1,-0.524591,-0.680953
2,-0.450818,0.141591
3,2.171578,0.604356


In [12]:
df.loc['G1'].loc[1]

A   -0.524591
B   -0.680953
Name: 1, dtype: float64

In [13]:
df.index.names 

FrozenList([None, None])

In [15]:
df.index.names = ['Groups','Num']

In [16]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.524591,-0.680953
G1,2,-0.450818,0.141591
G1,3,2.171578,0.604356
G2,1,-2.345561,0.381401
G2,2,0.875028,2.427094
G2,3,0.125301,1.379602


In [17]:
df.loc['G2']

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-2.345561,0.381401
2,0.875028,2.427094
3,0.125301,1.379602


In [18]:
df.loc['G2'].loc[2]['B']

2.427093705281223

In [20]:
df.loc['G1'].loc[2]['A'] # accession elements from table 

-0.4508176103783666

In [21]:
df.loc['G1']

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.524591,-0.680953
2,-0.450818,0.141591
3,2.171578,0.604356


In [22]:
df.xs('G1') # same as avobe

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.524591,-0.680953
2,-0.450818,0.141591
3,2.171578,0.604356


In [23]:
df.xs(1,level='Num') # assessing 1st row of g1 and g2 

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-0.524591,-0.680953
G2,-2.345561,0.381401


## Missing Value 

In [24]:
# automatic it will become as nan from pandas 

In [25]:
import numpy as np
import pandas as pd 


In [26]:
d= {'A':[1,2,np.nan],'B':[5,np.nan,np.nan],'C':[1,2,3]}

In [27]:
df=pd.DataFrame(d)

In [28]:
df

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2
2,,,3


In [29]:
df.dropna() # it will drop all the nan value 

Unnamed: 0,A,B,C
0,1.0,5.0,1


In [31]:
df.dropna(axis=1) # it will drop from a particular columna 

Unnamed: 0,C
0,1
1,2
2,3


In [37]:
df.dropna(thresh=2)

Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,,2


In [38]:
df.fillna(value='FILL VALUE')

Unnamed: 0,A,B,C
0,1,5,1
1,2,FILL VALUE,2
2,FILL VALUE,FILL VALUE,3


In [41]:
df['A'].fillna(value=df['A'].mean())

0    1.0
1    2.0
2    1.5
Name: A, dtype: float64

## Groupby