In [61]:
import numpy as np
import pandas as pd

## Set random seed

In [18]:
np.random.seed(101)

## Create DataFrame

In [19]:
df = pd.DataFrame(data=np.random.randn(5,4), index=['a','b','c','d','e'],columns=['w','x','y','z'])

In [20]:
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


## Reset Index

In [21]:
df.reset_index() #us inplace=True
df

Unnamed: 0,w,x,y,z
a,2.70685,0.628133,0.907969,0.503826
b,0.651118,-0.319318,-0.848077,0.605965
c,-2.018168,0.740122,0.528813,-0.589001
d,0.188695,-0.758872,-0.933237,0.955057
e,0.190794,1.978757,2.605967,0.683509


## Set New Index

In [23]:
NewIndex = 'CA NY WY OR CO'.split()
NewIndex

['CA', 'NY', 'WY', 'OR', 'CO']

In [24]:
df['States']=NewIndex

In [25]:
df

Unnamed: 0,w,x,y,z,States
a,2.70685,0.628133,0.907969,0.503826,CA
b,0.651118,-0.319318,-0.848077,0.605965,NY
c,-2.018168,0.740122,0.528813,-0.589001,WY
d,0.188695,-0.758872,-0.933237,0.955057,OR
e,0.190794,1.978757,2.605967,0.683509,CO


In [28]:
df.set_index('States', inplace=True)

In [33]:
df

Unnamed: 0_level_0,w,x,y,z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509


## Subset Based on Index

In [42]:
df.loc[df.index=='CA']

Unnamed: 0_level_0,w,x,y,z
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826


## Creating Multi Index

In [70]:
outside='G1 G1 G1 G2 G2 G2'.split()
inside=[1,2,3,1,2,3]
hier_index=list(zip(outside,inside))
print(hier_index)

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]


In [79]:
hier_index=pd.MultiIndex.from_tuples(hier_index)

In [80]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

#### Create DataFrame

In [81]:
x = np.random.randn(6,2)

In [82]:
df = pd.DataFrame(x,index=hier_index,columns=['a','b'])

In [83]:
df

Unnamed: 0,Unnamed: 1,a,b
G1,1,-0.925874,1.862864
G1,2,-1.133817,0.610478
G1,3,0.38603,2.084019
G2,1,-0.376519,0.230336
G2,2,0.681209,1.035125
G2,3,-0.03116,1.939932


## Name an Index

In [100]:
df.index.names

FrozenList(['Groups', 'Num'])

In [101]:
df.index.names=['Groups','Num']

In [102]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.925874,1.862864
G1,2,-1.133817,0.610478
G1,3,0.38603,2.084019
G2,1,-0.376519,0.230336
G2,2,0.681209,1.035125
G2,3,-0.03116,1.939932


## Subset a Multi Level Index

#### Subset using the outside index

In [87]:
df.loc['G1']

Unnamed: 0,a,b
1,-0.925874,1.862864
2,-1.133817,0.610478
3,0.38603,2.084019


#### Subset using the secondary index

In [89]:
df.loc['G1'].loc[1]

a   -0.925874
b    1.862864
Name: 1, dtype: float64

#### Subset for G2, 2, column b

In [108]:
df.loc['G2'].loc[2,'b']

1.0351250747739213

In [109]:
df.loc['G2'].loc[2]['b']

1.0351250747739213

## Using XS (cross section) to Subset Multi Level Index

#### Select all rows with the index 2

In [113]:
df.xs(2,level='Num')

Unnamed: 0_level_0,a,b
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-1.133817,0.610478
G2,0.681209,1.035125


#### Select all rows inside the G1 index

In [114]:
df.xs('G1')

Unnamed: 0_level_0,a,b
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.925874,1.862864
2,-1.133817,0.610478
3,0.38603,2.084019
