# Pandas - DataFrames 3

In [21]:
import pandas as pd
import numpy as np
np.random.seed(101)

In [22]:
# Index Levels
outside = 'G1 G1 G1 G2 G2 G2'.split()
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [23]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [24]:
inside

[1, 2, 3, 1, 2, 3]

In [25]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [26]:
list(zip(outside, inside))

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [27]:
# Let's create a multi level index
df = pd.DataFrame(np.random.randn(6,2), hier_index, ['A', 'B'])

In [28]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


In [29]:
# Select a group
df.loc['G1']

Unnamed: 0,A,B
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [30]:
# Call from outside index, then call inner index
df.loc['G1'].loc[1]

A    2.706850
B    0.628133
Name: 1, dtype: float64

In [31]:
# Check index names
df.index.names

FrozenList([None, None])

In [32]:
# Rename the index names
df.index.names = ['Groups', 'Num']

In [33]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


In [34]:
# Let's grab G1-B

In [37]:
df.loc['G2'].loc[2]['B']

0.7401220570561068

In [38]:
# Grab all B items in G2
df.loc['G2']['B']

Num
1    0.605965
2    0.740122
3   -0.589001
Name: B, dtype: float64

### Cross section
Used only when we have a multi level index

In [40]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [41]:
# Let's get all values where 'Num' in 1. Since Num is an inner level 
# this allows deeper cross sections easily
df.xs(1, level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,2.70685,0.628133
G2,-0.848077,0.605965


In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001
