# Pandas - DataFrames

In [7]:
import pandas as pd
from numpy.random import randn

In [2]:
outside = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2']
inside = [1, 2, 3, 1, 2, 3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [3]:
outside

['G1', 'G1', 'G1', 'G2', 'G2', 'G2']

In [4]:
inside

[1, 2, 3, 1, 2, 3]

### Creating a MultiIndex

In [5]:
hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [8]:
df = pd.DataFrame(randn(6, 2), hier_index, ['A', 'B'])

In [9]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-1.764653,0.080045
G1,2,0.770603,1.371349
G1,3,1.651103,0.094695
G2,1,0.135077,0.020619
G2,2,-0.623573,0.803564
G2,3,-0.95324,1.709841


### Indexing in a MultiIndex DataFrame

In [10]:
df.loc['G1']

Unnamed: 0,A,B
1,-1.764653,0.080045
2,0.770603,1.371349
3,1.651103,0.094695


In [11]:
df.loc['G1'].loc[1]

A   -1.764653
B    0.080045
Name: 1, dtype: float64

In [13]:
df.index.names = ['Groups', 'Num']

In [14]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-1.764653,0.080045
G1,2,0.770603,1.371349
G1,3,1.651103,0.094695
G2,1,0.135077,0.020619
G2,2,-0.623573,0.803564
G2,3,-0.95324,1.709841


In [17]:
df.loc['G2'].loc[2]['B']

0.8035640362351875

### Fetching a cross-section of a DataFrame

In [19]:
df.xs('G1')

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-1.764653,0.080045
2,0.770603,1.371349
3,1.651103,0.094695


In [20]:
df.xs(1, level='Num')

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,-1.764653,0.080045
G2,0.135077,0.020619
