## Pandas - MultiLevel Indexing

In [None]:
import pandas as pd
import numpy as np

In [None]:
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
  ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]

In [None]:
tuples = list(zip(*data))
tuples

In [None]:
[(student, course) for student in ['Alice', 'Bob', 'Charlie', 'Dave'] \
                     for course in ['cs1', 'cs2']]

In [None]:
indices = pd.MultiIndex.from_tuples(tuples, 
                                    names = ['Student', 'Class'])
indices

In [None]:
np.random.seed(123)
s = pd.Series(np.random.randint(60,80,8), index = indices)
s

In [None]:
# Using tuples as atomic labels 

np.random.seed(123)
pd.Series(np.random.randint(60,80,8), index = tuples)

In [None]:
# Another approach

data = [['Alice','Bob','Charlie','Dave'],
        ['cs1', 'cs2']]

indices = pd.MultiIndex.from_product(data, 
                                     names=['Student', 'Class'])
indices

In [None]:
np.random.seed(123)
s = pd.Series(np.random.randint(60,80,8), index = indices)
s

In [None]:
# Convenient

np.random.seed(123)
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
        ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]

s = pd.Series(np.random.randint(60,80,8), index = data)
s


In [None]:
np.random.seed(123)
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
        ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]

df = pd.DataFrame(np.random.randint(60,80,(8, 4)), index = data,
                 columns = ['Quiz1', 'Quiz2', 'Quiz3', 'Quiz4'])
df

In [None]:
df.index

In [None]:
df.index.names = ['Student', 'Class']
df

In [None]:
df.index.get_level_values(0)

In [None]:
df.index.get_level_values(1)

In [None]:
df.index.get_level_values('Class')

### Indexing with MultiIndex

In [None]:
df

In [None]:
df.loc['Bob']

In [None]:
df.loc['Bob','cs1']

In [None]:
df.loc['Bob'].loc['cs1']

In [None]:
df.loc[('Bob', 'cs1')]

In [None]:
df.loc[('Bob', 'cs1'), 'Quiz1']

In [None]:
df.loc['Bob':'Dave']

In [None]:
df.loc[('Bob', 'cs2'):('Dave', 'cs1')]

In [None]:
# with list of labels or tuples

df.loc[ [('Bob', 'cs2'), ('Dave', 'cs1')] ]

In [None]:
df

In [None]:
df.columns

In [None]:
df.unstack()

In [None]:
df.unstack()['Quiz1']

In [None]:
df.unstack()['Quiz1', 'cs1']

In [None]:
# With Series

In [None]:
s

In [None]:
s[:, 'cs1']

In [None]:
s['Bob']

In [None]:
s['Bob']['cs1']

In [None]:
s[('Bob','cs1')]

In [None]:
s['Bob','cs1']

In [None]:
s

In [None]:
s.unstack()

In [None]:
s.unstack()['cs1']

### Cross-section
 - xs()

In [None]:
df

In [None]:
df.xs('Bob')

In [None]:
df.xs('cs1', level=1)

In [None]:
df.xs('cs1', level='Class')

In [None]:
# Using slice

df.loc[(slice(None), 'cs1'), :]

In [None]:
df.loc[(slice('Alice','Charlie'), slice(None)), :]

### MultiIndex for Columns

In [None]:
# hierarchical indices and columns

np.random.seed(123)
index = pd.MultiIndex.from_product([[2017, 2018], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Alice', 'Bob', 'Charlie'], ['HR', 'Temp']],
                                     names=['subject', 'type'])

# mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

# create the DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

In [None]:
health_data['Bob']

In [None]:
health_data['Bob', 'HR']

In [None]:
health_data.loc[:, ('Bob', 'HR')]

In [None]:
health_data.iloc[:2, :2]

In [None]:
health_data

In [None]:
health_data.xs('HR', level='type', axis=1)

In [None]:
health_data

In [None]:
# IndexSlice

idx = pd.IndexSlice
idx

In [None]:
health_data.loc[idx[:,2], idx[:, 'HR']]

In [None]:
health_data.loc[:, idx[:, 'HR']]

In [None]:
health_data.loc[:, idx['Alice':'Bob', 'HR']]

In [None]:
health_data.index

In [None]:
health_data.columns

### Swapping levels

In [None]:
df

In [None]:
df.swaplevel()

In [None]:
df.swaplevel().loc['cs1']

In [None]:
df.index

In [None]:
df2 = df.swaplevel('Student', 'Class')
df2

In [None]:
df2.mean()

In [None]:
df2.mean(level='Class')

In [None]:
df2.mean(level='Student')

In [None]:
df2

In [None]:
df2.sort_index(level='Class')

In [None]:
print(df2.sort_index(level='Class'))

In [None]:
df2.sort_index(level='Student')

In [None]:
df2.reset_index(inplace=True)
df2