## Pandas - MultiLevel Indexing

## Functions Covered


### Create the tuples
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
  ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]<br/> 
tuples = list(zip(*data))<br/> 

### Create a MultiIndex from tuples
indices = pd.MultiIndex.from_tuples(tuples, names = ['Student', 'Class'])<br/> 

### Create a series from the tuples
s = pd.Series(np.random.randint(60,80,8), index = indices)<br/> 

### Use tuples as atomic labels such as (Alice, cs1), ...
pd.Series(np.random.randint(60,80,8), index = tuples)<br/> 


### Create a MultiIndex from a dot-product given a list of two lists
indices = pd.MultiIndex.from_product(<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;[['Alice','Bob','Charlie','Dave'], ['cs1', 'cs2']],<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;names=['Student', 'Class'])<br/> 

### Create a dataframe with multilevel index
data = [<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]<br/> 
<br/> 
df = pd.DataFrame(<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;np.random.randint(60,80,(8, 4)),<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;index = data,<br/> 
&nbsp;&nbsp;&nbsp;&nbsp;columns = ['Quiz1', 'Quiz2', 'Quiz3', 'Quiz4'])<br/> 
<br/> 
df.index.names = ['Student', 'Class']
<br/> 
df.index.get_level_values('Class')&nbsp;&nbsp;&nbsp;&nbsp;_Returns ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']_
### Indexing with MultiIndex
df.loc['Bob']<br/> 
df.loc['Bob','cs1']<br/> 
df.loc[('Bob', 'cs1'), 'Quiz1']<br/> 
df.loc['Bob':'Dave']<br/> 
df.loc[('Bob', 'cs2'):('Dave', 'cs1')]&nbsp;&nbsp;&nbsp;&nbsp;_From Bob-cs2 to Dave-cs1_ <br/>
df.loc[ [('Bob', 'cs2'), ('Dave', 'cs1')] ]&nbsp;&nbsp;&nbsp;&nbsp;_Only Bob-cs2 & Dave-cs1_

In [1]:
import pandas as pd
import numpy as np

### Create the tuples

In [2]:
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
  ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]

In [3]:
tuples = list(zip(*data))
tuples

[('Alice', 'cs1'),
 ('Alice', 'cs2'),
 ('Bob', 'cs1'),
 ('Bob', 'cs2'),
 ('Charlie', 'cs1'),
 ('Charlie', 'cs2'),
 ('Dave', 'cs1'),
 ('Dave', 'cs2')]

In [4]:
[(student, course) for student in ['Alice', 'Bob', 'Charlie', 'Dave'] \
                     for course in ['cs1', 'cs2']]

[('Alice', 'cs1'),
 ('Alice', 'cs2'),
 ('Bob', 'cs1'),
 ('Bob', 'cs2'),
 ('Charlie', 'cs1'),
 ('Charlie', 'cs2'),
 ('Dave', 'cs1'),
 ('Dave', 'cs2')]

### Create a MultiIndex from tuples

In [5]:
indices = pd.MultiIndex.from_tuples(tuples, 
                                    names = ['Student', 'Class'])
indices

MultiIndex([(  'Alice', 'cs1'),
            (  'Alice', 'cs2'),
            (    'Bob', 'cs1'),
            (    'Bob', 'cs2'),
            ('Charlie', 'cs1'),
            ('Charlie', 'cs2'),
            (   'Dave', 'cs1'),
            (   'Dave', 'cs2')],
           names=['Student', 'Class'])

### Create a series from the tuples

In [6]:
np.random.seed(123)
s = pd.Series(np.random.randint(60,80,8), index = indices)
s

Student  Class
Alice    cs1      73
         cs2      62
Bob      cs1      62
         cs2      66
Charlie  cs1      77
         cs2      79
Dave     cs1      70
         cs2      61
dtype: int64

### Use tuples as atomic labels such as (Alice, cs1), ...

In [7]:
# Using tuples as atomic labels 

np.random.seed(123)
pd.Series(np.random.randint(60,80,8), index = tuples)

(Alice, cs1)      73
(Alice, cs2)      62
(Bob, cs1)        62
(Bob, cs2)        66
(Charlie, cs1)    77
(Charlie, cs2)    79
(Dave, cs1)       70
(Dave, cs2)       61
dtype: int64

### Create a MultiIndex from a dot-product given a list of two lists

In [None]:
data = [['Alice','Bob','Charlie','Dave'],
        ['cs1', 'cs2']]

indices = pd.MultiIndex.from_product(data,    # Uses the dot-product to create indices
                                     names=['Student', 'Class'])
indices

In [None]:
np.random.seed(123)
s = pd.Series(np.random.randint(60,80,8), index = indices)
s

In [10]:
# Convenient

np.random.seed(123)
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
        ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]

s = pd.Series(np.random.randint(60,80,8), index = data)
s


Alice    cs1    73
         cs2    62
Bob      cs1    62
         cs2    66
Charlie  cs1    77
         cs2    79
Dave     cs1    70
         cs2    61
dtype: int64

### Create a dataframe with multilevel index

In [11]:
np.random.seed(123)
data = [['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'],
        ['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2']]

df = pd.DataFrame(np.random.randint(60,80,(8, 4)), index = data,
                 columns = ['Quiz1', 'Quiz2', 'Quiz3', 'Quiz4'])
df

Unnamed: 0,Unnamed: 1,Quiz1,Quiz2,Quiz3,Quiz4
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [12]:
df.index

MultiIndex([(  'Alice', 'cs1'),
            (  'Alice', 'cs2'),
            (    'Bob', 'cs1'),
            (    'Bob', 'cs2'),
            ('Charlie', 'cs1'),
            ('Charlie', 'cs2'),
            (   'Dave', 'cs1'),
            (   'Dave', 'cs2')],
           )

In [13]:
df.index.names = ['Student', 'Class']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [14]:
df.index.get_level_values(0)

Index(['Alice', 'Alice', 'Bob', 'Bob', 'Charlie', 'Charlie', 'Dave', 'Dave'], dtype='object', name='Student')

In [15]:
df.index.get_level_values(1)

Index(['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2'], dtype='object', name='Class')

In [16]:
df.index.get_level_values('Class')

Index(['cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2', 'cs1', 'cs2'], dtype='object', name='Class')

### Indexing with MultiIndex

In [17]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [18]:
df.loc['Bob']

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cs1,60,77,75,69
cs2,60,74,60,75


In [19]:
df.loc['Bob','cs1']

Quiz1    60
Quiz2    77
Quiz3    75
Quiz4    69
Name: (Bob, cs1), dtype: int64

In [20]:
df.loc['Bob'].loc['cs1']

Quiz1    60
Quiz2    77
Quiz3    75
Quiz4    69
Name: cs1, dtype: int64

In [21]:
df.loc[('Bob', 'cs1')]

Quiz1    60
Quiz2    77
Quiz3    75
Quiz4    69
Name: (Bob, cs1), dtype: int64

In [22]:
df.loc[('Bob', 'cs1'), 'Quiz1']

60

In [23]:
df.loc['Bob':'Dave']  # Note inclusive start and end

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [24]:
df.loc[('Bob', 'cs2'):('Dave', 'cs1')]  # From Bob-cs2 to Dave-cs1

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75


In [25]:
# with list of labels or tuples

df.loc[ [('Bob', 'cs2'), ('Dave', 'cs1')] ]  # Only Bob-cs2 & Dave-cs1

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bob,cs2,60,74,60,75
Dave,cs1,62,67,62,75


In [26]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [27]:
df.columns

Index(['Quiz1', 'Quiz2', 'Quiz3', 'Quiz4'], dtype='object')

### Unstacking

In [28]:
df.unstack()

Unnamed: 0_level_0,Quiz1,Quiz1,Quiz2,Quiz2,Quiz3,Quiz3,Quiz4,Quiz4
Class,cs1,cs2,cs1,cs2,cs1,cs2,cs1,cs2
Student,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
Alice,73,77,62,79,62,70,66,61
Bob,60,60,77,74,75,60,69,75
Charlie,79,76,74,64,64,77,60,63
Dave,62,76,67,67,62,69,75,63


In [29]:
df.unstack()['Quiz1']

Class,cs1,cs2
Student,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,73,77
Bob,60,60
Charlie,79,76
Dave,62,76


In [30]:
df.unstack()['Quiz1', 'cs1']

Student
Alice      73
Bob        60
Charlie    79
Dave       62
Name: (Quiz1, cs1), dtype: int64

In [31]:
# With Series

In [32]:
s

Alice    cs1    73
         cs2    62
Bob      cs1    62
         cs2    66
Charlie  cs1    77
         cs2    79
Dave     cs1    70
         cs2    61
dtype: int64

In [33]:
s[:, 'cs1']

Alice      73
Bob        62
Charlie    77
Dave       70
dtype: int64

In [34]:
s['Bob']

cs1    62
cs2    66
dtype: int64

In [35]:
s['Bob']['cs1']

62

In [36]:
s[('Bob','cs1')]

62

In [37]:
s['Bob','cs1']

62

In [38]:
s['Bob']['cs1'] == s[('Bob','cs1')] == s['Bob','cs1']

True

In [39]:
s

Alice    cs1    73
         cs2    62
Bob      cs1    62
         cs2    66
Charlie  cs1    77
         cs2    79
Dave     cs1    70
         cs2    61
dtype: int64

In [40]:
s.unstack()

Unnamed: 0,cs1,cs2
Alice,73,62
Bob,62,66
Charlie,77,79
Dave,70,61


In [41]:
s.unstack()['cs1']

Alice      73
Bob        62
Charlie    77
Dave       70
Name: cs1, dtype: int64

### Cross-section
 - xs()

In [42]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [43]:
df.xs('Bob')

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cs1,60,77,75,69
cs2,60,74,60,75


In [44]:
df.xs('cs1', level=1)

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,73,62,62,66
Bob,60,77,75,69
Charlie,79,74,64,60
Dave,62,67,62,75


In [45]:
df.xs('cs1', level='Class')

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,73,62,62,66
Bob,60,77,75,69
Charlie,79,74,64,60
Dave,62,67,62,75


In [46]:
# Using slice

df.loc[(slice(None), 'cs1'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Bob,cs1,60,77,75,69
Charlie,cs1,79,74,64,60
Dave,cs1,62,67,62,75


In [75]:
df.loc[(slice('Alice','Charlie'), slice(None)), :] # Slice(None) is a wildcard???

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63


### MultiIndex for Columns

In [81]:
# hierarchical indices and columns

np.random.seed(123)
index = pd.MultiIndex.from_product([[2017, 2018], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Alice', 'Bob', 'Charlie'], ['HeartRate', 'Temperature']],
                                     names=['subject', 'type'])
print("index:\n", index)
print()
print("columns\n", columns)
print()

# mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 37

# create the DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

index:
 MultiIndex([(2017, 1),
            (2017, 2),
            (2018, 1),
            (2018, 2)],
           names=['year', 'visit'])

columns
 MultiIndex([(  'Alice',   'HeartRate'),
            (  'Alice', 'Temperature'),
            (    'Bob',   'HeartRate'),
            (    'Bob', 'Temperature'),
            ('Charlie',   'HeartRate'),
            ('Charlie', 'Temperature')],
           names=['subject', 'type'])



Unnamed: 0_level_0,subject,Alice,Alice,Bob,Bob,Charlie,Charlie
Unnamed: 0_level_1,type,HeartRate,Temperature,HeartRate,Temperature,HeartRate,Temperature
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2017,1,26.0,38.0,40.0,35.5,31.0,38.7
2017,2,13.0,36.6,50.0,36.1,30.0,36.9
2018,1,52.0,36.4,33.0,36.6,59.0,39.2
2018,2,47.0,37.4,44.0,38.5,28.0,38.2


In [49]:
health_data['Bob']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2017,1,40.0,35.5
2017,2,50.0,36.1
2018,1,33.0,36.6
2018,2,44.0,38.5


In [50]:
health_data['Bob', 'HR']

year  visit
2017  1        40.0
      2        50.0
2018  1        33.0
      2        44.0
Name: (Bob, HR), dtype: float64

In [51]:
health_data.loc[:, ('Bob', 'HR')]  # Choose all rows where Bob-HR is specified for the column

year  visit
2017  1        40.0
      2        50.0
2018  1        33.0
      2        44.0
Name: (Bob, HR), dtype: float64

In [52]:
health_data.iloc[:2, :2]

Unnamed: 0_level_0,subject,Alice,Alice
Unnamed: 0_level_1,type,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2
2017,1,26.0,38.0
2017,2,13.0,36.6


In [53]:
health_data

Unnamed: 0_level_0,subject,Alice,Alice,Bob,Bob,Charlie,Charlie
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2017,1,26.0,38.0,40.0,35.5,31.0,38.7
2017,2,13.0,36.6,50.0,36.1,30.0,36.9
2018,1,52.0,36.4,33.0,36.6,59.0,39.2
2018,2,47.0,37.4,44.0,38.5,28.0,38.2


In [54]:
health_data.xs('HR', level='type', axis=1)

Unnamed: 0_level_0,subject,Alice,Bob,Charlie
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017,1,26.0,40.0,31.0
2017,2,13.0,50.0,30.0
2018,1,52.0,33.0,59.0
2018,2,47.0,44.0,28.0


In [55]:
health_data

Unnamed: 0_level_0,subject,Alice,Alice,Bob,Bob,Charlie,Charlie
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2017,1,26.0,38.0,40.0,35.5,31.0,38.7
2017,2,13.0,36.6,50.0,36.1,30.0,36.9
2018,1,52.0,36.4,33.0,36.6,59.0,39.2
2018,2,47.0,37.4,44.0,38.5,28.0,38.2


In [56]:
# IndexSlice

idx = pd.IndexSlice
idx

<pandas.core.indexing._IndexSlice at 0x11d6e5d30>

In [57]:
health_data.loc[idx[:,2], idx[:, 'HR']]

Unnamed: 0_level_0,subject,Alice,Bob,Charlie
Unnamed: 0_level_1,type,HR,HR,HR
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2017,2,13.0,50.0,30.0
2018,2,47.0,44.0,28.0


In [58]:
health_data.loc[:, idx[:, 'HR']] # Using index slicing

Unnamed: 0_level_0,subject,Alice,Bob,Charlie
Unnamed: 0_level_1,type,HR,HR,HR
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2017,1,26.0,40.0,31.0
2017,2,13.0,50.0,30.0
2018,1,52.0,33.0,59.0
2018,2,47.0,44.0,28.0


In [59]:
health_data.loc[:, idx['Alice':'Bob', 'HR']]

Unnamed: 0_level_0,subject,Alice,Bob
Unnamed: 0_level_1,type,HR,HR
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2
2017,1,26.0,40.0
2017,2,13.0,50.0
2018,1,52.0,33.0
2018,2,47.0,44.0


In [60]:
health_data.index

MultiIndex([(2017, 1),
            (2017, 2),
            (2018, 1),
            (2018, 2)],
           names=['year', 'visit'])

In [61]:
health_data.columns

MultiIndex([(  'Alice',   'HR'),
            (  'Alice', 'Temp'),
            (    'Bob',   'HR'),
            (    'Bob', 'Temp'),
            ('Charlie',   'HR'),
            ('Charlie', 'Temp')],
           names=['subject', 'type'])

### Swapping levels

In [62]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alice,cs1,73,62,62,66
Alice,cs2,77,79,70,61
Bob,cs1,60,77,75,69
Bob,cs2,60,74,60,75
Charlie,cs1,79,74,64,60
Charlie,cs2,76,64,77,63
Dave,cs1,62,67,62,75
Dave,cs2,76,67,69,63


In [63]:
df.swaplevel()

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Student,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cs1,Alice,73,62,62,66
cs2,Alice,77,79,70,61
cs1,Bob,60,77,75,69
cs2,Bob,60,74,60,75
cs1,Charlie,79,74,64,60
cs2,Charlie,76,64,77,63
cs1,Dave,62,67,62,75
cs2,Dave,76,67,69,63


In [64]:
df.swaplevel().loc['cs1']

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,73,62,62,66
Bob,60,77,75,69
Charlie,79,74,64,60
Dave,62,67,62,75


In [65]:
df.index

MultiIndex([(  'Alice', 'cs1'),
            (  'Alice', 'cs2'),
            (    'Bob', 'cs1'),
            (    'Bob', 'cs2'),
            ('Charlie', 'cs1'),
            ('Charlie', 'cs2'),
            (   'Dave', 'cs1'),
            (   'Dave', 'cs2')],
           names=['Student', 'Class'])

In [66]:
df2 = df.swaplevel('Student', 'Class')
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Student,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cs1,Alice,73,62,62,66
cs2,Alice,77,79,70,61
cs1,Bob,60,77,75,69
cs2,Bob,60,74,60,75
cs1,Charlie,79,74,64,60
cs2,Charlie,76,64,77,63
cs1,Dave,62,67,62,75
cs2,Dave,76,67,69,63


In [67]:
df2.mean()

Quiz1    70.375
Quiz2    70.500
Quiz3    67.375
Quiz4    66.500
dtype: float64

In [68]:
df2.mean(level='Class')

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cs1,68.5,70.0,65.75,67.5
cs2,72.25,71.0,69.0,65.5


In [69]:
df2.mean(level='Student')

Unnamed: 0_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Student,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,75.0,70.5,66.0,63.5
Bob,60.0,75.5,67.5,72.0
Charlie,77.5,69.0,70.5,61.5
Dave,69.0,67.0,65.5,69.0


In [70]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Student,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cs1,Alice,73,62,62,66
cs2,Alice,77,79,70,61
cs1,Bob,60,77,75,69
cs2,Bob,60,74,60,75
cs1,Charlie,79,74,64,60
cs2,Charlie,76,64,77,63
cs1,Dave,62,67,62,75
cs2,Dave,76,67,69,63


In [71]:
df2.sort_index(level='Class')

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Student,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cs1,Alice,73,62,62,66
cs1,Bob,60,77,75,69
cs1,Charlie,79,74,64,60
cs1,Dave,62,67,62,75
cs2,Alice,77,79,70,61
cs2,Bob,60,74,60,75
cs2,Charlie,76,64,77,63
cs2,Dave,76,67,69,63


In [72]:
print(df2.sort_index(level='Class'))

               Quiz1  Quiz2  Quiz3  Quiz4
Class Student                            
cs1   Alice       73     62     62     66
      Bob         60     77     75     69
      Charlie     79     74     64     60
      Dave        62     67     62     75
cs2   Alice       77     79     70     61
      Bob         60     74     60     75
      Charlie     76     64     77     63
      Dave        76     67     69     63


In [73]:
df2.sort_index(level='Student')

Unnamed: 0_level_0,Unnamed: 1_level_0,Quiz1,Quiz2,Quiz3,Quiz4
Class,Student,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cs1,Alice,73,62,62,66
cs2,Alice,77,79,70,61
cs1,Bob,60,77,75,69
cs2,Bob,60,74,60,75
cs1,Charlie,79,74,64,60
cs2,Charlie,76,64,77,63
cs1,Dave,62,67,62,75
cs2,Dave,76,67,69,63


In [74]:
df2.reset_index(inplace=True)
df2

Unnamed: 0,Class,Student,Quiz1,Quiz2,Quiz3,Quiz4
0,cs1,Alice,73,62,62,66
1,cs2,Alice,77,79,70,61
2,cs1,Bob,60,77,75,69
3,cs2,Bob,60,74,60,75
4,cs1,Charlie,79,74,64,60
5,cs2,Charlie,76,64,77,63
6,cs1,Dave,62,67,62,75
7,cs2,Dave,76,67,69,63
