In [1]:
import numpy as np
import pandas as pd

# Constructing

In [2]:
# MultiIndex typically constructed using helper method (alternative constructor). Here we use from_product
teams = ['LAL', 'HOU']
positions = ['PG', 'SG', 'SF', 'PF', 'C']
idx = pd.MultiIndex.from_product([teams, positions], names=['team', 'position'])

columns = ['pts', 'reb', 'ast']
data=np.random.randint(0, 20, size=(len(idx), len(columns)))

df = pd.DataFrame(data=data, columns=columns, index=idx)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LAL,PG,19,1,8
LAL,SG,4,12,0
LAL,SF,12,4,17
LAL,PF,5,2,12
LAL,C,4,11,0
HOU,PG,14,3,7
HOU,SG,8,12,9
HOU,SF,9,17,18
HOU,PF,10,7,6
HOU,C,14,11,13


In [3]:
# constructing MultiIndex from dataframe
index_df = df.reset_index()[['team', 'position']]
print(pd.MultiIndex.from_frame(index_df))
index_df

MultiIndex([('LAL', 'PG'),
            ('LAL', 'SG'),
            ('LAL', 'SF'),
            ('LAL', 'PF'),
            ('LAL',  'C'),
            ('HOU', 'PG'),
            ('HOU', 'SG'),
            ('HOU', 'SF'),
            ('HOU', 'PF'),
            ('HOU',  'C')],
           names=['team', 'position'])


Unnamed: 0,team,position
0,LAL,PG
1,LAL,SG
2,LAL,SF
3,LAL,PF
4,LAL,C
5,HOU,PG
6,HOU,SG
7,HOU,SF
8,HOU,PF
9,HOU,C


In [4]:
# from arrays/lists
arrays = [
['LAL', 'LAL', 'LAL', 'LAL', 'LAL', 'HOU', 'HOU', 'HOU', 'HOU', 'HOU'],
['PG', 'SG', 'SF', 'PF', 'C', 'PG', 'SG', 'SF', 'PF', 'C']
]
pd.MultiIndex.from_arrays(arrays, names=('team', 'position'))

MultiIndex([('LAL', 'PG'),
            ('LAL', 'SG'),
            ('LAL', 'SF'),
            ('LAL', 'PF'),
            ('LAL',  'C'),
            ('HOU', 'PG'),
            ('HOU', 'SG'),
            ('HOU', 'SF'),
            ('HOU', 'PF'),
            ('HOU',  'C')],
           names=['team', 'position'])

In [5]:
# from tuple pairs
tuples = [
    ('LAL', 'PG'),
    ('LAL', 'SG'),
    ('LAL', 'SF'),
    ('LAL', 'PF'),
    ('LAL', 'C'),
    ('HOU', 'PG'),
    ('HOU', 'SG'),
    ('HOU', 'SF'),
    ('HOU', 'PF'),
    ('HOU', 'C')
]
pd.MultiIndex.from_tuples(tuples, names=['team', 'position'])

MultiIndex([('LAL', 'PG'),
            ('LAL', 'SG'),
            ('LAL', 'SF'),
            ('LAL', 'PF'),
            ('LAL',  'C'),
            ('HOU', 'PG'),
            ('HOU', 'SG'),
            ('HOU', 'SF'),
            ('HOU', 'PF'),
            ('HOU',  'C')],
           names=['team', 'position'])

In [6]:
# reset_index works like normal
df = df.reset_index()
df

Unnamed: 0,team,position,pts,reb,ast
0,LAL,PG,19,1,8
1,LAL,SG,4,12,0
2,LAL,SF,12,4,17
3,LAL,PF,5,2,12
4,LAL,C,4,11,0
5,HOU,PG,14,3,7
6,HOU,SG,8,12,9
7,HOU,SF,9,17,18
8,HOU,PF,10,7,6
9,HOU,C,14,11,13


In [7]:
# setting MultiIndex from existing column
df = df.set_index(['team','position'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LAL,PG,19,1,8
LAL,SG,4,12,0
LAL,SF,12,4,17
LAL,PF,5,2,12
LAL,C,4,11,0
HOU,PG,14,3,7
HOU,SG,8,12,9
HOU,SF,9,17,18
HOU,PF,10,7,6
HOU,C,14,11,13


Note: When modifying a MultiIndex it is usually easiest to reset_index, modify index columns within the dataframe, and then use set_index to reconstruct the MultiIndex.

# Sorting

For MultiIndex-ed objects to be indexed and sliced effectively, they need to be sorted. As with any index, you can use sort_index().

In [8]:
# this will sort our index levels alphabetically.
df.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HOU,C,14,11,13
HOU,PF,10,7,6
HOU,PG,14,3,7
HOU,SF,9,17,18
HOU,SG,8,12,9
LAL,C,4,11,0
LAL,PF,5,2,12
LAL,PG,19,1,8
LAL,SF,12,4,17
LAL,SG,4,12,0


In [9]:
# apply custom order to position
df = df.reset_index()
position_order = ['PG', 'SG', 'SF', 'PF', 'C']
df['position'] = pd.Categorical(df['position'], categories=position_order, ordered=True)
df = df.set_index(['team', 'position'])

df = df.sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HOU,PG,14,3,7
HOU,SG,8,12,9
HOU,SF,9,17,18
HOU,PF,10,7,6
HOU,C,14,11,13
LAL,PG,19,1,8
LAL,SG,4,12,0
LAL,SF,12,4,17
LAL,PF,5,2,12
LAL,C,4,11,0


In [10]:
df.index.is_lexsorted()

True

In [11]:
df.loc[('LAL', 'PG'):('LAL', 'PF')]

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LAL,PG,19,1,8
LAL,SG,4,12,0
LAL,SF,12,4,17
LAL,PF,5,2,12


In [12]:
# we can sort in different ways but slicing will throw error if MultiIndexed df not default sorted
df.sort_index(level='position', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LAL,C,4,11,0
HOU,C,14,11,13
LAL,PF,5,2,12
HOU,PF,10,7,6
LAL,SF,12,4,17
HOU,SF,9,17,18
LAL,SG,4,12,0
HOU,SG,8,12,9
LAL,PG,19,1,8
HOU,PG,14,3,7


# Selecting

## iloc

In [13]:
# iloc selects only based on inner most level of index.
# iloc disregards outer index levels and after that behaves just like it would on a regular index.
df.iloc[:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HOU,PG,14,3,7
HOU,SG,8,12,9


## loc

In [14]:
# loc selects on outer most index first
df.loc['LAL']

Unnamed: 0_level_0,pts,reb,ast
position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PG,19,1,8
SG,4,12,0
SF,12,4,17
PF,5,2,12
C,4,11,0


In [15]:
# select outer level index and column
df.loc['LAL', 'ast']

position
PG     8
SG     0
SF    17
PF    12
C      0
Name: ast, dtype: int32

In [16]:
# can specify outer, inner... 
df.loc[('LAL', 'SF')]

pts    12
reb     4
ast    17
Name: (LAL, SF), dtype: int32

In [17]:
# use tuple to select row and column(s)
df.loc[('LAL', 'SF'), ('ast', 'pts')]

ast    17
pts    12
Name: (LAL, SF), dtype: int32

In [18]:
# pass list of tuples to select multiple rows
row_labels = [('LAL', 'SG'), ('HOU', 'C')]
df.loc[row_labels]

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LAL,SG,4,12,0
HOU,C,14,11,13


### loc slicing

Remember MultiIndexed df must be default sorted, df.sort_index(), before slicing

In [19]:
df.loc['HOU':'LAL']

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HOU,PG,14,3,7
HOU,SG,8,12,9
HOU,SF,9,17,18
HOU,PF,10,7,6
HOU,C,14,11,13
LAL,PG,19,1,8
LAL,SG,4,12,0
LAL,SF,12,4,17
LAL,PF,5,2,12
LAL,C,4,11,0


In [20]:
df.loc[('HOU', 'SG'):('LAL', 'SG')]

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HOU,SG,8,12,9
HOU,SF,9,17,18
HOU,PF,10,7,6
HOU,C,14,11,13
LAL,PG,19,1,8
LAL,SG,4,12,0


## xs

In [21]:
# use xs (cross section) to select using only name of an inner index level
df.xs('C', level=1, drop_level=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,pts,reb,ast
team,position,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
HOU,C,14,11,13
LAL,C,4,11,0
