# MultiIndex / advanced indexing
> https://pandas.pydata.org/docs/user_guide/advanced.html#advanced-indexing-with-hierarchical-index

## Hierarchical indexing (MultiIndex)

### Creating a MultiIndex (hierarchical index) object

In [1]:
import numpy as np
import pandas as pd

In [2]:
arrays = [
   ...:     ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
   ...:     ["one", "two", "one", "two", "one", "two", "one", "two"],
   ...: ]

In [3]:
tuples = list(zip(*arrays))

In [4]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [5]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

In [6]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [7]:
s = pd.Series(np.random.randn(8), index=index)

In [8]:
s

first  second
bar    one      -0.801668
       two       1.514124
baz    one      -0.751286
       two      -1.056213
foo    one      -0.128536
       two       1.737071
qux    one      -0.173347
       two      -2.059393
dtype: float64

In [9]:
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]

In [10]:
pd.MultiIndex.from_product(iterables, names=["first", "second"])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [11]:
df = pd.DataFrame(
   ....:     [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
   ....:     columns=["first", "second"],
   ....: )

In [12]:
df

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,foo,one
3,foo,two


In [13]:
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [14]:
arrays = [
   ....:     np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
   ....:     np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
   ....: ]

In [15]:
s = pd.Series(np.random.randn(8), index=arrays)

In [16]:
s

bar  one    0.069852
     two    0.671807
baz  one   -0.273809
     two    1.683280
foo  one    0.626582
     two   -1.061801
qux  one   -1.611194
     two    0.956504
dtype: float64

In [17]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)

In [18]:
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,-0.555024,0.554149,0.682733,-1.143168
bar,two,-0.070477,1.288789,-1.42672,-0.93414
baz,one,0.346587,-0.384439,-0.785089,-0.286762
baz,two,-0.54888,-0.385793,-0.521916,-0.406981
foo,one,1.375207,-0.236522,0.129006,-0.884036
foo,two,1.106109,0.641679,-1.515976,-0.081729
qux,one,0.061255,0.077323,-1.08791,-1.022526
qux,two,0.215159,-1.058299,0.093894,0.939469


In [19]:
df.index.names

FrozenList([None, None])

In [20]:
df = pd.DataFrame(np.random.randn(3,8), index=["A", "B", "C"], columns=index)

In [21]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.283479,0.683097,-1.203705,0.657149,-0.002972,-0.417262,-1.314024,-1.040896
B,-0.000798,0.102291,0.968358,0.322263,0.553892,-1.343042,1.356863,-0.510343
C,0.034508,-0.204105,2.180522,-1.388232,0.808507,0.748004,-0.174149,1.29005


In [22]:
pd.DataFrame(np.random.randn(6,6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,-0.490213,1.135897,-0.447196,-0.837858,1.494167,0.342667
bar,two,-0.115981,-0.097256,0.639093,2.754499,1.368085,1.504351
baz,one,-0.758393,0.191082,-0.684191,1.556573,0.073689,-0.737576
baz,two,-1.231825,0.740353,2.706766,-1.272905,-0.680528,0.42074
foo,one,-0.151007,-1.157798,-0.35236,-0.065828,1.637352,2.13338
foo,two,0.147175,1.328228,-0.051784,0.847049,-0.114,-1.503696


In [23]:
with pd.option_context("display.multi_sparse", False):
    print(df)

first        bar       bar       baz       baz       foo       foo       qux  \
second       one       two       one       two       one       two       one   
A       0.283479  0.683097 -1.203705  0.657149 -0.002972 -0.417262 -1.314024   
B      -0.000798  0.102291  0.968358  0.322263  0.553892 -1.343042  1.356863   
C       0.034508 -0.204105  2.180522 -1.388232  0.808507  0.748004 -0.174149   

first        qux  
second       two  
A      -1.040896  
B      -0.510343  
C       1.290050  


In [24]:
pd.Series(np.random.randn(8), index=tuples)

(bar, one)    0.280667
(bar, two)    0.295540
(baz, one)   -1.535064
(baz, two)    1.011590
(foo, one)   -0.087845
(foo, two)   -1.711638
(qux, one)   -0.433166
(qux, two)   -1.003320
dtype: float64

### Rescontructing the level labels

In [25]:
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [26]:
index.get_level_values(1)

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [27]:
index.get_level_values("second")

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

### Basic indexing on axis with MultiIndex

In [28]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.283479,0.683097,-1.203705,0.657149,-0.002972,-0.417262,-1.314024,-1.040896
B,-0.000798,0.102291,0.968358,0.322263,0.553892,-1.343042,1.356863,-0.510343
C,0.034508,-0.204105,2.180522,-1.388232,0.808507,0.748004,-0.174149,1.29005


In [29]:
df["bar"]

second,one,two
A,0.283479,0.683097
B,-0.000798,0.102291
C,0.034508,-0.204105


In [30]:
type(df["bar"])

pandas.core.frame.DataFrame

In [31]:
df["bar", "one"]

A    0.283479
B   -0.000798
C    0.034508
Name: (bar, one), dtype: float64

In [32]:
s

bar  one    0.069852
     two    0.671807
baz  one   -0.273809
     two    1.683280
foo  one    0.626582
     two   -1.061801
qux  one   -1.611194
     two    0.956504
dtype: float64

In [33]:
s.index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [34]:
type(s)

pandas.core.series.Series

In [35]:
s["qux"]

one   -1.611194
two    0.956504
dtype: float64

### Defined levels

In [36]:
df.columns.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [37]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.283479,0.683097,-1.203705,0.657149,-0.002972,-0.417262,-1.314024,-1.040896
B,-0.000798,0.102291,0.968358,0.322263,0.553892,-1.343042,1.356863,-0.510343
C,0.034508,-0.204105,2.180522,-1.388232,0.808507,0.748004,-0.174149,1.29005


In [38]:
df[["foo", "qux"]].columns.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [39]:
df[["foo", "qux"]]

first,foo,foo,qux,qux
second,one,two,one,two
A,-0.002972,-0.417262,-1.314024,-1.040896
B,0.553892,-1.343042,1.356863,-0.510343
C,0.808507,0.748004,-0.174149,1.29005


In [40]:
df[["foo", "qux"]].columns.to_numpy()

array([('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')],
      dtype=object)

In [41]:
df[["foo", "qux"]].columns.get_level_values(0)

Index(['foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [42]:
df[["foo", "qux"]].columns.get_level_values(1)

Index(['one', 'two', 'one', 'two'], dtype='object', name='second')

In [43]:
new_mi = df[["foo", "qux"]].columns.remove_unused_levels()

In [44]:
new_mi.levels

FrozenList([['foo', 'qux'], ['one', 'two']])

In [45]:
new_mi

MultiIndex([('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [46]:
type(new_mi)

pandas.core.indexes.multi.MultiIndex

### Data alignment and using `reindex`

In [47]:
s

bar  one    0.069852
     two    0.671807
baz  one   -0.273809
     two    1.683280
foo  one    0.626582
     two   -1.061801
qux  one   -1.611194
     two    0.956504
dtype: float64

In [48]:
type(s)

pandas.core.series.Series

In [49]:
s[:2]

bar  one    0.069852
     two    0.671807
dtype: float64

In [50]:
s + s[:2]

bar  one    0.139704
     two    1.343613
baz  one         NaN
     two         NaN
foo  one         NaN
     two         NaN
qux  one         NaN
     two         NaN
dtype: float64

In [51]:
s + s[::2]

bar  one    0.139704
     two         NaN
baz  one   -0.547617
     two         NaN
foo  one    1.253164
     two         NaN
qux  one   -3.222388
     two         NaN
dtype: float64

In [52]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [53]:
index[:3]

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one')],
           names=['first', 'second'])

In [54]:
s.reindex(index[:3])

first  second
bar    one       0.069852
       two       0.671807
baz    one      -0.273809
dtype: float64

In [55]:
s.reindex([("foo", "two"), ("bar", "one"), ("qux", "one"), ("baz", "one")])

foo  two   -1.061801
bar  one    0.069852
qux  one   -1.611194
baz  one   -0.273809
dtype: float64

## Advanced indexing with hierarchical index

In [56]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.283479,0.683097,-1.203705,0.657149,-0.002972,-0.417262,-1.314024,-1.040896
B,-0.000798,0.102291,0.968358,0.322263,0.553892,-1.343042,1.356863,-0.510343
C,0.034508,-0.204105,2.180522,-1.388232,0.808507,0.748004,-0.174149,1.29005


In [57]:
df = df.T

In [58]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.283479,-0.000798,0.034508
bar,two,0.683097,0.102291,-0.204105
baz,one,-1.203705,0.968358,2.180522
baz,two,0.657149,0.322263,-1.388232
foo,one,-0.002972,0.553892,0.808507
foo,two,-0.417262,-1.343042,0.748004
qux,one,-1.314024,1.356863,-0.174149
qux,two,-1.040896,-0.510343,1.29005


In [59]:
df.loc[("bar", "two")]

A    0.683097
B    0.102291
C   -0.204105
Name: (bar, two), dtype: float64

In [60]:
type(df.loc[("bar", "two")])

pandas.core.series.Series

In [61]:
df.loc["bar", "two"]  # lead to ambiguity!!!

A    0.683097
B    0.102291
C   -0.204105
Name: (bar, two), dtype: float64

In [62]:
df.loc[("bar", "two"), "A"]

0.6830974744689495

In [63]:
df.loc["bar"]

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.283479,-0.000798,0.034508
two,0.683097,0.102291,-0.204105


In [64]:
df["A"]

first  second
bar    one       0.283479
       two       0.683097
baz    one      -1.203705
       two       0.657149
foo    one      -0.002972
       two      -0.417262
qux    one      -1.314024
       two      -1.040896
Name: A, dtype: float64

In [65]:
df.loc[("bar",),]

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.283479,-0.000798,0.034508
two,0.683097,0.102291,-0.204105


In [66]:
df.loc["baz":"foo"]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,one,-1.203705,0.968358,2.180522
baz,two,0.657149,0.322263,-1.388232
foo,one,-0.002972,0.553892,0.808507
foo,two,-0.417262,-1.343042,0.748004


In [67]:
df.loc[("baz", "two"):("qux", "one")]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,0.657149,0.322263,-1.388232
foo,one,-0.002972,0.553892,0.808507
foo,two,-0.417262,-1.343042,0.748004
qux,one,-1.314024,1.356863,-0.174149


In [68]:
df.loc[("baz", "two"):"foo"]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,0.657149,0.322263,-1.388232
foo,one,-0.002972,0.553892,0.808507
foo,two,-0.417262,-1.343042,0.748004


In [69]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.283479,-0.000798,0.034508
bar,two,0.683097,0.102291,-0.204105
baz,one,-1.203705,0.968358,2.180522
baz,two,0.657149,0.322263,-1.388232
foo,one,-0.002972,0.553892,0.808507
foo,two,-0.417262,-1.343042,0.748004
qux,one,-1.314024,1.356863,-0.174149
qux,two,-1.040896,-0.510343,1.29005


In [70]:
df.loc[[("bar", "two"), ("qux", "one")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,0.683097,0.102291,-0.204105
qux,one,-1.314024,1.356863,-0.174149


> It is important to note that tuples and lists are not treated identically in pandas when it comes to indexing. Whereas a tuple is interpreted as one multi-level key, a list is used to specify several keys. Or in other words, tuples go horizontally (traversing levels), lists go vertically (scanning levels).

In [71]:
s = pd.Series([1, 2, 3, 4, 5, 6],
    index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]),)

In [72]:
s

A  c    1
   d    2
   e    3
B  c    4
   d    5
   e    6
dtype: int64

In [73]:
s.loc[[("A", "c"), ("B", "d")]] # list of tuples

A  c    1
B  d    5
dtype: int64

In [74]:
s.loc[(["A", "B"], ["c", "d"])] # tuple of lists

A  c    1
   d    2
B  c    4
   d    5
dtype: int64

### Using slicers

In [75]:
def mklbl(prefix, n):
    return ['%s%s' % (prefix, i) for i in range(n)]

In [76]:
miindex = pd.MultiIndex.from_product([mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)])

In [77]:
pd.MultiIndex.from_product([mklbl("A", 4), mklbl("B", 2)])

MultiIndex([('A0', 'B0'),
            ('A0', 'B1'),
            ('A1', 'B0'),
            ('A1', 'B1'),
            ('A2', 'B0'),
            ('A2', 'B1'),
            ('A3', 'B0'),
            ('A3', 'B1')],
           )

In [78]:
pd.MultiIndex.from_product([mklbl("A", 4), mklbl("B", 2), mklbl("C", 4)])

MultiIndex([('A0', 'B0', 'C0'),
            ('A0', 'B0', 'C1'),
            ('A0', 'B0', 'C2'),
            ('A0', 'B0', 'C3'),
            ('A0', 'B1', 'C0'),
            ('A0', 'B1', 'C1'),
            ('A0', 'B1', 'C2'),
            ('A0', 'B1', 'C3'),
            ('A1', 'B0', 'C0'),
            ('A1', 'B0', 'C1'),
            ('A1', 'B0', 'C2'),
            ('A1', 'B0', 'C3'),
            ('A1', 'B1', 'C0'),
            ('A1', 'B1', 'C1'),
            ('A1', 'B1', 'C2'),
            ('A1', 'B1', 'C3'),
            ('A2', 'B0', 'C0'),
            ('A2', 'B0', 'C1'),
            ('A2', 'B0', 'C2'),
            ('A2', 'B0', 'C3'),
            ('A2', 'B1', 'C0'),
            ('A2', 'B1', 'C1'),
            ('A2', 'B1', 'C2'),
            ('A2', 'B1', 'C3'),
            ('A3', 'B0', 'C0'),
            ('A3', 'B0', 'C1'),
            ('A3', 'B0', 'C2'),
            ('A3', 'B0', 'C3'),
            ('A3', 'B1', 'C0'),
            ('A3', 'B1', 'C1'),
            ('A3', 'B1', 'C2'),
        

In [79]:
miindex

MultiIndex([('A0', 'B0', 'C0', 'D0'),
            ('A0', 'B0', 'C0', 'D1'),
            ('A0', 'B0', 'C1', 'D0'),
            ('A0', 'B0', 'C1', 'D1'),
            ('A0', 'B0', 'C2', 'D0'),
            ('A0', 'B0', 'C2', 'D1'),
            ('A0', 'B0', 'C3', 'D0'),
            ('A0', 'B0', 'C3', 'D1'),
            ('A0', 'B1', 'C0', 'D0'),
            ('A0', 'B1', 'C0', 'D1'),
            ('A0', 'B1', 'C1', 'D0'),
            ('A0', 'B1', 'C1', 'D1'),
            ('A0', 'B1', 'C2', 'D0'),
            ('A0', 'B1', 'C2', 'D1'),
            ('A0', 'B1', 'C3', 'D0'),
            ('A0', 'B1', 'C3', 'D1'),
            ('A1', 'B0', 'C0', 'D0'),
            ('A1', 'B0', 'C0', 'D1'),
            ('A1', 'B0', 'C1', 'D0'),
            ('A1', 'B0', 'C1', 'D1'),
            ('A1', 'B0', 'C2', 'D0'),
            ('A1', 'B0', 'C2', 'D1'),
            ('A1', 'B0', 'C3', 'D0'),
            ('A1', 'B0', 'C3', 'D1'),
            ('A1', 'B1', 'C0', 'D0'),
            ('A1', 'B1', 'C0', 'D1'),
            

In [80]:
len(miindex)

64

In [81]:
4 * 2 * 4 * 2

64

In [82]:
micolumns = pd.MultiIndex.from_tuples([("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lv10", "lv11"])

In [83]:
dfmi = (
    pd.DataFrame(
        np.arange(len(miindex) * len(micolumns)).reshape(
            (len(miindex), len(micolumns))
        ),
        index=miindex,
        columns=micolumns,
    )
    .sort_index()
    .sort_index(axis=1)
)

In [84]:
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [85]:
dfmi.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [86]:
idx = pd.IndexSlice

In [87]:
dfmi.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [88]:
dfmi.loc["A1", (slice(None), "foo")]

Unnamed: 0_level_0,Unnamed: 1_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lv11,foo,foo
B0,C0,D0,64,66
B0,C0,D1,68,70
B0,C1,D0,72,74
B0,C1,D1,76,78
B0,C2,D0,80,82
B0,C2,D1,84,86
B0,C3,D0,88,90
B0,C3,D1,92,94
B1,C0,D0,96,98
B1,C0,D1,100,102


In [89]:
dfmi.loc[idx[:, :,["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [90]:
dfmi.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [91]:
mask = dfmi[("a", "foo")] > 200

In [92]:
dfmi[("a", "foo")]

A0  B0  C0  D0      0
            D1      4
        C1  D0      8
            D1     12
        C2  D0     16
                 ... 
A3  B1  C1  D1    236
        C2  D0    240
            D1    244
        C3  D0    248
            D1    252
Name: (a, foo), Length: 64, dtype: int64

In [93]:
dfmi.loc[idx[mask, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,foo,foo
A3,B0,C1,D1,204,206
A3,B0,C3,D0,216,218
A3,B0,C3,D1,220,222
A3,B1,C1,D0,232,234
A3,B1,C1,D1,236,238
A3,B1,C3,D0,248,250
A3,B1,C3,D1,252,254


In [94]:
dfmi.loc(axis=0)[:, :, ["C1", "C3"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,bar,foo,bah,foo
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C1,D0,41,40,43,42
A0,B1,C1,D1,45,44,47,46
A0,B1,C3,D0,57,56,59,58
A0,B1,C3,D1,61,60,63,62
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78


In [95]:
df2 = dfmi.copy()

In [96]:
df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10

In [97]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,-10,-10,-10,-10
A0,B0,C1,D1,-10,-10,-10,-10
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,-10,-10,-10,-10
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,-10,-10,-10,-10


In [98]:
df2 = dfmi.copy()

In [99]:
df2.loc[idx[:, :, ["C1", "C3"]], :] = df2 * 1000

In [100]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lv10,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lv11,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9000,8000,11000,10000
A0,B0,C1,D1,13000,12000,15000,14000
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237000,236000,239000,238000
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249000,248000,251000,250000


### Cross-section

In [101]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.283479,-0.000798,0.034508
bar,two,0.683097,0.102291,-0.204105
baz,one,-1.203705,0.968358,2.180522
baz,two,0.657149,0.322263,-1.388232
foo,one,-0.002972,0.553892,0.808507
foo,two,-0.417262,-1.343042,0.748004
qux,one,-1.314024,1.356863,-0.174149
qux,two,-1.040896,-0.510343,1.29005


In [102]:
df.xs("one", level="second")

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,0.283479,-0.000798,0.034508
baz,-1.203705,0.968358,2.180522
foo,-0.002972,0.553892,0.808507
qux,-1.314024,1.356863,-0.174149


In [103]:
df.loc[(slice(None), "one"), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.283479,-0.000798,0.034508
baz,one,-1.203705,0.968358,2.180522
foo,one,-0.002972,0.553892,0.808507
qux,one,-1.314024,1.356863,-0.174149


In [104]:
df = df.T

In [105]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.283479,0.683097,-1.203705,0.657149,-0.002972,-0.417262,-1.314024,-1.040896
B,-0.000798,0.102291,0.968358,0.322263,0.553892,-1.343042,1.356863,-0.510343
C,0.034508,-0.204105,2.180522,-1.388232,0.808507,0.748004,-0.174149,1.29005


In [106]:
df.xs("one", level="second", axis=1)

first,bar,baz,foo,qux
A,0.283479,-1.203705,-0.002972,-1.314024
B,-0.000798,0.968358,0.553892,1.356863
C,0.034508,2.180522,0.808507,-0.174149


In [107]:
df.loc[:, (slice(None), "one")]

first,bar,baz,foo,qux
second,one,one,one,one
A,0.283479,-1.203705,-0.002972,-1.314024
B,-0.000798,0.968358,0.553892,1.356863
C,0.034508,2.180522,0.808507,-0.174149


In [109]:
df.xs(("one", "bar"), level=("second", "first"), axis=1)

first,bar
second,one
A,0.283479
B,-0.000798
C,0.034508


In [110]:
df.loc[:, ("bar", "one")]

A    0.283479
B   -0.000798
C    0.034508
Name: (bar, one), dtype: float64

In [111]:
df.xs("one", level="second", axis=1, drop_level=False)

first,bar,baz,foo,qux
second,one,one,one,one
A,0.283479,-1.203705,-0.002972,-1.314024
B,-0.000798,0.968358,0.553892,1.356863
C,0.034508,2.180522,0.808507,-0.174149


In [112]:
df.xs("one", level="second", axis=1, drop_level=True)

first,bar,baz,foo,qux
A,0.283479,-1.203705,-0.002972,-1.314024
B,-0.000798,0.968358,0.553892,1.356863
C,0.034508,2.180522,0.808507,-0.174149


### Advanced reindexing and alignment

In [113]:
midx = pd.MultiIndex(
    levels=[["zero", "one"], ["x", "y"]], codes=[[1,1,0,0], [1,0,1,0]]
)

In [114]:
midx

MultiIndex([( 'one', 'y'),
            ( 'one', 'x'),
            ('zero', 'y'),
            ('zero', 'x')],
           )

In [116]:
pd.MultiIndex(
    levels=[["zero", "one"], ["x", "y"]], codes=[[1,0,0, 1], [1,0,1,0]]
)

MultiIndex([( 'one', 'y'),
            ('zero', 'x'),
            ('zero', 'y'),
            ( 'one', 'x')],
           )

In [117]:
df = pd.DataFrame(np.random.randn(4,2), index=midx)

In [118]:
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.055193,-2.457665
one,x,-0.642253,2.248494
zero,y,-1.072427,1.415076
zero,x,-1.318818,2.632045


In [119]:
df2 = df.mean(level=0)

In [120]:
df2

Unnamed: 0,0,1
one,-0.348723,-0.104585
zero,-1.195623,2.02356


In [122]:
(-0.055193 -0.642253) / 2

-0.348723

In [123]:
df2.reindex(df.index, level=0)

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.348723,-0.104585
one,x,-0.348723,-0.104585
zero,y,-1.195623,2.02356
zero,x,-1.195623,2.02356


In [125]:
df_ailgned, df2_aligned = df.align(df2, level=0)

### Swapping levels with swaplevel

In [126]:
df[:5]

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.055193,-2.457665
one,x,-0.642253,2.248494
zero,y,-1.072427,1.415076
zero,x,-1.318818,2.632045


In [127]:
df[:5].swaplevel(0,1,axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-0.055193,-2.457665
x,one,-0.642253,2.248494
y,zero,-1.072427,1.415076
x,zero,-1.318818,2.632045


In [128]:
df[:5].reorder_levels([1,0], axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-0.055193,-2.457665
x,one,-0.642253,2.248494
y,zero,-1.072427,1.415076
x,zero,-1.318818,2.632045


### Renaming names of an Index or MultiIndex

In [129]:
df.rename(columns={0: "col0", 1:"col1"})

Unnamed: 0,Unnamed: 1,col0,col1
one,y,-0.055193,-2.457665
one,x,-0.642253,2.248494
zero,y,-1.072427,1.415076
zero,x,-1.318818,2.632045


In [130]:
df.rename(index={"one": "two", "y": "z"})

Unnamed: 0,Unnamed: 1,0,1
two,z,-0.055193,-2.457665
two,x,-0.642253,2.248494
zero,z,-1.072427,1.415076
zero,x,-1.318818,2.632045


In [131]:
df.rename_axis(index=["abc", "def"])

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
abc,def,Unnamed: 2_level_1,Unnamed: 3_level_1
one,y,-0.055193,-2.457665
one,x,-0.642253,2.248494
zero,y,-1.072427,1.415076
zero,x,-1.318818,2.632045


In [132]:
df.rename_axis(columns="Cols").columns

RangeIndex(start=0, stop=2, step=1, name='Cols')

In [133]:
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.055193,-2.457665
one,x,-0.642253,2.248494
zero,y,-1.072427,1.415076
zero,x,-1.318818,2.632045


In [134]:
mi = pd.MultiIndex.from_product([[1,2], ["a", "b"]], names=["x", "y"])

In [135]:
mi

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['x', 'y'])

In [136]:
mi.names

FrozenList(['x', 'y'])

In [137]:
mi2 = mi.rename("new name", level=0)

In [138]:
mi2

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['new name', 'y'])

In [141]:
# You cannot set the names of the MultiIndex via a level.
# mi.levels[0].name = "name via level"

## Sorting a MultiIndex

In [142]:
import random

In [143]:
random.shuffle(tuples)

In [144]:
tuples

[('baz', 'two'),
 ('baz', 'one'),
 ('qux', 'two'),
 ('qux', 'one'),
 ('bar', 'two'),
 ('bar', 'one'),
 ('foo', 'two'),
 ('foo', 'one')]

In [145]:
s  = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))

In [146]:
s.sort_index()

bar  one    1.461472
     two   -1.537093
baz  one    0.512834
     two    0.285505
foo  one   -1.747695
     two    0.958897
qux  one   -1.258341
     two   -2.316496
dtype: float64

In [147]:
s.sort_index(level=0)

bar  one    1.461472
     two   -1.537093
baz  one    0.512834
     two    0.285505
foo  one   -1.747695
     two    0.958897
qux  one   -1.258341
     two   -2.316496
dtype: float64

In [148]:
s.sort_index(level=1)

bar  one    1.461472
baz  one    0.512834
foo  one   -1.747695
qux  one   -1.258341
bar  two   -1.537093
baz  two    0.285505
foo  two    0.958897
qux  two   -2.316496
dtype: float64

In [149]:
s.index.set_names(["L1", "L2"], inplace=True)

In [150]:
s.sort_index(level="L1")

L1   L2 
bar  one    1.461472
     two   -1.537093
baz  one    0.512834
     two    0.285505
foo  one   -1.747695
     two    0.958897
qux  one   -1.258341
     two   -2.316496
dtype: float64

In [151]:
s.sort_index(level="L2")

L1   L2 
bar  one    1.461472
baz  one    0.512834
foo  one   -1.747695
qux  one   -1.258341
bar  two   -1.537093
baz  two    0.285505
foo  two    0.958897
qux  two   -2.316496
dtype: float64

In [152]:
df.T.sort_index(level=1, axis=1)

Unnamed: 0_level_0,one,zero,one,zero
Unnamed: 0_level_1,x,x,y,y
0,-0.642253,-1.318818,-0.055193,-1.072427
1,2.248494,2.632045,-2.457665,1.415076


In [153]:
dfm = pd.DataFrame(
    {"jim": [0,0,1,1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)}
)

In [154]:
dfm = dfm.set_index(["jim", "joe"])

In [155]:
dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.830837
0,x,0.538484
1,z,0.772325
1,y,0.504985


In [156]:
dfm.loc[((1,'z'))]

  dfm.loc[((1,'z'))]


Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,z,0.772325


In [158]:
# dfm.loc[(0, 'y'):(1,'z')]

In [159]:
dfm.index.is_lexsorted()

False

In [160]:
dfm.index.lexsort_depth

1

In [161]:
dfm = dfm.sort_index()

In [162]:
dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.830837
0,x,0.538484
1,y,0.504985
1,z,0.772325


In [163]:
df.index.is_lexsorted()

False

In [164]:
dfm.index.lexsort_depth

2

In [165]:
dfm.loc[(0, 'y'):(1,'z')]

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,y,0.504985
1,z,0.772325


## Take methods