In [1]:
import pandas as pd
import numpy as np
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]

tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [2]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [3]:
s = pd.Series(np.random.randn(8), index=index)
s

first  second
bar    one      -0.800564
       two       0.436376
baz    one      -0.535096
       two       0.972463
foo    one      -1.738758
       two      -1.304350
qux    one       0.036913
       two      -0.284856
dtype: float64

In [4]:
#When you want every pairing of the elements in two iterables, it can be easier to use the MultiIndex.from_product()
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
pd.MultiIndex.from_product(iterables, names=["first", "second"])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [5]:
#You can also construct a MultiIndex from a DataFrame directly, using the method MultiIndex.from_frame().
df = pd.DataFrame(
    [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
    columns=["first", "second"],
)
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [6]:
#As a convenience, you can pass a list of arrays directly into Series or DataFrame to construct a MultiIndex automatically:
arrays = [
    np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
    np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),]
s = pd.Series(np.random.randn(8), index=arrays)
s

bar  one    1.277948
     two   -0.154167
baz  one   -0.046272
     two   -2.605940
foo  one    0.056234
     two    0.117534
qux  one   -0.502330
     two   -0.323395
dtype: float64

In [7]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,1.33903,0.551265,-0.415299,0.228168
bar,two,1.253974,-0.60605,-0.69957,0.098473
baz,one,-0.032364,-0.259329,1.716701,-0.794255
baz,two,0.724544,0.723268,0.901501,-0.653991
foo,one,-0.672522,1.348451,0.335708,0.15535
foo,two,-0.405497,-0.242051,1.303653,-0.72779
qux,one,1.966501,1.321773,0.103366,-0.027627
qux,two,1.010182,-0.494353,-0.197903,0.915907


In [8]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.037967,-0.115342,1.669155,0.169153,0.133697,0.833669,0.351041,-0.227265
B,-1.902917,-0.663993,-0.068756,0.437375,-0.29289,0.066706,-0.457078,-0.659014
C,-0.744951,0.145049,-0.5899,0.099283,-1.49857,0.195966,0.348871,-0.563337


In [9]:
pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,0.362933,-0.078513,0.2653,-0.285661,-0.84869,1.783287
bar,two,-0.10195,-0.645161,0.246717,1.067566,1.060677,-0.63852
baz,one,-0.828273,-1.726242,-0.799764,1.074463,-0.045441,0.324045
baz,two,1.307749,1.021553,1.79381,1.108095,-0.097735,0.593675
foo,one,-0.003233,-0.906042,0.503103,-0.432736,0.564246,0.404732
foo,two,-1.637795,0.16881,2.44087,0.454802,1.415454,-0.441849


In [10]:
#Basic indexing on axis with MultiIndex
df['bar']

second,one,two
A,0.037967,-0.115342
B,-1.902917,-0.663993
C,-0.744951,0.145049


In [11]:
df['bar','one']

A    0.037967
B   -1.902917
C   -0.744951
Name: (bar, one), dtype: float64

In [12]:
s['qux']

one   -0.502330
two   -0.323395
dtype: float64

In [13]:
#Data alignment and using reindex
s + s[:-2]

bar  one    2.555895
     two   -0.308333
baz  one   -0.092545
     two   -5.211880
foo  one    0.112468
     two    0.235068
qux  one         NaN
     two         NaN
dtype: float64

In [14]:
s + s[::2]

bar  one    2.555895
     two         NaN
baz  one   -0.092545
     two         NaN
foo  one    0.112468
     two         NaN
qux  one   -1.004659
     two         NaN
dtype: float64

In [15]:
s.reindex(index[:3])

first  second
bar    one       1.277948
       two      -0.154167
baz    one      -0.046272
dtype: float64

In [16]:
s.reindex([("foo", "two"), ("bar", "one"), ("qux", "one"), ("baz", "one")])

foo  two    0.117534
bar  one    1.277948
qux  one   -0.502330
baz  one   -0.046272
dtype: float64

In [17]:
#Advanced indexing with hierarchical index
df = df.T
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.037967,-1.902917,-0.744951
bar,two,-0.115342,-0.663993,0.145049
baz,one,1.669155,-0.068756,-0.5899
baz,two,0.169153,0.437375,0.099283
foo,one,0.133697,-0.29289,-1.49857
foo,two,0.833669,0.066706,0.195966
qux,one,0.351041,-0.457078,0.348871
qux,two,-0.227265,-0.659014,-0.563337


In [18]:
df.loc[("bar", "two"), "A"]

np.float64(-0.11534165014972833)

In [19]:
df.loc['bar']

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.037967,-1.902917,-0.744951
two,-0.115342,-0.663993,0.145049


In [20]:
df.loc[("baz", "two"):("qux", "one")]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,0.169153,0.437375,0.099283
foo,one,0.133697,-0.29289,-1.49857
foo,two,0.833669,0.066706,0.195966
qux,one,0.351041,-0.457078,0.348871


In [21]:
df.loc[[('bar','two'), ('qux', 'one')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,-0.115342,-0.663993,0.145049
qux,one,0.351041,-0.457078,0.348871


In [22]:
#Using slicers
def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]


miindex = pd.MultiIndex.from_product(
    [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
)


micolumns = pd.MultiIndex.from_tuples(
    [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
)


dfmi = (
    pd.DataFrame(
        np.arange(len(miindex) * len(micolumns)).reshape(
            (len(miindex), len(micolumns))
        ),
        index=miindex,
        columns=micolumns,
    )
    .sort_index()
    .sort_index(axis=1)
)


dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [23]:
dfmi.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [24]:
idx = pd.IndexSlice
df2 = dfmi.copy()
df2.loc[idx[:, :, ["C1", "C3"]], :] = df2 * 1000
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9000,8000,11000,10000
A0,B0,C1,D1,13000,12000,15000,14000
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237000,236000,239000,238000
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249000,248000,251000,250000


In [25]:
#Cross section
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.037967,-1.902917,-0.744951
bar,two,-0.115342,-0.663993,0.145049
baz,one,1.669155,-0.068756,-0.5899
baz,two,0.169153,0.437375,0.099283
foo,one,0.133697,-0.29289,-1.49857
foo,two,0.833669,0.066706,0.195966
qux,one,0.351041,-0.457078,0.348871
qux,two,-0.227265,-0.659014,-0.563337


In [26]:
df.xs("one", level="second")

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,0.037967,-1.902917,-0.744951
baz,1.669155,-0.068756,-0.5899
foo,0.133697,-0.29289,-1.49857
qux,0.351041,-0.457078,0.348871


In [27]:
df.loc[(slice(None), "one"), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.037967,-1.902917,-0.744951
baz,one,1.669155,-0.068756,-0.5899
foo,one,0.133697,-0.29289,-1.49857
qux,one,0.351041,-0.457078,0.348871


In [28]:
df = df.T
df.xs("one", level="second", axis=1)

first,bar,baz,foo,qux
A,0.037967,1.669155,0.133697,0.351041
B,-1.902917,-0.068756,-0.29289,-0.457078
C,-0.744951,-0.5899,-1.49857,0.348871


In [29]:
# using the slicers
df.loc[:, (slice(None), "one")]

first,bar,baz,foo,qux
second,one,one,one,one
A,0.037967,1.669155,0.133697,0.351041
B,-1.902917,-0.068756,-0.29289,-0.457078
C,-0.744951,-0.5899,-1.49857,0.348871


In [30]:
df.xs(("one", "bar"), level=("second", "first"), axis=1)

first,bar
second,one
A,0.037967
B,-1.902917
C,-0.744951


In [31]:
df.xs("one", level="second", axis=1, drop_level=False)

first,bar,baz,foo,qux
second,one,one,one,one
A,0.037967,1.669155,0.133697,0.351041
B,-1.902917,-0.068756,-0.29289,-0.457078
C,-0.744951,-0.5899,-1.49857,0.348871


In [32]:
df.xs("one", level="second", axis=1, drop_level=True)

first,bar,baz,foo,qux
A,0.037967,1.669155,0.133697,0.351041
B,-1.902917,-0.068756,-0.29289,-0.457078
C,-0.744951,-0.5899,-1.49857,0.348871


In [33]:
#Advanced reindexing and alighnment
midx = pd.MultiIndex(levels=[["zero", "one"], ["x", "y"]], codes=[[1, 1, 0, 0], [1, 0, 1, 0]])
df = pd.DataFrame(np.random.randn(4, 2), index=midx)
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.122097,-0.491296
one,x,-0.662271,-2.630191
zero,y,0.14372,1.106632
zero,x,0.033766,-1.392533


In [34]:
df2 = df.groupby(level=0).mean()
df2

Unnamed: 0,0,1
one,-0.892184,-1.560743
zero,0.088743,-0.14295


In [35]:
df2 = df.groupby(level=0).mean()
df2

Unnamed: 0,0,1
one,-0.892184,-1.560743
zero,0.088743,-0.14295


In [36]:
df2.reindex(df.index, level=0)

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.892184,-1.560743
one,x,-0.892184,-1.560743
zero,y,0.088743,-0.14295
zero,x,0.088743,-0.14295


In [37]:
df_aligned, df2_aligned = df.align(df2, level=0)
df_aligned

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.122097,-0.491296
one,x,-0.662271,-2.630191
zero,y,0.14372,1.106632
zero,x,0.033766,-1.392533


In [38]:
df2_aligned

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.892184,-1.560743
one,x,-0.892184,-1.560743
zero,y,0.088743,-0.14295
zero,x,0.088743,-0.14295


In [39]:
#Swapping levels with swaplevel
df[:5]

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.122097,-0.491296
one,x,-0.662271,-2.630191
zero,y,0.14372,1.106632
zero,x,0.033766,-1.392533


In [40]:
df[:5].swaplevel(0, 1, axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-1.122097,-0.491296
x,one,-0.662271,-2.630191
y,zero,0.14372,1.106632
x,zero,0.033766,-1.392533


In [41]:
#Reordering levels with reorder_levels
df[:5].reorder_levels([1, 0], axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-1.122097,-0.491296
x,one,-0.662271,-2.630191
y,zero,0.14372,1.106632
x,zero,0.033766,-1.392533


In [42]:
#Renaming names of an Index or MultiIndex
