In [1]:
import pandas as pd
import numpy as np
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]

tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [2]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [3]:
s = pd.Series(np.random.randn(8), index=index)
s

first  second
bar    one      -2.719910
       two      -1.859769
baz    one      -1.118029
       two       1.379101
foo    one       0.807918
       two       1.044386
qux    one      -1.711049
       two      -1.057320
dtype: float64

In [4]:
#When you want every pairing of the elements in two iterables, it can be easier to use the MultiIndex.from_product()
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
pd.MultiIndex.from_product(iterables, names=["first", "second"])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [5]:
#You can also construct a MultiIndex from a DataFrame directly, using the method MultiIndex.from_frame().
df = pd.DataFrame(
    [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
    columns=["first", "second"],
)
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [6]:
#As a convenience, you can pass a list of arrays directly into Series or DataFrame to construct a MultiIndex automatically:
arrays = [
    np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
    np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),]
s = pd.Series(np.random.randn(8), index=arrays)
s

bar  one   -0.315212
     two    0.047971
baz  one   -0.086912
     two   -2.148841
foo  one    0.156513
     two   -0.435359
qux  one   -0.287731
     two   -0.281379
dtype: float64

In [7]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.412943,0.536926,-0.97583,-0.13933
bar,two,-0.277639,0.205495,2.323194,0.317628
baz,one,1.136131,-0.668041,-0.502565,-0.363479
baz,two,0.849945,0.676178,-0.572154,-0.576849
foo,one,0.06532,0.808466,0.507917,1.339993
foo,two,-0.065661,-1.334241,-0.071549,-0.906887
qux,one,0.447648,1.922669,0.565448,-0.318618
qux,two,1.121978,0.935693,0.485518,-1.06375


In [8]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.692152,0.290527,-1.984037,0.240805,0.210255,1.110364,0.383845,-0.044952
B,0.732662,1.949945,1.869382,-3.058598,0.511609,-1.212379,-0.813514,-0.282848
C,0.107861,1.272182,-0.199721,1.391029,-0.702901,1.573196,2.162615,1.845507


In [9]:
pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,-0.810739,-0.517742,-0.997263,1.08966,-1.266715,0.131574
bar,two,2.420003,-1.134069,-0.332066,-2.016341,0.720045,-0.615752
baz,one,-0.946585,0.005978,-1.123434,0.122828,-0.522362,0.332719
baz,two,0.009221,1.518227,-1.448371,-0.917917,-0.90425,0.112943
foo,one,1.718198,-1.223559,-1.406164,-0.626821,0.436566,0.066895
foo,two,0.408151,1.334246,0.677122,0.106622,0.104554,-1.025869


In [10]:
#Basic indexing on axis with MultiIndex
df['bar']

second,one,two
A,0.692152,0.290527
B,0.732662,1.949945
C,0.107861,1.272182


In [11]:
df['bar','one']

A    0.692152
B    0.732662
C    0.107861
Name: (bar, one), dtype: float64

In [12]:
s['qux']

one   -0.287731
two   -0.281379
dtype: float64

In [13]:
#Data alignment and using reindex
s + s[:-2]

bar  one   -0.630424
     two    0.095942
baz  one   -0.173825
     two   -4.297682
foo  one    0.313027
     two   -0.870719
qux  one         NaN
     two         NaN
dtype: float64

In [14]:
s + s[::2]

bar  one   -0.630424
     two         NaN
baz  one   -0.173825
     two         NaN
foo  one    0.313027
     two         NaN
qux  one   -0.575461
     two         NaN
dtype: float64

In [15]:
s.reindex(index[:3])

first  second
bar    one      -0.315212
       two       0.047971
baz    one      -0.086912
dtype: float64

In [16]:
s.reindex([("foo", "two"), ("bar", "one"), ("qux", "one"), ("baz", "one")])

foo  two   -0.435359
bar  one   -0.315212
qux  one   -0.287731
baz  one   -0.086912
dtype: float64

In [18]:
#Advanced indexing with hierarchical index
df = df.T
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.692152,0.732662,0.107861
bar,two,0.290527,1.949945,1.272182
baz,one,-1.984037,1.869382,-0.199721
baz,two,0.240805,-3.058598,1.391029
foo,one,0.210255,0.511609,-0.702901
foo,two,1.110364,-1.212379,1.573196
qux,one,0.383845,-0.813514,2.162615
qux,two,-0.044952,-0.282848,1.845507


In [19]:
df.loc[("bar", "two"), "A"]

np.float64(0.29052726368226245)

In [20]:
df.loc['bar']

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.692152,0.732662,0.107861
two,0.290527,1.949945,1.272182


In [21]:
df.loc[("baz", "two"):("qux", "one")]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,0.240805,-3.058598,1.391029
foo,one,0.210255,0.511609,-0.702901
foo,two,1.110364,-1.212379,1.573196
qux,one,0.383845,-0.813514,2.162615


In [22]:
df.loc[[('bar','two'), ('qux', 'one')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,0.290527,1.949945,1.272182
qux,one,0.383845,-0.813514,2.162615


In [None]:
#Using slicers
