In [1]:
import pandas as pd
import numpy as np
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]

tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [2]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [3]:
s = pd.Series(np.random.randn(8), index=index)
s

first  second
bar    one      -0.464629
       two       0.225041
baz    one       1.774406
       two      -0.911697
foo    one       0.361171
       two      -0.738575
qux    one       1.457981
       two       1.183486
dtype: float64

In [4]:
#When you want every pairing of the elements in two iterables, it can be easier to use the MultiIndex.from_product()
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]
pd.MultiIndex.from_product(iterables, names=["first", "second"])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [5]:
#You can also construct a MultiIndex from a DataFrame directly, using the method MultiIndex.from_frame().
df = pd.DataFrame(
    [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
    columns=["first", "second"],
)
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [6]:
#As a convenience, you can pass a list of arrays directly into Series or DataFrame to construct a MultiIndex automatically:
arrays = [
    np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
    np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),]
s = pd.Series(np.random.randn(8), index=arrays)
s

bar  one   -0.481231
     two   -0.391174
baz  one    0.646157
     two    0.427235
foo  one   -0.112834
     two    0.382294
qux  one   -0.249141
     two   -2.542712
dtype: float64

In [7]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,-0.268127,0.520643,0.337227,1.464887
bar,two,1.047451,-0.548585,-0.234381,0.474933
baz,one,-2.111346,0.654599,0.625712,-0.909263
baz,two,0.044887,1.01847,0.970775,-0.815903
foo,one,-0.967895,0.729454,1.165444,-0.956839
foo,two,0.211889,-0.705352,-0.038118,1.343046
qux,one,-1.286255,0.48349,1.275818,0.493674
qux,two,0.917297,0.706416,1.58217,-0.248719


In [8]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-1.473886,0.428134,-0.217969,-0.2649,1.030838,-1.013653,0.126977,0.404414
B,1.359424,0.785555,0.19916,-1.397745,0.658907,1.478449,1.973797,-1.53961
C,-0.763226,-0.561011,0.61332,0.819984,-1.177286,1.104239,1.43207,-0.472967


In [9]:
pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,-0.754138,1.320978,-1.250648,0.081055,0.312538,0.343522
bar,two,1.123441,-0.060324,1.587467,-1.571612,-1.137814,-1.369742
baz,one,1.54366,1.187029,-1.970501,-0.819826,0.86045,0.234405
baz,two,0.449324,0.078732,-0.366578,1.091079,0.830555,0.064955
foo,one,-0.541029,0.553867,-0.111782,0.736642,0.588468,0.03213
foo,two,-0.126003,-0.571126,2.533194,0.561121,0.895951,-0.40337


In [10]:
#Basic indexing on axis with MultiIndex
df['bar']

second,one,two
A,-1.473886,0.428134
B,1.359424,0.785555
C,-0.763226,-0.561011


In [11]:
df['bar','one']

A   -1.473886
B    1.359424
C   -0.763226
Name: (bar, one), dtype: float64

In [12]:
s['qux']

one   -0.249141
two   -2.542712
dtype: float64

In [13]:
#Data alignment and using reindex
s + s[:-2]

bar  one   -0.962462
     two   -0.782349
baz  one    1.292313
     two    0.854471
foo  one   -0.225669
     two    0.764588
qux  one         NaN
     two         NaN
dtype: float64

In [14]:
s + s[::2]

bar  one   -0.962462
     two         NaN
baz  one    1.292313
     two         NaN
foo  one   -0.225669
     two         NaN
qux  one   -0.498281
     two         NaN
dtype: float64

In [15]:
s.reindex(index[:3])

first  second
bar    one      -0.481231
       two      -0.391174
baz    one       0.646157
dtype: float64

In [16]:
s.reindex([("foo", "two"), ("bar", "one"), ("qux", "one"), ("baz", "one")])

foo  two    0.382294
bar  one   -0.481231
qux  one   -0.249141
baz  one    0.646157
dtype: float64

In [17]:
#Advanced indexing with hierarchical index
df = df.T
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-1.473886,1.359424,-0.763226
bar,two,0.428134,0.785555,-0.561011
baz,one,-0.217969,0.19916,0.61332
baz,two,-0.2649,-1.397745,0.819984
foo,one,1.030838,0.658907,-1.177286
foo,two,-1.013653,1.478449,1.104239
qux,one,0.126977,1.973797,1.43207
qux,two,0.404414,-1.53961,-0.472967


In [18]:
df.loc[("bar", "two"), "A"]

np.float64(0.42813366546861126)

In [19]:
df.loc['bar']

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,-1.473886,1.359424,-0.763226
two,0.428134,0.785555,-0.561011


In [20]:
df.loc[("baz", "two"):("qux", "one")]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,-0.2649,-1.397745,0.819984
foo,one,1.030838,0.658907,-1.177286
foo,two,-1.013653,1.478449,1.104239
qux,one,0.126977,1.973797,1.43207


In [21]:
df.loc[[('bar','two'), ('qux', 'one')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,0.428134,0.785555,-0.561011
qux,one,0.126977,1.973797,1.43207


In [25]:
#Using slicers
def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]


miindex = pd.MultiIndex.from_product(
    [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
)


micolumns = pd.MultiIndex.from_tuples(
    [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
)


dfmi = (
    pd.DataFrame(
        np.arange(len(miindex) * len(micolumns)).reshape(
            (len(miindex), len(micolumns))
        ),
        index=miindex,
        columns=micolumns,
    )
    .sort_index()
    .sort_index(axis=1)
)


dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [26]:
dfmi.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [28]:
idx = pd.IndexSlice
df2 = dfmi.copy()
df2.loc[idx[:, :, ["C1", "C3"]], :] = df2 * 1000
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9000,8000,11000,10000
A0,B0,C1,D1,13000,12000,15000,14000
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237000,236000,239000,238000
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249000,248000,251000,250000
