https://pandas.pydata.org/docs/user_guide/advanced.html#advanced-indexing-with-hierarchical-index

# MultiIndex / advanced indexing

## Hierarchical indexing (MultiIndex)

In [5]:
import numpy as np
import pandas as pd

In [1]:
arrays = [
   ...:     ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
   ...:     ["one", "two", "one", "two", "one", "two", "one", "two"],
   ...: ]

In [2]:
tuples = list(zip(*arrays))

In [3]:
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [6]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

In [7]:
index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [8]:
s = pd.Series(np.random.randn(8), index=index)

In [9]:
s

first  second
bar    one      -0.866358
       two      -0.644850
baz    one       0.807632
       two      -1.401468
foo    one       1.303821
       two       2.346305
qux    one       0.802909
       two      -0.320047
dtype: float64

In [10]:
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]

In [11]:
pd.MultiIndex.from_product(iterables, names=["first", "second"])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [12]:
df = pd.DataFrame(
   ....:     [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
   ....:     columns=["first", "second"],
   ....: )

In [13]:
df

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,foo,one
3,foo,two


In [14]:
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [15]:
arrays = [
   ....:     np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
   ....:     np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
   ....: ]

In [16]:
s = pd.Series(np.random.randn(8), index=arrays)

In [17]:
s

bar  one   -0.405917
     two   -1.351725
baz  one    1.412760
     two    0.914321
foo  one    0.404151
     two    1.519593
qux  one   -0.429649
     two    1.725291
dtype: float64

In [18]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)

In [19]:
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.524857,1.094011,-1.139849,0.399082
bar,two,0.92377,-0.944788,-0.691989,0.222959
baz,one,0.074482,-1.01113,0.066383,-0.751721
baz,two,-0.25633,0.98243,-0.354478,-0.233604
foo,one,1.244099,1.855536,-0.492733,-1.477756
foo,two,-0.163801,-0.518328,-1.194796,-1.016841
qux,one,-0.000201,-0.334621,-0.327387,-0.433838
qux,two,0.661724,-0.580446,0.817945,0.74585


In [20]:
df.index.names

FrozenList([None, None])

In [21]:
df = pd.DataFrame(np.random.randn(3,8), index=["A", "B", "C"], columns=index)

In [22]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.372985,1.030924,0.761534,-0.187443,-1.629497,-0.540796,-0.969058,0.562194
B,-1.648801,-0.045208,-1.035452,-1.471731,-1.094572,0.785029,-0.398579,1.270306
C,0.341034,-0.36219,-1.613882,1.189915,-1.315741,0.278712,-1.138498,0.785666
