# Multi Index in Pandas

Exploring multi-index in Pandas.

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#### Canned Example

In [3]:
data = pd.Series(np.random.randn(9),
    index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
    [1, 2, 3, 1, 3, 1, 2, 2, 3]])

In [4]:
data

a  1    0.358429
   2    0.615294
   3   -2.312104
b  1   -1.275133
   3    0.236521
c  1    1.003658
   2   -0.280555
d  2    0.745407
   3    0.946250
dtype: float64

In [5]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [6]:
data['a']

1    0.358429
2    0.615294
3   -2.312104
dtype: float64

In [63]:
data[:, 2]

a    0.615294
c   -0.280555
d    0.745407
dtype: float64

#### My Example

In [50]:
x = np.ones(10, dtype=np.int)
x[:4] *= 1
x[4:7] *= 2
x[7:] *= 3
#
y = np.ones(10, dtype=np.int)
y[:4] *= 6
y[4:7] *= 8
y[7:] *= 10
#
z = np.ones(10, dtype=np.int)
z[:4] *= 3
z[4:7] *= 5
z[7:] *= 7

In [51]:
df = pd.DataFrame(None, index=[x, y, z])
df.index.names = ['x', 'y', 'z']
df['one'] = (10*np.random.randn(10)).astype(int)
df['two'] = np.random.random(10)

In [52]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,one,two
x,y,z,Unnamed: 3_level_1,Unnamed: 4_level_1
1,6,3,-11,0.903167
1,6,3,-16,0.916457
1,6,3,18,0.120235
1,6,3,5,0.262101
2,8,5,-7,0.765819
2,8,5,7,0.483798
2,8,5,6,0.6323
3,10,7,1,0.587004
3,10,7,-5,0.024788
3,10,7,11,0.498297


#### Construct index first

Four helper functions exist, from_arrays, from_tuples, from_frmae and from_product

In [53]:
my_idx = pd.MultiIndex.from_arrays([x, y, z], names=('x', 'y', 'z'))

In [54]:
df1 = pd.DataFrame(None, index = my_idx)
df1['ds1'] = np.random.randn(10)
df1['ds2'] = np.random.random(10)

In [55]:
df1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ds1,ds2
x,y,z,Unnamed: 3_level_1,Unnamed: 4_level_1
1,6,3,-0.534889,0.309959
1,6,3,0.776948,0.361283
1,6,3,0.352057,0.380967
1,6,3,2.668451,0.408532
2,8,5,0.312661,0.958474
2,8,5,0.620247,0.259245
2,8,5,-0.715763,0.488394
3,10,7,0.224315,0.481209
3,10,7,-0.006851,0.093452
3,10,7,0.76974,0.646152


In [62]:
df1.loc[2:3]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ds1,ds2
x,y,z,Unnamed: 3_level_1,Unnamed: 4_level_1
2,8,5,0.312661,0.958474
2,8,5,0.620247,0.259245
2,8,5,-0.715763,0.488394
3,10,7,0.224315,0.481209
3,10,7,-0.006851,0.093452
3,10,7,0.76974,0.646152


In [71]:
df1.index

MultiIndex([(1,  6, 3),
            (1,  6, 3),
            (1,  6, 3),
            (1,  6, 3),
            (2,  8, 5),
            (2,  8, 5),
            (2,  8, 5),
            (3, 10, 7),
            (3, 10, 7),
            (3, 10, 7)],
           names=['x', 'y', 'z'])

Use of integer values in the x,y,z mean the standard slices don't seem to work.  Enter the .xs method....

In [79]:
df1.xs(6, level='y')

Unnamed: 0_level_0,Unnamed: 1_level_0,ds1,ds2
x,z,Unnamed: 2_level_1,Unnamed: 3_level_1
1,3,-0.534889,0.309959
1,3,0.776948,0.361283
1,3,0.352057,0.380967
1,3,2.668451,0.408532


In [81]:
df1.xs(3, level='z')

Unnamed: 0_level_0,Unnamed: 1_level_0,ds1,ds2
x,y,Unnamed: 2_level_1,Unnamed: 3_level_1
1,6,-0.534889,0.309959
1,6,0.776948,0.361283
1,6,0.352057,0.380967
1,6,2.668451,0.408532
