# Multi-index / Index Hierarchy

How to multi-index or form a hierarchy between indexes? Let's move on to examples...

In [1]:
import numpy as np
import pandas as pd

from pandas import Series,DataFrame
from numpy.random import rand

### Series

In [2]:
ser = Series(rand(6),
            index=[[1,1,1,2,2,2],['s1','s2','s3','s1','s2','s3']]) 

ser         # the first list is the governing index            

1  s1    0.857109
   s2    0.017733
   s3    0.601528
2  s1    0.769805
   s2    0.999838
   s3    0.671783
dtype: float64

In [3]:
ser.index

MultiIndex([(1, 's1'),
            (1, 's2'),
            (1, 's3'),
            (2, 's1'),
            (2, 's2'),
            (2, 's3')],
           )

In [4]:
ser[1]

s1    0.857109
s2    0.017733
s3    0.601528
dtype: float64

In [5]:
ser[1]['s2']

0.017733094335112387

In [6]:
ser[2]

s1    0.769805
s2    0.999838
s3    0.671783
dtype: float64

In [7]:
ser[:,'s1']

1    0.857109
2    0.769805
dtype: float64

In [8]:
ser

1  s1    0.857109
   s2    0.017733
   s3    0.601528
2  s1    0.769805
   s2    0.999838
   s3    0.671783
dtype: float64

A hierarchical Series can be transformed into a DataFrame using unstack() function.

### DataFrames

In [9]:
df = ser.unstack()  
df                    

Unnamed: 0,s1,s2,s3
1,0.857109,0.017733,0.601528
2,0.769805,0.999838,0.671783


In [10]:
df1 = DataFrame(np.arange(16).reshape(4,4),
                index=[['a','a','b','b'],[5,6,5,6]],
               columns=[['SYD','NY','NY','IST'],['far','close','far','close']]) 
df1   

Unnamed: 0_level_0,Unnamed: 1_level_0,SYD,NY,NY,IST
Unnamed: 0_level_1,Unnamed: 1_level_1,far,close,far,close
a,5,0,1,2,3
a,6,4,5,6,7
b,5,8,9,10,11
b,6,12,13,14,15


In [11]:
df1.loc['a']

Unnamed: 0_level_0,SYD,NY,NY,IST
Unnamed: 0_level_1,far,close,far,close
5,0,1,2,3
6,4,5,6,7


In [12]:
df1.loc['a'].loc[5]

SYD  far      0
NY   close    1
     far      2
IST  close    3
Name: 5, dtype: int32

In [13]:
df1.xs('a')

Unnamed: 0_level_0,SYD,NY,NY,IST
Unnamed: 0_level_1,far,close,far,close
5,0,1,2,3
6,4,5,6,7


In [14]:
df1.xs(['a',5])

SYD  far      0
NY   close    1
     far      2
IST  close    3
Name: (a, 5), dtype: int32

In [15]:
df1.index

MultiIndex([('a', 5),
            ('a', 6),
            ('b', 5),
            ('b', 6)],
           )

In [16]:
df1.columns

MultiIndex([('SYD',   'far'),
            ( 'NY', 'close'),
            ( 'NY',   'far'),
            ('IST', 'close')],
           )

In [17]:
df1.index.names = ['letter','values']        # naming the rows
df1.columns.names = ['cities','distance'] # naming the columns

In [18]:
df1

Unnamed: 0_level_0,cities,SYD,NY,NY,IST
Unnamed: 0_level_1,distance,far,close,far,close
letter,values,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,5,0,1,2,3
a,6,4,5,6,7
b,5,8,9,10,11
b,6,12,13,14,15


In [19]:
df1.swaplevel('cities','distance',axis=1) 

Unnamed: 0_level_0,distance,far,close,far,close
Unnamed: 0_level_1,cities,SYD,NY,NY,IST
letter,values,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,5,0,1,2,3
a,6,4,5,6,7
b,5,8,9,10,11
b,6,12,13,14,15


In [20]:
df1.sort_index(axis=1,level='distance')  # 'sort according to 'distance'                                        

Unnamed: 0_level_0,cities,IST,NY,NY,SYD
Unnamed: 0_level_1,distance,close,close,far,far
letter,values,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,5,3,1,2,0
a,6,7,5,6,4
b,5,11,9,10,8
b,6,15,13,14,12


In [21]:
df1.sort_index(axis=0,level='values')

Unnamed: 0_level_0,cities,SYD,NY,NY,IST
Unnamed: 0_level_1,distance,far,close,far,close
letter,values,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,5,0,1,2,3
b,5,8,9,10,11
a,6,4,5,6,7
b,6,12,13,14,15


In [22]:
df1

Unnamed: 0_level_0,cities,SYD,NY,NY,IST
Unnamed: 0_level_1,distance,far,close,far,close
letter,values,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,5,0,1,2,3
a,6,4,5,6,7
b,5,8,9,10,11
b,6,12,13,14,15


In [23]:
df1.xs(6,level='values')     

cities,SYD,NY,NY,IST
distance,far,close,far,close
letter,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,4,5,6,7
b,12,13,14,15


In [24]:
df1.sum(axis=1,level='distance')

Unnamed: 0_level_0,distance,far,close
letter,values,Unnamed: 2_level_1,Unnamed: 3_level_1
a,5,2,4
a,6,10,12
b,5,18,20
b,6,26,28
