In [3]:
import pandas as pd
import numpy as np

In [4]:
## A multyindexed series

In [5]:
index = [('Nairobi', 2000), ('Nairobi', 2010),
         ('Kisumu', 2000), ('NKisumu', 2010),
         ('Mombasa', 2000), ('Mombasa', 2010)]

populations = [3456789, 355678,
              18663456, 193456789,
             23456789, 25456789]

pop = pd.Series(populations, index=index)
pop

(Nairobi, 2000)      3456789
(Nairobi, 2010)       355678
(Kisumu, 2000)      18663456
(NKisumu, 2010)    193456789
(Mombasa, 2000)     23456789
(Mombasa, 2010)     25456789
dtype: int64

In [6]:
index = pd.MultiIndex.from_tuples(index)
index

MultiIndex([('Nairobi', 2000),
            ('Nairobi', 2010),
            ( 'Kisumu', 2000),
            ('NKisumu', 2010),
            ('Mombasa', 2000),
            ('Mombasa', 2010)],
           )

In [7]:
pop = pop.reindex(index)
pop

Nairobi  2000      3456789
         2010       355678
Kisumu   2000     18663456
NKisumu  2010    193456789
Mombasa  2000     23456789
         2010     25456789
dtype: int64

In [8]:
pop[:, 2010]

Nairobi       355678
NKisumu    193456789
Mombasa     25456789
dtype: int64

In [9]:
## MultiIndex as extra dimension

In [10]:
pop_df = pop.unstack()
pop_df

Unnamed: 0,2000,2010
Kisumu,18663456.0,
Mombasa,23456789.0,25456789.0
NKisumu,,193456789.0
Nairobi,3456789.0,355678.0


In [11]:
pop_df.stack()

Kisumu   2000     18663456.0
Mombasa  2000     23456789.0
         2010     25456789.0
NKisumu  2010    193456789.0
Nairobi  2000      3456789.0
         2010       355678.0
dtype: float64

In [12]:
pop_df = pd.DataFrame({'total' : pop,
                    'under18' : [926089, 9284094,
                                 454325, 5454443,
                                 4235434, 5453243]})
pop_df

Unnamed: 0,Unnamed: 1,total,under18
Nairobi,2000,3456789,926089
Nairobi,2010,355678,9284094
Kisumu,2000,18663456,454325
NKisumu,2010,193456789,5454443
Mombasa,2000,23456789,4235434
Mombasa,2010,25456789,5453243


In [13]:
f_u18 = pop_df['under18'] / pop_df['total']
f_u18.unstack()

Unnamed: 0,2000,2010
Kisumu,0.024343,
Mombasa,0.180563,0.214216
NKisumu,,0.028195
Nairobi,0.267904,26.102525


## Methods of MultiIndex Creation

In [15]:
df = pd.DataFrame(np.random.rand(4, 2),
                  index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
                  columns=['data1', 'data2'])
df

Unnamed: 0,Unnamed: 1,data1,data2
a,1,0.141252,0.107126
a,2,0.417304,0.510671
b,1,0.008054,0.159608
b,2,0.742136,0.29199


###### Explicit MultiIndex constructors

In [17]:
pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], [1, 2, 1, 2]])

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

#### MultiIndex for columns

In [18]:
# hierarchical indices and columns

In [21]:
index = pd.MultiIndex.from_product([[2013, 2014], [1, 2]],
                                   names=['year', 'visit'])
columns = pd.MultiIndex.from_product([['Bob', 'Guido', 'Sue'], ['HR', 'Temp']],
                                     names=['subject', 'type'])

# Mock some data
data = np.round(np.random.randn(4, 6), 1)
data[:, ::2] *= 10
data += 3

# create the DataFrame
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Guido,Guido,Sue,Sue
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,-6.0,3.6,2.0,1.7,11.0,3.6
2013,2,1.0,2.5,-3.0,4.5,8.0,2.3
2014,1,-4.0,2.3,-14.0,2.7,19.0,1.7
2014,2,22.0,2.7,-10.0,3.8,5.0,1.3


In [22]:
health_data['Guido']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,2.0,1.7
2013,2,-3.0,4.5
2014,1,-14.0,2.7
2014,2,-10.0,3.8
