In [143]:
# Pandas Multindex - for working with > 2 dimensions

import numpy as np
import pandas as pd

In [145]:
# Create a list indexes - City and Year

index = [('California', 2000), ('California', 2010),
         ('Texas', 2000), ('Texas', 2010),
                    ('New York', 2000), ('New York', 2010),
                    ]


In [155]:
# Create a multi-index from the tuples, creating multiple levels of indexing City/Year

index = pd.MultiIndex.from_tuples(index)
index

MultiIndex([('California', 2000),
            ('California', 2010),
            (     'Texas', 2000),
            (     'Texas', 2010),
            (  'New York', 2000),
            (  'New York', 2010)],
           )

In [158]:
# Create a dataframe and assigm the multipleindexes

populations = [33871648, 37253956,
                          18976457, 19378102,
                          20851820, 25145561]
areas = [33871648, 37253956,
                          18976457, 19378102,
                          20851820, 25145561]

pop = pd.DataFrame({'population': populations, "area": areas}, index=index)

# Rename Indexs
pop.index.names = ['state', 'year']

pop

Unnamed: 0_level_0,Unnamed: 1_level_0,population,area
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1
California,2000,33871648,33871648
California,2010,37253956,37253956
Texas,2000,18976457,18976457
Texas,2010,19378102,19378102
New York,2000,20851820,20851820
New York,2010,25145561,25145561


In [148]:
# With xs select data at a particular level of a MultiIndex

pop.xs(2010, level=1, drop_level=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,population,area
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1
California,2010,37253956,37253956
Texas,2010,19378102,19378102
New York,2010,25145561,25145561


In [82]:
pop.xs('California')

Unnamed: 0,population,area
2000,33871648,33871648
2010,37253956,37253956


In [72]:
# pop.loc[('California', 2010)]
pop.xs(('California', 2010))

population    37253956
area          37253956
Name: (California, 2010), dtype: int64

In [150]:
# Indexes need to be sorted first to call in alphabetical order

pop = pop.sort_index()
pop['population']['California':'New York']

state       year
California  2000    33871648
            2010    37253956
New York    2000    20851820
            2010    25145561
Name: population, dtype: int64

In [87]:
pop2 = pd.DataFrame({'population': populations}, index=index)

In [98]:
# Stack
# Convert a multiply- indexed Series into a conventionally indexed DataFrame

pop.stack()

California  2000  population    33871648
                  area          33871648
            2010  population    37253956
                  area          37253956
New York    2000  population    18976457
                  area          18976457
            2010  population    19378102
                  area          19378102
Texas       2000  population    20851820
                  area          20851820
            2010  population    25145561
                  area          25145561
dtype: int64

In [94]:
pop.stack().unstack()

Unnamed: 0,Unnamed: 1,population,area
California,2000,33871648,33871648
California,2010,37253956,37253956
New York,2000,18976457,18976457
New York,2010,19378102,19378102
Texas,2000,20851820,20851820
Texas,2010,25145561,25145561


In [159]:
# Reset indexes and add previous index as columns

pop_flat = pop.reset_index()
pop_flat

Unnamed: 0,state,year,population,area
0,California,2000,33871648,33871648
1,California,2010,37253956,37253956
2,Texas,2000,18976457,18976457
3,Texas,2010,19378102,19378102
4,New York,2000,20851820,20851820
5,New York,2010,25145561,25145561


In [161]:
# Create MultiIndex from columns (and drop previous indexes)

pop_flat.set_index(['state', 'year'])

Unnamed: 0_level_0,Unnamed: 1_level_0,population,area
state,year,Unnamed: 2_level_1,Unnamed: 3_level_1
California,2000,33871648,33871648
California,2010,37253956,37253956
Texas,2000,18976457,18976457
Texas,2010,19378102,19378102
New York,2000,20851820,20851820
New York,2010,25145561,25145561
