# Chapter 8: Data Wrangling: Join, Combine, and Reshap

## 8.1: Hierarchical Indexing

In [1]:
import pandas as pd
import numpy as np

In [2]:
data=pd.Series(np.random.uniform(size=9),
               index=[["a","a","a","b","b","c","c","d","d"],[1,2,3,1,3,1,2,2,3]])
data

a  1    0.550644
   2    0.192014
   3    0.366671
b  1    0.182766
   3    0.170704
c  1    0.205399
   2    0.344422
d  2    0.069652
   3    0.761021
dtype: float64

In [3]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [4]:
data["b"]

1    0.627141
3    0.181274
dtype: float64

In [5]:
data["b":"c"]

b  1    0.627141
   3    0.181274
c  1    0.036821
   2    0.278927
dtype: float64

In [6]:
data.loc[["b","d"]]

b  1    0.627141
   3    0.181274
d  2    0.232790
   3    0.974609
dtype: float64

In [7]:
data.loc[:,2]

a    0.918108
c    0.278927
d    0.232790
dtype: float64

In [8]:
data.unstack()

Unnamed: 0,1,2,3
a,0.55617,0.918108,0.13837
b,0.627141,,0.181274
c,0.036821,0.278927,
d,,0.23279,0.974609


In [9]:
data.unstack().stack()

a  1    0.556170
   2    0.918108
   3    0.138370
b  1    0.627141
   3    0.181274
c  1    0.036821
   2    0.278927
d  2    0.232790
   3    0.974609
dtype: float64

In [10]:
frame=pd.DataFrame(np.arange(12).reshape((4,3)),
                   index=[["a","a","b","b"],[1,2,1,2]],
                   columns=[["Ohio","Ohio","Colorado"],["Green","Red","Green"]])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [11]:
frame.index.names=["key1","key2"]
frame.columns.names=["state","color"]
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [12]:
frame.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           names=['key1', 'key2'])

In [13]:
frame.columns

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

In [14]:
frame.index.nlevels

2

In [15]:
frame["Ohio"]

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [16]:
pd.MultiIndex.from_arrays([["Ohio","Ohio","Colorado"],["Green","Red","Green"]],
                          names=["state","color"])

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

### Reordering and Sorting Levels

In [17]:
frame.swaplevel("key1","key2")

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11
