## Data Wrangling: Join, Combine, and Reshape

In [2]:
import pandas as pd
import numpy as np

### Hierarchical indexing

In [4]:
#Hierarchical indexing is an important feature of pandas that enables you to have multiple (two or more) index levels on an axis. 
data = pd.Series(np.random.randn(9),index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],[1, 2, 3, 1, 3, 1, 2, 2, 3]])
data

a  1   -1.024109
   2    0.317811
   3   -0.646857
b  1    0.165373
   3   -0.822577
c  1    0.632566
   2    0.462337
d  2   -0.735023
   3   -0.216161
dtype: float64

In [5]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [6]:
data['b']

1    0.165373
3   -0.822577
dtype: float64

In [7]:
data['b':'c']

b  1    0.165373
   3   -0.822577
c  1    0.632566
   2    0.462337
dtype: float64

In [8]:
data.loc[['b', 'd']]

b  1    0.165373
   3   -0.822577
d  2   -0.735023
   3   -0.216161
dtype: float64

In [9]:
#rearrange the data into a DataFrame using its unstack method
data.unstack()

Unnamed: 0,1,2,3
a,-1.024109,0.317811,-0.646857
b,0.165373,,-0.822577
c,0.632566,0.462337,
d,,-0.735023,-0.216161


In [10]:
#The inverse operation of unstack is stack
data.unstack().stack()

a  1   -1.024109
   2    0.317811
   3   -0.646857
b  1    0.165373
   3   -0.822577
c  1    0.632566
   2    0.462337
d  2   -0.735023
   3   -0.216161
dtype: float64

In [12]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],columns=[['Ohio', 'Ohio', 'Colorado'],['Green', 'Red', 'Green']])
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [14]:
frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [15]:
frame.swaplevel('key1','key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [16]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [17]:
frame.swaplevel(0, 1).sort_index(level=0)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11


In [18]:
frame.sum(level='key2')

  frame.sum(level='key2')


state,Ohio,Ohio,Colorado
color,Green,Red,Green
key2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,6,8,10
2,12,14,16
