8.1 Hierarchical Indexing

In [8]:
import pandas as pd
import numpy as np


data = pd.Series(np.random.randn(9),
                 index=[["a", "a", "a", "b", "b", "c", "c", "d", "d"],
                        [1, 2, 3, 1, 3, 1, 2, 2, 3]])

print(data)
print('\n')
print(data.index)
print('\n')
print(data['b'])
print('\n')
print(data['b':'c'])
print('\n')
print(data.loc[['b', 'd']])
print('\n')
print(data.loc[:, 2])
print('\n')
print(data.unstack())
print('\n')
print(data.unstack().stack())

a  1   -0.753332
   2    0.859014
   3   -0.996806
b  1    0.462221
   3   -1.020178
c  1    0.439455
   2   -0.377132
d  2   -1.674719
   3    0.178594
dtype: float64


MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )


1    0.462221
3   -1.020178
dtype: float64


b  1    0.462221
   3   -1.020178
c  1    0.439455
   2   -0.377132
dtype: float64


b  1    0.462221
   3   -1.020178
d  2   -1.674719
   3    0.178594
dtype: float64


a    0.859014
c   -0.377132
d   -1.674719
dtype: float64


          1         2         3
a -0.753332  0.859014 -0.996806
b  0.462221       NaN -1.020178
c  0.439455 -0.377132       NaN
d       NaN -1.674719  0.178594


a  1   -0.753332
   2    0.859014
   3   -0.996806
b  1    0.462221
   3   -1.020178
c  1    0.439455
   2   -0.377132
d  2   -1.674719
   3    0.178594
dtype: float64


In [11]:
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),
                     index=[["a", "a", "b", "b"], [1, 2, 1, 2]],
                     columns=[["Ohio", "Ohio", "Colorado"],
                              ["Green", "Red", "Green"]])

print(frame)

frame.index.names = ['key1', 'key2']
frame.columns.names = ['state', 'color']

print('\n')
print(frame)

print('\n')
print(frame['Ohio'])

     Ohio     Colorado
    Green Red    Green
a 1     0   1        2
  2     3   4        5
b 1     6   7        8
  2     9  10       11


state      Ohio     Colorado
color     Green Red    Green
key1 key2                   
a    1        0   1        2
     2        3   4        5
b    1        6   7        8
     2        9  10       11


color      Green  Red
key1 key2            
a    1         0    1
     2         3    4
b    1         6    7
     2         9   10


In [13]:
# Reordering and Sorting Levels

print(frame.sort_index(level=1))

print('\n')
print(frame.swaplevel(0, 1).sort_index(level=0))


state      Ohio     Colorado
color     Green Red    Green
key1 key2                   
a    1        0   1        2
b    1        6   7        8
a    2        3   4        5
b    2        9  10       11


state      Ohio     Colorado
color     Green Red    Green
key2 key1                   
1    a        0   1        2
     b        6   7        8
2    a        3   4        5
     b        9  10       11


In [19]:
# Summary Statistics by Level

print(frame.groupby('key2').sum())
print('\n')
print(frame.T.groupby('color').sum().T)

state  Ohio     Colorado
color Green Red    Green
key2                    
1         6   8       10
2        12  14       16


color      Green  Red
key1 key2            
a    1         2    1
     2         8    4
b    1        14    7
     2        20   10


In [23]:
# Indexing with a DataFrame’s columns

frame = pd.DataFrame({"a": range(7), "b": range(7, 0, -1),
                      "c": ["one", "one", "one", "two", "two",
                            "two", "two"],
                      "d": [0, 1, 2, 0, 1, 2, 3]})

print(frame)

frame2 = frame.set_index(['c', 'd'])
print('\n')
print(frame2)

print('\n')
print(frame.set_index(['c', 'd'], drop=False))

print('\n')
print(frame2.reset_index)

   a  b    c  d
0  0  7  one  0
1  1  6  one  1
2  2  5  one  2
3  3  4  two  0
4  4  3  two  1
5  5  2  two  2
6  6  1  two  3


       a  b
c   d      
one 0  0  7
    1  1  6
    2  2  5
two 0  3  4
    1  4  3
    2  5  2
    3  6  1


       a  b    c  d
c   d              
one 0  0  7  one  0
    1  1  6  one  1
    2  2  5  one  2
two 0  3  4  two  0
    1  4  3  two  1
    2  5  2  two  2
    3  6  1  two  3


<bound method DataFrame.reset_index of        a  b
c   d      
one 0  0  7
    1  1  6
    2  2  5
two 0  3  4
    1  4  3
    2  5  2
    3  6  1>
