In [1]:
import numpy as np
import pandas as pd

This chapter is about combining data from disparate places. It may be challenging to combine data from separate sources because their formats and layout can be vastly different.

In [2]:
#Heirarchical indexing: we can have multiple levels of an index on a given axis.
#This means we can work with higher-dimensional data on a lower-dimensional 
data : pd.Series = pd.Series(np.random.uniform(size=9),
                            index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
                                  [1, 2, 3, 1, 3, 1, 2, 2, 3]])

In [3]:
data #gaps mean use the label directly above

a  1    0.341699
   2    0.000855
   3    0.847967
b  1    0.023589
   3    0.260882
c  1    0.606250
   2    0.654656
d  2    0.995543
   3    0.979886
dtype: float64

In [6]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [None]:
#partial indexing: select subsets of the data
#1) select every element that is indexed as "b"
data['b']

In [11]:
data.loc['b']

1    0.023589
3    0.260882
dtype: float64

In [8]:
data['b':'c']

b  1    0.023589
   3    0.260882
c  1    0.606250
   2    0.654656
dtype: float64

In [10]:
data.loc[['b', 'd']]

b  1    0.023589
   3    0.260882
d  2    0.995543
   3    0.979886
dtype: float64

In [17]:
#selecting from the inner index level
data.loc[:, 2]

a    0.000855
c    0.654656
d    0.995543
dtype: float64

In [19]:
data.loc[:, 1]

a    0.341699
b    0.023589
c    0.606250
dtype: float64

In [20]:
#Heirarchical indexing is important in reshaping data and creating pivot tables. Pivot-table creation 
#is a "group based operation" applied to each group within a DataFrame.
data.unstack()

Unnamed: 0,1,2,3
a,0.341699,0.000855,0.847967
b,0.023589,,0.260882
c,0.60625,0.654656,
d,,0.995543,0.979886


In [22]:
#stack the data back again
data.unstack().stack()

a  1    0.341699
   2    0.000855
   3    0.847967
b  1    0.023589
   3    0.260882
c  1    0.606250
   2    0.654656
d  2    0.995543
   3    0.979886
dtype: float64