# Intro to Pandas

## Index Hierarchy

In [1]:
# Standard Imports
import numpy as np
from pandas import Series,DataFrame
import pandas as pd

from numpy.random import randn

In [2]:
# Pandas allows you to have multiple index levels, which is very clear with this example:

ser = Series(np.random.randn(6), index=[[1,1,1,2,2,2],['a','b','c', 'a','b','c']])

In [3]:
#Show Series with multiple index levels
ser

1  a    0.593168
   b   -0.824193
   c   -0.287389
2  a   -1.805854
   b   -1.032485
   c    0.980388
dtype: float64

In [4]:
# We can check the multiple levels
ser.index

MultiIndex(levels=[[1, 2], ['a', 'b', 'c']],
           codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

Select specific subsets

In [5]:
# sleect specific subsets
ser[1]

a    0.593168
b   -0.824193
c   -0.287389
dtype: float64

In [6]:
# level 2
ser[2]

a   -1.805854
b   -1.032485
c    0.980388
dtype: float64

In [7]:
# deeper selection
ser[2]['a']

-1.8058544249419386

In [8]:
# same as above
ser[2][0]

-1.8058544249419386

In [9]:
# We can also select from an internal index level
ser[:,'a']

1    0.593168
2   -1.805854
dtype: float64

### Unstack 

Create Data Frames from Series with multiple levels

In [25]:
# Unstuck a series into a DF
dframe = ser.unstack(level=-1)

#Show
dframe

Unnamed: 0,a,b,c
1,0.593168,-0.824193,-0.287389
2,-1.805854,-1.032485,0.980388


Reverse a DF into a series

In [26]:
#Can also reverse
dframe.unstack(level=0)

a  1    0.593168
   2   -1.805854
b  1   -0.824193
   2   -1.032485
c  1   -0.287389
   2    0.980388
dtype: float64

### Multi Index DF

In [27]:
# We can also apply multiple level indexing to DataFrames

dframe2 = DataFrame(np.arange(24).reshape(4,6),
                    index=[['a','a','b','b'],[1,2,1,2]],
                    columns=[['London','London','Rome','Rome','Paris','Paris'],
                             ['cold','hot','cold','hot','hot','cold']])
                                                   
dframe2                                                

Unnamed: 0_level_0,Unnamed: 1_level_0,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Unnamed: 1_level_1,cold,hot,cold,hot,hot,cold
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


Give these index levels names

In [28]:
#Name the index levels
dframe2.index.names = ['Index_1','Index_2']

#Name the column levels
dframe2.columns.names = ['Cities','Temp']

dframe2

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


In [29]:
# We can also interchange level orders (axis=1 for columns)
dframe2.swaplevel('Cities','Temp', axis=1)

Unnamed: 0_level_0,Temp,cold,hot,cold,hot,hot,cold
Unnamed: 0_level_1,Cities,London,London,Rome,Rome,Paris,Paris
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


In [31]:
# change 
dframe2.swaplevel(axis=0)

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_2,Index_1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
1,a,0,1,2,3,4,5
2,a,6,7,8,9,10,11
1,b,12,13,14,15,16,17
2,b,18,19,20,21,22,23


#### Sort Levels

Ascending

In [32]:
#We can also sort levels
dframe2.sort_values(['Index_2'])

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
b,1,12,13,14,15,16,17
a,2,6,7,8,9,10,11
b,2,18,19,20,21,22,23


Descending

In [33]:
dframe2.sort_values(['Index_2'], ascending=False)

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,2,6,7,8,9,10,11
b,2,18,19,20,21,22,23
a,1,0,1,2,3,4,5
b,1,12,13,14,15,16,17


**Note:** The change in sorting, now the Dframe index is sorted by the INDEX_2

In [35]:
dframe2

Unnamed: 0_level_0,Cities,London,London,Rome,Rome,Paris,Paris
Unnamed: 0_level_1,Temp,cold,hot,cold,hot,hot,cold
Index_1,Index_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
a,1,0,1,2,3,4,5
a,2,6,7,8,9,10,11
b,1,12,13,14,15,16,17
b,2,18,19,20,21,22,23


In [36]:
#We can also perform operations on particular levels
dframe2.sum(level='Temp',axis=1)

Unnamed: 0_level_0,Temp,cold,hot
Index_1,Index_2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,7,8
a,2,25,26
b,1,43,44
b,2,61,62


### End of this section! 