## Task
Explore indexing in pandas

## Notebook Summary
* Using row & column labels
* Using `.ix[]`
* Hierarchical indexes, swap levels, sort index

## References
* *Python for Data Analysis*, Wes McKinney, O'Reilly, 2012
* *Numerical Python*, Robert Johansson, APress, 2015
* *Python Data Science Handbook*, Jake VanderPlas, O'Reilly, 2016


In [2]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import platform
print 'python.version = ', platform.python_version()
import IPython
print 'ipython.version =', IPython.version_info

import numpy as np
print 'numpy.version =', np.__version__

import pandas as pd
print 'pandas.version =', pd.__version__
from pandas import Series, DataFrame


python.version =  2.7.10
ipython.version = (5, 1, 0, '')
numpy.version = 1.11.3
pandas.version = 0.19.2


In [7]:
# Index using row & column labels - Series

s = Series(range(3), index=['a', 'b', 'c'])
s
print '-----'

i = s.index
i
type(i)
i[1:]
# i[0] = 'd' - does not work since indexes are immutable
'a' in i
'd' in i

s.drop('a') 
s # original Series is not affected by drop


a    0
b    1
c    2
dtype: int64

-----


Index([u'a', u'b', u'c'], dtype='object')

pandas.indexes.base.Index

Index([u'b', u'c'], dtype='object')

True

False

b    1
c    2
dtype: int64

a    0
b    1
c    2
dtype: int64

In [29]:
# Index using row & column labels - DataFrame

df = DataFrame(np.arange(12).reshape(4,3), index=['a', 'b', 'c', 'd'], columns=['Col1', 'Col2', 'Col3'])
print 'Original DataFrame'
df
print '-----'

print 'Drop rows & columns by index'
df.drop('a')
df.drop(['a','b'])
df.drop('Col1', axis=1)
df.drop(['Col1', 'Col2', 'Col3'], axis=1)

print '\n----- Access by Index'
df[['Col2', 'Col1']]
df[:2]

print '-----'
df.set_value('a', 'Col1', 99)
df.get_value('a', 'Col1')

print '-----'
df<5
df[df<5]

Original DataFrame


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


-----
Drop rows & columns by index


Unnamed: 0,Col1,Col2,Col3
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col1,Col2,Col3
c,6,7,8
d,9,10,11


Unnamed: 0,Col2,Col3
a,1,2
b,4,5
c,7,8
d,10,11


a
b
c
d



----- Access by Index


Unnamed: 0,Col2,Col1
a,1,0
b,4,3
c,7,6
d,10,9


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5


-----


Unnamed: 0,Col1,Col2,Col3
a,99,1,2
b,3,4,5
c,6,7,8
d,9,10,11


99

-----


Unnamed: 0,Col1,Col2,Col3
a,False,True,True
b,True,True,False
c,False,False,False
d,False,False,False


Unnamed: 0,Col1,Col2,Col3
a,,1.0,2.0
b,3.0,4.0,
c,,,
d,,,


In [32]:
# .ix[] - Series

s = Series(range(5), index=['A','B','C','D','E'])
s

s.ix['A']
s.ix[0]

s.ix[['A','C']]
s.ix[[0,2]]


A    0
B    1
C    2
D    3
E    4
dtype: int64

0

0

A    0
C    2
dtype: int64

A    0
C    2
dtype: int64

In [33]:
# .ix[] - DataFrame (get by rows)

df = DataFrame(np.arange(12).reshape(4,3), index=['a', 'b', 'c', 'd'], columns=['Col1', 'Col2', 'Col3'])
df

print 'Get Row a, Cols 2 & 3'
d2 = df.ix['a', ['Col2', 'Col3']] # single label will return Series
d2
type(d2)
d2.name
d2.dtype
d2.index

print '-----'

df.ix[['a','b'], ['Col1','Col2']] # Multiple labels will return DataFrame
type(df.ix[['a','b'], ['Col1','Col2']])
df.ix[:'b', :'Col2'] # stop is inclusive, so b and Col2 are returned in result
df.ix[2] # returns 3rd row


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


Get Row a, Cols 2 & 3


Col2    1
Col3    2
Name: a, dtype: int64

pandas.core.series.Series

'a'

dtype('int64')

Index([u'Col2', u'Col3'], dtype='object')

-----


Unnamed: 0,Col1,Col2
a,0,1
b,3,4


pandas.core.frame.DataFrame

Unnamed: 0,Col1,Col2
a,0,1
b,3,4


Col1    6
Col2    7
Col3    8
Name: c, dtype: int64

In [45]:
# Hierarchical indexing - Series

s = Series(np.arange(9), index=[['a','a','a','b','b','b','c','c','c'],['x','y','z','x','y','z','x','y','z']])
s
s['a']
s['b':'c']
s[['a','c']]

print '---'
s.ix[['a','c']]

s['a','y']
s[:,'x']

print '\n----- Stack & Unstack'

s.unstack() # same as stack with level=1
type(s.unstack())
s.unstack(level=0)

s.unstack().stack()
type(s.unstack().stack())


a  x    0
   y    1
   z    2
b  x    3
   y    4
   z    5
c  x    6
   y    7
   z    8
dtype: int64

x    0
y    1
z    2
dtype: int64

b  x    3
   y    4
   z    5
c  x    6
   y    7
   z    8
dtype: int64

a  x    0
   y    1
   z    2
c  x    6
   y    7
   z    8
dtype: int64

---


a  x    0
   y    1
   z    2
c  x    6
   y    7
   z    8
dtype: int64

1

a    0
b    3
c    6
dtype: int64


----- Stack & Unstack


Unnamed: 0,x,y,z
a,0,1,2
b,3,4,5
c,6,7,8


pandas.core.frame.DataFrame

Unnamed: 0,a,b,c
x,0,3,6
y,1,4,7
z,2,5,8


a  x    0
   y    1
   z    2
b  x    3
   y    4
   z    5
c  x    6
   y    7
   z    8
dtype: int64

pandas.core.series.Series

In [52]:
# Hierarchical indexing - DataFrame

df = DataFrame(np.arange(36).reshape(9,4), 
               index=[['a','a','a','b','b','b','c','c','c'],['x','y','z','x','y','z','x','y','z']],
               columns=[['Iris','Iris','Campanula','Campanula'],['Petal','Sepal','Petal','Sepal']]
              )
df.index.names = ['Item','SubItem']
df.columns.names = ['Flower', 'Metric']
df.name = 'Flower Metrics'
df

print '-----'

df['Iris']
df['Iris']['Petal']
type(df['Iris']['Petal']) # this is a Series
df['Iris']['Petal'].unstack() # same as unstacking Series

print '\n----- Unstack original DataFrame'
df.unstack()
df.unstack(level=0)

print '\n----- Swap column levels'
df.swaplevel('Item', 'SubItem')

print '\n----- Swap column levels and sort outer level'
df.swaplevel('Item', 'SubItem').sortlevel(0) # same as...
df.swaplevel('Item', 'SubItem').sort_index()

df.swaplevel(0,1, axis=1).sortlevel(0, axis=1)


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
Item,SubItem,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,x,0,1,2,3
a,y,4,5,6,7
a,z,8,9,10,11
b,x,12,13,14,15
b,y,16,17,18,19
b,z,20,21,22,23
c,x,24,25,26,27
c,y,28,29,30,31
c,z,32,33,34,35


-----


Unnamed: 0_level_0,Metric,Petal,Sepal
Item,SubItem,Unnamed: 2_level_1,Unnamed: 3_level_1
a,x,0,1
a,y,4,5
a,z,8,9
b,x,12,13
b,y,16,17
b,z,20,21
c,x,24,25
c,y,28,29
c,z,32,33


Item  SubItem
a     x           0
      y           4
      z           8
b     x          12
      y          16
      z          20
c     x          24
      y          28
      z          32
Name: Petal, dtype: int64

pandas.core.series.Series

SubItem,x,y,z
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,4,8
b,12,16,20
c,24,28,32



----- Unstack original DataFrame


Flower,Iris,Iris,Iris,Iris,Iris,Iris,Campanula,Campanula,Campanula,Campanula,Campanula,Campanula
Metric,Petal,Petal,Petal,Sepal,Sepal,Sepal,Petal,Petal,Petal,Sepal,Sepal,Sepal
SubItem,x,y,z,x,y,z,x,y,z,x,y,z
Item,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
a,0,4,8,1,5,9,2,6,10,3,7,11
b,12,16,20,13,17,21,14,18,22,15,19,23
c,24,28,32,25,29,33,26,30,34,27,31,35


Flower,Iris,Iris,Iris,Iris,Iris,Iris,Campanula,Campanula,Campanula,Campanula,Campanula,Campanula
Metric,Petal,Petal,Petal,Sepal,Sepal,Sepal,Petal,Petal,Petal,Sepal,Sepal,Sepal
Item,a,b,c,a,b,c,a,b,c,a,b,c
SubItem,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
x,0,12,24,1,13,25,2,14,26,3,15,27
y,4,16,28,5,17,29,6,18,30,7,19,31
z,8,20,32,9,21,33,10,22,34,11,23,35



----- Swap column levels


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
SubItem,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
x,a,0,1,2,3
y,a,4,5,6,7
z,a,8,9,10,11
x,b,12,13,14,15
y,b,16,17,18,19
z,b,20,21,22,23
x,c,24,25,26,27
y,c,28,29,30,31
z,c,32,33,34,35



----- Swap column levels and sort outer level


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
SubItem,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
x,a,0,1,2,3
x,b,12,13,14,15
x,c,24,25,26,27
y,a,4,5,6,7
y,b,16,17,18,19
y,c,28,29,30,31
z,a,8,9,10,11
z,b,20,21,22,23
z,c,32,33,34,35


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
SubItem,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
x,a,0,1,2,3
x,b,12,13,14,15
x,c,24,25,26,27
y,a,4,5,6,7
y,b,16,17,18,19
y,c,28,29,30,31
z,a,8,9,10,11
z,b,20,21,22,23
z,c,32,33,34,35


Unnamed: 0_level_0,Metric,Petal,Petal,Sepal,Sepal
Unnamed: 0_level_1,Flower,Campanula,Iris,Campanula,Iris
Item,SubItem,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,x,2,0,3,1
a,y,6,4,7,5
a,z,10,8,11,9
b,x,14,12,15,13
b,y,18,16,19,17
b,z,22,20,23,21
c,x,26,24,27,25
c,y,30,28,31,29
c,z,34,32,35,33
