## Task
Explore Pandas basics

## Notebook Summary
* Header
* Series
* DataFrame
* Indexes
* Hierarchical Indexing
* Missing Data
* Descriptive statistics
* Misc

## References
* *Python for Data Analysis*, Wes McKinney


In [3]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
print 'numpy.version = ', np.__version__
import pandas as pd
print 'pandas.version = ', pd.__version__
from pandas import Series, DataFrame

%matplotlib inline

import matplotlib
print 'matplotlib.version = ', matplotlib.__version__
import matplotlib.pyplot as plt


numpy.version =  1.11.2
pandas.version =  0.19.1
matplotlib.version =  1.5.3


In [41]:
# Series

s = Series(['First', 'Second', 'Third', 'Fourth'], index=['a','b','c','d'])
s.values
s.index
s['d'] = 'Not Fifth'
s[['c', 'a', 'b', 'd']]

print '---'

s = Series([1,2,3,4])
s
s[s%2 == 0]
s**2

0 in s
1 in s
s.values
s.isnull()
s.notnull()

s.name = 'SeriesName'
s.index.name = 'IndexName'
s

s.index = ['aa', 'bb', 'cc', 'dd']
s


array(['First', 'Second', 'Third', 'Fourth'], dtype=object)

Index([u'a', u'b', u'c', u'd'], dtype='object')

c        Third
a        First
b       Second
d    Not Fifth
dtype: object

---


0    1
1    2
2    3
3    4
dtype: int64

1    2
3    4
dtype: int64

0     1
1     4
2     9
3    16
dtype: int64

True

True

array([1, 2, 3, 4])

0    False
1    False
2    False
3    False
dtype: bool

0    True
1    True
2    True
3    True
dtype: bool

IndexName
0    1
1    2
2    3
3    4
Name: SeriesName, dtype: int64

aa    1
bb    2
cc    3
dd    4
Name: SeriesName, dtype: int64

In [65]:
# DataFrame

mydict = {
    'key1' : 'val1', 
    'key2' : 'val2',
    'key3' : 'val3'
}

DataFrame(mydict, index=['a'])

mydict = {
    'key1' : ['val1'], 
    'key2' : ['val2'],
    'key3' : ['val3']
}

DataFrame(mydict)
df = DataFrame(mydict, columns=['key4', 'key2', 'key3', 'key1'])
df['key4']
df.key3 = 5
df

df.key3 = Series([1,2,3], index=[0, 1, 2])
df

df['key5'] = 5
df

del df['key5']
df


df.index.name = 'MyIndexName'
df.columns.name = 'MyColumnName'
df

df.index
df.columns
df.values


Unnamed: 0,key1,key2,key3
a,val1,val2,val3


Unnamed: 0,key1,key2,key3
0,val1,val2,val3


0    NaN
Name: key4, dtype: object

Unnamed: 0,key4,key2,key3,key1
0,,val2,5,val1


Unnamed: 0,key4,key2,key3,key1
0,,val2,1,val1


Unnamed: 0,key4,key2,key3,key1,key5
0,,val2,1,val1,5


Unnamed: 0,key4,key2,key3,key1
0,,val2,1,val1


MyColumnName,key4,key2,key3,key1
MyIndexName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,,val2,1,val1


RangeIndex(start=0, stop=1, step=1, name=u'MyIndexName')

Index([u'key4', u'key2', u'key3', u'key1'], dtype='object', name=u'MyColumnName')

array([[nan, 'val2', 1, 'val1']], dtype=object)

In [115]:
# Index

s = Series(np.arange(3), index=['a', 'b', 'c'])
i = s.index
i
type(i)
i[1:]
# i[0] = 'd' - does not work since indexes are immutable
'a' in i
'd' in i

s.drop('a')
s

df = DataFrame(np.arange(12).reshape(4,3), index=['a', 'b', 'c', 'd'], columns=['Col1', 'Col2', 'Col3'])
df
df.drop('a')
df.drop(['a','b'])
df.drop('Col1', axis=1)
df.drop(['Col1', 'Col2', 'Col3'], axis=1)

print '---'

df
df[['Col2', 'Col1']]
df[:2]
df<5
df[df<5]

print '---'

d2 = df.ix['a', ['Col2', 'Col3']]
d2
d2.name
d2.dtype
d2.index

df.ix[['a','b'], ['Col1','Col2']]
df.ix[:'b', :'Col2']
df.ix[2]

df.set_value('a', 'Col1', 99)
df.get_value('a', 'Col1')

type(df['Col1'])
type(df.ix[0])



Index([u'a', u'b', u'c'], dtype='object')

pandas.indexes.base.Index

Index([u'b', u'c'], dtype='object')

True

False

b    1
c    2
dtype: int64

a    0
b    1
c    2
dtype: int64

Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col1,Col2,Col3
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col1,Col2,Col3
c,6,7,8
d,9,10,11


Unnamed: 0,Col2,Col3
a,1,2
b,4,5
c,7,8
d,10,11


a
b
c
d


---


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col2,Col1
a,1,0
b,4,3
c,7,6
d,10,9


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5


Unnamed: 0,Col1,Col2,Col3
a,True,True,True
b,True,True,False
c,False,False,False
d,False,False,False


Unnamed: 0,Col1,Col2,Col3
a,0.0,1.0,2.0
b,3.0,4.0,
c,,,
d,,,


---


Col2    1
Col3    2
Name: a, dtype: int64

'a'

dtype('int64')

Index([u'Col2', u'Col3'], dtype='object')

Unnamed: 0,Col1,Col2
a,0,1
b,3,4


Unnamed: 0,Col1,Col2
a,0,1
b,3,4


Col1    6
Col2    7
Col3    8
Name: c, dtype: int64

Unnamed: 0,Col1,Col2,Col3
a,99,1,2
b,3,4,5
c,6,7,8
d,9,10,11


99

pandas.core.series.Series

pandas.core.series.Series

In [None]:
# Hierarchical indexing

