## Task
Explore Pandas basics

## Notebook Summary
* Header
* Series
* DataFrame
* Indexes
* Hierarchical Indexing
* Missing Data
* Summary & descriptive statistics
* Reindexing, indexing
* Data alignment / function application

## References
* *Python for Data Analysis*, Wes McKinney


In [1]:
# display output from all cmds just like Python shell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import platform
print 'python.version = ', platform.python_version()
import IPython
print 'ipython.version =', IPython.version_info

import numpy as np
print 'numpy.version = ', np.__version__
import pandas as pd
print 'pandas.version = ', pd.__version__
from pandas import Series, DataFrame


python.version =  2.7.10
ipython.version = (5, 1, 0, '')
numpy.version =  1.11.2
pandas.version =  0.19.1


In [3]:
# Series

s = Series(['First', 'Second', 'Third', 'Fourth'], index=['a','b','c','d'])
s.values
s.index
s['d'] = 'Not Fifth'
s[['c', 'a', 'b', 'd']]

print '---'

s = Series([1,2,3,4])
s
s[s%2 == 0]
s**2

0 in s
1 in s
s.values
s.isnull()
s.notnull()

s.name = 'SeriesName'
s.index.name = 'IndexName'
s

s.index = ['aa', 'bb', 'cc', 'dd']
s


array(['First', 'Second', 'Third', 'Fourth'], dtype=object)

Index([u'a', u'b', u'c', u'd'], dtype='object')

c        Third
a        First
b       Second
d    Not Fifth
dtype: object

---


0    1
1    2
2    3
3    4
dtype: int64

1    2
3    4
dtype: int64

0     1
1     4
2     9
3    16
dtype: int64

True

True

array([1, 2, 3, 4])

0    False
1    False
2    False
3    False
dtype: bool

0    True
1    True
2    True
3    True
dtype: bool

IndexName
0    1
1    2
2    3
3    4
Name: SeriesName, dtype: int64

aa    1
bb    2
cc    3
dd    4
Name: SeriesName, dtype: int64

In [4]:
# DataFrame

mydict = {
    'key1' : 'val1', 
    'key2' : 'val2',
    'key3' : 'val3'
}

DataFrame(mydict, index=['a'])

mydict = {
    'key1' : ['val1'], 
    'key2' : ['val2'],
    'key3' : ['val3']
}

DataFrame(mydict)
df = DataFrame(mydict, columns=['key4', 'key2', 'key3', 'key1'])
df['key4']
df.key3 = 5
df

df.key3 = Series([1,2,3], index=[0, 1, 2])
df

df['key5'] = 5
df

del df['key5']
df


df.index.name = 'MyIndexName'
df.columns.name = 'MyColumnName'
df

df.index
df.columns
df.values


Unnamed: 0,key1,key2,key3
a,val1,val2,val3


Unnamed: 0,key1,key2,key3
0,val1,val2,val3


0    NaN
Name: key4, dtype: object

Unnamed: 0,key4,key2,key3,key1
0,,val2,5,val1


Unnamed: 0,key4,key2,key3,key1
0,,val2,1,val1


Unnamed: 0,key4,key2,key3,key1,key5
0,,val2,1,val1,5


Unnamed: 0,key4,key2,key3,key1
0,,val2,1,val1


MyColumnName,key4,key2,key3,key1
MyIndexName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,,val2,1,val1


RangeIndex(start=0, stop=1, step=1, name=u'MyIndexName')

Index([u'key4', u'key2', u'key3', u'key1'], dtype='object', name=u'MyColumnName')

array([[nan, 'val2', 1, 'val1']], dtype=object)

In [5]:
# Index

s = Series(np.arange(3), index=['a', 'b', 'c'])
i = s.index
i
type(i)
i[1:]
# i[0] = 'd' - does not work since indexes are immutable
'a' in i
'd' in i

s.drop('a')
s

df = DataFrame(np.arange(12).reshape(4,3), index=['a', 'b', 'c', 'd'], columns=['Col1', 'Col2', 'Col3'])
df
df.drop('a')
df.drop(['a','b'])
df.drop('Col1', axis=1)
df.drop(['Col1', 'Col2', 'Col3'], axis=1)

print '---'

df
df[['Col2', 'Col1']]
df[:2]
df<5
df[df<5]

print '---'

d2 = df.ix['a', ['Col2', 'Col3']]
d2
d2.name
d2.dtype
d2.index

df.ix[['a','b'], ['Col1','Col2']]
df.ix[:'b', :'Col2']
df.ix[2]

df.set_value('a', 'Col1', 99)
df.get_value('a', 'Col1')

type(df['Col1'])
type(df.ix[0])


Index([u'a', u'b', u'c'], dtype='object')

pandas.indexes.base.Index

Index([u'b', u'c'], dtype='object')

True

False

b    1
c    2
dtype: int64

a    0
b    1
c    2
dtype: int64

Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col1,Col2,Col3
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col1,Col2,Col3
c,6,7,8
d,9,10,11


Unnamed: 0,Col2,Col3
a,1,2
b,4,5
c,7,8
d,10,11


a
b
c
d


---


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5
c,6,7,8
d,9,10,11


Unnamed: 0,Col2,Col1
a,1,0
b,4,3
c,7,6
d,10,9


Unnamed: 0,Col1,Col2,Col3
a,0,1,2
b,3,4,5


Unnamed: 0,Col1,Col2,Col3
a,True,True,True
b,True,True,False
c,False,False,False
d,False,False,False


Unnamed: 0,Col1,Col2,Col3
a,0.0,1.0,2.0
b,3.0,4.0,
c,,,
d,,,


---


Col2    1
Col3    2
Name: a, dtype: int64

'a'

dtype('int64')

Index([u'Col2', u'Col3'], dtype='object')

Unnamed: 0,Col1,Col2
a,0,1
b,3,4


Unnamed: 0,Col1,Col2
a,0,1
b,3,4


Col1    6
Col2    7
Col3    8
Name: c, dtype: int64

Unnamed: 0,Col1,Col2,Col3
a,99,1,2
b,3,4,5
c,6,7,8
d,9,10,11


99

pandas.core.series.Series

pandas.core.series.Series

In [6]:
# Hierarchical indexing - Series

s = Series(np.arange(9), index=[['a','a','a','b','b','b','c','c','c'],['x','y','z','x','y','z','x','y','z']])
s
s['a']
s['b':'c']
s[['a','c']]
s.ix[['a','c']]

s[:,'x']
s[:,'y']

print '---'

s.unstack()
s.unstack().stack()


a  x    0
   y    1
   z    2
b  x    3
   y    4
   z    5
c  x    6
   y    7
   z    8
dtype: int64

x    0
y    1
z    2
dtype: int64

b  x    3
   y    4
   z    5
c  x    6
   y    7
   z    8
dtype: int64

a  x    0
   y    1
   z    2
c  x    6
   y    7
   z    8
dtype: int64

a  x    0
   y    1
   z    2
c  x    6
   y    7
   z    8
dtype: int64

a    0
b    3
c    6
dtype: int64

a    1
b    4
c    7
dtype: int64

---


Unnamed: 0,x,y,z
a,0,1,2
b,3,4,5
c,6,7,8


a  x    0
   y    1
   z    2
b  x    3
   y    4
   z    5
c  x    6
   y    7
   z    8
dtype: int64

In [7]:
# Hierarchical Indexing - DataFrame

df = DataFrame(np.arange(36).reshape(9,4), 
               index=[['a','a','a','b','b','b','c','c','c'],['x','y','z','x','y','z','x','y','z']],
               columns=[['Iris','Iris','Campanula','Campanula'],['Petal','Sepal','Petal','Sepal']]
              )
df.index.names = ['Item','SubItem']
df.columns.names = ['Flower', 'Metric']
df.name = 'Flower Metrics'
df

df['Iris']
df['Iris']['Petal']
df['Iris']['Petal'].unstack()

df.unstack()


df.swaplevel('Item', 'SubItem')
df.swaplevel('Item', 'SubItem').sortlevel(0) # same as...
df.swaplevel('Item', 'SubItem').sort_index()

df.swaplevel(0,1, axis=1).sortlevel(0, axis=1)


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
Item,SubItem,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,x,0,1,2,3
a,y,4,5,6,7
a,z,8,9,10,11
b,x,12,13,14,15
b,y,16,17,18,19
b,z,20,21,22,23
c,x,24,25,26,27
c,y,28,29,30,31
c,z,32,33,34,35


Unnamed: 0_level_0,Metric,Petal,Sepal
Item,SubItem,Unnamed: 2_level_1,Unnamed: 3_level_1
a,x,0,1
a,y,4,5
a,z,8,9
b,x,12,13
b,y,16,17
b,z,20,21
c,x,24,25
c,y,28,29
c,z,32,33


Item  SubItem
a     x           0
      y           4
      z           8
b     x          12
      y          16
      z          20
c     x          24
      y          28
      z          32
Name: Petal, dtype: int64

SubItem,x,y,z
Item,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,4,8
b,12,16,20
c,24,28,32


Flower,Iris,Iris,Iris,Iris,Iris,Iris,Campanula,Campanula,Campanula,Campanula,Campanula,Campanula
Metric,Petal,Petal,Petal,Sepal,Sepal,Sepal,Petal,Petal,Petal,Sepal,Sepal,Sepal
SubItem,x,y,z,x,y,z,x,y,z,x,y,z
Item,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3
a,0,4,8,1,5,9,2,6,10,3,7,11
b,12,16,20,13,17,21,14,18,22,15,19,23
c,24,28,32,25,29,33,26,30,34,27,31,35


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
SubItem,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
x,a,0,1,2,3
y,a,4,5,6,7
z,a,8,9,10,11
x,b,12,13,14,15
y,b,16,17,18,19
z,b,20,21,22,23
x,c,24,25,26,27
y,c,28,29,30,31
z,c,32,33,34,35


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
SubItem,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
x,a,0,1,2,3
x,b,12,13,14,15
x,c,24,25,26,27
y,a,4,5,6,7
y,b,16,17,18,19
y,c,28,29,30,31
z,a,8,9,10,11
z,b,20,21,22,23
z,c,32,33,34,35


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
SubItem,Item,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
x,a,0,1,2,3
x,b,12,13,14,15
x,c,24,25,26,27
y,a,4,5,6,7
y,b,16,17,18,19
y,c,28,29,30,31
z,a,8,9,10,11
z,b,20,21,22,23
z,c,32,33,34,35


Unnamed: 0_level_0,Metric,Petal,Petal,Sepal,Sepal
Unnamed: 0_level_1,Flower,Campanula,Iris,Campanula,Iris
Item,SubItem,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,x,2,0,3,1
a,y,6,4,7,5
a,z,10,8,11,9
b,x,14,12,15,13
b,y,18,16,19,17
b,z,22,20,23,21
c,x,26,24,27,25
c,y,30,28,31,29
c,z,34,32,35,33


In [8]:
# Summary statistics by level

df
df.sum(level='Item')
df.sum(level='SubItem')
df.sum(level='Flower', axis=1)
df.sum(level='Metric', axis=1)

df.reset_index()


Unnamed: 0_level_0,Flower,Iris,Iris,Campanula,Campanula
Unnamed: 0_level_1,Metric,Petal,Sepal,Petal,Sepal
Item,SubItem,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,x,0,1,2,3
a,y,4,5,6,7
a,z,8,9,10,11
b,x,12,13,14,15
b,y,16,17,18,19
b,z,20,21,22,23
c,x,24,25,26,27
c,y,28,29,30,31
c,z,32,33,34,35


Flower,Iris,Iris,Campanula,Campanula
Metric,Petal,Sepal,Petal,Sepal
Item,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,12,15,18,21
b,48,51,54,57
c,84,87,90,93


Flower,Iris,Iris,Campanula,Campanula
Metric,Petal,Sepal,Petal,Sepal
SubItem,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
x,36,39,42,45
y,48,51,54,57
z,60,63,66,69


Unnamed: 0_level_0,Flower,Campanula,Iris
Item,SubItem,Unnamed: 2_level_1,Unnamed: 3_level_1
a,x,5,1
a,y,13,9
a,z,21,17
b,x,29,25
b,y,37,33
b,z,45,41
c,x,53,49
c,y,61,57
c,z,69,65


Unnamed: 0_level_0,Metric,Petal,Sepal
Item,SubItem,Unnamed: 2_level_1,Unnamed: 3_level_1
a,x,2,4
a,y,10,12
a,z,18,20
b,x,26,28
b,y,34,36
b,z,42,44
c,x,50,52
c,y,58,60
c,z,66,68


Flower,Item,SubItem,Iris,Iris,Campanula,Campanula
Metric,Unnamed: 1_level_1,Unnamed: 2_level_1,Petal,Sepal,Petal,Sepal
0,a,x,0,1,2,3
1,a,y,4,5,6,7
2,a,z,8,9,10,11
3,b,x,12,13,14,15
4,b,y,16,17,18,19
5,b,z,20,21,22,23
6,c,x,24,25,26,27
7,c,y,28,29,30,31
8,c,z,32,33,34,35


In [9]:
# Missing data - Series

s = Series([0, 1, np.nan, np.nan, np.nan, np.nan, 3, np.nan, np.nan, 4])
s

s.isnull()
s.ffill()
s.ffill(limit=2)
s.bfill(limit=1)

s.dropna()

print '---'

# Missing data - DataFrame

df = DataFrame(np.arange(12).reshape(4,3))
df.ix[0,1] = df.ix[1,2] = np.nan
df[3] = np.nan
df

df.dropna()
df.dropna(how='all')
df.dropna(how='all', axis=1)
df.dropna(thresh=3)

print '---'

df.fillna(-1)
df.mean()
df.fillna(df.mean())
df.fillna({1:-1,2:-2,3:-3}, inplace=True)
df


0    0.0
1    1.0
2    NaN
3    NaN
4    NaN
5    NaN
6    3.0
7    NaN
8    NaN
9    4.0
dtype: float64

0    False
1    False
2     True
3     True
4     True
5     True
6    False
7     True
8     True
9    False
dtype: bool

0    0.0
1    1.0
2    1.0
3    1.0
4    1.0
5    1.0
6    3.0
7    3.0
8    3.0
9    4.0
dtype: float64

0    0.0
1    1.0
2    1.0
3    1.0
4    NaN
5    NaN
6    3.0
7    3.0
8    3.0
9    4.0
dtype: float64

0    0.0
1    1.0
2    NaN
3    NaN
4    NaN
5    3.0
6    3.0
7    NaN
8    4.0
9    4.0
dtype: float64

0    0.0
1    1.0
6    3.0
9    4.0
dtype: float64

---


Unnamed: 0,0,1,2,3
0,0,,2.0,
1,3,4.0,,
2,6,7.0,8.0,
3,9,10.0,11.0,


Unnamed: 0,0,1,2,3


Unnamed: 0,0,1,2,3
0,0,,2.0,
1,3,4.0,,
2,6,7.0,8.0,
3,9,10.0,11.0,


Unnamed: 0,0,1,2
0,0,,2.0
1,3,4.0,
2,6,7.0,8.0
3,9,10.0,11.0


Unnamed: 0,0,1,2,3
2,6,7.0,8.0,
3,9,10.0,11.0,


---


Unnamed: 0,0,1,2,3
0,0,-1.0,2.0,-1.0
1,3,4.0,-1.0,-1.0
2,6,7.0,8.0,-1.0
3,9,10.0,11.0,-1.0


0    4.5
1    7.0
2    7.0
3    NaN
dtype: float64

Unnamed: 0,0,1,2,3
0,0,7.0,2.0,
1,3,4.0,7.0,
2,6,7.0,8.0,
3,9,10.0,11.0,


Unnamed: 0,0,1,2,3
0,0,-1.0,2.0,-3.0
1,3,4.0,-2.0,-3.0
2,6,7.0,8.0,-3.0
3,9,10.0,11.0,-3.0


Unnamed: 0,0,1,2,3
0,0,-1.0,2.0,-3.0
1,3,4.0,-2.0,-3.0
2,6,7.0,8.0,-3.0
3,9,10.0,11.0,-3.0


In [10]:
# Summary & descriptive statistics

s = Series(np.arange(4), index=['Col1','Col2','Col3','Col4'])
s
s.mean()
s.idxmax()
s.cumsum()

s.describe()

print '---'

df = DataFrame(np.arange(12).reshape(3,4), index=['Row1', 'Row2', 'Row3'], columns=['Col1','Col2','Col3','Col4'])
df

df.sum()
df.mean(axis=1)
df.idxmax()
df.idxmin()
df.cumsum()
df.cumsum(axis=1)
df.describe()


df.ix['Row1']
s
df.ix['Row1'].corr(s)

df.corr()
df.cov()
df.corrwith(s, axis=1)


Col1    0
Col2    1
Col3    2
Col4    3
dtype: int64

1.5

'Col4'

Col1    0
Col2    1
Col3    3
Col4    6
dtype: int64

count    4.000000
mean     1.500000
std      1.290994
min      0.000000
25%      0.750000
50%      1.500000
75%      2.250000
max      3.000000
dtype: float64

---


Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,5,6,7
Row3,8,9,10,11


Col1    12
Col2    15
Col3    18
Col4    21
dtype: int64

Row1    1.5
Row2    5.5
Row3    9.5
dtype: float64

Col1    Row3
Col2    Row3
Col3    Row3
Col4    Row3
dtype: object

Col1    Row1
Col2    Row1
Col3    Row1
Col4    Row1
dtype: object

Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,3
Row2,4,6,8,10
Row3,12,15,18,21


Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,3,6
Row2,4,9,15,22
Row3,8,17,27,38


Unnamed: 0,Col1,Col2,Col3,Col4
count,3.0,3.0,3.0,3.0
mean,4.0,5.0,6.0,7.0
std,4.0,4.0,4.0,4.0
min,0.0,1.0,2.0,3.0
25%,2.0,3.0,4.0,5.0
50%,4.0,5.0,6.0,7.0
75%,6.0,7.0,8.0,9.0
max,8.0,9.0,10.0,11.0


Col1    0
Col2    1
Col3    2
Col4    3
Name: Row1, dtype: int64

Col1    0
Col2    1
Col3    2
Col4    3
dtype: int64

1.0

Unnamed: 0,Col1,Col2,Col3,Col4
Col1,1.0,1.0,1.0,1.0
Col2,1.0,1.0,1.0,1.0
Col3,1.0,1.0,1.0,1.0
Col4,1.0,1.0,1.0,1.0


Unnamed: 0,Col1,Col2,Col3,Col4
Col1,16.0,16.0,16.0,16.0
Col2,16.0,16.0,16.0,16.0
Col3,16.0,16.0,16.0,16.0
Col4,16.0,16.0,16.0,16.0


Row1    1.0
Row2    1.0
Row3    1.0
dtype: float64

In [11]:
# Reindexing

s = Series(np.arange(5), index=['b','a','d','e','c'])
s

s.reindex(['a','b','c','x'])
s.reindex(['a','b','c','x','y','z'], fill_value=99)
# s.reindex(['a','b','c','x','y','z'], method='ffill') - will not work due to string index
s = Series(['a','b','c'], index=range(3))
s.reindex(range(10))
s.reindex(range(10), method='ffill')
s.reindex(range(10), method='bfill')

# note that NA values are also dropped
s.drop(2)
s.drop([0,1,2])

print '---'

df = DataFrame(np.arange(9).reshape(3,3), index=['Row1', 'Row2', 'Row3'], columns=['Col1', 'Col2', 'Col3'])
df
df.reindex(['Row1', 'Row2', 'Row3', 'Row4'], columns=['Col1', 'Col2', 'Col3', 'Col4'], method='ffill')

df.drop('Row1')
df.drop('Col1', axis=1)

print '---'

df
df['Col1']
df.ix[['Row1','Row2'], ['Col1','Col2']]
df.ix[:'Row2', :'Col2']


b    0
a    1
d    2
e    3
c    4
dtype: int64

a    1.0
b    0.0
c    4.0
x    NaN
dtype: float64

a     1
b     0
c     4
x    99
y    99
z    99
dtype: int64

0      a
1      b
2      c
3    NaN
4    NaN
5    NaN
6    NaN
7    NaN
8    NaN
9    NaN
dtype: object

0    a
1    b
2    c
3    c
4    c
5    c
6    c
7    c
8    c
9    c
dtype: object

0      a
1      b
2      c
3    NaN
4    NaN
5    NaN
6    NaN
7    NaN
8    NaN
9    NaN
dtype: object

0    a
1    b
dtype: object

Series([], dtype: object)

---


Unnamed: 0,Col1,Col2,Col3
Row1,0,1,2
Row2,3,4,5
Row3,6,7,8


Unnamed: 0,Col1,Col2,Col3,Col4
Row1,0,1,2,
Row2,3,4,5,
Row3,6,7,8,
Row4,6,7,8,


Unnamed: 0,Col1,Col2,Col3
Row2,3,4,5
Row3,6,7,8


Unnamed: 0,Col2,Col3
Row1,1,2
Row2,4,5
Row3,7,8


---


Unnamed: 0,Col1,Col2,Col3
Row1,0,1,2
Row2,3,4,5
Row3,6,7,8


Row1    0
Row2    3
Row3    6
Name: Col1, dtype: int64

Unnamed: 0,Col1,Col2
Row1,0,1
Row2,3,4


Unnamed: 0,Col1,Col2
Row1,0,1
Row2,3,4


In [12]:
# Data alignment - Series

s1 = Series(range(3), index=['Val1','Val2','Val3'])
s1
s2 = Series([1,11,12], index=['Val1','Val11','Val12'])
s2
s1 + s2

s1.add(s2, fill_value=0)


print '---'

# Data Alignment - DataFrame

df = DataFrame(np.arange(9).reshape(3,3), index=['Row1','Row2','Row3'], columns=['Col1','Col2','Col3'])
df

df2 = DataFrame([1,2,3], index=['Row1', 'Row2', 'Row4'], columns=['Col1'])
df2

df + df2
df.add(df2,fill_value=0)

print '---'

df.ix['Row1']
df - df.ix['Row1']

df2.ix['Row4']
df - df2.ix['Row4']

df.sub(df['Col1'], axis=0)


Val1    0
Val2    1
Val3    2
dtype: int64

Val1      1
Val11    11
Val12    12
dtype: int64

Val1     1.0
Val11    NaN
Val12    NaN
Val2     NaN
Val3     NaN
dtype: float64

Val1      1.0
Val11    11.0
Val12    12.0
Val2      1.0
Val3      2.0
dtype: float64

---


Unnamed: 0,Col1,Col2,Col3
Row1,0,1,2
Row2,3,4,5
Row3,6,7,8


Unnamed: 0,Col1
Row1,1
Row2,2
Row4,3


Unnamed: 0,Col1,Col2,Col3
Row1,1.0,,
Row2,5.0,,
Row3,,,
Row4,,,


Unnamed: 0,Col1,Col2,Col3
Row1,1.0,1.0,2.0
Row2,5.0,4.0,5.0
Row3,6.0,7.0,8.0
Row4,3.0,,


---


Col1    0
Col2    1
Col3    2
Name: Row1, dtype: int64

Unnamed: 0,Col1,Col2,Col3
Row1,0,0,0
Row2,3,3,3
Row3,6,6,6


Col1    3
Name: Row4, dtype: int64

Unnamed: 0,Col1,Col2,Col3
Row1,-3.0,,
Row2,0.0,,
Row3,3.0,,


Unnamed: 0,Col1,Col2,Col3
Row1,0,1,2
Row2,0,1,2
Row3,0,1,2


In [13]:
# Sorting

s.sort_index()
s.sort_index(ascending=False)
s.sort_values()

s = Series([1,1,2,3,4,4,5])
s
s.rank()
s.rank(method='first')
s.rank(method='max')
s.rank(method='min')

print '---'

df
df.sort_index(ascending=False)
df.sort_index(axis=1, ascending=False)
df.sort_values(by='Col1', ascending=False)

df.rank()
df.rank(axis=1)


df.apply(sum)
df.apply(sum, axis=1)

df.apply(lambda x: [x.min(), x.max()])
df.apply(lambda x: [x.min(), x.max()], axis=1)


0    a
1    b
2    c
dtype: object

2    c
1    b
0    a
dtype: object

0    a
1    b
2    c
dtype: object

0    1
1    1
2    2
3    3
4    4
5    4
6    5
dtype: int64

0    1.5
1    1.5
2    3.0
3    4.0
4    5.5
5    5.5
6    7.0
dtype: float64

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
dtype: float64

0    2.0
1    2.0
2    3.0
3    4.0
4    6.0
5    6.0
6    7.0
dtype: float64

0    1.0
1    1.0
2    3.0
3    4.0
4    5.0
5    5.0
6    7.0
dtype: float64

---


Unnamed: 0,Col1,Col2,Col3
Row1,0,1,2
Row2,3,4,5
Row3,6,7,8


Unnamed: 0,Col1,Col2,Col3
Row3,6,7,8
Row2,3,4,5
Row1,0,1,2


Unnamed: 0,Col3,Col2,Col1
Row1,2,1,0
Row2,5,4,3
Row3,8,7,6


Unnamed: 0,Col1,Col2,Col3
Row3,6,7,8
Row2,3,4,5
Row1,0,1,2


Unnamed: 0,Col1,Col2,Col3
Row1,1.0,1.0,1.0
Row2,2.0,2.0,2.0
Row3,3.0,3.0,3.0


Unnamed: 0,Col1,Col2,Col3
Row1,1.0,2.0,3.0
Row2,1.0,2.0,3.0
Row3,1.0,2.0,3.0


Col1     9
Col2    12
Col3    15
dtype: int64

Row1     3
Row2    12
Row3    21
dtype: int64

Col1    [0, 6]
Col2    [1, 7]
Col3    [2, 8]
dtype: object

Row1    [0, 2]
Row2    [3, 5]
Row3    [6, 8]
dtype: object