# More Functionally for DataFrame, index

In [9]:
import pandas as pd
from pandas import Series, DataFrame

import numpy as np

In [4]:
obj = Series(range(3), index=['a', 'b', 'c'])

In [5]:
index = obj.index

In [6]:
index[1:]

Index([u'b', u'c'], dtype='object')

In [7]:
index[1]

'b'

In [10]:
index = pd.Index(np.arange(3))

In [11]:
obj2 = Series([1.5, -2.5, 0], index=index)

In [14]:
obj2.index is index

True

# reindex the index

In [15]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])

In [16]:
obj2

a     0
b     1
c     2
d   NaN
e   NaN
dtype: float64

In [17]:
obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=10)

a     0
b     1
c     2
d    10
e    10
dtype: int64

In [18]:
obj3 = Series(['blue', 'purple', 'yellow'], index=[0,2,4])

In [20]:
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [21]:
obj = Series(np.arange(5.), index=['a','b', 'c', 'd', 'e'])

In [22]:
new_obj = obj.drop('c')

In [23]:
new_obj

a    0
b    1
d    3
e    4
dtype: float64

In [24]:
obj.drop(['d', 'c'])

a    0
b    1
e    4
dtype: float64

In [28]:
data = DataFrame(np.arange(16).reshape((4,4)), index=['Ohio', 'Texas', "New York", 'Utah'],
                 columns=['one', 'two', 'three', 'four'])

In [29]:
data.drop(['Utah'])

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Texas,4,5,6,7
New York,8,9,10,11


In [31]:
data.drop(['two'], axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Texas,4,6,7
New York,8,10,11
Utah,12,14,15


In [33]:
data.drop(['two', 'four'], axis=1)

Unnamed: 0,one,three
Ohio,0,2
Texas,4,6
New York,8,10
Utah,12,14


# Indexing, Selection and filtering

In [35]:
data['two']

Ohio         1
Texas        5
New York     9
Utah        13
Name: two, dtype: int64

In [36]:
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Texas,4,5,6,7
New York,8,9,10,11
Utah,12,13,14,15


In [39]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Texas,4,5,6,7


In [40]:
data[1:]

Unnamed: 0,one,two,three,four
Texas,4,5,6,7
New York,8,9,10,11
Utah,12,13,14,15


In [42]:
data<5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Texas,True,False,False,False
New York,False,False,False,False
Utah,False,False,False,False


In [43]:
data.ix['Utah', ['two', 'three']]

two      13
three    14
Name: Utah, dtype: int64

In [46]:
data.ix[['Texas', 'Utah'], [3, 0, 1]]

Unnamed: 0,four,one,two
Texas,7,4,5
Utah,15,12,13


In [47]:
data.ix[2]

one       8
two       9
three    10
four     11
Name: New York, dtype: int64

In [48]:
data.ix[:'Utah','two']

Ohio         1
Texas        5
New York     9
Utah        13
Name: two, dtype: int64

#Arithmetic and data alignment

In [50]:
s1=Series([7.3, -2.5, 3.4, 1.5], index = ['a', 'c', 'd', 'e'])
s2 = Series([-2.1, 3.6, -1.5, 3.1, 4], index = ['a','c', 'e', 'f', 'g'])

In [52]:
s1 + s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [55]:
df1 = DataFrame(np.arange(9.).reshape((3,3)), columns=list('bcd'), index=['Ohio', 'Texas', 'Colorado'])

In [56]:
df1

Unnamed: 0,b,c,d
Ohio,0,1,2
Texas,3,4,5
Colorado,6,7,8


In [57]:
df2 = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregan'])

In [58]:
df2

Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregan,9,10,11


In [59]:
df1 + df2

Unnamed: 0,b,c,d,e
Colorado,,,,
Ohio,3.0,,6.0,
Oregan,,,,
Texas,9.0,,12.0,
Utah,,,,


In [60]:
df3 = DataFrame(np.arange(12.).reshape(3, 4), columns=list('abcd'))
df4 = DataFrame(np.arange(20.).reshape(4, 5), columns=list('abcde'))

In [61]:
df3 + df4

Unnamed: 0,a,b,c,d,e
0,0.0,2.0,4.0,6.0,
1,9.0,11.0,13.0,15.0,
2,18.0,20.0,22.0,24.0,
3,,,,,


In [63]:
df3.add(df4, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0,2,4,6,4
1,9,11,13,15,9
2,18,20,22,24,14
3,15,16,17,18,19


In [65]:
df3.reindex(columns=df4.columns, fill_value=0)

Unnamed: 0,a,b,c,d,e
0,0,1,2,3,0
1,4,5,6,7,0
2,8,9,10,11,0


In [66]:
arr = np.arange(12.).reshape((3, 4))
arr

array([[  0.,   1.,   2.,   3.],
       [  4.,   5.,   6.,   7.],
       [  8.,   9.,  10.,  11.]])

In [67]:
arr - arr[0]

array([[ 0.,  0.,  0.,  0.],
       [ 4.,  4.,  4.,  4.],
       [ 8.,  8.,  8.,  8.]])

In [70]:
frame = DataFrame(np.arange(12.).reshape((4, 3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])

series=frame.ix[0]

In [71]:
frame

Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregon,9,10,11


In [72]:
series

b    0
d    1
e    2
Name: Utah, dtype: float64

In [73]:
frame-series

Unnamed: 0,b,d,e
Utah,0,0,0
Ohio,3,3,3
Texas,6,6,6
Oregon,9,9,9


In [74]:
series2 = Series(range(3), index=['b', 'e', 'f'])

frame + series2

Unnamed: 0,b,d,e,f
Utah,0,,3,
Ohio,3,,6,
Texas,6,,9,
Oregon,9,,12,


In [75]:
series3 = frame['d']

In [76]:
series3

Utah       1
Ohio       4
Texas      7
Oregon    10
Name: d, dtype: float64

In [77]:
frame

Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregon,9,10,11


In [78]:
frame.sub(series3, axis=0)

Unnamed: 0,b,d,e
Utah,-1,0,1
Ohio,-1,0,1
Texas,-1,0,1
Oregon,-1,0,1


In [79]:
frame

Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregon,9,10,11


In [80]:
abs(frame)

Unnamed: 0,b,d,e
Utah,0,1,2
Ohio,3,4,5
Texas,6,7,8
Oregon,9,10,11
