In [19]:
import pandas as pd
from pandas import Series, DataFrame

import numpy as np

# Series

In [4]:
obj = Series([4, 7, -5, 3])

In [38]:
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64

In [6]:
obj.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [37]:
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']

In [8]:
obj.values

array([ 4,  7, -5,  3])

In [10]:
obj2 = Series([3, 5, 6, 7,8], index=['a', 'b', 'c', 'd', 'e'])

In [11]:
obj2.index

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [13]:
obj[1], obj2['c']

(7, 6)

In [14]:
obj2['d']=100

In [15]:
obj2

a      3
b      5
c      6
d    100
e      8
dtype: int64

In [16]:
obj2[obj2>5]

c      6
d    100
e      8
dtype: int64

In [17]:
obj2*2

a      6
b     10
c     12
d    200
e     16
dtype: int64

In [20]:
np.exp(obj2)

a    2.008554e+01
b    1.484132e+02
c    4.034288e+02
d    2.688117e+43
e    2.980958e+03
dtype: float64

In [21]:
'b' in obj2

True

In [22]:
'f' in obj2

False

## Series can be created using python dict

In [23]:
sdata = {'Ohio':35000, 'Texas':71000, 'Oregon':16000, 'Utah':5000}

obj3 = Series(sdata)

In [24]:
obj3

Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [26]:
states = ['California', 'Ohio', 'Oregon', 'Texas']

onj4 = Series(sdata, index=states)

onj4

California      NaN
Ohio          35000
Oregon        16000
Texas         71000
dtype: float64

In [27]:
obj_n = obj3 +onj4

In [28]:
obj_n

California       NaN
Ohio           70000
Oregon         32000
Texas         142000
Utah             NaN
dtype: float64

In [31]:
obj_n.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
Utah           True
dtype: bool

In [34]:
obj_n.notnull()

California    False
Ohio           True
Oregon         True
Texas          True
Utah          False
dtype: bool

In [35]:
obj_n.name = 'population'
obj_n.index.name = 'state'

In [36]:
obj_n

state
California       NaN
Ohio           70000
Oregon         32000
Texas         142000
Utah             NaN
Name: population, dtype: float64

# DataFrame

In [39]:
# can use a dict of equal-length lists or Numpy arrays

In [40]:
data = { 'state' : ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [200, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}

In [41]:
frame = DataFrame(data)

In [42]:
frame

Unnamed: 0,pop,state,year
0,1.5,Ohio,200
1,1.7,Ohio,2001
2,3.6,Ohio,2002
3,2.4,Nevada,2001
4,2.9,Nevada,2002


In [44]:
DataFrame(data, columns= ['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,200,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [45]:
frame2 = DataFrame(data, columns=['year','state', 'pop', 'debt'],
                  index=['a', 'b', 'c', 'd', 'e'])

In [46]:
frame2

Unnamed: 0,year,state,pop,debt
a,200,Ohio,1.5,
b,2001,Ohio,1.7,
c,2002,Ohio,3.6,
d,2001,Nevada,2.4,
e,2002,Nevada,2.9,


In [47]:
frame2.columns

Index([u'year', u'state', u'pop', u'debt'], dtype='object')

In [48]:
frame2.year

a     200
b    2001
c    2002
d    2001
e    2002
Name: year, dtype: int64

In [49]:
frame2['year']

a     200
b    2001
c    2002
d    2001
e    2002
Name: year, dtype: int64

In [51]:
frame2['debt'] = np.arange(5.)

In [52]:
frame2

Unnamed: 0,year,state,pop,debt
a,200,Ohio,1.5,0
b,2001,Ohio,1.7,1
c,2002,Ohio,3.6,2
d,2001,Nevada,2.4,3
e,2002,Nevada,2.9,4


#### indexing a field

In [53]:
frame2.ix['c']

year     2002
state    Ohio
pop       3.6
debt        2
Name: c, dtype: object

In [57]:
val=Series([-1.2, -1.5, -1.7], index=['c', 'a', 'e'])

In [58]:
frame2['debt'] = val

In [59]:
frame2

Unnamed: 0,year,state,pop,debt
a,200,Ohio,1.5,-1.5
b,2001,Ohio,1.7,
c,2002,Ohio,3.6,-1.2
d,2001,Nevada,2.4,
e,2002,Nevada,2.9,-1.7


In [60]:
frame2['eastern'] = frame2.state =='Ohio'

In [61]:
frame2

Unnamed: 0,year,state,pop,debt,eastern
a,200,Ohio,1.5,-1.5,True
b,2001,Ohio,1.7,,True
c,2002,Ohio,3.6,-1.2,True
d,2001,Nevada,2.4,,False
e,2002,Nevada,2.9,-1.7,False


### Adding new column

In [62]:
frame2['new'] = np.arange(5)

In [64]:
frame2

Unnamed: 0,year,state,pop,debt,eastern,new
a,200,Ohio,1.5,-1.5,True,0
b,2001,Ohio,1.7,,True,1
c,2002,Ohio,3.6,-1.2,True,2
d,2001,Nevada,2.4,,False,3
e,2002,Nevada,2.9,-1.7,False,4


### delete a column

In [65]:
del frame2['eastern']

In [66]:
frame2

Unnamed: 0,year,state,pop,debt,new
a,200,Ohio,1.5,-1.5,0
b,2001,Ohio,1.7,,1
c,2002,Ohio,3.6,-1.2,2
d,2001,Nevada,2.4,,3
e,2002,Nevada,2.9,-1.7,4


In [68]:
pop = { 'Nevada' : {2001: 2.4, 2002:2.9}, 'Ohio' : {2000: 1.5, 2001: 1.7, 2002:3.6}}

In [69]:
frame3= DataFrame(pop)

In [70]:
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [71]:
frame3.T   # can transpose the results

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.7,3.6


In [74]:
DataFrame(pop, index=[2002, 2003, 2004])

Unnamed: 0,Nevada,Ohio
2002,2.9,3.6
2003,,
2004,,


In [77]:
pdata = {'Ohio': frame3['Ohio'][:-1], 'Nevada': frame3['Nevada'][:2]}

In [79]:
DataFrame(pdata)

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7


In [80]:
frame3.index.name = 'year'; frame3.columns.name ='state'

In [82]:
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [83]:
frame3.values

array([[ nan,  1.5],
       [ 2.4,  1.7],
       [ 2.9,  3.6]])

In [85]:
frame2.values

array([[200, 'Ohio', 1.5, -1.5, 0],
       [2001, 'Ohio', 1.7, nan, 1],
       [2002, 'Ohio', 3.6, -1.2, 2],
       [2001, 'Nevada', 2.4, nan, 3],
       [2002, 'Nevada', 2.9, -1.7, 4]], dtype=object)