# Chapter 5: Getting started with Pandas

In [1]:
import pandas as pd
import numpy as np

In [2]:
obj=pd.Series([11,12,13])

In [3]:
obj.index.values

array([0, 1, 2], dtype=int64)

In [4]:
obj=pd.Series([11,12,13,14,15],index=['a','b','c','d','e'])

In [5]:
obj

a    11
b    12
c    13
d    14
e    15
dtype: int64

In [6]:
obj[['a','b']]

a    11
b    12
dtype: int64

In [7]:
# Convert Disctionary to Series
sdata = {'Niraj':'IIT','Manish':'MNR','Ponds':'IIM'}

In [8]:
obj1=pd.Series(sdata)

In [9]:
obj1.name='Education'

In [10]:
obj1.index.name='Name'

In [11]:
obj1

Name
Manish    MNR
Niraj     IIT
Ponds     IIM
Name: Education, dtype: object

In [12]:
data={'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],'year':[2000,2001,2002,2001,2002],'pop':[1.5,1.7,3.6,2.4,2.9]}

In [13]:
frame=pd.DataFrame(data)

In [14]:
frame=pd.DataFrame(data,columns=['year','state','pop'])

In [15]:
frame

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [16]:
# Nesting dictionary to Dataframe

data={'Nevada':{'2000':1.5,'2001':1.7,'2009':4.5},'newYork':{'2000':1.9,'2001':1.95,'2009':11.5}}

In [17]:
frame=pd.DataFrame(data)

In [18]:
frame

Unnamed: 0,Nevada,newYork
2000,1.5,1.9
2001,1.7,1.95
2009,4.5,11.5


In [19]:
frame.T

Unnamed: 0,2000,2001,2009
Nevada,1.5,1.7,4.5
newYork,1.9,1.95,11.5


In [20]:
frame.columns.name='state'

In [21]:
frame.index.name='year'

In [22]:
frame

state,Nevada,newYork
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,1.5,1.9
2001,1.7,1.95
2009,4.5,11.5


In [23]:
frame.values

array([[  1.5 ,   1.9 ],
       [  1.7 ,   1.95],
       [  4.5 ,  11.5 ]])

In [24]:
obj3=pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
frame=pd.DataFrame(obj3)
frame.reindex(['a','b','Cc','Dd','Ee'],fill_value=0)

Unnamed: 0,0
a,1
b,2
Cc,0
Dd,0
Ee,0


In [25]:
frame1=pd.DataFrame(np.arange(9).reshape(3,3),index=['a','b','c'],columns=['A','B','C'])
frame1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [26]:
frame1.reindex(['a','b','c','d'],fill_value=0)

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8
d,0,0,0


In [27]:
frame1.columns.name='category'
frame1.index.name='Alpha'

In [28]:
frame1

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,1,2
b,3,4,5
c,6,7,8


In [29]:
frame1.reindex(['a','b','c','d','e'])

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,,,
e,,,


In [30]:
frame1

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,1,2
b,3,4,5
c,6,7,8


In [31]:
frame1.loc[['a','b'],['B','C']]

category,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1
a,1,2
b,4,5


In [34]:
frame1

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,1,2
b,3,4,5
c,6,7,8


In [47]:
series= frame1.iloc[0]
series

category
A    0
B    1
C    2
Name: a, dtype: int32

In [43]:
frame1

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,1,2
b,3,4,5
c,6,7,8


In [48]:
frame1-series

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,0,0
b,3,3,3
c,6,6,6


In [51]:
series1=frame1['A']
series1

Alpha
a    0
b    3
c    6
Name: A, dtype: int32

In [57]:
f=lambda x:max(x)-min(x)

In [58]:
frame1.apply(f)

category
A    6
B    6
C    6
dtype: int64

In [59]:
frame1

category,A,B,C
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
a,0,1,2
b,3,4,5
c,6,7,8


In [60]:
frame1.apply(f,axis=1)

Alpha
a    2
b    2
c    2
dtype: int64

In [61]:
def f(x):
    return pd.Series([x.min(),x.max()],index=['min','max'])

In [69]:
frame1.apply(f,axis=0)

category,A,B,C
min,0,1,2
max,6,7,8


In [72]:
frame2=pd.DataFrame(np.random.randn(4,6),index=['A','B','C','D'])

In [89]:
format=lambda x:'%0.3f'%x

In [95]:
x= 10.09090
'%1.3f' %x

'10.091'

In [97]:
frame2.applymap(format)

Unnamed: 0,0,1,2,3,4,5
A,-0.558,1.282,1.118,0.819,0.27,-0.439
B,0.251,1.26,0.568,0.561,-0.97,-0.323
C,0.082,-1.289,0.868,-0.889,-0.327,-0.132
D,-0.032,2.179,0.544,-0.677,-1.148,1.046


In [108]:
frame2.loc['A'].map(format)

0    -0.558
1     1.282
2     1.118
3     0.819
4     0.270
5    -0.439
Name: A, dtype: object

In [117]:
frame2.sort_index(axis=0)

Unnamed: 0,0,1,2,3,4,5
A,-0.557948,1.282021,1.117955,0.819468,0.270188,-0.439387
B,0.251475,1.259754,0.567996,0.560596,-0.970353,-0.322727
C,0.081509,-1.289309,0.867637,-0.888864,-0.326554,-0.132066
D,-0.032059,2.178919,0.544046,-0.676592,-1.14847,1.045764


In [119]:
frame3=pd.DataFrame(np.arange(10).reshape(2,5),index=['three','one'],columns=['a','b','c','d','e'])
frame3

Unnamed: 0,a,b,c,d,e
three,0,1,2,3,4
one,5,6,7,8,9


In [124]:
frame3.sort_index(axis=1)

Unnamed: 0,a,b,c,d,e
three,0,1,2,3,4
one,5,6,7,8,9


In [127]:
frame3.sort_values(by='e',ascending=False)

Unnamed: 0,a,b,c,d,e
one,5,6,7,8,9
three,0,1,2,3,4


In [129]:
frame3['a'].order

<bound method Series.order of three    0
one      5
Name: a, dtype: int32>

In [135]:
frame3.loc['one'].rank()

a    1.0
b    2.0
c    3.0
d    4.0
e    5.0
Name: one, dtype: float64

In [139]:
frame4=pd.DataFrame({'b':[4.3,7,-4,2],'a':[0,1,0,1],'c':[-2,5,8,-3]})

In [140]:
frame4

Unnamed: 0,a,b,c
0,0,4.3,-2
1,1,7.0,5
2,0,-4.0,8
3,1,2.0,-3


In [144]:
frame4.rank(axis=0)

Unnamed: 0,a,b,c
0,1.5,3.0,2.0
1,3.5,4.0,3.0
2,1.5,1.0,4.0
3,3.5,2.0,1.0


In [147]:
import pandas.io.data as web

In [150]:
data=web.get_data_google('IBM','1/1/2017','2/1/2017')

OSError: after 3 tries, Google did not return a 200 for url 'http://www.google.com/finance/historical?startdate=Jan+01%2C+2017&output=csv&q=IBM&enddate=Feb+01%2C+2017'

In [149]:
data

{'Nevada': {'2000': 1.5, '2001': 1.7, '2009': 4.5},
 'newYork': {'2000': 1.9, '2001': 1.95, '2009': 11.5}}