In [1]:
import numpy as np
import pandas as pd

# creation of series

In [2]:
s=pd.Series(np.random.rand(5),index=['a','b','c','d','e'])

In [3]:
s

a    0.928404
b    0.858463
c    0.610847
d    0.272727
e    0.790020
dtype: float64

In [4]:
# from dict

In [5]:
d={'a':0.,'b':1.,'c':2.}

In [6]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [7]:
pd.Series(d,index=['b','c','d','e'])

b    1.0
c    2.0
d    NaN
e    NaN
dtype: float64

In [8]:
# from scalar value

In [9]:
pd.Series(5.,index=['a','b','c','d','e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

# series is like ndarray

In [10]:
s[0]

0.9284035332332444

In [11]:
s[:3]

a    0.928404
b    0.858463
c    0.610847
dtype: float64

In [12]:
s[s>s.median()]

a    0.928404
b    0.858463
dtype: float64

In [13]:
s[[4,3,1]]

e    0.790020
d    0.272727
b    0.858463
dtype: float64

In [14]:
np.exp(s)

a    2.530466
b    2.359530
c    1.841990
d    1.313542
e    2.203440
dtype: float64

# series is like dict

In [15]:
s['a']

0.9284035332332444

In [16]:
s

a    0.928404
b    0.858463
c    0.610847
d    0.272727
e    0.790020
dtype: float64

# series is dict like

In [17]:
s.get('f')

In [18]:
s.get('f',np.nan)

nan

# Vectorized operations and label alignment with Series

In [19]:
s+s

a    1.856807
b    1.716925
c    1.221693
d    0.545454
e    1.580039
dtype: float64

In [20]:
s*2

a    1.856807
b    1.716925
c    1.221693
d    0.545454
e    1.580039
dtype: float64

In [21]:
np.exp(s)

a    2.530466
b    2.359530
c    1.841990
d    1.313542
e    2.203440
dtype: float64

# DataFrame

DataFrame is a 2-dimensional labeled data structure with columns of 
potentially 
different types.

# From dict of Series or dicts

In [22]:
d={'one':pd.Series([1.,2.,3.],index=['a','b','c']),
  'two':pd.Series([1.,2.,3.,4.],index=['a','b','c','d'])}

In [23]:
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64,
 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [24]:
df=pd.DataFrame(d)

In [25]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


# From dict of Series or dicts

In [26]:
pd.DataFrame(d,index=['d','b','a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [27]:
pd.DataFrame(d,index=['d','b','a'],columns=['two','three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


# From dict of Series or dicts

In [28]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [29]:
df.columns

Index(['one', 'two'], dtype='object')

# From dict of ndarrays/lists

In [30]:
d={'one':[1.,2.,3.,4.],
  'two':[4.,3.,2.,1,]}

In [31]:
pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [32]:
pd.DataFrame(d,index=['a','b','c','d'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


# From a list of dicts

In [33]:
data2=[{'a':1,'b':2},{'a':5,'b':10,'c':20}]

In [34]:
pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [35]:
pd.DataFrame(data2,index=['first','second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [36]:
pd.DataFrame(data2,columns=['a','b'])

Unnamed: 0,a,b
0,1,2
1,5,10


# Column selection, addition, deletion

In [37]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [38]:
df['three']=df['one']*df['two']

In [39]:
df['flag']=df['one']>2

In [40]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


# Columns can be deleted or popped like with a dict:

In [41]:
del df['two']

In [42]:
three=df.pop('three')

In [43]:
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


# insertion

In [44]:
df['foo']='bar'

In [45]:
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [46]:
df['one_trunc']=df['one'][:2]

In [47]:
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


# Assigning New Columns in Method Chains


In [48]:
iris=pd.read_csv('D:\\pandas\\iris1.csv')

In [49]:
iris.head()

Unnamed: 0.1,Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Target
0,0,5.1,3.5,1.4,0.2,Iris-setosa
1,1,4.9,3.0,1.4,0.2,Iris-setosa
2,2,4.7,3.2,1.3,0.2,Iris-setosa
3,3,4.6,3.1,1.5,0.2,Iris-setosa
4,4,5.0,3.6,1.4,0.2,Iris-setosa


In [50]:
(iris.assign(sepal_ratio=iris['SepalWidth']/iris['SepalLength']).head())

Unnamed: 0.1,Unnamed: 0,SepalLength,SepalWidth,PetalLength,PetalWidth,Target,sepal_ratio
0,0,5.1,3.5,1.4,0.2,Iris-setosa,0.686275
1,1,4.9,3.0,1.4,0.2,Iris-setosa,0.612245
2,2,4.7,3.2,1.3,0.2,Iris-setosa,0.680851
3,3,4.6,3.1,1.5,0.2,Iris-setosa,0.673913
4,4,5.0,3.6,1.4,0.2,Iris-setosa,0.72


# Index Object

In [52]:
obj=pd.Series(range(3),index=['a','b','c'])

In [54]:
obj.index

Index(['a', 'b', 'c'], dtype='object')

In [55]:
obj.index[1:]

Index(['b', 'c'], dtype='object')

# Reindexing


In [61]:
obj=pd.Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])

In [62]:
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [63]:
obj2=obj.reindex(['a','b','c','d','e'])

In [64]:
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [65]:
obj2=obj.reindex(['a','b','c','d','e'],fill_value=0)

In [66]:
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    0.0
dtype: float64

In [67]:
obj3=pd.Series(['blue','purple','yellow'],index=[0,2,4])

In [68]:
obj3

0      blue
2    purple
4    yellow
dtype: object

In [69]:
obj3.reindex(range(6),method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [70]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'],columns=['Ohio', 'Texas', 'California'])


In [71]:
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [74]:
frame2=frame.reindex(['a','b','c','d'])

In [75]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [81]:
states=['Texas','Utah','California']

In [87]:
frame.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1,,2
c,4,,5
d,7,,8


In [90]:
frame.reindex(index=['a','b','c','d'],method='ffill')

Unnamed: 0,Ohio,Texas,California
a,0,1,2
b,0,1,2
c,3,4,5
d,6,7,8


In [91]:
obj=pd.Series(np.arange(5.),index=['a','b','c','d','e'])

In [92]:
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [93]:
new_obj=obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [94]:
obj.drop(['d','c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [100]:
data=pd.DataFrame(np.arange(16).reshape((4,4)),index=['ohio','california','utah','new york'],columns=['one','two','three','four'])

In [101]:
data

Unnamed: 0,one,two,three,four
ohio,0,1,2,3
california,4,5,6,7
utah,8,9,10,11
new york,12,13,14,15


In [102]:
data.drop(['california','ohio'])

Unnamed: 0,one,two,three,four
utah,8,9,10,11
new york,12,13,14,15


In [103]:
data.drop('two',axis=1)

Unnamed: 0,one,three,four
ohio,0,2,3
california,4,6,7
utah,8,10,11
new york,12,14,15
