Pandas is a high-performance module that provides a comprehensive set of structure for working with data. 

# 17.1 Data Structures


In [2]:
import numpy as np
from pandas import Series

a = np.array([0.1,1.2,2.3,3.4,4.5])
a

array([ 0.1,  1.2,  2.3,  3.4,  4.5])

In [3]:
a

array([ 0.1,  1.2,  2.3,  3.4,  4.5])

In [4]:
s = Series([0.1,1.2,2.3,3.4,4.5])
s

0    0.1
1    1.2
2    2.3
3    3.4
4    4.5
dtype: float64

In [5]:
s = Series(a,index = ['a','b','c','d','e'])

In [6]:
s

a    0.1
b    1.2
c    2.3
d    3.4
e    4.5
dtype: float64

In [7]:
s['a']

0.10000000000000001

In [8]:
s[0] # numeric selection

0.10000000000000001

In [9]:
s['a':'c'] # Index look up

a    0.1
b    1.2
c    2.3
dtype: float64

In [10]:
s[['a','c']]

a    0.1
c    2.3
dtype: float64

In [11]:
s1 = Series ([1.0,2,3],index = ['a']*3)
s2 = Series ([4.0,5],index = ['a']*2)
s1
s2

a    4.0
a    5.0
dtype: float64

In [12]:
s1

a    1.0
a    2.0
a    3.0
dtype: float64

In [13]:
s1+s2

a    5.0
a    6.0
a    6.0
a    7.0
a    7.0
a    8.0
dtype: float64

In [14]:
#fillna
s1 = Series(np.arange(1.0,4.0),index = ['a','b','c'])
s2 = Series(np.arange(1.0,4.0),index = ['c','d','e'])
s3 = s1+s2

In [15]:
s3.fillna(-1.0)

a   -1.0
b   -1.0
c    4.0
d   -1.0
e   -1.0
dtype: float64

In [16]:
s3.append(Series([4],index=['f']))

a    NaN
b    NaN
c    4.0
d    NaN
e    NaN
f    4.0
dtype: float64

In [17]:
s3.replace(1,4.0)

a    NaN
b    NaN
c    4.0
d    NaN
e    NaN
dtype: float64

In [18]:
s3

a    NaN
b    NaN
c    4.0
d    NaN
e    NaN
dtype: float64

In [19]:
s3.dropna()

c    4.0
dtype: float64

In [20]:
s1 = Series(np.arange(1.0,4.0),index = ['a','b','c'])
s1

a    1.0
b    2.0
c    3.0
dtype: float64

In [22]:
s2 = Series(-1.0*np.arange(1.0,4.0),index = ['c','d','e'])

In [23]:
s2

c   -1.0
d   -2.0
e   -3.0
dtype: float64

In [24]:
s1.update(s2)

In [25]:
s1

a    1.0
b    2.0
c   -1.0
dtype: float64

In [26]:
#17.1.2 DataFrame
from pandas import DataFrame
a = np.array([[1.0,2],[3,4]])
df = DataFrame(a)
df

Unnamed: 0,0,1
0,1.0,2.0
1,3.0,4.0


In [27]:
df = DataFrame(np.array([[1,2],[3,4]]),columns = ['a','b'])
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [28]:
df = DataFrame(np.array([[1,2],[3,4]]))

In [29]:
df.columns = ['dogs','cats']

In [30]:
df

Unnamed: 0,dogs,cats
0,1,2
1,3,4


In [31]:
df.index = ['A','B']

In [32]:
df

Unnamed: 0,dogs,cats
A,1,2
B,3,4


In [38]:
# final way to create dataframe. from multiple series
s1 = Series(np.arange(0.0,5))
s2 = Series(np.arange(1.0,6))
s3 = DataFrame({'one':s1,'two':s2})


In [39]:
s3

Unnamed: 0,one,two
0,0.0,1.0
1,1.0,2.0
2,2.0,3.0
3,3.0,4.0
4,4.0,5.0


In [40]:
s3.one

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
Name: one, dtype: float64

In [41]:
s3[1:3]

Unnamed: 0,one,two
1,1.0,2.0
2,2.0,3.0


In [42]:
s3.iloc[1:3]

Unnamed: 0,one,two
1,1.0,2.0
2,2.0,3.0


In [43]:
s3b = s3['one']>3

In [44]:
s3b

0    False
1    False
2    False
3    False
4     True
Name: one, dtype: bool

In [45]:
s3[s3b]

Unnamed: 0,one,two
4,4.0,5.0


In [46]:
s3.ix[s3b,'two']

4    5.0
Name: two, dtype: float64

In [47]:
s3.ix[1,'two']

2.0

In [50]:
s3.ix[1:2,:]

Unnamed: 0,one,two
1,1.0,2.0
2,2.0,3.0


In [52]:
s4 = s3['one']

In [53]:
s3['three'] = s4

In [55]:
s3.insert(1,'four',s4)

In [56]:
s3

Unnamed: 0,one,four,two,three
0,0.0,0.0,1.0,0.0
1,1.0,1.0,2.0,1.0
2,2.0,2.0,3.0,2.0
3,3.0,3.0,4.0,3.0
4,4.0,4.0,5.0,4.0


In [57]:
# deleting columns. 
# Columns are deleted using del keyword. 
# del, pop, drop

del s3['four']


In [58]:
s3

Unnamed: 0,one,two,three
0,0.0,1.0,0.0
1,1.0,2.0,1.0
2,2.0,3.0,2.0
3,3.0,4.0,3.0
4,4.0,5.0,4.0


In [61]:
s5 = s3.drop(['three'],axis=1)

In [62]:
s5

Unnamed: 0,one,two
0,0.0,1.0
1,1.0,2.0
2,2.0,3.0
3,3.0,4.0
4,4.0,5.0


In [63]:
s3

Unnamed: 0,one,two,three
0,0.0,1.0,0.0
1,1.0,2.0,1.0
2,2.0,3.0,2.0
3,3.0,4.0,3.0
4,4.0,5.0,4.0


In [64]:
s3[0,'one']=10

s3.columns