## Pandas Basics

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series(np.random.randn(5), index=['a','b', 'c', 'd', 'e'])

In [3]:
s

a   -0.674392
b    1.497194
c    1.423559
d   -0.854313
e    0.420101
dtype: float64

In [4]:
s1 = pd.Series(np.random.randn(5))
s1

0    1.122031
1   -0.398403
2   -0.013872
3   -1.055996
4   -1.061840
dtype: float64

In [5]:
s

a   -0.674392
b    1.497194
c    1.423559
d   -0.854313
e    0.420101
dtype: float64

In [6]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [7]:
pd.Series(np.random.randn(5))

0    0.591537
1   -0.769505
2    0.211394
3    1.324136
4    1.451616
dtype: float64

In [8]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}

In [9]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [10]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [11]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [12]:
s[0]

-0.6743917002436214

In [13]:
s[:3]

a   -0.674392
b    1.497194
c    1.423559
dtype: float64

In [14]:
s['a']

-0.6743917002436214

In [15]:
s['e'] = 12.

In [16]:
s

a    -0.674392
b     1.497194
c     1.423559
d    -0.854313
e    12.000000
dtype: float64

In [17]:
s.get('a')

-0.6743917002436214

In [18]:
ts1 = pd.Series(np.random.randn(5))
ts2 = pd.Series(np.random.randn(5))

In [19]:
ts1

0   -1.349851
1    0.766841
2    0.103116
3    0.210660
4   -0.394909
dtype: float64

In [20]:
d = {'col1': ts1, 'col2': ts2}
d

{'col1': 0   -1.349851
 1    0.766841
 2    0.103116
 3    0.210660
 4   -0.394909
 dtype: float64,
 'col2': 0    0.334838
 1    1.506712
 2   -0.402276
 3    0.510360
 4   -1.506622
 dtype: float64}

In [21]:
df1 = pd.DataFrame(data = d)
df1

Unnamed: 0,col1,col2
0,-1.349851,0.334838
1,0.766841,1.506712
2,0.103116,-0.402276
3,0.21066,0.51036
4,-0.394909,-1.506622


In [22]:
df2 = pd.DataFrame(np.random.randn(10, 5))
df2

Unnamed: 0,0,1,2,3,4
0,0.094523,-0.206156,0.507045,-0.079828,-1.660469
1,1.31483,-1.271432,0.823118,-1.599488,-0.296044
2,2.006892,0.853438,-0.563616,-2.531948,0.135765
3,-0.622905,-0.995045,-0.117854,-0.210452,-0.962907
4,0.173718,2.288802,-0.777351,-0.942901,0.763486
5,0.046613,2.213526,0.695668,0.047518,-0.733645
6,-0.135402,0.832444,1.28863,0.76506,1.135805
7,-0.624412,-1.431361,0.081382,0.190692,0.731325
8,0.999774,0.352476,-0.211199,0.715979,-0.089187
9,0.959122,-0.493201,0.111786,-2.545743,0.538978


In [23]:
df3 = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
df3

Unnamed: 0,a,b,c,d,e
0,-0.842538,0.178754,-1.439285,-0.783321,0.262552
1,-1.289168,0.269655,-0.26241,0.834895,-1.324148
2,-0.366714,-2.175928,0.196176,-1.295627,-0.078307
3,0.551485,0.958922,1.189267,-0.82918,-0.928113
4,-0.457782,-1.750701,0.939293,0.484411,-0.371825
5,0.165907,0.350404,0.410526,2.140748,2.155019
6,0.63837,-0.417376,-1.130943,-0.21962,0.016128
7,0.273148,-0.57918,0.650477,-1.780963,0.444284
8,0.220549,-0.270805,-2.005218,0.726319,-0.582163
9,-2.606195,0.658106,-0.939152,-2.5762,0.164511


In [24]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [25]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [26]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [27]:
df.columns

Index(['one', 'two'], dtype='object')

In [28]:
df.index.hasnans

False

In [29]:
dfc = pd.read_csv('data1.csv')
dfc

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059
3,2000-01-03,B,-1.135632
4,2000-01-04,B,1.212112
5,2000-01-05,B,-0.173215
6,2000-01-03,C,0.119209
7,2000-01-04,C,-1.044236
8,2000-01-05,C,-0.861849
9,2000-01-03,D,-2.104569


In [30]:
dfc['variable'] == 'A'

0      True
1      True
2      True
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
Name: variable, dtype: bool

In [31]:
dfc[dfc['variable'] == 'A']

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059


In [32]:
dfc.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.469112,-1.135632,0.119209,-2.104569
2000-01-04,-0.282863,1.212112,-1.044236,-0.494929
2000-01-05,-1.509059,-0.173215,-0.861849,1.071804


In [33]:
dfc.describe()

Unnamed: 0,value
count,12.0
mean,-0.39451
std,1.007649
min,-2.104569
25%,-1.067085
50%,-0.388896
75%,0.206685
max,1.212112
