## Pandas Basics

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
s = pd.Series(np.random.randn(5), index=['a','b', 'c', 'd', 'e'])

In [3]:
s1 = pd.Series(np.random.randn(5))
s1

0    0.743771
1    1.156209
2   -0.458626
3   -2.208660
4    0.871080
dtype: float64

In [4]:
s

a   -2.482180
b    0.828244
c    0.171191
d    3.439166
e    0.120896
dtype: float64

In [5]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [6]:
pd.Series(np.random.randn(5))

0   -1.648016
1   -0.176764
2   -0.188250
3   -0.376087
4    0.706117
dtype: float64

In [7]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}

In [8]:
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [9]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

In [10]:
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [11]:
s[0]

-2.4821796943434666

In [12]:
s[:3]

a   -2.482180
b    0.828244
c    0.171191
dtype: float64

In [13]:
s['a']

-2.4821796943434666

In [14]:
s['e'] = 12.

In [15]:
s

a    -2.482180
b     0.828244
c     0.171191
d     3.439166
e    12.000000
dtype: float64

In [16]:
s.get('a')

-2.4821796943434666

In [17]:
ts1 = pd.Series(np.random.randn(5))
ts2 = pd.Series(np.random.randn(5))

In [18]:
d = {'col1': ts1, 'col2': ts2}
d

{'col1': 0   -0.429514
 1   -0.996424
 2    0.705889
 3   -0.120557
 4    0.607724
 dtype: float64, 'col2': 0    0.004748
 1    0.385571
 2    0.780920
 3   -0.157166
 4   -2.382349
 dtype: float64}

In [19]:
df1 = pd.DataFrame(data = d)
df1

Unnamed: 0,col1,col2
0,-0.429514,0.004748
1,-0.996424,0.385571
2,0.705889,0.78092
3,-0.120557,-0.157166
4,0.607724,-2.382349


In [20]:
df2 = pd.DataFrame(np.random.randn(10, 5))
df2

Unnamed: 0,0,1,2,3,4
0,0.34341,0.457696,-0.89791,0.459412,0.310223
1,-1.843798,0.227068,0.566616,-0.383058,-0.510329
2,0.455385,0.350169,-1.155758,-0.472407,0.459832
3,-0.176023,0.261241,-0.918962,1.005805,1.821841
4,-0.183226,0.685465,-0.78393,0.575304,1.698093
5,0.537634,-0.456955,-0.717094,-0.140822,-0.464075
6,-0.028829,-1.398598,-2.458489,0.575361,-0.004448
7,0.50261,1.734241,1.272098,-0.36728,-0.435266
8,3.15966,1.300011,0.116335,1.118826,0.63501
9,-0.554674,-0.600743,-0.656925,-2.618613,0.025467


In [21]:
df3 = pd.DataFrame(np.random.randn(10, 5), columns=['a', 'b', 'c', 'd', 'e'])
df3

Unnamed: 0,a,b,c,d,e
0,-0.036612,0.912985,1.770308,-0.167828,-0.344511
1,-0.21529,0.840798,0.928539,0.453119,1.648046
2,-0.136316,-1.207606,-1.654559,-0.979253,0.052128
3,-0.931426,-0.04667,-0.015089,-0.265272,-1.475054
4,-0.034268,1.235839,-0.752505,0.307318,-1.419611
5,0.00382,-0.395598,-1.850319,0.19607,-0.207548
6,0.660602,-0.522672,0.254985,0.573095,-1.738207
7,-0.358716,0.20453,0.193537,0.417359,0.1501
8,1.18968,0.100903,-0.389515,0.952755,1.169476
9,-1.099982,-0.189002,-0.03914,0.34673,0.587474


In [22]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
df = pd.DataFrame(data = d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [23]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [24]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [25]:
df.columns

Index(['one', 'two'], dtype='object')

In [26]:
df.index.hasnans

False

In [27]:
dfc = pd.read_csv('data1.csv')
dfc

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059
3,2000-01-03,B,-1.135632
4,2000-01-04,B,1.212112
5,2000-01-05,B,-0.173215
6,2000-01-03,C,0.119209
7,2000-01-04,C,-1.044236
8,2000-01-05,C,-0.861849
9,2000-01-03,D,-2.104569


In [28]:
dfc[dfc['variable'] == 'A']

Unnamed: 0,date,variable,value
0,2000-01-03,A,0.469112
1,2000-01-04,A,-0.282863
2,2000-01-05,A,-1.509059


In [29]:
dfc.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,0.469112,-1.135632,0.119209,-2.104569
2000-01-04,-0.282863,1.212112,-1.044236,-0.494929
2000-01-05,-1.509059,-0.173215,-0.861849,1.071804


In [30]:
dfc.describe()

Unnamed: 0,value
count,12.0
mean,-0.39451
std,1.007649
min,-2.104569
25%,-1.067085
50%,-0.388896
75%,0.206685
max,1.212112
