In [1]:
import pandas as pd
from pandas.io.data import DataReader
import numpy as np
from dateutil.parser import parse

The pandas.io.data module is moved to a separate package (pandas-datareader) and will be removed from pandas in a future version.
After installing the pandas-datareader package (https://github.com/pydata/pandas-datareader), you can change the import ``from pandas.io import data, wb`` to ``from pandas_datareader import data, wb``.


## Window Functions

In [2]:
idx = pd.date_range('1/1/2000', periods=500, freq='d')

In [3]:
idx

DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07', '2000-01-08',
               '2000-01-09', '2000-01-10',
               ...
               '2001-05-05', '2001-05-06', '2001-05-07', '2001-05-08',
               '2001-05-09', '2001-05-10', '2001-05-11', '2001-05-12',
               '2001-05-13', '2001-05-14'],
              dtype='datetime64[ns]', length=500, freq='D')

In [4]:
ser = pd.Series(np.random.randn(len(idx)), index=idx)

In [5]:
ser

2000-01-01    2.442100
2000-01-02   -0.675982
2000-01-03   -0.642238
2000-01-04    0.998327
2000-01-05   -0.157217
2000-01-06   -0.060568
2000-01-07    0.018842
2000-01-08    0.515121
2000-01-09    0.288402
2000-01-10    0.235223
2000-01-11    0.419987
2000-01-12   -0.184649
2000-01-13   -0.213494
2000-01-14    2.280596
2000-01-15    0.186151
2000-01-16   -0.407548
2000-01-17   -2.456670
2000-01-18   -0.334379
2000-01-19   -0.835238
2000-01-20    0.090643
2000-01-21   -1.591925
2000-01-22   -0.890288
2000-01-23   -0.097234
2000-01-24   -2.075792
2000-01-25    0.001939
2000-01-26   -0.126447
2000-01-27    0.040942
2000-01-28    0.109121
2000-01-29   -0.843421
2000-01-30    0.548469
                ...   
2001-04-15    2.199949
2001-04-16   -0.717898
2001-04-17    0.349674
2001-04-18    2.115316
2001-04-19    0.244658
2001-04-20   -1.307282
2001-04-21   -0.380056
2001-04-22   -0.446086
2001-04-23   -0.195893
2001-04-24   -0.422923
2001-04-25    0.942648
2001-04-26   -0.462705
2001-04-27 

In [6]:
ser2 = ser + ser.shift(2)

In [7]:
ser2

2000-01-01         NaN
2000-01-02         NaN
2000-01-03    1.799862
2000-01-04    0.322345
2000-01-05   -0.799455
2000-01-06    0.937759
2000-01-07   -0.138375
2000-01-08    0.454553
2000-01-09    0.307244
2000-01-10    0.750344
2000-01-11    0.708389
2000-01-12    0.050574
2000-01-13    0.206492
2000-01-14    2.095948
2000-01-15   -0.027343
2000-01-16    1.873048
2000-01-17   -2.270519
2000-01-18   -0.741927
2000-01-19   -3.291907
2000-01-20   -0.243736
2000-01-21   -2.427163
2000-01-22   -0.799645
2000-01-23   -1.689159
2000-01-24   -2.966080
2000-01-25   -0.095294
2000-01-26   -2.202239
2000-01-27    0.042881
2000-01-28   -0.017326
2000-01-29   -0.802480
2000-01-30    0.657590
                ...   
2001-04-15    3.958599
2001-04-16   -0.563048
2001-04-17    2.549623
2001-04-18    1.397418
2001-04-19    0.594332
2001-04-20    0.808034
2001-04-21   -0.135398
2001-04-22   -1.753368
2001-04-23   -0.575949
2001-04-24   -0.869009
2001-04-25    0.746755
2001-04-26   -0.885629
2001-04-27 

In [8]:
ser.resample('Q')

use .resample(...).mean() instead of .resample(...)
  return getattr(obj, attr, default)


DatetimeIndexResampler [freq=<QuarterEnd: startingMonth=12>, axis=0, closed=right, label=right, convention=start, base=0]

In [9]:
ser.resample('m')

use .resample(...).mean() instead of .resample(...)
  return getattr(obj, attr, default)


DatetimeIndexResampler [freq=<MonthEnd>, axis=0, closed=right, label=right, convention=start, base=0]

In [10]:
ser.resample('Q')

use .resample(...).mean() instead of .resample(...)
  return getattr(obj, attr, default)


DatetimeIndexResampler [freq=<QuarterEnd: startingMonth=12>, axis=0, closed=right, label=right, convention=start, base=0]

In [None]:
# http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases

In [11]:
spy = DataReader('SPY', data_source='yahoo')

In [12]:
spy.tail().T

Date,2016-10-03 00:00:00,2016-10-04 00:00:00,2016-10-05 00:00:00,2016-10-06 00:00:00,2016-10-07 00:00:00
Open,215.82,215.91,215.41,215.37,216.1
High,216.04,216.17,216.13,216.04,216.3
Low,215.04,213.99,215.33,214.74,214.19
Close,215.78,214.68,215.63,215.78,215.04
Volume,83512100.0,119948100.0,72816000.0,62927400.0,88510700.0
Adj Close,215.78,214.68,215.63,215.78,215.04


In [13]:
spy['mean50'] = pd.rolling_mean(spy.Close, 50)
spy['mean200'] = pd.rolling_mean(spy.Close, 200)

	Series.rolling(window=50,center=False).mean()
  if __name__ == '__main__':
	Series.rolling(window=200,center=False).mean()
  from ipykernel import kernelapp as app


In [14]:
spy.tail().T

Date,2016-10-03 00:00:00,2016-10-04 00:00:00,2016-10-05 00:00:00,2016-10-06 00:00:00,2016-10-07 00:00:00
Open,215.82,215.91,215.41,215.37,216.1
High,216.04,216.17,216.13,216.04,216.3
Low,215.04,213.99,215.33,214.74,214.19
Close,215.78,214.68,215.63,215.78,215.04
Volume,83512100.0,119948100.0,72816000.0,62927400.0,88510700.0
Adj Close,215.78,214.68,215.63,215.78,215.04
mean50,216.8188,216.7794,216.757,216.7422,216.7076
mean200,206.3811,206.4302,206.5082,206.5788,206.6365
