In [74]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Read in the data

In [125]:
data = pd.read_csv("FRB_H15.csv",header = 0,skiprows = 5).replace('ND',np.nan)
data.drop(['RIFLGFCM01_N.B','RIFLGFCM03_N.B','RIFLGFCM06_N.B','RIFLGFCY02_N.B','RIFLGFCY07_N.B','RIFLGFCY20_N.B','RIFLGFCY30_N.B'],axis = 1,inplace = True)
data['Time Period'] = pd.to_datetime(data['Time Period'])

Generate dates, as there are missing dates in data

In [126]:
dates = pd.DataFrame(pd.date_range(start = '1962-01-02',end = '2017-10-5'))
dates.columns = ['Time Period']

Left join the generated dates, and the dates from the data. The dates which are missing in the data will have NA values this way.

In [127]:
data = dates.merge(data,how = 'left',on = 'Time Period')

Create weekend and day of week features

In [128]:
data['IS_WEEKEND'] = 1
data['Day of week'] = data['Time Period'].dt.dayofweek
weekend = data['IS_WEEKEND'].copy()
weekend[(data['Day of week'] != 5) & (data['Day of week'] != 6)] = 0
data['IS_WEEKEND'] = weekend

Impute missing values in the table. Use most recent existing values for imputation.

In [129]:
f1 = data['RIFLGFCY01_N.B'].copy()
f2 = data['RIFLGFCY03_N.B'].copy()
f3 = data['RIFLGFCY05_N.B'].copy()
f4 = data['RIFLGFCY10_N.B'].copy()
for i in range(len(data)):
    if data['IS_WEEKEND'].iloc[i] == 1:
        f1[i] = f1[i-1]
        f2[i] = f2[i-1]
        f3[i] = f3[i-1]
        f4[i] = f4[i-1]
        
for i in range(len(f1)):
    if pd.isnull(f1[i]):
        j = i - 1
        while pd.isnull(f1[j]):
            j = j - 1
        f1[i] = f1[j]

for i in range(len(f2)):
    if pd.isnull(f2[i]):
        j = i - 1
        while pd.isnull(f2[j]):
            j = j - 1
        f2[i] = f2[j]

for i in range(len(f3)):
    if pd.isnull(f3[i]):
        j = i - 1
        while pd.isnull(f3[j]):
            j = j - 1
        f3[i] = f3[j]

for i in range(len(f4)):
    if pd.isnull(f4[i]):
        j = i - 1
        while pd.isnull(f4[j]):
            j = j - 1
        f4[i] = f4[j]
        
data['RIFLGFCY01_N.B'] = f1
data['RIFLGFCY03_N.B'] = f2
data['RIFLGFCY05_N.B'] = f3
data['RIFLGFCY10_N.B'] = f4

In [101]:
data.to_csv("Weekend Imputed.csv",index = False)

In [135]:
from statsmodels.tsa.arima_model import ARIMA
from pandas.tools.plotting import autocorrelation_plot

In [138]:
data['RIFLGFCY01_N.B'] = data['RIFLGFCY01_N.B'].astype(float)
data['RIFLGFCY03_N.B'] = data['RIFLGFCY03_N.B'].astype(float)
data['RIFLGFCY05_N.B'] = data['RIFLGFCY05_N.B'].astype(float)
data['RIFLGFCY10_N.B'] = data['RIFLGFCY10_N.B'].astype(float)

Generate autocorrelation plot for 2010-2017 data

In [144]:
autocorrelation_plot(data['RIFLGFCY01_N.B'][17531:])
plt.show()

In [146]:
autocorrlist = []
for i in range(100):
    autocorrlist.append(data['RIFLGFCY01_N.B'][17531:].autocorr(lag = i))

In [164]:
plt.plot((data['RIFLGFCY01_N.B'][17531:]))
plt.show()

In [154]:
data.to_csv("Imputed Data.csv",index = False)

Compute rolling mean for a window of 100 days

In [184]:
plt.plot(data['RIFLGFCY01_N.B'][17531:],color = 'red')
plt.plot(pd.rolling_mean((data['RIFLGFCY01_N.B'][17531:]),100),color = 'blue')
plt.show()

	Series.rolling(window=300,center=False).mean()
  from ipykernel import kernelapp as app


Difference the data from the rolling mean to detrend it

In [190]:
diff = (data['RIFLGFCY01_N.B'][17531:] - pd.rolling_mean((data['RIFLGFCY01_N.B'][17531:]),100))

	Series.rolling(window=100,center=False).mean()
  if __name__ == '__main__':


Check for stationarity using dickey fuller test

In [189]:
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
    
    #Determing rolling statistics
    rolmean = pd.rolling_mean(timeseries, window=12)
    rolstd = pd.rolling_std(timeseries, window=12)

    #Plot rolling statistics:
    orig = plt.plot(timeseries, color='blue',label='Original')
    mean = plt.plot(rolmean, color='red', label='Rolling Mean')
    std = plt.plot(rolstd, color='black', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Rolling Mean & Standard Deviation')
    plt.show(block=False)
    
    #Perform Dickey-Fuller test:
    print 'Results of Dickey-Fuller Test:'
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print dfoutput

In [196]:
test_stationarity(diff[99:])

	Series.rolling(window=12,center=False).mean()
	Series.rolling(window=12,center=False).std()


Results of Dickey-Fuller Test:
Test Statistic                   -5.593215
p-value                           0.000001
#Lags Used                       28.000000
Number of Observations Used    2707.000000
Critical Value (5%)              -2.862608
Critical Value (1%)              -3.432768
Critical Value (10%)             -2.567339
dtype: float64
