In [None]:
import numpy as np
import pandas as pd

import arima_gru

from fbprophet import Prophet

import keras
from keras import models
from keras import layers

from keras.layers import Dense, Activation, GRU, Dropout, Input
from keras.models import Sequential

from sklearn.preprocessing import StandardScaler

In [None]:
date_ranges = [
	pd.date_range(start='2014-03-17', end='2014-06-17').tolist(),
	pd.date_range(start='2014-06-18', end='2014-09-17').tolist(),
	pd.date_range(start='2014-09-18', end='2014-12-17').tolist(),
	pd.date_range(start='2014-12-18', end='2015-03-17').tolist(),

	pd.date_range(start='2015-03-18', end='2015-06-17').tolist(),
	pd.date_range(start='2015-06-18', end='2015-09-17').tolist(),
	pd.date_range(start='2015-09-18', end='2015-12-17').tolist(),
	pd.date_range(start='2015-12-18', end='2016-03-17').tolist(),

	pd.date_range(start='2016-03-18', end='2016-06-17').tolist(),
	pd.date_range(start='2016-06-18', end='2016-09-17').tolist(),
	pd.date_range(start='2016-09-18', end='2016-12-17').tolist(),
	pd.date_range(start='2016-12-18', end='2017-03-17').tolist(),

	pd.date_range(start='2017-03-18', end='2017-06-17').tolist(),
	pd.date_range(start='2017-06-18', end='2017-09-17').tolist(),
	pd.date_range(start='2017-09-18', end='2017-12-17').tolist(),
	pd.date_range(start='2017-12-18', end='2018-03-17').tolist(),

	pd.date_range(start='2018-03-18', end='2018-06-17').tolist(),
	pd.date_range(start='2018-06-18', end='2018-09-17').tolist(),
	pd.date_range(start='2018-09-18', end='2018-12-17').tolist(),
	pd.date_range(start='2018-12-18', end='2019-03-15').tolist()
]

In [None]:
date_range = pd.date_range(start='2014-03-17', end='2014-06-17').tolist()
etf = 'SPY'

In [None]:
dr_index = date_ranges.index(date_range)

# Read in etfs CSV
etfs = pd.read_csv('/home/ubuntu/csvs/etfs_pivot.csv', parse_dates=True, infer_datetime_format=True)
etfs = etfs.set_index('date')
etfs.index = pd.to_datetime(etfs.index)

# Make dataframe for just input ETF
etf_df = pd.DataFrame(etfs[etf])

# Get prices for single date range
true_price = etf_df[etf_df.index.isin(date_range)]
true_price = true_price.reset_index()
true_price.columns = ['ds', 'y']

# Create ARIMA model
arima_model = Prophet()
arima_model.fit(true_price)

# Make predictions three months out
future = arima_model.make_future_dataframe(periods = len(date_ranges[dr_index+1]))
forecast = arima_model.predict(future)

# Extract predicted prices for three months out
three_months_out = forecast[forecast['ds'].isin(date_ranges[dr_index+1])]
predicted_prices = pd.DataFrame(three_months_out[['ds','yhat']])
predicted_prices.columns = ['date','etf']

# Make dataframes of predicted prices and actual prices
predicted_prices = predicted_prices.set_index('date')
actual_prices = pd.DataFrame(etf_df[etf_df.index.isin(date_ranges[dr_index + 1])][etf])

# Convert predicted and actual prices to arrays
predicted_array = predicted_prices.values
actual_array = actual_prices.values

# Calculate ARIMA residuals and create dataframe
residuals = pd.DataFrame(index = date_ranges[dr_index + 1])
residuals['residual'] = predicted_array - actual_array

In [None]:
dr_index = date_ranges.index(date_range)

# Read in macroeconomic data CSV
macro = pd.read_csv('/home/ubuntu/csvs/macro_pivot.csv', parse_dates=True, infer_datetime_format=True)
macro = macro.set_index('date')
macro.index = pd.to_datetime(macro.index)

# Extract values for date range three months out and reshape
X_frame = macro[macro.index.isin(date_ranges[dr_index + 1])]
X_array = X_frame.values
X = np.reshape(X_array, (X_array.shape[0], X_array.shape[1], 1))

# Create target array for GRU model
y = np.array(residuals['residual'])

gru_model = Sequential()

# Build GRU model
gru_model.add(GRU(units=100, input_shape=(28779,1), return_sequences=True))
gru_model.add(Dropout(0.2))
gru_model.add(GRU(units=50, return_sequences=False))
gru_model.add(Dropout(0.2))
gru_model.add(Dense(1))
gru_model.add(Activation('linear'))

# Compile and fit GRU model
gru_model.compile(loss='mse', optimizer='rmsprop')
gru_model.fit(x=X,y=y, batch_size=256, epochs = 20, validation_split=0.2)
gru_model.summary()

In [None]:
# Create new dataframe for 6 months out using ARIMA model from before
new_future = arima_model.make_future_dataframe(periods=len(date_ranges[dr_index+1])+len(date_ranges[dr_index+2]))
new_forecast = arima_model.predict(new_future)

# Extract predictions for ARIMA model 6 months out and actual prices six months out for validation
new_forecast_predictions = new_forecast[new_forecast['ds'].isin(date_ranges[dr_index+2])]['yhat'].values
new_actuals = etf_df[etf_df.index.isin(date_ranges[dr_index+2])][etf].values

# Get macroeconomic data 6 months out for validation
macro_predict = macro[macro.index.isin(date_ranges[dr_index+2])].values
predictors = np.reshape(macro_predict, (macro_predict.shape[0], macro_predict.shape[1], 1))

# Calculate residual adjustments to ARIMA model 6 months out
gru_residuals = gru_model.predict(predictors)
gru_residuals = gru_residuals.reshape(1,91)

# Calculate adjusted ARIMA predictions 6 months out
final_predictions = new_forecast_predictions - gru_residuals

In [None]:
residuals