In [None]:
import numpy as np
import pandas as pd

from pmdarima.arima import auto_arima

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

In [None]:
SPY_df = pd.read_csv ('SPY.csv')

In [None]:
#Item 1
SPY_df[['Date','Close']].head()

In [None]:
#Item 2
SPY_df[['Date','High','Low','Volume','Close']].head()

In [None]:
train = SPY_df.head(3876)
test = SPY_df.tail(1660)

train_tune = train.head(3101)
train_valid = train.tail(776)

In [None]:
arma_model = auto_arima(train['Close'], out_of_sample_size = 776)

In [None]:
arma_model

In [None]:
arma_full_model = auto_arima(train['Close'],p=1,d=1,q=1)

In [None]:
arma_pred = []
for i in range(20):
    arma_full_model = auto_arima(SPY_df.iloc[(0+i):(3876+i)]['Close'],p=1,d=1,q=1)
    arma_pred.append(arma_full_model.predict(1))

In [None]:
np.mean((np.array(arma_pred)[0] - test['Close'][:20])**2)

In [None]:
LSTM_tries = [[1,[128]],[2,[64,64]]]

In [None]:
#Item 3
def auto_LSTM(train_tune_df, train_valid_df, mode='univariate', window_size = [256,512], \
             LSTM_setup = LSTM_tries, epochs_size = 2, h = 1):
    
    def windowing(series, window):
        X, Y = list(), list()
        for i in range(len(series)):
            take_rng = i + window
            if take_rng > len(series) - 1:
                break
            x, y = series[i:take_rng], series[take_rng]
            X.append(x)
            Y.append(y)

        return np.array(X), np.array(Y)
    
    def standardize(series,mean=None,sd=None):
        if (mean == None) and (sd == None):
            mean = np.mean(series)
            sd = (np.var(series))**0.5
            return (series - mean)/sd,mean,sd
        
        else:
            return (series - mean)/sd,mean,sd

    if mode == 'univariate':
        
        res_dict = dict()
        
        for window in window_size:
            
            train, train_mu, train_sigma = standardize(train_tune_df.reset_index()['Close'])
            val, _ , _ = standardize(train_valid_df.reset_index()['Close'], mean = train_mu, sd = train_sigma)
            X, Y = sampling(train, window)
            X_val, Y_val = sampling(val, window)
            x_raw_train, y_raw_train = sampling(train_tune_df.reset_index()['Close'],window)
            x_raw_val, y_raw_val = sampling(train_valid_df.reset_index()['Close'],window)
            
            for set_up in LSTM_setup:
                num_layers = set_up[0]
                num_neurons = set_up[1]
                
                curr_model = keras.Sequential()
                
                if num_layers == 1:
                    curr_model.add(layers.LSTM(num_neurons[0], input_shape=(window, h),return_sequences=False))

                else:
                    for layer in range(num_layers - 1):
                        if layer == 0:
                            curr_model.add(layers.LSTM(num_neurons[layer], input_shape=(window, h),return_sequences=True))
                        else:
                            curr_model.add(layers.LSTM(num_neurons[layer], return_sequences=True))
                    
                    curr_model.add(layers.LSTM(num_neurons[-1], return_sequences=False))

                curr_model.add(layers.Dense(1))
                curr_model.compile(optimizer='adam', loss='mse', metrics=['mean_squared_error'])
                print("training:" + str(set_up) + ' window_size=' + str(window))
                curr_model.fit(X, Y, epochs=epochs_size)
                
                train_pred = (curr_model.predict(X) * train_sigma) + train_mu
                train_score = np.mean((train_pred - y_raw_train)**2)
                
                val_pred = (curr_model.predict(X_val) * train_sigma) + train_mu
                val_score = np.mean((val_pred - y_raw_val)**2)
                
                
                res_dict[str(set_up) + ' window_size=' + str(window)] = val_score
                print(str(set_up) + ' window_size=' + str(window) + " train_score: " + str(train_score))
                print(str(set_up) + ' window_size=' + str(window) + " val_score: " + str(val_score))
                curr_model = None
    
    return min(res_dict, key=res_dict.get)


In [None]:
#Item 4
res = auto_LSTM(train_tune, train_valid)
print(res)

def fit_LSTM(train_df,num_layers, num_neurons, window, mode = 'univariate',h = 1, \
             epochs_size = 2):
    
    if mode == 'univariate':
        train, train_mu, train_sigma = standardize(train_df.reset_index()['Close'])
        X, Y = sampling(train, window)

        curr_model = keras.Sequential()

        if num_layers == 1:
            curr_model.add(layers.LSTM(num_neurons[0], input_shape=(window, h),return_sequences=False))

        else:
            for layer in range(num_layers - 1):
                if layer == 0:
                    curr_model.add(layers.LSTM(num_neurons[layer], input_shape=(window, h),return_sequences=True))
                else:
                    curr_model.add(layers.LSTM(num_neurons[layer], return_sequences=True))

            curr_model.add(layers.LSTM(num_neurons[-1], return_sequences=False))

        curr_model.add(layers.Dense(1))
        curr_model.compile(optimizer='adam', loss='mse', metrics=['mean_squared_error'])
        curr_model.fit(X, Y, epochs=epochs_size)
    
    return [curr_model,train_mu,train_sigma]

LSTM_1 = fit_LSTM(train,1,[128],512, epochs_size = 2)

LSTM_pred = []
for i in range(20):
    input_data = (SPY_df.iloc[(0+i):(3876+i)]['Close'].tail(512)-LSTM_1[1])/LSTM_1[2]
    LSTM_pred.append(float((LSTM_1[0].predict([list(input_data)])*LSTM_1[2])+LSTM_1[1]))

np.mean((np.array(LSTM_pred) - test['Close'][:20])**2)

plt.plot(train['Close'].loc[train.shape[0]-100:train.shape[0]], color='blue')
plt.plot(test['Close'][:20], color='blue', alpha=0.2)
plt.plot([3876 + i for i in range(20)], arma_pred, color = 'red', linestyle = "--",label='ARMA')
plt.plot([3876 + i for i in range(20)], LSTM_pred, color = 'green', linestyle = "--",label='LSTM')
plt.xlabel('t')
plt.ylabel('SPY Index Close')
plt.legend(loc="upper left")