In [None]:
from scripts.utils import DataWindow
from scripts.utils import compile_and_fit
from scripts.utils import add_seasonality, encode_ticker, split_date
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

In [None]:
df = pd.read_csv("data/sp500/SP500.csv")
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values(by="Date")
n = len(df)

train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]
train_df = split_date(train_df)
train_df = add_seasonality(train_df)
train_df = encode_ticker(train_df)
train_df.drop(columns=["Date"], inplace=True)
val_df = split_date(val_df)
val_df = add_seasonality(val_df)
val_df = encode_ticker(val_df)
val_df.drop(columns=["Date"], inplace=True)
test_df = split_date(test_df)
test_df = add_seasonality(test_df)
test_df = encode_ticker(test_df)
test_df.drop(columns=["Date"], inplace=True)

scaler = MinMaxScaler()
scaler.fit(train_df)

train_df[train_df.columns] = scaler.transform(train_df[train_df.columns])
val_df[val_df.columns] = scaler.transform(val_df[val_df.columns])
test_df[test_df.columns] = scaler.transform(test_df[test_df.columns])

In [None]:
window = DataWindow(input_width=21, label_width=21, shift=21,
                    train_df=train_df, val_df=val_df, test_df=test_df,
                    label_columns=['Close'])

In [None]:
model = Sequential()
model.add(SimpleRNN(units=50, return_sequences=True, input_shape=(21, train_df.shape[1])))
model.add(Dropout(0.2))
model.add(SimpleRNN(units=50))
model.add(Dropout(0.2))
model.add(Dense(units=21))
model.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
history = compile_and_fit(model, window)

In [None]:
val_performance = {}
performance = {}

val_performance['Dense'] = model.evaluate(window.val)
performance['Dense'] = model.evaluate(window.test, verbose=0)