In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from models import RNN, get_XY

# **1. Setup**

In [3]:
df = pd.read_csv('./data/final.csv')
df['Month'] = pd.to_datetime(df['Month'], format="%Y%m")
df.set_index('Month', inplace=True)

In [4]:
df.head()

Unnamed: 0_level_0,DP,EP,Mkt-RF,SMB,HML,STR,TB,TS,INF,IP,RV
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1964-07-01,0.035045,0.061789,1.74,0.5,0.75,1.82,3.46,0.73,1.075269,0.657251,-3.880622
1964-08-01,0.035045,0.061789,-1.44,0.41,0.08,-1.11,3.5,0.71,0.97561,0.65296,-4.338397
1964-09-01,0.035045,0.061789,2.69,-0.34,1.7,1.25,3.53,0.65,1.171875,0.370552,-4.292086
1964-10-01,0.035045,0.061789,0.59,0.87,1.17,1.11,3.57,0.59,1.203252,-1.38504,-4.22742
1964-11-01,0.035045,0.061789,0.0,-0.15,-1.96,-0.68,3.64,0.56,1.397011,3.090023,-3.977059


## **1.1 Input Variables**

In [5]:
# input variables
TEST_SIZE = 0.2
MAX_UNITS = 9
TIME_STEP = 2
MODEL_TYPES = ['simple', 'lstm', 'gru']
FEATURES = df.columns[:-1].to_list()
TARGET = [df.columns[-1]]
NUM_FEATS = len(FEATURES)
# split training and testing data
split = int(df.shape[0] * TEST_SIZE)
train = df.iloc[:(df.shape[0] - split), :].copy()
test = df.iloc[(df.shape[0] - split):, :].copy()

# **2. Data Preparation**

In [6]:
scaler = MinMaxScaler(feature_range=(0,1))
# get scaled train arrays for RNN
# x - (batch_size, time_steps, features)
# y - (batch_size, 1)
train[FEATURES] = scaler.fit_transform(train[FEATURES])
train[TARGET] = scaler.fit_transform(train[TARGET])
x_train, y_train = get_XY(
    data=train,
    xlabs=FEATURES,
    ylab=TARGET,
    scaler=scaler,
    time_steps=TIME_STEP
)
# get scaled test arrays for RNN
# x - (batch_size, time_steps, features)
# y - (batch_size, 1)
test[FEATURES] = scaler.fit_transform(test[FEATURES])
test[TARGET] = scaler.fit_transform(test[TARGET])
x_test, y_test = get_XY(
    data=test,
    xlabs=FEATURES,
    ylab=TARGET,
    scaler=scaler,
    time_steps=TIME_STEP
)

In [7]:
print(f'Scaled x_train shape: {x_train.shape}')
print(f'Scaled y_train shape: {y_train.shape}')
print(f'Scaled x_test shape: {x_test.shape}')
print(f'Scaled y_test shape: {y_test.shape}')

Scaled x_train shape: (559, 2, 10)
Scaled y_train shape: (559,)
Scaled x_test shape: (138, 2, 10)
Scaled y_test shape: (138,)


# **3. LSTM Neural Network**

## **3.1 Hidden Units Cross-Validation**

In [8]:
results_lstm = {
    'model': [],
    'hidden units': [],
    'MSE': []
}
for i in range(1, MAX_UNITS + 1):
    lstm = RNN(
        x_train=x_train,
        y_train=y_train,
        units=i,
        type_=MODEL_TYPES[1]
    ).regressor
    mse = lstm.evaluate(x_test, y_test)
    results_lstm['model'].append(MODEL_TYPES[1])
    results_lstm['hidden units'].append(i)
    results_lstm['MSE'].append(mse)



In [9]:
results_lstm = pd.DataFrame.from_dict(results_lstm)
lstm_units = results_lstm.loc[results_lstm.MSE == results_lstm.MSE.min()]['hidden units'].values[0]

## **3.2 LSTM Training & Prediction**

In [10]:
rnn_lstm = RNN(
    x_train=x_train,
    y_train=y_train,
    units=lstm_units,
    type_=MODEL_TYPES[1]
).regressor
lstm_mse = rnn_lstm.evaluate(x_test, y_test)



In [11]:
print(f'LSTM Neural Network: \n' +
        f'Optimal # of Hidden Units: {lstm_units}\n' +
            f'LSTM MSE: {lstm_mse}')

LSTM Neural Network: 
Optimal # of Hidden Units: 5LSTM MSE: 0.045741524547338486


## Ignore

In [12]:
# y_pred = rnn.predict(x_test)
# # inv_yhat = scaler.inverse_transform(y_pred).flatten()
# # inv_y = scaler.inverse_transform(y_test).flatten()
# x_t = x_test.reshape((x_test.shape[0], 2 * num_feats))
# # invert RV predictions
# inv_yhat = np.concatenate((y_pred, x_t), axis=1)
# inv_yhat = scaler.inverse_transform(inv_yhat)[:,0]
# # invert RV true
# y_test = y_test.reshape((len(y_test), 1))
# inv_y = np.concatenate((y_test, x_t), axis=1)
# inv_y = scaler.inverse_transform(inv_y)[:,0]

In [13]:
# np.sqrt(mean_squared_error(inv_y, inv_yhat))

In [14]:
# results = pd.DataFrame(
#     data={
#     'Prediction': y_pred.flatten(),
#     'Actual': y_test.flatten()
#     },
#     index=test.index.to_list()[-y_pred.shape[0]:]
# )

In [15]:
# fig = px.line(results, results.index, ['Prediction', 'Actual'])
# fig.show()

# **4. Simple RNN**

## **4.1 Hidden Unit Cross-Validation**

In [16]:
results_simple = {
    'model': [],
    'hidden units': [],
    'MSE': []
}
for i in range(1, MAX_UNITS + 1):
    simple = RNN(
        x_train=x_train,
        y_train=y_train,
        units=i,
        type_=MODEL_TYPES[0]
    ).regressor
    mse = simple.evaluate(x_test, y_test)
    results_simple['model'].append(MODEL_TYPES[0])
    results_simple['hidden units'].append(i)
    results_simple['MSE'].append(mse)



In [17]:
results_simple = pd.DataFrame.from_dict(results_simple)
simple_units = results_simple.loc[results_simple.MSE == results_simple.MSE.min()]['hidden units'].values[0]

## **4.2 Simple RNN Prediction**

In [18]:
rnn_simple = RNN(
    x_train=x_train,
    y_train=y_train,
    units=simple_units,
    type_=MODEL_TYPES[0]
).regressor
simple_mse = rnn_simple.evaluate(x_test, y_test)



In [19]:
print(f'Simple Recurrent Neural Network: \n' +
        f'Optimal # of Hidden Units: {simple_units}\n' +
            f'Simple RNN MSE: {simple_mse}')

Simple Recurrent Neural Network: 
Optimal # of Hidden Units: 4
Simple RNN MSE: 0.0382847897708416


# **5. Gated Recurrent Unit**

## **5.1 Hidden Unit Cross-Validation**

In [20]:
results_gru = {
    'model': [],
    'hidden units': [],
    'MSE': []
}
for i in range(1, MAX_UNITS + 1):
    gru = RNN(
        x_train=x_train,
        y_train=y_train,
        units=i,
        type_=MODEL_TYPES[2]
    ).regressor
    mse = gru.evaluate(x_test, y_test)
    results_gru['model'].append(MODEL_TYPES[2])
    results_gru['hidden units'].append(i)
    results_gru['MSE'].append(mse)



In [21]:
results_gru = pd.DataFrame.from_dict(results_gru)
gru_units = results_gru.loc[results_gru.MSE == results_gru.MSE.min()]['hidden units'].values[0]

## **5.2 GRU Prediction**

In [22]:
rnn_gru = RNN(
    x_train=x_train,
    y_train=y_train,
    units=lstm_units,
    type_=MODEL_TYPES[2]
).regressor
gru_mse = rnn_gru.evaluate(x_test, y_test)



In [23]:
print(f'GRU Neural Network: \n' +
        f'Optimal # of Hidden Units: {lstm_units}\n' +
            f'GRU MSE: {lstm_mse}')

GRU Neural Network: 
Optimal # of Hidden Units: 5
GRU MSE: 0.045741524547338486


In [None]:
from sklearn.linear_model import Lasso
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV