In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from models import get_XY, RNN

2023-04-12 01:04:15.860922: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv('./data/final.csv')
df['Month'] = pd.to_datetime(df['Month'], format="%Y%m")
df.set_index('Month', inplace=True)

In [3]:
df.head()

Unnamed: 0_level_0,DP,EP,Mkt-RF,SMB,HML,STR,TB,TS,INF,IP,RV
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1964-07-01,0.035045,0.061789,1.74,0.5,0.75,1.82,3.46,0.73,1.075269,0.657251,-3.880622
1964-08-01,0.035045,0.061789,-1.44,0.41,0.08,-1.11,3.5,0.71,0.97561,0.65296,-4.338397
1964-09-01,0.035045,0.061789,2.69,-0.34,1.7,1.25,3.53,0.65,1.171875,0.370552,-4.292086
1964-10-01,0.035045,0.061789,0.59,0.87,1.17,1.11,3.57,0.59,1.203252,-1.38504,-4.22742
1964-11-01,0.035045,0.061789,0.0,-0.15,-1.96,-0.68,3.64,0.56,1.397011,3.090023,-3.977059


In [4]:
# split training and testing data
split = int(df.shape[0] * 0.2)
train = df.iloc[:(df.shape[0] - split), :].copy()
test = df.iloc[(df.shape[0] - split):, :].copy()
# input variables
feature_labels = df.columns[:-1].to_list()
target = [df.columns[-1]]
time_step = 12
num_feats = len(feature_labels)
scaler = MinMaxScaler(feature_range=(0,1))
units = 5

In [5]:
# get scaled train arrays for RNN
# x - (batch_size, time_steps, features)
# y - (batch_size, 1)
x_train, y_train = get_XY(
    data=train,
    xlabs=feature_labels,
    ylab=target,
    scaler=scaler,
    time_steps=time_step
)
# get scaled test arrays for RNN
# x - (batch_size, time_steps, features)
# y - (batch_size, 1)
x_test, y_test = get_XY(
    data=test,
    xlabs=feature_labels,
    ylab=target,
    scaler=scaler,
    time_steps=time_step
)

In [6]:
print(f'Scaled x_train shape: {x_train.shape}')
print(f'Scaled y_train shape: {y_train.shape}')
print(f'Scaled x_test shape: {x_test.shape}')
print(f'Scaled y_test shape: {y_test.shape}')

Scaled x_train shape: (549, 12, 10)
Scaled y_train shape: (549, 1)
Scaled x_test shape: (128, 12, 10)
Scaled y_test shape: (128, 1)


In [7]:
rnn = RNN(
    x_train=x_train,
    y_train=y_train,
    units=units
).regressor

In [8]:
rnn.evaluate(x_test, y_test)



0.14796727895736694

In [9]:
y_pred = rnn.predict(x_test)
inv_yhat = scaler.inverse_transform(y_pred).flatten()
inv_y = scaler.inverse_transform(y_test).flatten()
# x_t = x_test.reshape((x_test.shape[0], time_step * num_feats))
# # invert RV predictions
# inv_yhat = np.concatenate((y_pred, x_t), axis=1)
# inv_yhat = scaler.inverse_transform(inv_yhat)[:,0]
# # invert RV true
# inv_y = np.concatenate((y_test, x_t), axis=1)
# inv_y = scaler.inverse_transform(inv_y)[:,0]



In [10]:
y_pred

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [None]:
np.sqrt(mean_squared_error(inv_y, inv_yhat))

In [None]:
results = pd.DataFrame(
    data={
    'Prediction': inv_yhat,
    'Actual': inv_y
    },
    index=test.index.to_list()[-y_pred.shape[0]:]
)

In [None]:
results

In [None]:
fig = px.line(results, results.index, ['Prediction', 'Actual'])
fig.show()