# Neural Networks

# Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import yfinance as yf
import datetime as dt
import os
import seaborn as sns
import random 
from scipy.optimize import newton
import itertools
import tensorflow as tf
import keras

In [2]:
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM, Conv1D, Flatten, GRU, SimpleRNN
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
sns.set_theme()
sns.set_palette("tab20")
sns.set_style("darkgrid", rc = {"grid.color": "white"})
c10 = sns.color_palette("tab10").as_hex()[:]
c20 = sns.color_palette("tab20").as_hex()[:]

In [5]:
pd.set_option('display.max_columns', None)
os.chdir('/Users/maris/Documents/FE800')

In [28]:
np.random.seed(10)

## Import Data

In [38]:
df = pd.read_pickle("factor_df.pkl")
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Return,Price,volume,Trades,TTM,Stock_Return,Stock_Volume,ETF_Return,PV,YTM,Duration,DV01,Convexity
cusip,trd_dt,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
00206RBH4,2018-07-03,0.00885,86.513796,-0.785507,-0.94015,1.728678,0.015523,-0.171355,-0.005025,0.004849,0.902753,1.131115,0.498639,1.39185
00206RBH4,2018-07-05,0.003406,86.808383,-0.573559,1.089431,1.724879,-0.003363,-0.086349,0.004959,0.001749,0.878724,1.132837,0.478094,1.392504
00206RBH4,2018-07-06,0.004973,87.240021,-0.637707,-0.462601,1.72298,0.002454,0.232636,0.007858,0.001668,0.841574,1.137649,0.447232,1.395884
00206RBH4,2018-07-09,-0.001021,87.150785,-0.115591,-0.94015,1.717283,0.004123,0.357721,0.011967,-0.002931,0.852737,1.113003,0.46136,1.375882
00206RBH4,2018-07-10,0.002972,87.409758,-0.519514,-0.462601,1.715383,0.010887,0.103056,0.00215,-0.001502,0.830979,1.102112,0.447388,1.367151


In [39]:
macro_factors = pd.read_pickle("macro_factors.pkl").pct_change().drop(["Lower_Target", "Upper_Target"], axis = 1)
macro_factors = macro_factors.drop(pd.Timestamp("2018-07-02"))
macro_factors.head()

Unnamed: 0,EFFR,1YRT,2YRT,3YRT,5YRT,7YRT,10YRT,30YRT,SPY,LQD
2018-07-03,0.0,-0.004274,-0.015564,-0.007547,-0.010909,-0.014134,-0.013937,-0.010033,-0.003531,0.003677
2018-07-05,0.0,-0.004292,0.007905,0.007605,0.007353,0.003584,0.003534,-0.003378,0.008158,0.00314
2018-07-06,0.0,0.008621,-0.007843,-0.003774,-0.010949,-0.007143,-0.007042,-0.00339,0.008458,0.001391
2018-07-09,0.0,0.0,0.01581,0.007576,0.01476,0.014388,0.014184,0.006803,0.009004,0.000695
2018-07-10,0.0,0.008547,0.007782,0.011278,0.007273,0.003546,0.003497,0.003378,0.003598,-0.001128


# LSTM Neural Network

In [31]:
scaler = MinMaxScaler()

In [40]:
def neural_network_with_lstm(df, feature):

    X = df.drop([feature, "Price"], axis=1)[1:].fillna(0)
    X_scaled = pd.DataFrame(scaler.fit_transform(X))
    X_scaled.columns = X.columns
    X_scaled.index = X.index
    Y = df[feature][1:].fillna(0)

    X_train = X_scaled.loc[:pd.Timestamp("2022-05-31")]
    X_test = X_scaled.loc[pd.Timestamp("2022-06-01"):]
    Y_train = Y.loc[:pd.Timestamp("2022-05-31")]
    Y_test = Y.loc[pd.Timestamp("2022-06-01"):]

    X_train_lstm = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    X_test_lstm = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

    model = Sequential()
    model.add(LSTM(4, input_shape=(1, X.shape[1])))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    model.fit(X_train_lstm, Y_train, epochs=100, batch_size=32, verbose=0)
    Y_pred = model.predict(X_test_lstm)
    Y_pred_train = model.predict(X_train_lstm)
    r2_train = r2_score(Y_train, Y_pred_train)
    r2_test = r2_score(Y_test, Y_pred)

    prices = pd.DataFrame({"Test": df["Price"].loc[pd.Timestamp("2022-06-01"):]}).ffill()
    prices["Return Pred"] = Y_pred + 1

    pred = []
    for i in range(len(prices)):
        if i == 0:
            pred.append(prices["Return Pred"].iloc[0] * df["Price"].loc[pd.Timestamp("2022-05-31")])
        else:
            pred.append(prices["Return Pred"].iloc[i] * prices["Test"].iloc[i - 1])
    prices["Pred"] = pred

    try: 
        
        mse = mean_squared_error(prices["Test"], prices["Pred"])
        rmse = mean_squared_error(prices["Test"], prices["Pred"], squared=False)
        mae = mean_absolute_error(prices["Test"], prices["Pred"])
    
        return r2_train, r2_test, mse, rmse, mae

    except:
        return None, None, None, None, None

In [41]:
%%time

neural_network_with_lstm(pd.concat([df.loc[random.sample(list(df.index.droplevel(1).unique()), 1)[0]]
                             ,  macro_factors], axis = 1), "Return")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311us/step
CPU times: user 3.02 s, sys: 607 ms, total: 3.63 s
Wall time: 3.71 s


(0.16863853482607816,
 0.1896246032697645,
 0.48164175308649493,
 0.6940041448626189,
 0.546908968574912)

In [42]:
%%time

ltsm_results = pd.DataFrame(columns = ["Cusip", "R2 Train", "R2 Test", "MSE", "RMSE", "MAE"])
for col in list(df.index.droplevel(1).unique()):
    outputs = neural_network_with_lstm(pd.concat([df.loc[col],  macro_factors], axis = 1), "Return")
    ltsm_results.loc[len(ltsm_results)] = [col, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4]]

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 321us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 298us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [45]:
ltsm_results.sort_values("R2 Test").dropna()

Unnamed: 0,Cusip,R2 Train,R2 Test,MSE,RMSE,MAE
324,38141GFD1,2.101516e-02,-18.548769,26.425615,5.140585,4.972128
719,92976GAJ0,1.639158e-02,-17.486120,19.479940,4.413609,3.781852
666,902613AY4,-3.071299e+06,-13.863293,0.091226,0.302037,0.246439
479,594918BY9,-1.734098e-01,-5.305967,0.738950,0.859622,0.656337
231,25156PAC7,7.194161e-02,-5.205872,2.559312,1.599785,1.426239
...,...,...,...,...,...,...
392,46647PAJ5,4.394072e-01,0.575222,0.272764,0.522268,0.387143
153,126650CN8,4.433685e-01,0.597317,0.329880,0.574352,0.441180
51,035242AN6,4.121081e-01,0.600531,0.405469,0.636765,0.503507
2,00206RCP5,3.823059e-01,0.617506,0.182542,0.427250,0.338942


In [44]:
ltsm_results.drop(["Cusip"], axis = 1).drop(ltsm_results.sort_values("R2 Test").index[:10]).mean()

R2 Train    0.246766
R2 Test    -0.010629
MSE         0.690796
RMSE        0.772380
MAE         0.579408
dtype: float64

# Recurrent Neural Network

In [23]:
def neural_network_with_rnn(df, feature):
    X = df.drop([feature, "Price"], axis=1)[1:].fillna(0)
    X_scaled = pd.DataFrame(scaler.fit_transform(X))
    X_scaled.columns = X.columns
    X_scaled.index = X.index
    Y = df[feature][1:].fillna(0)

    X_train = X_scaled.loc[:pd.Timestamp("2022-05-31")]
    X_test = X_scaled.loc[pd.Timestamp("2022-06-01"):]
    Y_train = Y.loc[:pd.Timestamp("2022-05-31")]
    Y_test = Y.loc[pd.Timestamp("2022-06-01"):]

    X_train_rnn = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))
    X_test_rnn = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))

    model = Sequential()
    model.add(SimpleRNN(4, input_shape=(1, X.shape[1])))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')

    model.fit(X_train_rnn, Y_train, epochs=100, batch_size=32, verbose=0)
    Y_pred = model.predict(X_test_rnn)
    Y_pred_train = model.predict(X_train_rnn)
    r2_train = r2_score(Y_train, Y_pred_train)
    r2_test = r2_score(Y_test, Y_pred)

    prices = pd.DataFrame({"Test": df["Price"].loc[pd.Timestamp("2022-06-01"):]}).ffill()
    prices["Return Pred"] = Y_pred + 1

    pred = []
    for i in range(len(prices)):
        if i == 0:
            pred.append(prices["Return Pred"].iloc[0] * df["Price"].loc[pd.Timestamp("2022-05-31")])
        else:
            pred.append(prices["Return Pred"].iloc[i] * prices["Test"].iloc[i - 1])
    prices["Pred"] = pred

    try:
        mse = mean_squared_error(prices["Test"], prices["Pred"])
        rmse = mean_squared_error(prices["Test"], prices["Pred"], squared=False)
        mae = mean_absolute_error(prices["Test"], prices["Pred"])

        return r2_train, r2_test, mse, rmse, mae

    except:
        return None, None, None, None, None


In [18]:
%%time

neural_network_with_rnn(pd.concat([df.loc[random.sample(list(df.index.droplevel(1).unique()), 1)[0]]
                             ,  macro_factors], axis = 1), "Return")

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263us/step
CPU times: user 4.25 s, sys: 565 ms, total: 4.81 s
Wall time: 4.13 s


(None, None, None, None)

In [24]:
%%time

rnn_results = pd.DataFrame(columns = ["Cusip", "R2 Train", "R2 Test", "MSE", "RMSE", "MAE"])
for col in list(df.index.droplevel(1).unique()):
    outputs = neural_network_with_rnn(pd.concat([df.loc[col],  macro_factors], axis = 1), "Return")
    rnn_results.loc[len(rnn_results)] = [col, outputs[0], outputs[1], outputs[2], outputs[3], outputs[4]]

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 786us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 325us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 585us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 315us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 761us/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

In [20]:
rnn_results.sort_values("R2 Test")

Unnamed: 0,Cusip,R2 Train,R2 Test,MSE,RMSE,MAE
7,00206RDQ2,-11.762139,-42.582065,4.979728,2.23153,1.839962
10,002824BG4,-25.358771,-29.871258,20.827906,4.56376,3.624472
9,002824BF6,-4.483596,-23.032401,5.923792,2.433884,2.032392
12,00287YAM1,-5.542589,-9.557441,7.326847,2.706815,2.133837
8,00206RDR0,-1.560903,-4.406576,4.395833,2.096624,1.69519
3,00206RCQ3,-1.361518,-3.389167,3.561609,1.887222,1.437396
6,00206RDK5,0.034027,-3.014222,4.026057,2.006504,1.629604
0,00206RBH4,-0.526076,-2.545048,4.247371,2.060915,1.602825
1,00206RBK7,-1.658717,-2.302561,2.748244,1.657783,1.383217
2,00206RCP5,0.088116,-2.121745,1.481205,1.217048,0.955081


In [21]:
rnn_results.drop(["Cusip"], axis = 1).drop(rnn_results.sort_values("R2 Test").index[0:10]).mean()

R2 Train   -0.497801
R2 Test    -0.634228
MSE         1.579024
RMSE        1.243486
MAE         0.955265
dtype: float64

# Compare Methods

In [22]:
compare_error = pd.DataFrame(columns = ["R2 Train", "R2 Test", "MSE", "RMSE", "MAE"])
compare_error.loc["Recurrent Neural Networks"] = rnn_results.drop(["Cusip"], axis = 1).drop(rnn_results.sort_values("R2 Test").index[0:10]).mean()
compare_error.loc["LTSM Neural Networks"] = ltsm_results.drop(["Cusip"], axis = 1).drop(ltsm_results.sort_values("R2 Test").index[0]).mean()
compare_error

Unnamed: 0,R2 Train,R2 Test,MSE,RMSE,MAE
Recurrent Neural Networks,-0.497801,-0.634228,1.579024,1.243486,0.955265
LTSM Neural Networks,0.358784,0.262741,0.604185,0.737262,0.535882
