# Neural Network Model

In [79]:
# Import appropriate modules
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import datetime
import numpy as np
import yfinance as yf
from datetime import datetime
import hvplot
import hvplot.pandas
import matplotlib.pyplot as plt
from vix_functions import garch_fit_and_predict, correlation_filter, retrieve_yahoo_close, retrieve_yahoo_volume 

# Generation of the Features Matrix X

In [80]:
# Ticker List: VIX must be in first position
ticker_list= ["^VIX", "spy", "DX-Y.NYB", "tlt", "ief", "gld", "slv", "CL=F", "USDJPY=X", "FXI", "EZU", "EEM", "EFA"]

In [81]:
# Inclusion of security levels X1
def retrieve_close(close_prices_dict, ticker_list):
    for ticker in ticker_list:
        close_price = retrieve_yahoo_close(ticker)
        close_prices_dict[ticker] = close_price
    return close_prices_dict

close_prices_dict = {}
close_prices_dict = retrieve_close(close_prices_dict, ticker_list)
print("Completed retrieve of close prices")

close_prices_df= pd.DataFrame(close_prices_dict)

close_prices_component_df = correlation_filter(close_prices_df, min_corr=0.20, key_column='^VIX')

X1=close_prices_component_df.copy()
vix=close_prices_df['^VIX']

Processing Close ^VIX
Processing Close spy
Processing Close DX-Y.NYB
Processing Close tlt
Processing Close ief
Processing Close gld
Processing Close slv
Processing Close CL=F
Processing Close USDJPY=X
Processing Close FXI
Processing Close EZU
Processing Close EEM
Processing Close EFA
Completed retrieve of close prices


In [83]:
# Inclusion of security returns X2

security_returns_df= close_prices_df.pct_change()
security_returns_component_df = correlation_filter(security_returns_df, min_corr=0.20, key_column='^VIX')

X2=security_returns_component_df.copy()

In [84]:
# inclusion of security volume X3
volume_list = ticker_list[1:len(ticker_list)]

def retrieve_volume(volume_dict, volume_list):
    for ticker in volume_list:        
        volume = retrieve_yahoo_volume(ticker)
        volume_dict[ticker] = volume
    return volume_dict

volume_dict = {}
volume_dict = retrieve_volume(volume_dict, volume_list)
print("Completed retrieve of volume")

volume_df= pd.DataFrame(volume_dict)
volume_df_with_vix=pd.concat([vix, volume_df], axis=1)
#print(volume_df_with_vix.corr())

volume_component_df = correlation_filter(volume_df_with_vix, min_corr=0.20, key_column='^VIX', eliminate_first_column=True )
X3=volume_component_df.copy()

Processing Volume spy
Processing Volume DX-Y.NYB
Processing Volume tlt
Processing Volume ief
Processing Volume gld
Processing Volume slv
Processing Volume CL=F
Processing Volume USDJPY=X
Processing Volume FXI
Processing Volume EZU
Processing Volume EEM
Processing Volume EFA
Completed retrieve of volume


In [85]:
# Inclusion of GARCH series X4
garch_series=pd.DataFrame()

for ticker in ticker_list:
        garch_series[ticker]=garch_fit_and_predict(security_returns_df[ticker], horizon=1, p=1, q=1, o=1)

X4=garch_series.copy()
#X4


In [86]:
# Inclusion of return squares in X5

returns_squared_df_no_vix= security_returns_df.drop(columns='^VIX')**2
returns_squared_and_vix_level_df=pd.concat([vix,returns_squared_df_no_vix], axis=1)
returns_squared_component_df = correlation_filter(returns_squared_and_vix_level_df, min_corr=0.20, key_column='^VIX', eliminate_first_column=True)

X5=returns_squared_component_df.copy()
X5

Unnamed: 0_level_0,spy,DX-Y.NYB,tlt,ief,gld,slv,FXI,EZU,EEM,EFA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-07-02,,,,,,,,,,
2007-07-03,0.000013,0.000000e+00,3.717994e-05,0.000009,0.000019,0.000020,0.000439,0.000042,6.347385e-05,2.265340e-05
2007-07-04,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00,0.000000e+00
2007-07-05,0.000001,3.396638e-06,1.255806e-04,0.000031,0.000024,0.000029,0.000012,0.000027,1.964648e-05,2.480096e-05
2007-07-06,0.000028,9.626279e-07,1.645320e-05,0.000006,0.000070,0.000298,0.000630,0.000118,2.936444e-04,5.187246e-05
...,...,...,...,...,...,...,...,...,...,...
2021-09-27,0.000008,2.869577e-07,1.351201e-05,0.000007,0.000004,0.000123,0.000207,0.000001,4.223255e-05,3.877590e-07
2021-09-28,0.000406,1.744296e-05,2.426398e-04,0.000015,0.000094,0.000058,0.000002,0.000569,1.667539e-04,4.755389e-04
2021-09-29,0.000003,3.695060e-05,3.010235e-06,0.000001,0.000020,0.001595,0.000052,0.000024,8.679118e-05,5.257838e-06
2021-09-30,0.000149,9.100347e-07,1.916833e-08,0.000001,0.000323,0.000816,0.000092,0.000052,6.405172e-05,1.566647e-05


### Inclusion of Google Trends

In [160]:
# Upload of csv files
keywords=['liquidity', 'Jobless_claims']
google_trends_df=pd.DataFrame()

for keyword in keywords:
    file_path=f"./Resources/{keyword}.csv"
    print(file_path)
    trend=pd.read_csv(Path(file_path),
                      index_col= 'Date', 
                      parse_dates= True,
                      infer_datetime_format=True)
    google_trends_df=pd.concat([google_trends_df, trend], axis=1)

google_trends_df

./Resources/liquidity.csv
./Resources/Jobless_claims.csv


Unnamed: 0_level_0,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-10-09,31,0
2016-10-16,54,0
2016-10-23,42,0
2016-10-30,38,0
2016-11-06,32,0
...,...,...
2021-09-05,31,0
2021-09-12,43,0
2021-09-19,37,9
2021-09-26,39,0


In [173]:
# We will interpolate with degree 2 in case of missing values
google_trends_df.iloc[0,:]=0

vix_google_trends_df=pd.concat([vix, google_trends_df], axis=1)
vix_google_trends_df.iloc[0,1:]=0
vix_google_trends_df.head(20)

vix_google_trends_df=vix_google_trends_df.interpolate(method="polynomial", order=2)

In [179]:
# Filtering by correlation X6


google_trends_component_df = correlation_filter(
                                vix_google_trends_df, 
                                min_corr=0.05, 
                                key_column='^VIX', 
                                eliminate_first_column=True)

X6=google_trends_component_df.copy()



Unnamed: 0_level_0,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-07-02,0.000000,0.000000
2007-07-03,-9.351484,-0.000046
2007-07-04,-18.697444,-0.000093
2007-07-05,-28.037881,-0.000139
2007-07-06,-37.372794,-0.000185
...,...,...
2021-09-28,40.817461,-1.479368
2021-09-29,41.866667,-1.775242
2021-09-30,43.009524,-1.775242
2021-10-01,44.246032,-1.479368


# GENERATION OF THE FEATURE MATRIX **X**

In [186]:
# Concatenation of all sources of data
X=pd.concat([X1, X2, X3, X4, X5, X6], axis=1)
#print("X without interpolation")
#print(X)

# Fill missing values with interpolation
X=X.interpolate(method="polynomial", order=2)
#print(X)
#print("X with interpolation")

# Drop first or last rows if they are NA
X=X.dropna()
#print("X after drop Nan")
#print(X)

In [10]:
# Inclusion of different lags of data from the X -- up to n_lag



In [11]:
# Shift data

In [12]:
# Set the Signal column
gtd_tdf["Signal"] = 0.0

# Generate the trading signals 1 (entry) or -1 (exit)
# where 1 is when the ^VIX is greater than 3.6%.
# where 0 is when the ^VIX  is less than 3.6%.
for index, row in gtd_tdf.iterrows():
    if row["^VIX"] > 0.036:
        gtd_tdf.loc[index, "Signal"] = 1.0
    if row["^VIX"] < 0.036:
        gtd_tdf.loc[index,"Signal"] =  0.0
# Review the DataFrame
gtd_tdf.head()        


NameError: name 'gtd_tdf' is not defined

In [None]:
# Define the target set y using the Signal column
y = gtd_tdf["Signal"].values
# Display a sample of y
y

In [None]:
# Define features set X by selecting all columns but Signal
X = gtd_tdf.drop(columns=["Signal"])

# Review the features DataFrame
X.head()

In [None]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [None]:
# Define the the number of inputs (features) to the model
number_input_features = X.shape[1]

# Review the number of features
number_input_features

In [None]:
# Define the number of neurons in the output layer
number_output_neurons = 1

In [None]:
# Define the number of hidden nodes for the first hidden layer - use the square root of the number of input features times number of output neurons
hidden_nodes_layer1 =  int(np.ceil(np.sqrt(number_input_features * number_output_neurons)))

# Review the number hidden nodes in the first layer
hidden_nodes_layer1


In [None]:
# Define the number of hidden nodes for the second hidden layer - hidden layer neurons divided by 2
hidden_nodes_layer2 =  int(np.floor(hidden_nodes_layer1/2))

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

In [None]:
# Create the Sequential model instance
nn = Sequential()

In [None]:
# Add the first hidden layer
nn.add(Dense(input_dim=number_input_features, units=hidden_nodes_layer1, activation="relu"))

In [None]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))

In [None]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons, activation="sigmoid"))

In [None]:
# Display the Sequential model summary
nn.summary()

In [None]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Fit the model using 50 epochs and the training data
model_1 = nn.fit(X_train_scaled, y_train, epochs=50)

In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

# Generation of the Features Matrix X

In [None]:
ticker_list= ["^VIX", "spy", "DX-Y.NYB", "tlt", "ief", "gld", "slv", "CL=F", "USDJPY=X", "VXX", "FXI", "EZU", "EEM", "EFA"]

In [None]:
# Inclusion of security levels

X=

#select items the series that have a correlation of 0.20 or more

In [None]:
# Inclusion of security returns

In [None]:
# inclusion of security volume

In [None]:
# Inclusion of GARCH series

In [None]:
# Inclusion of return squares in X

In [None]:
# Inclusion of leading indicator series

In [None]:
# Inclusion of Google Trends

In [None]:
# Inclusion of different lags of data from the X -- up to n_lag

In [None]:
# Shift data

In [None]:
# Set the Signal column
gtd_tdf["Signal"] = 0.0

# Generate the trading signals 1 (entry) or -1 (exit)
# where 1 is when the ^VIX is greater than 3.6%.
# where 0 is when the ^VIX  is less than 3.6%.
for index, row in gtd_tdf.iterrows():
    if row["^VIX"] > 0.036:
        gtd_tdf.loc[index, "Signal"] = 1.0
    if row["^VIX"] < 0.036:
        gtd_tdf.loc[index,"Signal"] =  0.0
# Review the DataFrame
gtd_tdf.head()        


In [None]:
# Define the target set y using the Signal column
y = gtd_tdf["Signal"].values
# Display a sample of y
y

In [None]:
# Define features set X by selecting all columns but Signal
X = gtd_tdf.drop(columns=["Signal"])

# Review the features DataFrame
X.head()

In [None]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [None]:
# Define the the number of inputs (features) to the model
number_input_features = X.shape[1]

# Review the number of features
number_input_features

In [None]:
# Define the number of neurons in the output layer
number_output_neurons = 1

In [None]:
# Define the number of hidden nodes for the first hidden layer - use the square root of the number of input features times number of output neurons
hidden_nodes_layer1 =  int(np.ceil(np.sqrt(number_input_features * number_output_neurons)))

# Review the number hidden nodes in the first layer
hidden_nodes_layer1


In [None]:
# Define the number of hidden nodes for the second hidden layer - hidden layer neurons divided by 2
hidden_nodes_layer2 =  int(np.floor(hidden_nodes_layer1/2))

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

In [None]:
# Create the Sequential model instance
nn = Sequential()

In [None]:
# Add the first hidden layer
nn.add(Dense(input_dim=number_input_features, units=hidden_nodes_layer1, activation="relu"))

In [None]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))

In [None]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons, activation="sigmoid"))

In [None]:
# Display the Sequential model summary
nn.summary()

In [None]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Fit the model using 50 epochs and the training data
model_1 = nn.fit(X_train_scaled, y_train, epochs=50)

In [None]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")