In [1]:
# Import appropriate modules
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import datetime
import numpy as np
import yfinance as yf
from datetime import datetime
import hvplot
import hvplot.pandas
import matplotlib.pyplot as plt


In [2]:
def retrieve_yahoo_data(ticker = 'spy', start_date = '2007-07-01', end_date = '2021-10-03'):
    try:
        # get data based on ticker
        yahoo_data = yf.Ticker(ticker)
        print(f"Processing Ticker {ticker}")
        # select data using start date and end data and calculate the daily return
        price_df = yahoo_data.history(start=start_date, end=end_date).Close
        price_df.name = ticker
        # if no data retrieved raise exception
        if price_df.shape[0] == 0:
            raise Exception("No Prices.")
        return price_df
    # handle exception
    except Exception as ex:
        print(f"Sorry, Data not available for '{ticker}': Exception is {ex}")

In [3]:
# Set up the variable related to tickers and tickers data
ticker_data_dict = {}
ticker_list = ["^VIX", "spy", "DX-Y.NYB", "tlt", "ief", "gld", "slv", "CL=F", "USDJPY=X", "VXX"]

# Retrieve Tickers
def retrieve_tickers(ticker_data_dict, ticker_list):
    for ticker in ticker_list:
        ticker_data = retrieve_yahoo_data(ticker)
        ticker_data_dict[ticker] = ticker_data
        
    return ticker_data_dict
        
ticker_data_dict = retrieve_tickers(ticker_data_dict, ticker_list)
ticker_data_df = pd.DataFrame(ticker_data_dict)
# ticker_data_df = ticker_data_df.dropna()
# Display the ticker data related data frame
ticker_data_df.head()

Processing Ticker ^VIX
Processing Ticker spy
Processing Ticker DX-Y.NYB
Processing Ticker tlt
Processing Ticker ief
Processing Ticker gld
Processing Ticker slv
Processing Ticker CL=F
Processing Ticker USDJPY=X
Processing Ticker VXX


Unnamed: 0_level_0,^VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-07-02,15.4,114.157875,81.389999,56.094963,58.102318,65.019997,12.588,71.089996,122.230003,
2007-07-03,14.92,114.571541,,55.752949,57.929977,64.739998,12.532,,122.470001,
2007-07-04,,,,,,,,,122.660004,
2007-07-05,15.48,114.45121,81.540001,55.128139,57.606781,64.419998,12.465,71.809998,122.949997,
2007-07-06,14.72,115.052872,81.459999,54.904507,57.470341,64.959999,12.68,72.809998,123.360001,


In [4]:
returns_df=ticker_data_df.pct_change()
returns_all_but_VIX=returns_df.drop(columns=['^VIX'])

In [5]:
cum_returns=(1+returns_df).cumprod()
cum_returns_all_but_vix=(1+returns_all_but_VIX).cumprod()
cum_returns_all_but_vix

Unnamed: 0_level_0,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-07-02,,,,,,,,,
2007-07-03,1.003624,1.000000,0.993903,0.997034,0.995694,0.995551,1.000000,1.001963,
2007-07-04,1.003624,1.000000,0.993903,0.997034,0.995694,0.995551,1.000000,1.003518,
2007-07-05,1.002570,1.001843,0.982765,0.991471,0.990772,0.990229,1.010128,1.005890,
2007-07-06,1.007840,1.000860,0.978778,0.989123,0.999077,1.007309,1.024195,1.009245,
...,...,...,...,...,...,...,...,...,...
2021-09-27,3.877437,1.147315,2.606016,1.984772,2.516764,1.663489,1.061331,0.906193,0.224331
2021-09-28,3.799300,1.152107,2.565423,1.977205,2.492310,1.650779,1.059080,0.907829,0.247740
2021-09-29,3.805695,1.159110,2.569874,1.979269,2.481083,1.584843,1.052609,0.912444,0.251356
2021-09-30,3.759180,1.158005,2.569518,1.981677,2.525685,1.630124,1.055423,0.915782,0.251537


In [6]:
# Are levels correlated? -- YES
ticker_data_df.corr()

Unnamed: 0,^VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
^VIX,1.0,-0.261907,-0.177535,-0.153038,-0.238696,-0.140994,-0.116533,-0.154325,-0.236938,0.375032
spy,-0.261907,1.0,0.694249,0.885115,0.866796,0.482224,-0.112768,-0.481942,0.529305,-0.868761
DX-Y.NYB,-0.177535,0.694249,1.0,0.770689,0.754411,0.123592,-0.455048,-0.830481,0.676866,0.274011
tlt,-0.153038,0.885115,0.770689,1.0,0.982691,0.623439,-0.040164,-0.623989,0.398247,-0.356807
ief,-0.238696,0.866796,0.754411,0.982691,1.0,0.680952,0.045964,-0.569718,0.344841,-0.469316
gld,-0.140994,0.482224,0.123592,0.623439,0.680952,1.0,0.714587,-0.023258,-0.360724,-0.539503
slv,-0.116533,-0.112768,-0.455048,-0.040164,0.045964,0.714587,1.0,0.47739,-0.711848,-0.68193
CL=F,-0.154325,-0.481942,-0.830481,-0.623989,-0.569718,-0.023258,0.47739,1.0,-0.514512,-0.340802
USDJPY=X,-0.236938,0.529305,0.676866,0.398247,0.344841,-0.360724,-0.711848,-0.514512,1.0,0.04985
VXX,0.375032,-0.868761,0.274011,-0.356807,-0.469316,-0.539503,-0.68193,-0.340802,0.04985,1.0


In [7]:
#Are returns correlated? YES, the most
returns_df.corr()

Unnamed: 0,^VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
^VIX,1.0,-0.719043,0.093661,0.334191,0.344311,-0.010204,-0.164473,-0.130616,-0.11036,0.886326
spy,-0.719043,1.0,-0.182515,-0.431964,-0.430171,0.0312,0.220519,0.17398,0.212615,-0.774008
DX-Y.NYB,0.093661,-0.182515,1.0,0.011807,-0.053914,-0.40666,-0.41817,-0.101696,0.051795,0.072226
tlt,0.334191,-0.431964,0.011807,1.0,0.911065,0.153197,-0.007242,-0.120435,-0.128969,0.280662
ief,0.344311,-0.430171,-0.053914,0.911065,1.0,0.192801,0.026677,-0.105577,-0.190355,0.311779
gld,-0.010204,0.0312,-0.40666,0.153197,0.192801,1.0,0.794172,0.07169,-0.037653,-0.066436
slv,-0.164473,0.220519,-0.41817,-0.007242,0.026677,0.794172,1.0,0.124697,0.002264,-0.227686
CL=F,-0.130616,0.17398,-0.101696,-0.120435,-0.105577,0.07169,0.124697,1.0,0.035922,-0.163233
USDJPY=X,-0.11036,0.212615,0.051795,-0.128969,-0.190355,-0.037653,0.002264,0.035922,1.0,-0.013628
VXX,0.886326,-0.774008,0.072226,0.280662,0.311779,-0.066436,-0.227686,-0.163233,-0.013628,1.0


In [8]:
# Is the VIX level correlted with returns? -- Not do much. It's much more correlated with levels,
# or vIX returns correlated with returns

vix_level_and_price_returns_df=pd.concat([ticker_data_df['^VIX'],returns_all_but_VIX], axis=1)
vix_level_and_price_returns_df.corr()

Unnamed: 0,^VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
^VIX,1.0,-0.136983,0.03784,0.071525,0.080746,0.01746,-0.024428,-0.075762,-0.054239,0.223272
spy,-0.136983,1.0,-0.182515,-0.431964,-0.430171,0.0312,0.220519,0.17398,0.212615,-0.774008
DX-Y.NYB,0.03784,-0.182515,1.0,0.011807,-0.053914,-0.40666,-0.41817,-0.101696,0.051795,0.072226
tlt,0.071525,-0.431964,0.011807,1.0,0.911065,0.153197,-0.007242,-0.120435,-0.128969,0.280662
ief,0.080746,-0.430171,-0.053914,0.911065,1.0,0.192801,0.026677,-0.105577,-0.190355,0.311779
gld,0.01746,0.0312,-0.40666,0.153197,0.192801,1.0,0.794172,0.07169,-0.037653,-0.066436
slv,-0.024428,0.220519,-0.41817,-0.007242,0.026677,0.794172,1.0,0.124697,0.002264,-0.227686
CL=F,-0.075762,0.17398,-0.101696,-0.120435,-0.105577,0.07169,0.124697,1.0,0.035922,-0.163233
USDJPY=X,-0.054239,0.212615,0.051795,-0.128969,-0.190355,-0.037653,0.002264,0.035922,1.0,-0.013628
VXX,0.223272,-0.774008,0.072226,0.280662,0.311779,-0.066436,-0.227686,-0.163233,-0.013628,1.0


In [9]:
vix_level_and_cum_price_returns_df=pd.concat([ticker_data_df['^VIX'],cum_returns_all_but_vix], axis=1)
vix_level_and_cum_price_returns_df.hvplot()

In [10]:
vix_level_adjusted=ticker_data_df['^VIX']/10
vix_level_adjusted.hvplot()

In [11]:
vix_level_adj_and_cum_price_returns_df=pd.concat([vix_level_adjusted,cum_returns_all_but_vix], axis=1)
vix_level_adj_and_cum_price_returns_df.hvplot()

In [12]:
ticker_data_df[["^VIX", "VXX"]].dropna().hvplot()

In [13]:
VIX_and_VXX_for_plot=pd.concat([ticker_data_df["^VIX"], ticker_data_df["VXX"]/5],axis=1).dropna()
VIX_and_VXX_for_plot.hvplot()

In [14]:
VIX_and_VXX_for_plot=pd.concat([ticker_data_df["^VIX"], ticker_data_df["VXX"]],axis=1).dropna()
returns_vix_and_vxx=VIX_and_VXX_for_plot.pct_change().dropna()
returns_vix_and_vxx.head()

Unnamed: 0_level_0,^VIX,VXX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-29,0.195164,0.069414
2018-01-30,0.068642,0.032792
2018-01-31,-0.084517,0.003273
2018-02-01,-0.00517,-0.050343
2018-02-02,0.285078,0.128594


In [15]:
# Cases of interest
# When both go up together
vix_and_vxx_positive_df=returns_vix_and_vxx[(returns_vix_and_vxx["^VIX"]*returns_vix_and_vxx['VXX']>0) & 
                     (returns_vix_and_vxx["^VIX"]>0)]

In [16]:
# It is 3% a good threshold? It would if most of the time vxx is also positive when vix is above threshold
results=[]

for threshold_bps in range(150,1000,5):
    threshold=threshold_bps/10000
    returns_vxx_negative_with_vix_above_threshold=returns_vix_and_vxx[(returns_vix_and_vxx["^VIX"]>threshold) &
                                                                  (returns_vix_and_vxx["VXX"]<0)]

                                                                  
    cases_vxx_negative_and_vix_above_threshold=returns_vxx_negative_with_vix_above_threshold.describe().iloc[0,0]
    cases_vix_above_threshold=returns_vix_and_vxx[returns_vix_and_vxx["^VIX"]>threshold].describe().iloc[0,0]
    
    results.append({'Threshold return (%)':threshold*100, 
                     "cases_vxx_negative_and_vix_above_threshold":cases_vxx_negative_and_vix_above_threshold,
                     "cases_vix_above_threshold":cases_vix_above_threshold,
                     "Proportion of fail cases (%)": cases_vxx_negative_and_vix_above_threshold/cases_vix_above_threshold*100
                    })
    
results_df=pd.DataFrame(results)

results_df.head(50)

Unnamed: 0,Threshold return (%),cases_vxx_negative_and_vix_above_threshold,cases_vix_above_threshold,Proportion of fail cases (%)
0,1.5,38.0,334.0,11.377246
1,1.55,38.0,333.0,11.411411
2,1.6,37.0,330.0,11.212121
3,1.65,36.0,325.0,11.076923
4,1.7,36.0,323.0,11.145511
5,1.75,36.0,320.0,11.25
6,1.8,32.0,316.0,10.126582
7,1.85,30.0,312.0,9.615385
8,1.9,29.0,311.0,9.324759
9,1.95,27.0,309.0,8.737864


In [17]:
curve=results_df.hvplot(x='Threshold return (%)', 
                  y='Proportion of fail cases (%)',
                  title='Determination of Threshold for VIX return prediction'
                 )
amount_of_cases=results_df[(results_df['Threshold return (%)']>2) &
                           (results_df['Threshold return (%)']<4)].hvplot.bar(x='Threshold return (%)', 
                  y=['cases_vxx_negative_and_vix_above_threshold','cases_vix_above_threshold'],
                  title='Determination of Threshold for VIX return prediction'
                 )
curve+amount_of_cases

In [18]:
# Read the google_trends_df.csv file from the Resources folder into a Pandas DataFrame
df_path = Path("./Resources/google_trends_df.csv")
google_trends_df = pd.read_csv(df_path,index_col= 'Date', parse_dates= True, infer_datetime_format=True)

# Review the DataFrame
google_trends_df


Unnamed: 0_level_0,Leading_Indicators,PMI,CCI,Jobless_Claims,GDP,war,Impeachment,Catastrophe,Natural_disaster,Inflation,...,Debt_ceiling,retail_spending,Consumer_spending,Consumer,Earnings,Economic_contraction,Depression,Shock,Monetary_policy,VIX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2004-01-01,0,0,0,0,31,50,0,34,0,84,...,0,0,0,100,45,0,37,34,0,0
2004-02-01,0,0,0,0,61,41,0,51,0,55,...,0,0,0,78,51,0,0,0,74,0
2004-03-01,0,0,0,0,20,63,0,43,0,41,...,0,0,0,95,36,0,0,85,0,17
2004-04-01,0,0,0,0,60,40,0,50,0,77,...,0,0,0,74,74,0,36,33,0,6
2004-05-01,0,0,0,0,28,73,0,30,27,58,...,0,0,0,88,37,0,49,30,100,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-01,7,0,0,0,15,37,1,16,7,80,...,0,0,5,43,33,0,17,21,5,18
2021-07-01,0,0,0,5,14,52,1,18,6,60,...,1,0,5,46,47,0,27,31,10,17
2021-08-01,3,0,0,10,15,38,13,15,5,53,...,1,0,0,45,48,0,17,24,14,17
2021-09-01,0,0,0,5,23,41,86,31,7,48,...,12,0,5,45,65,0,28,20,14,19


In [19]:
# Review the data types associated with the columns
google_trends_df.dtypes

Leading_Indicators      int64
PMI                     int64
CCI                     int64
Jobless_Claims          int64
GDP                     int64
war                     int64
Impeachment             int64
Catastrophe             int64
Natural_disaster        int64
Inflation               int64
unemployment            int64
Market_crash            int64
Covid                   int64
virus                   int64
Pandemic                int64
vaccination             int64
Delta                   int64
Covid_mutation          int64
FED                     int64
Tapering                int64
Liquidity               int64
Banking                 int64
Correction_of_market    int64
coup                    int64
Debt_ceiling            int64
retail_spending         int64
Consumer_spending       int64
Consumer                int64
Earnings                int64
Economic_contraction    int64
Depression              int64
Shock                   int64
Monetary_policy         int64
VIX       

In [20]:
gtd_tdf = pd.concat([google_trends_df, ticker_data_df.pct_change()],axis=1).dropna()
gtd_tdf.head()

Unnamed: 0_level_0,Leading_Indicators,PMI,CCI,Jobless_Claims,GDP,war,Impeachment,Catastrophe,Natural_disaster,Inflation,...,^VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-02-01,4.0,0.0,6.0,0.0,35.0,50.0,1.0,25.0,15.0,59.0,...,-0.00517,-0.001135,-0.005161,-0.014493,-0.005159,0.00329,-0.006116,0.01653,0.004313,-0.050343
2018-03-01,0.0,0.0,0.0,0.0,27.0,63.0,1.0,19.0,9.0,44.0,...,0.13199,-0.014541,-0.003201,0.006826,0.004303,-0.00224,0.005175,-0.010545,-0.007317,0.068841
2018-05-01,0.0,0.0,0.0,5.0,36.0,80.0,2.0,19.0,8.0,45.0,...,-0.027621,0.001777,0.006642,-0.003602,-0.001566,-0.007063,-0.009103,-0.01925,0.002062,0.032667
2018-06-01,4.0,0.0,6.0,0.0,27.0,68.0,2.0,25.0,3.0,38.0,...,-0.127673,0.009818,0.002128,-0.005283,-0.003695,-0.004955,-0.001294,-0.018347,0.00034,-0.048998
2018-08-01,4.0,0.0,6.0,0.0,21.0,51.0,6.0,21.0,3.0,41.0,...,0.024942,-0.00167,0.001376,-0.008147,-0.002363,-0.007328,-0.008898,-0.015998,0.007334,-0.01211


In [21]:
# Set the Signal column
gtd_tdf["Signal"] = 0.0

# Generate the trading signals 1 (entry) or -1 (exit)
# where 1 is when the ^VIX is greater than 3.6%.
# where -1 is when the ^VIX  is less than 3.6%.
for index, row in gtd_tdf.iterrows():
    if row["^VIX"] > 0.036:
        gtd_tdf.loc[index, "Signal"] = 1.0
    if row["^VIX"] < 0.036:
        gtd_tdf.loc[index,"Signal"] =  0.0
# Review the DataFrame
gtd_tdf.head()        


Unnamed: 0_level_0,Leading_Indicators,PMI,CCI,Jobless_Claims,GDP,war,Impeachment,Catastrophe,Natural_disaster,Inflation,...,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-02-01,4.0,0.0,6.0,0.0,35.0,50.0,1.0,25.0,15.0,59.0,...,-0.001135,-0.005161,-0.014493,-0.005159,0.00329,-0.006116,0.01653,0.004313,-0.050343,0.0
2018-03-01,0.0,0.0,0.0,0.0,27.0,63.0,1.0,19.0,9.0,44.0,...,-0.014541,-0.003201,0.006826,0.004303,-0.00224,0.005175,-0.010545,-0.007317,0.068841,1.0
2018-05-01,0.0,0.0,0.0,5.0,36.0,80.0,2.0,19.0,8.0,45.0,...,0.001777,0.006642,-0.003602,-0.001566,-0.007063,-0.009103,-0.01925,0.002062,0.032667,0.0
2018-06-01,4.0,0.0,6.0,0.0,27.0,68.0,2.0,25.0,3.0,38.0,...,0.009818,0.002128,-0.005283,-0.003695,-0.004955,-0.001294,-0.018347,0.00034,-0.048998,0.0
2018-08-01,4.0,0.0,6.0,0.0,21.0,51.0,6.0,21.0,3.0,41.0,...,-0.00167,0.001376,-0.008147,-0.002363,-0.007328,-0.008898,-0.015998,0.007334,-0.01211,0.0


In [22]:
# Define the target set y using the Signal column
y = gtd_tdf["Signal"].values
# Display a sample of y
y

array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0.,
       1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.])

In [41]:
# Define features set X by selecting all columns but Signal
X = gtd_tdf.drop(columns=["Signal","^VIX"])

# Review the features DataFrame
X.head()

Unnamed: 0_level_0,Leading_Indicators,PMI,CCI,Jobless_Claims,GDP,war,Impeachment,Catastrophe,Natural_disaster,Inflation,...,VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,VXX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-02-01,4.0,0.0,6.0,0.0,35.0,50.0,1.0,25.0,15.0,59.0,...,62.0,-0.001135,-0.005161,-0.014493,-0.005159,0.00329,-0.006116,0.01653,0.004313,-0.050343
2018-03-01,0.0,0.0,0.0,0.0,27.0,63.0,1.0,19.0,9.0,44.0,...,18.0,-0.014541,-0.003201,0.006826,0.004303,-0.00224,0.005175,-0.010545,-0.007317,0.068841
2018-05-01,0.0,0.0,0.0,5.0,36.0,80.0,2.0,19.0,8.0,45.0,...,13.0,0.001777,0.006642,-0.003602,-0.001566,-0.007063,-0.009103,-0.01925,0.002062,0.032667
2018-06-01,4.0,0.0,6.0,0.0,27.0,68.0,2.0,25.0,3.0,38.0,...,13.0,0.009818,0.002128,-0.005283,-0.003695,-0.004955,-0.001294,-0.018347,0.00034,-0.048998
2018-08-01,4.0,0.0,6.0,0.0,21.0,51.0,6.0,21.0,3.0,41.0,...,14.0,-0.00167,0.001376,-0.008147,-0.002363,-0.007328,-0.008898,-0.015998,0.007334,-0.01211


In [42]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [43]:
# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [44]:
# Define the the number of inputs (features) to the model
number_input_features = X.shape[1]

# Review the number of features
number_input_features

43

In [45]:
# Define the number of neurons in the output layer
number_output_neurons = 1

In [46]:
# Define the number of hidden nodes for the first hidden layer - use the square root of the number of input features times number of output neurons
hidden_nodes_layer1 =  int(np.ceil(np.sqrt(number_input_features * number_output_neurons)))

# Review the number hidden nodes in the first layer
hidden_nodes_layer1

7

In [47]:
# Define the number of hidden nodes for the second hidden layer - hidden layer neurons divided by 2
hidden_nodes_layer2 =  int(np.floor(hidden_nodes_layer1/2))

# Review the number hidden nodes in the second layer
hidden_nodes_layer2

3

In [48]:
# Create the Sequential model instance
nn = Sequential()

In [49]:
# Add the first hidden layer
nn.add(Dense(input_dim=number_input_features, units=hidden_nodes_layer1, activation="relu"))

In [50]:
# Add the second hidden layer
nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))

In [51]:
# Add the output layer to the model specifying the number of output neurons and activation function
nn.add(Dense(units=number_output_neurons, activation="sigmoid"))

In [52]:
# Display the Sequential model summary
nn.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 7)                 308       
_________________________________________________________________
dense_4 (Dense)              (None, 3)                 24        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 4         
Total params: 336
Trainable params: 336
Non-trainable params: 0
_________________________________________________________________


In [53]:
# Compile the Sequential model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [54]:
# Fit the model using 50 epochs and the training data
model_1 = nn.fit(X_train_scaled, y_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [55]:
# Evaluate the model loss and accuracy metrics using the evaluate method and the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled, y_test, verbose=2)

# Display the model loss and accuracy results
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

1/1 - 0s - loss: 0.7911 - accuracy: 0.6250
Loss: 0.7911221981048584, Accuracy: 0.625
