# Neural Network Model

#### to do:

Add VIX and VIX return in the correlation_filter 

manage names in the features: spy_volume, spy_return, spy_close, etc

In [18]:
# Import appropriate modules

import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder, MinMaxScaler
from sklearn.metrics import classification_report

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

import datetime
import numpy as np
import yfinance as yf
from datetime import datetime
from pandas.tseries.offsets import DateOffset
import hvplot
import hvplot.pandas
import matplotlib.pyplot as plt
from vix_functions import garch_fit_and_predict, correlation_filter, retrieve_yahoo_close, retrieve_yahoo_volume 

# Generation of the Features Matrix X

In [2]:
# Ticker List: VIX must be in first position
ticker_list= ["^VIX", "spy", "DX-Y.NYB", "tlt", "ief", "gld", "slv", "CL=F", "USDJPY=X", "FXI", "EZU", "EEM", "EFA"]

In [3]:
# Inclusion of security levels X1
def retrieve_close(close_prices_dict, ticker_list):
    for ticker in ticker_list:
        close_price = retrieve_yahoo_close(ticker)
        close_prices_dict[ticker] = close_price
    return close_prices_dict

close_prices_dict = {}
close_prices_dict = retrieve_close(close_prices_dict, ticker_list)
print("Completed retrieve of close prices")

close_prices_df= pd.DataFrame(close_prices_dict)

close_prices_component_df = correlation_filter(close_prices_df, min_corr=0.20, key_column='^VIX', eliminate_first_column=True)

X1=close_prices_component_df
vix=close_prices_df['^VIX']
vix_ret=close_prices_df['^VIX'].pct_change()
VIX=pd.DataFrame([vix, vix_ret]).T
VIX.columns=['VIX','VIX_ret']

X1=pd.concat([VIX,close_prices_component_df], axis=1)

Processing Close ^VIX
Processing Close spy
Processing Close DX-Y.NYB
Processing Close tlt
Processing Close ief
Processing Close gld
Processing Close slv
Processing Close CL=F
Processing Close USDJPY=X
Processing Close FXI
Processing Close EZU
Processing Close EEM
Processing Close EFA
Completed retrieve of close prices


In [4]:
# Inclusion of security returns X2
# Include returns that are correlated more than 0.20 with the Vix return

security_returns_df= close_prices_df.pct_change()
security_returns_component_df = correlation_filter(
                                        security_returns_df, 
                                        min_corr=0.20, 
                                        key_column='^VIX', 
                                        eliminate_first_column=True 
)

X2=security_returns_component_df


#### OBSERVATION: It would be good to include VIX level and Return and check on both in case correlations changes in the future. As of now with some variables I check with VIX level, and others I check VIX return

In [5]:
# inclusion of security volume X3
volume_list = ticker_list[1:len(ticker_list)]

def retrieve_volume(volume_dict, volume_list):
    for ticker in volume_list:        
        volume = retrieve_yahoo_volume(ticker)
        volume_dict[ticker] = volume
    return volume_dict

volume_dict = {}
volume_dict = retrieve_volume(volume_dict, volume_list)
print("Completed retrieve of volume")

volume_df= pd.DataFrame(volume_dict)
volume_df_with_vix=pd.concat([vix, volume_df], axis=1)
#print(volume_df_with_vix.corr())

volume_component_df = correlation_filter(volume_df_with_vix, min_corr=0.20, key_column='^VIX', eliminate_first_column=True )
X3=volume_component_df

Processing Volume spy
Processing Volume DX-Y.NYB
Processing Volume tlt
Processing Volume ief
Processing Volume gld
Processing Volume slv
Processing Volume CL=F
Processing Volume USDJPY=X
Processing Volume FXI
Processing Volume EZU
Processing Volume EEM
Processing Volume EFA
Completed retrieve of volume


In [6]:
# Inclusion of GARCH series X4
garch_series=pd.DataFrame()

for ticker in ticker_list:
        garch_series[ticker]=garch_fit_and_predict(security_returns_df[ticker], horizon=1, p=1, q=1, o=1)

X4=garch_series
X4


Unnamed: 0_level_0,^VIX,spy,DX-Y.NYB,tlt,ief,gld,slv,CL=F,USDJPY=X,FXI,EZU,EEM,EFA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2007-07-03,0.000745,0.000098,0.000028,0.000068,0.000037,0.000072,0.000118,0.000135,0.000062,0.000217,0.000130,0.000184,0.000103
2007-07-04,0.000699,0.000092,0.000028,0.000066,0.000036,0.000070,0.000116,0.000130,0.000059,0.000209,0.000125,0.000176,0.000099
2007-07-05,0.000684,0.000086,0.000027,0.000069,0.000037,0.000070,0.000115,0.000129,0.000057,0.000202,0.000121,0.000169,0.000097
2007-07-06,0.000650,0.000081,0.000027,0.000068,0.000036,0.000071,0.000120,0.000130,0.000055,0.000200,0.000117,0.000164,0.000093
2007-07-09,0.000638,0.000076,0.000027,0.000067,0.000036,0.000072,0.000118,0.000133,0.000053,0.000195,0.000112,0.000158,0.000089
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-27,0.000902,0.000087,0.000030,0.000087,0.000035,0.000085,0.000154,0.000166,0.000042,0.000193,0.000114,0.000126,0.000097
2021-09-28,0.001494,0.000137,0.000030,0.000091,0.000035,0.000086,0.000152,0.000157,0.000041,0.000187,0.000148,0.000130,0.000130
2021-09-29,0.001324,0.000127,0.000032,0.000088,0.000034,0.000085,0.000166,0.000152,0.000042,0.000182,0.000142,0.000130,0.000124
2021-09-30,0.001185,0.000136,0.000032,0.000086,0.000033,0.000093,0.000176,0.000145,0.000042,0.000177,0.000139,0.000126,0.000120


In [7]:
# Inclusion of return squares in X5

returns_squared_df_no_vix= security_returns_df.drop(columns='^VIX')**2
returns_squared_and_vix_level_df=pd.concat([vix,returns_squared_df_no_vix], axis=1)
returns_squared_component_df = correlation_filter(returns_squared_and_vix_level_df, min_corr=0.20, key_column='^VIX', eliminate_first_column=True)

X5=returns_squared_component_df
X5

Unnamed: 0_level_0,spy,DX-Y.NYB,tlt,ief,gld,slv,FXI,EZU,EEM,EFA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-07-02,,,,,,,,,,
2007-07-03,0.000013,0.000000e+00,3.717165e-05,0.000009,0.000019,0.000020,0.000439,0.000042,0.000063,2.265070e-05
2007-07-04,0.000000,0.000000e+00,0.000000e+00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000e+00
2007-07-05,0.000001,3.396638e-06,1.255958e-04,0.000031,0.000024,0.000029,0.000012,0.000027,0.000020,2.479816e-05
2007-07-06,0.000028,9.626279e-07,1.644983e-05,0.000006,0.000070,0.000298,0.000630,0.000118,0.000294,5.187350e-05
...,...,...,...,...,...,...,...,...,...,...
2021-09-27,0.000008,2.869577e-07,1.351201e-05,0.000007,0.000004,0.000123,0.000207,0.000001,0.000042,3.877590e-07
2021-09-28,0.000406,1.744296e-05,2.426398e-04,0.000015,0.000094,0.000058,0.000002,0.000569,0.000167,4.755389e-04
2021-09-29,0.000003,3.695060e-05,3.010235e-06,0.000001,0.000020,0.001595,0.000052,0.000024,0.000087,5.257838e-06
2021-09-30,0.000149,9.100347e-07,1.916833e-08,0.000001,0.000323,0.000816,0.000092,0.000052,0.000064,1.566647e-05


### Inclusion of Google Trends

In [8]:
# Upload of csv files
keywords=['liquidity', 'Jobless_claims']
google_trends_df=pd.DataFrame()

for keyword in keywords:
    file_path=f"./Resources/{keyword}.csv"
    print(file_path)
    trend=pd.read_csv(Path(file_path),
                      index_col= 'Date', 
                      parse_dates= True,
                      infer_datetime_format=True)
    google_trends_df=pd.concat([google_trends_df, trend], axis=1)

google_trends_df

./Resources/liquidity.csv
./Resources/Jobless_claims.csv


Unnamed: 0_level_0,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-10-09,31,0
2016-10-16,54,0
2016-10-23,42,0
2016-10-30,38,0
2016-11-06,32,0
...,...,...
2021-09-05,31,0
2021-09-12,43,0
2021-09-19,37,9
2021-09-26,39,0


In [9]:
# We will interpolate with degree 2 in case of missing values
google_trends_df.iloc[0,:]=0

vix_google_trends_df=pd.concat([vix, google_trends_df], axis=1)

#First row set to zero in case there are a lot of missing values
vix_google_trends_df.iloc[0,1:]=0
print(vix_google_trends_df.head(20))

vix_google_trends_df=vix_google_trends_df.interpolate(method="polynomial", order=2)
vix_google_trends_df

                 ^VIX  liquidity: (United States)  \
Date                                                
2007-07-02  15.400000                         0.0   
2007-07-03  14.920000                         NaN   
2007-07-04        NaN                         NaN   
2007-07-05  15.480000                         NaN   
2007-07-06  14.720000                         NaN   
2007-07-09  15.160000                         NaN   
2007-07-10  17.570000                         NaN   
2007-07-11  16.639999                         NaN   
2007-07-12  15.540000                         NaN   
2007-07-13  15.150000                         NaN   
2007-07-16  15.590000                         NaN   
2007-07-17  15.630000                         NaN   
2007-07-18  16.000000                         NaN   
2007-07-19  15.230000                         NaN   
2007-07-20  16.950001                         NaN   
2007-07-23  16.809999                         NaN   
2007-07-24  18.549999                         

Unnamed: 0_level_0,^VIX,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2007-07-02,15.400000,0.000000,0.000000
2007-07-03,14.920000,-9.351484,-0.000046
2007-07-04,15.275253,-18.697444,-0.000093
2007-07-05,15.480000,-28.037881,-0.000139
2007-07-06,14.720000,-37.372794,-0.000185
...,...,...,...
2021-09-28,23.250000,40.817461,-1.479368
2021-09-29,22.559999,41.866667,-1.775242
2021-09-30,23.139999,43.009524,-1.775242
2021-10-01,,44.246032,-1.479368


In [10]:
# Filtering by correlation X6

google_trends_component_df = correlation_filter(
                                vix_google_trends_df, 
                                min_corr=0.05, 
                                key_column='^VIX', 
                                eliminate_first_column=True)

X6=google_trends_component_df

# We will interpolate so we can fill the missing data only on Google Trends
pro_interpolation_of_X6=pd.concat([vix, X6], axis=1)
pro_interpolation_of_X6=pro_interpolation_of_X6.interpolate(method="polynomial", order=2, axis=0)
pro_interpolation_of_X6
X6 = pro_interpolation_of_X6.iloc[:,1:]
X6

Unnamed: 0_level_0,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-07-02,0.000000,0.000000
2007-07-03,-9.351484,-0.000046
2007-07-04,-18.697444,-0.000093
2007-07-05,-28.037881,-0.000139
2007-07-06,-37.372794,-0.000185
...,...,...
2021-09-28,40.817461,-1.479368
2021-09-29,41.866667,-1.775242
2021-09-30,43.009524,-1.775242
2021-10-01,44.246032,-1.479368


# INCLUSION OF LAGGED SERIES

In [11]:
# Inclusion of different lags of data from the X -- up to n_lag



# GENERATION OF THE FEATURE MATRIX **X**

In [12]:
# Concatenation of all sources of data
XY=pd.concat([X1, X2, X3, X4, X5, X6], axis=1)

# Interpolation is not applied to numerical variables. We are just going to drop those.
XY=XY.dropna()
XY

Unnamed: 0_level_0,VIX,VIX_ret,spy,ief,USDJPY=X,FXI,EZU,EEM,EFA,spy,...,tlt,ief,gld,slv,FXI,EZU,EEM,EFA,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-07-03,14.920000,-0.031169,114.571526,57.929958,122.470001,32.352463,39.293324,33.974312,53.867031,0.003623,...,3.717165e-05,0.000009,0.000019,0.000020,0.000439,0.000042,0.000063,2.265070e-05,-9.351484,-0.000046
2007-07-05,15.480000,0.037533,114.451233,57.606716,122.949997,32.238968,39.090736,34.124905,53.598785,-0.001050,...,1.255958e-04,0.000031,0.000024,0.000029,0.000012,0.000027,0.000020,2.479816e-05,-28.037881,-0.000139
2007-07-06,14.720000,-0.049096,115.052864,57.470310,123.360001,33.047962,39.515488,34.709679,53.984821,0.005257,...,1.644983e-05,0.000006,0.000070,0.000298,0.000630,0.000118,0.000294,5.187350e-05,-37.372794,-0.000185
2007-07-09,15.160000,0.029891,115.143127,57.678585,123.370003,33.504387,39.688675,35.045979,54.115673,0.000785,...,1.291630e-05,0.000013,0.000042,0.000000,0.000191,0.000019,0.000094,5.875106e-06,-65.344391,-0.000324
2007-07-10,17.570000,0.158971,113.503586,58.073639,121.290001,33.110748,39.009052,34.355797,53.454849,-0.014239,...,2.872594e-04,0.000047,0.000012,0.000225,0.000138,0.000293,0.000388,1.491163e-04,-74.657209,-0.000370
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-24,17.750000,-0.047236,443.910004,115.629654,110.325996,38.240002,49.959999,50.779999,80.300003,0.001647,...,9.552035e-05,0.000009,0.000002,0.000091,0.000441,0.000110,0.000141,8.790382e-05,37.557142,2.662863
2021-09-27,18.760000,0.056901,442.640015,115.319878,110.764000,38.790001,49.900002,51.110001,80.250000,-0.002861,...,1.351201e-05,0.000007,0.000004,0.000123,0.000207,0.000001,0.000042,3.877590e-07,39.861905,-0.887621
2021-09-28,23.250000,0.239339,433.720001,114.880180,110.963997,38.840000,48.709999,50.450001,78.500000,-0.020152,...,2.426398e-04,0.000015,0.000094,0.000058,0.000002,0.000569,0.000167,4.755389e-04,40.817461,-1.479368
2021-09-29,22.559999,-0.029677,434.450012,115.000099,111.528000,38.560001,48.470001,49.980000,78.320000,0.001683,...,3.010235e-06,0.000001,0.000020,0.001595,0.000052,0.000024,0.000087,5.257838e-06,41.866667,-1.775242


In [13]:
# Set the Signal column
threshold=0.030
vix_ret=vix.pct_change()
XY["Signal"] = 0.0

XY.loc[(XY['VIX_ret'] >= threshold), 'Signal'] = 1

# # Generate the trading signals 1 (entry) or -1 (exit)
# # where 1 is when the ^VIX is greater than 3.6%.
# # where 0 is when the ^VIX  is less than 3.6%.
#for index, row in XY.iterrows():
#    if row["VIX_ret"] >= 0.036:
#        XY.loc[index, "Signal"] = 1.0

# Review the DataFrame
print(XY["Signal"].head())    
XY["Signal"].value_counts()
XY.loc[XY["Signal"]==1, 'VIX_ret']
#XY.shape  ## 3981

Date
2007-07-03    0.0
2007-07-05    1.0
2007-07-06    0.0
2007-07-09    0.0
2007-07-10    1.0
Name: Signal, dtype: float64


Date
2007-07-05    0.037533
2007-07-10    0.158971
2007-07-20    0.112935
2007-07-24    0.103510
2007-07-26    0.145856
                ...   
2021-09-10    0.114362
2021-09-17    0.113430
2021-09-20    0.235464
2021-09-27    0.056901
2021-09-28    0.239339
Name: VIX_ret, Length: 981, dtype: float64

In [14]:
vix_ret=vix.pct_change()
vix_ret[vix_ret>=threshold].index
vix_ret.shape


# How many values of the vix we missed due to missing data on other series
compare=pd.concat([XY.loc[XY["Signal"]==1, 'VIX_ret'],vix_ret[vix_ret>=threshold] ], axis=1)
missing_dates=compare.loc[compare["VIX_ret"]!=compare["^VIX"]]
missing_dates=missing_dates.index
missing_dates
vix[missing_dates]

Date
2007-09-10    27.379999
2007-11-26    28.910000
2008-08-04    23.490000
2008-08-07    21.150000
2008-08-12    21.170000
2008-08-18    20.980000
2008-08-25    20.969999
2013-10-08    20.340000
Name: ^VIX, dtype: float64

In [15]:
# Define the target set y using the Signal column
y = XY["Signal"]
# Display a sample of y
y

Date
2007-07-03    0.0
2007-07-05    1.0
2007-07-06    0.0
2007-07-09    0.0
2007-07-10    1.0
             ... 
2021-09-24    0.0
2021-09-27    1.0
2021-09-28    1.0
2021-09-29    0.0
2021-09-30    0.0
Name: Signal, Length: 3561, dtype: float64

In [16]:
# Define features set X by selecting all columns but Signal
X = XY.drop(columns=["Signal"]).shift().dropna()


# Review the features DataFrame
X.head()

Unnamed: 0_level_0,VIX,VIX_ret,spy,ief,USDJPY=X,FXI,EZU,EEM,EFA,spy,...,tlt,ief,gld,slv,FXI,EZU,EEM,EFA,liquidity: (United States),Jobless claims: (United States)
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-07-05,14.92,-0.031169,114.571526,57.929958,122.470001,32.352463,39.293324,33.974312,53.867031,0.003623,...,3.7e-05,9e-06,1.9e-05,2e-05,0.000439,4.2e-05,6.3e-05,2.3e-05,-9.351484,-4.6e-05
2007-07-06,15.48,0.037533,114.451233,57.606716,122.949997,32.238968,39.090736,34.124905,53.598785,-0.00105,...,0.000126,3.1e-05,2.4e-05,2.9e-05,1.2e-05,2.7e-05,2e-05,2.5e-05,-28.037881,-0.000139
2007-07-09,14.72,-0.049096,115.052864,57.47031,123.360001,33.047962,39.515488,34.709679,53.984821,0.005257,...,1.6e-05,6e-06,7e-05,0.000298,0.00063,0.000118,0.000294,5.2e-05,-37.372794,-0.000185
2007-07-10,15.16,0.029891,115.143127,57.678585,123.370003,33.504387,39.688675,35.045979,54.115673,0.000785,...,1.3e-05,1.3e-05,4.2e-05,0.0,0.000191,1.9e-05,9.4e-05,6e-06,-65.344391,-0.000324
2007-07-11,17.57,0.158971,113.503586,58.073639,121.290001,33.110748,39.009052,34.355797,53.454849,-0.014239,...,0.000287,4.7e-05,1.2e-05,0.000225,0.000138,0.000293,0.000388,0.000149,-74.657209,-0.00037


In [22]:
X.corr()

Unnamed: 0,VIX,VIX_ret,spy,ief,USDJPY=X,FXI,EZU,EEM,EFA,spy.1,...,tlt,ief.1,gld,slv,FXI.1,EZU.1,EEM.1,EFA.1,liquidity: (United States),Jobless claims: (United States)
VIX,1.0,0.11119,-0.262625,-0.239764,-0.236936,-0.373423,-0.414042,-0.460807,-0.458605,-0.136907,...,0.356999,0.326861,0.266638,0.269724,0.433842,0.477337,0.430942,0.471962,-0.0867,0.288857
VIX_ret,0.11119,1.0,0.003066,0.0028,0.018867,-0.014502,-0.002461,-0.016331,-0.002043,-0.718217,...,0.095693,0.11843,0.109275,0.09105,0.047903,0.155967,0.057804,0.133489,0.012511,-0.012656
spy,-0.262625,0.003066,1.0,0.867243,0.52989,0.745377,0.817212,0.791347,0.924803,0.02624,...,-0.07749,-0.163297,-0.12284,-0.083812,-0.155503,-0.156827,-0.134932,-0.138846,0.742976,0.321352
ief,-0.239764,0.0028,0.867243,1.0,0.345793,0.49541,0.497635,0.581481,0.707926,0.022755,...,-0.007903,-0.14931,-0.114511,-0.066008,-0.201532,-0.122116,-0.153157,-0.122959,0.523912,0.374987
USDJPY=X,-0.236936,0.018867,0.52989,0.345793,1.0,0.483967,0.627711,0.238695,0.60979,-0.005713,...,-0.095615,-0.090858,-0.05748,-0.099265,-0.01417,-0.120425,-0.038085,-0.077627,0.74723,0.132983
FXI,-0.373423,-0.014502,0.745377,0.49541,0.483967,1.0,0.857845,0.880911,0.848456,0.054279,...,-0.124209,-0.165731,-0.162935,-0.114389,-0.193389,-0.233628,-0.201333,-0.216382,0.644637,0.202014
EZU,-0.414042,-0.002461,0.817212,0.497635,0.627711,0.857845,1.0,0.847793,0.95757,0.030928,...,-0.165351,-0.17951,-0.130627,-0.111078,-0.144335,-0.233258,-0.162074,-0.200438,0.718061,0.1489
EEM,-0.460807,-0.016331,0.791347,0.581481,0.238695,0.880911,0.847793,1.0,0.879962,0.056847,...,-0.142175,-0.196966,-0.168402,-0.10758,-0.245885,-0.25201,-0.235551,-0.243599,0.448734,0.16182
EFA,-0.458605,-0.002043,0.924803,0.707926,0.60979,0.848456,0.95757,0.879962,1.0,0.035579,...,-0.148668,-0.203113,-0.152532,-0.124413,-0.194028,-0.240501,-0.191028,-0.215042,0.71209,0.204363
spy,-0.136907,-0.718217,0.02624,0.022755,-0.005713,0.054279,0.030928,0.056847,0.035579,1.0,...,-0.141369,-0.114725,-0.078201,-0.086602,0.082365,-0.044547,0.076637,-0.008121,-0.003063,0.03402


In [21]:
# Split the preprocessed data into a training and testing dataset
# Assign the function a random_state equal to 1
training_begin = X.index.min()
training_end = X.index.min() + DateOffset(months=112)

X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

X_test = X.loc[training_end+DateOffset(days=1):]
y_test = y.loc[training_end+DateOffset(days=1):]


In [22]:
# Create a StandardScaler instance
scaler =  StandardScaler() # MinMaxScaler() #

# Fit the scaler to the features training dataset
X_scaler = scaler.fit(X_train)

# Fit the scaler to the features training dataset
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [23]:
# Instance AdaBoost
# Initiate the model instance
adaboost_model=AdaBoostClassifier(n_estimators=50)
adaboost_model


AdaBoostClassifier()

In [25]:
# Fit the model 
adaboost_model =adaboost_model.fit(X_train_scaled, y_train)

pred_adaboost=adaboost_model.predict(X_test_scaled)

In [26]:
# Use a classification report to evaluate the model using the predictions and testing data
adaboost_report=classification_report(y_test, pred_adaboost)

# Print the classification report
print("         AdaBoost Classification Report")
print(adaboost_report)


         AdaBoost Classification Report
              precision    recall  f1-score   support

         0.0       0.72      0.19      0.30       892
         1.0       0.27      0.80      0.41       338

    accuracy                           0.36      1230
   macro avg       0.49      0.50      0.35      1230
weighted avg       0.59      0.36      0.33      1230

