In [397]:
# Import appropriate modules
import pandas as pd
import datetime
import numpy as np
import yfinance as yf
from pathlib import Path
from datetime import datetime
import hvplot
import hvplot.pandas
# Import the finta Python library and the TA module
from finta import TA
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from pandas.tseries.offsets import DateOffset
# Import a new classifier from SKLearn
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot

In [398]:
# Initialize Global Variables that control the flow of the program
# Start date for the data
start_data_date = '2005-01-01'
# End date for the data
end_data_date = '2021-10-02'
# Define the training period for the model.
training_months = 120
# Do we do tuning of adaboost?
run_multiple_tuning_iterations = False
# The number of estimators to be used for the adaboost model
adaboost_estimators = 43

# Define some experimental technical indicator windows
# Define the short window
short_window = 50
# Define the long window
long_window = 200
# Define the general window
window = 120

In [399]:
# Define function to retrieve ticker daily return data from yahoo using ticker, start date and end date
def retrieve_yahoo_data_return(ticker = 'spy', start_date = start_data_date, end_date = end_data_date):
    try:
        # get data based on ticker
        yahoo_data = yf.Ticker(ticker)
        print(f"Processing Ticker {ticker}")
        # select data using start date and end data and calculate the daily return
        price_df = yahoo_data.history(start=start_date, end=end_date).Close.pct_change()
        price_df.name = ticker
        # if no data retrieved raise exception
        if price_df.shape[0] == 0:
            raise Exception("No Prices.")
        return price_df
    # handle exception
    except Exception as ex:
        print(f"Sorry, Data not available for '{ticker}': Exception is {ex}")

In [400]:
# Define function to retrieve ticker daily close data from yahoo using ticker, start date and end date
def retrieve_yahoo_data_close(ticker = 'spy', start_date = start_data_date, end_date = end_data_date):
    try:
        # get data based on ticker
        yahoo_data = yf.Ticker(ticker)
        print(f"Processing Ticker {ticker}")
        # select data using start date and end data and save the Close data
        price_df = yahoo_data.history(start=start_date, end=end_date).Close
        price_df.name = ticker
        # if no data retrieved raise exception
        if price_df.shape[0] == 0:
            raise Exception("No Prices.")
        return price_df
    # handle exception
    except Exception as ex:
        print(f"Sorry, Data not available for '{ticker}': Exception is {ex}")

In [401]:
# Define function to retrieve ticker daily return data from yahoo using ticker, start date and end date
def retrieve_yahoo_data_volume(ticker = 'spy', start_date = start_data_date, end_date = end_data_date):
    try:
        # get data based on ticker
        yahoo_data = yf.Ticker(ticker)
        print(f"Processing Ticker {ticker}")
        # select data using start date and end data and calculate the daily return
        price_df = yahoo_data.history(start=start_date, end=end_date).Volume
        price_df.name = ticker
        # if no data retrieved raise exception
        if price_df.shape[0] == 0:
            raise Exception("No Prices.")
        return price_df
    # handle exception
    except Exception as ex:
        print(f"Sorry, Data not available for '{ticker}': Exception is {ex}")

In [402]:
# Define function to retrieve ticker daily return data from yahoo using ticker, start date and end date
def retrieve_yahoo_put_options_volume(ticker = 'spy', date = '2007-07-01'):
    try:
        # get data based on ticker
        yahoo_data = yf.Ticker(ticker)
        print(f"Processing Ticker {ticker}")
        # select data using start date and end data and calculate the daily return
        opts = yahoo_data.option_chain()
        price_df = opts.puts
        price_df.name = ticker
        price_df = price_df.volume
        # if no data retrieved raise exception
        if price_df.shape[0] == 0:
            raise Exception("No Prices.")
        return price_df
    # handle exception
    except Exception as ex:
        print(f"Sorry, Data not available for '{ticker}': Exception is {ex}")

In [403]:
# Define function to retrieve ticker daily close data from yahoo using ticker, start date and end date
def retrieve_yahoo_data(ticker = 'spy', start_date = start_data_date, end_date = end_data_date):
    try:
        # get data based on ticker
        yahoo_data = yf.Ticker(ticker)
        print(f"Processing Ticker {ticker}")
        # select data using start date and end data and save the Close data
        price_df = yahoo_data.history(start=start_date, end=end_date)
        price_df.name = ticker
        # if no data retrieved raise exception
        if price_df.shape[0] == 0:
            raise Exception("No Prices.")
        return price_df
    # handle exception
    except Exception as ex:
        print(f"Sorry, Data not available for '{ticker}': Exception is {ex}")

In [404]:
# Set up the variable related to tickers and tickers data
ticker_data_dict = {}
ticker_list = ["^VIX", "spy"]

# Retrieve Tickers
def retrieve_tickers(ticker_data_dict, ticker_list):
    for ticker in ticker_list:
        ticker_data = retrieve_yahoo_data_return(ticker)
        ticker_data_dict[ticker] = ticker_data
        
    return ticker_data_dict
        
ticker_data_dict = retrieve_tickers(ticker_data_dict, ticker_list)
ticker_data_df = pd.DataFrame(ticker_data_dict)
# ticker_data_df = ticker_data_df.dropna()
# Display the ticker data related data frame
ticker_data_df
display(ticker_data_df.shape)

Processing Ticker ^VIX
Processing Ticker spy


(4217, 2)

In [405]:
# check the correlation between various tickers
ticker_data_df.corr()

Unnamed: 0,^VIX,spy
^VIX,1.0,-0.723776
spy,-0.723776,1.0


In [406]:
# Plot the cum prod spy and VIX
(1+ticker_data_df[["^VIX", "spy"]]).cumprod().hvplot()

In [407]:
# Get the spy close data and plot it 
spy_close_data = retrieve_yahoo_data_close("spy")
spy_close_dict = {}
spy_close_dict["close"] = spy_close_data
spy_close_df = pd.DataFrame(spy_close_dict)

# Plot the DataFrame with hvplot
spy_close_df.hvplot()

Processing Ticker spy


In [408]:
#spy_close_df["close"]
spy_close_df['sma 120'] = spy_close_df["close"].rolling(window=window).mean()
spy_close_df.hvplot()

In [409]:
# Define the two technical indicators - simple moving averages 50 day and 200 day - on the spy close and plot them 
spy_close_df['sma 50'] = spy_close_df["close"].rolling(window=50).mean()
spy_close_df['sma 200'] = spy_close_df["close"].rolling(window=200).mean()
spy_close_df[['sma 50', 'sma 200', "close"]].hvplot()

In [410]:
# Retrieve the vix close data and plot it.
vix_close_data = retrieve_yahoo_data_close("^VIX")
vix_close_dict = {}
vix_close_dict["close"] = vix_close_data
vix_close_df = pd.DataFrame(vix_close_dict)

# Plot the DataFrame with hvplot
vix_close_df.hvplot()

Processing Ticker ^VIX


In [411]:
# figure the simple moving average with a 120 day window and plot it.
vix_close_df['sma 120'] = vix_close_df["close"].rolling(window=120).mean()
vix_close_df.hvplot()

In [412]:
# figure out the simple moving average with 20, 50 and 200 day on vix and plot the 50 day moving average against vix close
vix_close_df['sma 20'] = vix_close_df["close"].rolling(window=20).mean()
vix_close_df['sma 50'] = vix_close_df["close"].rolling(window=50).mean()
vix_close_df['sma 200'] = vix_close_df["close"].rolling(window=200).mean()
vix_close_df[['sma 50', "close"]].hvplot(width=1000, height=500)
# Long vix when the vix is above its 50 day Moving Average and short vix when it is below its 50 days Moving Average

In [413]:
# Plot the 200 day simple moving on the vix and vix close.
vix_close_df[['sma 200', "close"]].hvplot(width=1000, height=500)
# Long vix when the vix is above its 50 day Moving Average and short vix when it is below its 50 days Moving Average

In [414]:
# Figure out the simple moving average of 100 and 300 days on vix close and plot the sma 100 and close.
vix_close_df['sma 100'] = vix_close_df["close"].rolling(window=100).mean()
vix_close_df['sma 300'] = vix_close_df["close"].rolling(window=300).mean()
vix_close_df[['sma 100', "close"]].hvplot(width=1000, height=500)

In [415]:
# Set the Signal column
vix_close_df["Signal"] = 0.0

# Generate the trading signal 1 or 0,
# where 1 is when the SMA120 is greater than (or crosses over) the SMA200
# where 0 is when the SMA120 is under the SMA200
vix_close_df["Signal"][120:] = np.where(
    vix_close_df["sma 120"][120:] > vix_close_df["sma 200"][120:], 1.0, 0.0
)

# Calculate the points in time at which a position should be taken, 1 or -1
vix_close_df["Entry/Exit"] = vix_close_df["Signal"].diff()

# Review the DataFrame
vix_close_df

Unnamed: 0_level_0,close,sma 120,sma 20,sma 50,sma 200,sma 100,sma 300,Signal,Entry/Exit
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2005-01-03,14.080000,,,,,,,0.0,
2005-01-04,13.980000,,,,,,,0.0,0.0
2005-01-05,14.090000,,,,,,,0.0,0.0
2005-01-06,13.580000,,,,,,,0.0,0.0
2005-01-07,13.490000,,,,,,,0.0,0.0
...,...,...,...,...,...,...,...,...,...
2021-09-27,18.760000,18.041917,19.0020,18.3620,19.97435,18.1287,21.921833,0.0,0.0
2021-09-28,23.250000,18.094417,19.3550,18.3770,19.97800,18.1773,21.916533,0.0,0.0
2021-09-29,22.559999,18.143333,19.6590,18.4336,19.97425,18.2360,21.910667,0.0,0.0
2021-09-30,23.139999,18.195250,20.0105,18.5382,19.96635,18.2708,21.900867,0.0,0.0


In [416]:
# Visualize entry position relative to close price
entry = vix_close_df[vix_close_df["Entry/Exit"] == 1.0]["close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit = vix_close_df[vix_close_df["Entry/Exit"] == -1.0]["close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = vix_close_df[["close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs = vix_close_df[["sma 120", "sma 200"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
entry_exit_plot = security_close * moving_avgs * entry * exit
entry_exit_plot

In [417]:
# Set up the technical variable dataframe.
# Figure out the vix return, vix mean, and 10, 15, 20, 50, 100, 150, 200 & 250 day simple moving averages.
# Plot the technical variables
technical_variables_dict = {}
technical_variables_dict["vix close"] = vix_close_data
technical_variables_df = pd.DataFrame(technical_variables_dict)

technical_variables_df["vix return"] = technical_variables_df["vix close"].pct_change()
technical_variables_df["mean"] =technical_variables_df["vix close"].mean()
technical_variables_df['sma 10'] = technical_variables_df["vix close"].rolling(window=10).mean()
technical_variables_df['sma 15'] = technical_variables_df["vix close"].rolling(window=15).mean()
technical_variables_df['sma 20'] = technical_variables_df["vix close"].rolling(window=20).mean()
technical_variables_df['sma 50'] = technical_variables_df["vix close"].rolling(window=50).mean()
technical_variables_df['sma 100'] = technical_variables_df["vix close"].rolling(window=100).mean()
technical_variables_df['sma 150'] = technical_variables_df["vix close"].rolling(window=150).mean()
technical_variables_df['sma 200'] = technical_variables_df["vix close"].rolling(window=200).mean()
technical_variables_df['sma 250'] = technical_variables_df["vix close"].rolling(window=250).mean()

technical_variables_df.hvplot(width=1000, height=500)

In [418]:
# Check if the technical variables have Nan or infinite.
# Make sure everything is finite in the technicalvariables.
display(np.all(np.isfinite(technical_variables_df)))

False

In [419]:
# Set the Signal column with various signals related to above the moving average and below the moving average signals.
technical_variables_df["a15"] = 0.0
technical_variables_df["a50"] = 0.0
technical_variables_df["a100"] = 0.0
technical_variables_df["a150"] = 0.0
technical_variables_df["a200"] = 0.0
technical_variables_df["b15"] = 0.0
technical_variables_df["b50"] = 0.0
technical_variables_df["b100"] = 0.0
technical_variables_df["b150"] = 0.0
technical_variables_df["b200"] = 0.0
technical_variables_df["vix_pos_return"] = 0.0
technical_variables_df["vix_neg_return"] = 0.0

# Generate the trading signal 1 or 0 for various signals
# where 1 is when the vix close is above or below the simple moving averages.
# where 0 is when the simple moving average is under/over the vix close
technical_variables_df["a15"] = np.where(
    technical_variables_df["vix close"] > technical_variables_df["sma 15"], 1.0, 0.0
)
technical_variables_df["a50"] = np.where(
    technical_variables_df["vix close"] > technical_variables_df["sma 50"], 1.0, 0.0
)
technical_variables_df["a100"] = np.where(
    technical_variables_df["vix close"] > technical_variables_df["sma 100"], 1.0, 0.0
)
technical_variables_df["a150"] = np.where(
    technical_variables_df["vix close"] > technical_variables_df["sma 150"], 1.0, 0.0
)
technical_variables_df["a200"] = np.where(
    technical_variables_df["vix close"] > technical_variables_df["sma 200"], 1.0, 0.0
)
technical_variables_df["b15"] = np.where(
    technical_variables_df["sma 15"] > technical_variables_df["vix close"], 1.0, 0.0
)
technical_variables_df["b50"] = np.where(
    technical_variables_df["sma 50"] > technical_variables_df["vix close"], 1.0, 0.0
)
technical_variables_df["b100"] = np.where(
    technical_variables_df["sma 100"] > technical_variables_df["vix close"], 1.0, 0.0
)
technical_variables_df["b150"] = np.where(
    technical_variables_df["sma 150"] > technical_variables_df["vix close"], 1.0, 0.0
)
technical_variables_df["b200"] = np.where(
    technical_variables_df["sma 200"] > technical_variables_df["vix close"], 1.0, 0.0
)
# Create columnes for positive and negative returns
technical_variables_df["vix_pos_return"] = np.where(
    technical_variables_df["vix return"] > 0, 1.0, 0.0
) 
technical_variables_df["vix_neg_return"] = np.where(
    technical_variables_df["vix return"] < 0, 1.0, 0.0
)
# Check the data frame.
technical_variables_df

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,a100,a150,a200,b15,b50,b100,b150,b200,vix_pos_return,vix_neg_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-03,14.080000,,19.114446,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2005-01-04,13.980000,-0.007102,19.114446,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2005-01-05,14.090000,0.007868,19.114446,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2005-01-06,13.580000,-0.036196,19.114446,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2005-01-07,13.490000,-0.006627,19.114446,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-27,18.760000,0.056901,19.114446,20.322,19.896000,19.0020,18.3620,18.1287,18.774133,19.97435,...,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0
2021-09-28,23.250000,0.239339,19.114446,20.701,20.236667,19.3550,18.3770,18.1773,18.786867,19.97800,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.6590,18.4336,18.2360,18.744667,19.97425,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.712600,19.96635,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [420]:
# Check if the technical variables have Nan or infinite.
# Make sure everything is finite in the technical variables.
display(np.all(np.isfinite(technical_variables_df)))

False

In [421]:
# Create new columns to records cumulative count of 1s. for various signals 
technical_variables_df["c_1_15"] = np.where(
        technical_variables_df["a15"].eq(1),
        technical_variables_df.groupby(technical_variables_df.a15.ne(technical_variables_df.a15.shift()).cumsum()).cumcount() + 1,
        0,
)
technical_variables_df["c_1_50"] = np.where(
        technical_variables_df["a50"].eq(1),
        technical_variables_df.groupby(technical_variables_df.a50.ne(technical_variables_df.a50.shift()).cumsum()).cumcount() + 1,
        0,
)
technical_variables_df["c_1_100"] = np.where(
        technical_variables_df["a100"].eq(1),
        technical_variables_df.groupby(technical_variables_df.a100.ne(technical_variables_df.a100.shift()).cumsum()).cumcount() + 1,
        0,
)
technical_variables_df["c_1_150"] = np.where(
        technical_variables_df["a150"].eq(1),
        technical_variables_df.groupby(technical_variables_df.a150.ne(technical_variables_df.a150.shift()).cumsum()).cumcount() + 1,
        0,
)
technical_variables_df["c_1_200"] = np.where(
        technical_variables_df["a200"].eq(1),
        technical_variables_df.groupby(technical_variables_df.a200.ne(technical_variables_df.a200.shift()).cumsum()).cumcount() + 1,
        0,
)
technical_variables_df["c_pos_returns"] = np.where(
        technical_variables_df["vix_pos_return"].eq(1),
        technical_variables_df.groupby(technical_variables_df.vix_pos_return.ne(technical_variables_df.vix_pos_return.shift()).cumsum()).cumcount() + 1,
        0,
)
technical_variables_df["c_neg_returns"] = np.where(
        technical_variables_df["vix_neg_return"].eq(1),
        technical_variables_df.groupby(technical_variables_df.vix_neg_return.ne(technical_variables_df.vix_neg_return.shift()).cumsum()).cumcount() + 1,
        0,
)

# display the new technical variables dataframe.
technical_variables_df

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,b200,vix_pos_return,vix_neg_return,c_1_15,c_1_50,c_1_100,c_1_150,c_1_200,c_pos_returns,c_neg_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-03,14.080000,,19.114446,,,,,,,,...,0.0,0.0,0.0,0,0,0,0,0,0,0
2005-01-04,13.980000,-0.007102,19.114446,,,,,,,,...,0.0,0.0,1.0,0,0,0,0,0,0,1
2005-01-05,14.090000,0.007868,19.114446,,,,,,,,...,0.0,1.0,0.0,0,0,0,0,0,1,0
2005-01-06,13.580000,-0.036196,19.114446,,,,,,,,...,0.0,0.0,1.0,0,0,0,0,0,0,1
2005-01-07,13.490000,-0.006627,19.114446,,,,,,,,...,0.0,0.0,1.0,0,0,0,0,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-27,18.760000,0.056901,19.114446,20.322,19.896000,19.0020,18.3620,18.1287,18.774133,19.97435,...,1.0,1.0,0.0,0,1,1,0,0,1,0
2021-09-28,23.250000,0.239339,19.114446,20.701,20.236667,19.3550,18.3770,18.1773,18.786867,19.97800,...,0.0,1.0,0.0,1,2,2,1,1,2,0
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.6590,18.4336,18.2360,18.744667,19.97425,...,0.0,0.0,1.0,2,3,3,2,2,0,1
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.712600,19.96635,...,0.0,1.0,0.0,3,4,4,3,3,1,0


In [422]:
# Check if the technical variables have Nan or infinite.
# Make sure everything is finite in the technicalvariables.
display(np.all(np.isfinite(technical_variables_df)))

False

In [423]:
# Create new columns related to more than or equal to 3 consecutive 1 signals.
technical_variables_df["c_3_15"] = np.where(technical_variables_df["c_1_15"] >= 3, 1, 0)
technical_variables_df["c_3_50"] = np.where(technical_variables_df["c_1_50"] >= 3, 1, 0)
technical_variables_df["c_3_100"] = np.where(technical_variables_df["c_1_100"] >= 3, 1, 0)
technical_variables_df["c_3_150"] = np.where(technical_variables_df["c_1_150"] >= 3, 1, 0)
technical_variables_df["c_3_200"] = np.where(technical_variables_df["c_1_200"] >= 3, 1, 0)
technical_variables_df["c_3_pos_returns"] = np.where(technical_variables_df["c_pos_returns"] >= 3, 1, 0)
technical_variables_df["c_3_neg_returns"] = np.where(technical_variables_df["c_neg_returns"] >= 3, 1, 0)
technical_variables_df

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_1_200,c_pos_returns,c_neg_returns,c_3_15,c_3_50,c_3_100,c_3_150,c_3_200,c_3_pos_returns,c_3_neg_returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-03,14.080000,,19.114446,,,,,,,,...,0,0,0,0,0,0,0,0,0,0
2005-01-04,13.980000,-0.007102,19.114446,,,,,,,,...,0,0,1,0,0,0,0,0,0,0
2005-01-05,14.090000,0.007868,19.114446,,,,,,,,...,0,1,0,0,0,0,0,0,0,0
2005-01-06,13.580000,-0.036196,19.114446,,,,,,,,...,0,0,1,0,0,0,0,0,0,0
2005-01-07,13.490000,-0.006627,19.114446,,,,,,,,...,0,0,2,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-27,18.760000,0.056901,19.114446,20.322,19.896000,19.0020,18.3620,18.1287,18.774133,19.97435,...,0,1,0,0,0,0,0,0,0,0
2021-09-28,23.250000,0.239339,19.114446,20.701,20.236667,19.3550,18.3770,18.1773,18.786867,19.97800,...,1,2,0,0,0,0,0,0,0,0
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.6590,18.4336,18.2360,18.744667,19.97425,...,2,0,1,0,1,1,0,0,0,0
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.712600,19.96635,...,3,1,0,1,1,1,1,1,0,0


In [424]:
# Check if the technical variables have Nan or infinite.
# Make sure everything is finite in the technical variables.
display(np.all(np.isfinite(technical_variables_df)))

False

In [425]:
# Check the balance among the 3 or more consecutive 1s for positive and negative returns.
display(technical_variables_df["c_3_pos_returns"].value_counts())
display(technical_variables_df["c_3_neg_returns"].value_counts())

0    3882
1     335
Name: c_3_pos_returns, dtype: int64

0    3606
1     611
Name: c_3_neg_returns, dtype: int64

In [426]:
# Visualize entry position relative to close price
entry_3_100_day = technical_variables_df[(technical_variables_df["c_3_100"] == 1.0) & (technical_variables_df["c_3_15"] == 1.0) & (technical_variables_df["c_3_50"] == 1.0)]["vix close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=20,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

'''
# Visualize exit position relative to close price
exit = vix_close_df[vix_close_df["Entry/Exit"] == -1.0]["close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)
'''

# Visualize close price for the investment
security_3_100_close = technical_variables_df[["vix close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
moving_avgs_14_50_100 = technical_variables_df[["sma 50"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
entry_exit_3_100_plot = security_3_100_close * moving_avgs_14_50_100 * entry_3_100_day
entry_exit_3_100_plot

In [427]:
# Visualize entry position relative to close price
entry_3_pos_returns_day = technical_variables_df[(technical_variables_df["c_3_pos_returns"] == 1.0)]["vix close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=20,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit_3_neg_returns_day = technical_variables_df[technical_variables_df["c_3_neg_returns"] == 1.0]["vix close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=20,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_3_close = technical_variables_df[["vix close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
#moving_avgs_14_50_100 = technical_variables_df[["sma 50"]].hvplot(
#    ylabel='Price in $',
#    width=1000,
#    height=400
#)

# Overlay plots
entry_exit_3_returns_plot = security_3_close * entry_3_pos_returns_day * exit_3_neg_returns_day
entry_exit_3_returns_plot

In [428]:
# Visualize entry position relative to close price
entry_strategy_day = technical_variables_df[(technical_variables_df["c_3_100"] == 1.0) & (technical_variables_df["c_3_15"] == 1.0) & (technical_variables_df["c_3_50"] == 1.0)]["vix close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=20,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit_strategy_day = technical_variables_df[technical_variables_df["c_3_neg_returns"] == 1.0]["vix close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=20,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_3_close = technical_variables_df[["vix close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
#moving_avgs_14_50_100 = technical_variables_df[["sma 50"]].hvplot(
#    ylabel='Price in $',
#    width=1000,
#    height=400
#)

# Overlay plots
entry_exit_3_returns_plot = security_3_close * entry_strategy_day * exit_strategy_day
entry_exit_3_returns_plot

In [429]:
# Create a new data frame to track the bollinger bands and plot the columns
vix_df = retrieve_yahoo_data("^VIX")
# vix_dict = {}
#vix_dict["close"] = vix_close_data
#vix_close_df = pd.DataFrame(vix_close_dict)
vix_df = vix_df .drop(columns=["Dividends", "Stock Splits"])
vix_df.columns=["open", "high", "low", "close", "volume"]


# Plot the DataFrame with hvplot
# vix_close_df.hvplot()
# Determine the Bollinger Bands for the Dataset
bbands_vix_df = TA.BBANDS(vix_df)
vix_df = pd.concat([vix_df, bbands_vix_df], axis=1)
vix_df[["close", "BB_UPPER", "BB_LOWER"]].hvplot()

Processing Ticker ^VIX


In [430]:
# Create new columns related to when VIX Close is greater than BB UPPER and VIX Close is smaller than BB LOWER
vix_df[">b_upper"] = np.where(vix_df["close"] >= vix_df["BB_UPPER"], 1, 0)
vix_df["<b_lower"] = np.where(vix_df["BB_LOWER"] >= vix_df["close"], 1, 0)
print(vix_df[">b_upper"].value_counts())
print(vix_df["<b_lower"].value_counts())

0    3932
1     285
Name: >b_upper, dtype: int64
0    4167
1      50
Name: <b_lower, dtype: int64


In [431]:
# Check if the technical variables have Nan or infinite.
# Make sure everything is finite in the technical variables.
display(np.all(np.isfinite(vix_df)))

False

In [432]:
# Visualize entry position relative to close price
entry_vix = vix_df[vix_df[">b_upper"] == 1.0]["close"].hvplot.scatter(
    color='purple',
    marker='^',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize exit position relative to close price
exit_vix = vix_df[vix_df["<b_lower"] == 1.0]["close"].hvplot.scatter(
    color='orange',
    marker='v',
    size=200,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close_vix = vix_df[["close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize moving averages
bollinger_band_vix = vix_df[["BB_UPPER", "BB_LOWER"]].hvplot(
    ylabel='Price in $',
    width=1000,
    height=400
)

# Overlay plots
entry_exit_vix_plot = security_close_vix * bollinger_band_vix * entry_vix * exit_vix
entry_exit_vix_plot

In [433]:
# Copy the columns to the technical variables data frame from vix dataframe and check the updated dataframe.
technical_variables_df[">b_upper"] = vix_df[">b_upper"]
technical_variables_df["<b_lower"] = vix_df["<b_lower"]
technical_variables_df

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_neg_returns,c_3_15,c_3_50,c_3_100,c_3_150,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-03,14.080000,,19.114446,,,,,,,,...,0,0,0,0,0,0,0,0,0,0
2005-01-04,13.980000,-0.007102,19.114446,,,,,,,,...,1,0,0,0,0,0,0,0,0,0
2005-01-05,14.090000,0.007868,19.114446,,,,,,,,...,0,0,0,0,0,0,0,0,0,0
2005-01-06,13.580000,-0.036196,19.114446,,,,,,,,...,1,0,0,0,0,0,0,0,0,0
2005-01-07,13.490000,-0.006627,19.114446,,,,,,,,...,2,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-27,18.760000,0.056901,19.114446,20.322,19.896000,19.0020,18.3620,18.1287,18.774133,19.97435,...,0,0,0,0,0,0,0,0,0,0
2021-09-28,23.250000,0.239339,19.114446,20.701,20.236667,19.3550,18.3770,18.1773,18.786867,19.97800,...,0,0,0,0,0,0,0,0,0,0
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.6590,18.4336,18.2360,18.744667,19.97425,...,1,0,1,1,0,0,0,0,0,0
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.712600,19.96635,...,0,1,1,1,1,1,0,0,0,0


In [434]:
# Check if the data is finite or has nans, etc.
display(np.all(np.isfinite(technical_variables_df)))

False

In [435]:
# Save the technical indicators to the csv file.
technical_variables_date_df = technical_variables_df.reset_index()
technical_variables_date_df.to_csv('technical_indicators.csv', index=False) 
technical_variables_date_df.head(5)
technical_variables_date_df.tail(5)

Unnamed: 0,Date,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,...,c_neg_returns,c_3_15,c_3_50,c_3_100,c_3_150,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower
4212,2021-09-27,18.76,0.056901,19.114446,20.322,19.896,19.002,18.362,18.1287,18.774133,...,0,0,0,0,0,0,0,0,0,0
4213,2021-09-28,23.25,0.239339,19.114446,20.701,20.236667,19.355,18.377,18.1773,18.786867,...,0,0,0,0,0,0,0,0,0,0
4214,2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.659,18.4336,18.236,18.744667,...,1,0,1,1,0,0,0,0,0,0
4215,2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.7126,...,0,1,1,1,1,1,0,0,0,0
4216,2021-10-01,21.1,-0.088159,19.114446,21.613,20.842667,20.245,18.6064,18.2634,18.6976,...,1,1,1,1,1,1,0,0,0,0


In [436]:
# Check the correlation of various features
technical_variables_df.corr()

Unnamed: 0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_neg_returns,c_3_15,c_3_50,c_3_100,c_3_150,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower
vix close,1.0,0.109501,,0.960041,0.94176,0.923363,0.82887,0.733867,0.661892,0.611658,...,-0.059794,0.171537,0.275832,0.343661,0.427092,0.488473,0.021984,-0.04683,0.11907,-0.043656
vix return,0.109501,1.0,,-0.032715,-0.035617,-0.037074,-0.036398,-0.030691,-0.027406,-0.024473,...,-0.441692,0.083184,0.031851,0.015266,0.00811,0.006454,0.248765,-0.2173,0.439155,-0.079807
mean,,,,,,,,,,,...,,,,,,,,,,
sma 10,0.960041,-0.032715,,1.0,0.993066,0.980315,0.88843,0.783,0.704457,0.649612,...,0.048621,0.035588,0.195257,0.295119,0.392415,0.462277,-0.054931,0.041277,-0.020846,-0.019454
sma 15,0.94176,-0.035617,,0.993066,1.0,0.994917,0.913693,0.805507,0.724392,0.66744,...,0.050581,-0.016887,0.149263,0.261586,0.366879,0.440793,-0.056928,0.042226,-0.041704,-0.016612
sma 20,0.923363,-0.037074,,0.980315,0.994917,1.0,0.935854,0.826764,0.743631,0.684744,...,0.051615,-0.044758,0.109163,0.230457,0.342986,0.421306,-0.056607,0.043278,-0.053427,-0.015386
sma 50,0.82887,-0.036398,,0.88843,0.913693,0.935854,1.0,0.928684,0.845375,0.778287,...,0.050678,-0.079379,-0.065373,0.063101,0.221049,0.318906,-0.058475,0.0422,-0.063943,-0.004381
sma 100,0.733867,-0.030691,,0.783,0.805507,0.826764,0.928684,1.0,0.957797,0.895753,...,0.035398,-0.072279,-0.11712,-0.096149,0.04824,0.158528,-0.053183,0.029182,-0.0602,0.006961
sma 150,0.661892,-0.027406,,0.704457,0.724392,0.743631,0.845375,0.957797,1.0,0.971172,...,0.034667,-0.070386,-0.121597,-0.144469,-0.067804,0.022138,-0.037408,0.026919,-0.050298,0.016908
sma 200,0.611658,-0.024473,,0.649612,0.66744,0.684744,0.778287,0.895753,0.971172,1.0,...,0.037866,-0.053334,-0.113858,-0.150247,-0.109669,-0.052354,-0.030007,0.02811,-0.042104,0.021324


In [437]:
# Read the day of the week prophet data from the csv
prophet_df = pd.read_csv("prophet_output_day_of_week.csv")
prophet_df

Unnamed: 0,Date,y,Friday,Monday,Thursday,Tuesday,Wednesday
0,2006-07-03,,0,1,0,0,0
1,2006-07-05,0.084291,0,0,0,0,1
2,2006-07-06,-0.035336,0,0,1,0,0
3,2006-07-07,0.023443,1,0,0,0,0
4,2006-07-10,0.003579,0,1,0,0,0
...,...,...,...,...,...,...,...
3835,2021-09-27,0.056901,0,1,0,0,0
3836,2021-09-28,0.239339,0,0,0,1,0
3837,2021-09-29,-0.029677,0,0,0,0,1
3838,2021-09-30,0.025709,0,0,1,0,0


In [438]:
# Set up the temp technical variables
temp_technical_variables_df = technical_variables_df.copy()
temp_technical_variables_df = temp_technical_variables_df.loc["2006-07-03":,:]
temp_prophet_df = prophet_df.copy()
display(temp_technical_variables_df.head(1))
display(temp_technical_variables_df.tail(1))

# Set up the temp prophet data frame
temp_prophet_df = temp_prophet_df.drop(columns=["Date", "y"])
display(temp_prophet_df.head(1))
display(temp_prophet_df.tail(1))
#display(temp_technical_variables_df.head())
#display(temp_prophet_df.head())

# Set up the technical variables by concatenating the temp technical variables and prophet data
technical_variables_enhanced_df = pd.concat([temp_technical_variables_df.reset_index(), temp_prophet_df], axis=1)
technical_variables_enhanced_df = technical_variables_enhanced_df.set_index("Date").dropna()
technical_variables_enhanced_df

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_neg_returns,c_3_15,c_3_50,c_3_100,c_3_150,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-07-03,13.05,-0.002294,19.114446,15.095,16.48,16.9885,15.239,13.5626,12.9816,13.09455,...,1,0,0,0,1,0,0,0,0,0


Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_neg_returns,c_3_15,c_3_50,c_3_100,c_3_150,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-10-01,21.1,-0.088159,19.114446,21.613,20.842667,20.245,18.6064,18.2634,18.6976,19.9574,...,1,1,1,1,1,1,0,0,0,0


Unnamed: 0,Friday,Monday,Thursday,Tuesday,Wednesday
0,0,1,0,0,0


Unnamed: 0,Friday,Monday,Thursday,Tuesday,Wednesday
3839,1,0,0,0,0


Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-07-03,13.050000,-0.002294,19.114446,15.095,16.480000,16.9885,15.2390,13.5626,12.981600,13.09455,...,0,0,0,0,0,0,1,0,0,0
2006-07-05,14.150000,0.084291,19.114446,14.841,15.836000,16.8290,15.2870,13.5729,12.997000,13.10920,...,0,0,0,0,0,0,0,0,0,1
2006-07-06,13.650000,-0.035336,19.114446,14.654,15.315333,16.6215,15.3250,13.5807,13.008733,13.11675,...,0,0,0,0,0,0,0,1,0,0
2006-07-07,13.970000,0.023443,19.114446,14.463,15.186667,16.4025,15.3692,13.5869,13.021467,13.12340,...,1,0,0,0,0,1,0,0,0,0
2006-07-10,14.020000,0.003579,19.114446,14.276,14.971333,16.1975,15.4128,13.6046,13.040000,13.12455,...,1,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-09-27,18.760000,0.056901,19.114446,20.322,19.896000,19.0020,18.3620,18.1287,18.774133,19.97435,...,0,0,0,0,0,0,1,0,0,0
2021-09-28,23.250000,0.239339,19.114446,20.701,20.236667,19.3550,18.3770,18.1773,18.786867,19.97800,...,0,0,0,0,0,0,0,0,1,0
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.6590,18.4336,18.2360,18.744667,19.97425,...,0,0,0,0,0,0,0,0,0,1
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.712600,19.96635,...,1,0,0,0,0,0,0,1,0,0


In [439]:
# Check if the data is finite or has nans, etc.
display(np.all(np.isfinite(technical_variables_enhanced_df)))
display(np.any(np.isnan(technical_variables_enhanced_df)))
# display(technical_variables_enhanced_df.isna().sum())

True

False

In [440]:
# Initialize the new Signal column
technical_variables_enhanced_df['Signal'] = 0.0

# When Actual Returns are greater than or equal to 0, generate signal to buy stock long
technical_variables_enhanced_df.loc[(technical_variables_enhanced_df['vix return'] >= 0), 'Signal'] = 1

# When Actual Returns are less than 0, generate signal to sell stock short
technical_variables_enhanced_df.loc[(technical_variables_enhanced_df['vix return'] < 0), 'Signal'] = 0

#technical_variables_enhanced_df = technical_variables_enhanced_df.iloc[200:,:]
#technical_variables_enhanced_df = technical_variables_enhanced_df.loc["2007-01-01":,:]

# Review the DataFrame
display(technical_variables_enhanced_df.head())
display(technical_variables_enhanced_df.tail())

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-07-03,13.05,-0.002294,19.114446,15.095,16.48,16.9885,15.239,13.5626,12.9816,13.09455,...,0,0,0,0,0,1,0,0,0,0.0
2006-07-05,14.15,0.084291,19.114446,14.841,15.836,16.829,15.287,13.5729,12.997,13.1092,...,0,0,0,0,0,0,0,0,1,1.0
2006-07-06,13.65,-0.035336,19.114446,14.654,15.315333,16.6215,15.325,13.5807,13.008733,13.11675,...,0,0,0,0,0,0,1,0,0,0.0
2006-07-07,13.97,0.023443,19.114446,14.463,15.186667,16.4025,15.3692,13.5869,13.021467,13.1234,...,0,0,0,0,1,0,0,0,0,1.0
2006-07-10,14.02,0.003579,19.114446,14.276,14.971333,16.1975,15.4128,13.6046,13.04,13.12455,...,0,0,0,0,0,1,0,0,0,1.0


Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday,Signal
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-09-27,18.76,0.056901,19.114446,20.322,19.896,19.002,18.362,18.1287,18.774133,19.97435,...,0,0,0,0,0,1,0,0,0,1.0
2021-09-28,23.25,0.239339,19.114446,20.701,20.236667,19.355,18.377,18.1773,18.786867,19.978,...,0,0,0,0,0,0,0,1,0,1.0
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.659,18.4336,18.236,18.744667,19.97425,...,0,0,0,0,0,0,0,0,1,0.0
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.7126,19.96635,...,0,0,0,0,0,0,1,0,0,1.0
2021-10-01,21.1,-0.088159,19.114446,21.613,20.842667,20.245,18.6064,18.2634,18.6976,19.9574,...,0,0,0,0,1,0,0,0,0,0.0


In [441]:
# Check the value counts of signal in the dataframe
technical_variables_enhanced_df['Signal'].value_counts()

0.0    2068
1.0    1772
Name: Signal, dtype: int64

In [442]:
# check if the data is finite or has nans, etc.
display(np.all(np.isfinite(technical_variables_enhanced_df)))

True

In [443]:
# Visualize closing price when the signal is 1
signal_1 = technical_variables_enhanced_df[technical_variables_enhanced_df["Signal"] == 1.0]["vix close"].hvplot.scatter(
    color='green',
    marker='^',
    size=10,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize closing price when the signal is -1
signal_neg_1 = technical_variables_enhanced_df[technical_variables_enhanced_df["Signal"] == -1.0]["vix close"].hvplot.scatter(
    color='red',
    marker='v',
    size=10,
    legend=False,
    ylabel='Price in $',
    width=1000,
    height=400
)

# Visualize close price for the investment
security_close = technical_variables_enhanced_df[["vix close"]].hvplot(
    line_color='lightgray',
    ylabel='Price in $',
    width=1000,
    height=400
)
# Visualize moving averages
#moving_avgs = technical_variables_enhanced_df[["SMA_Fast", "SMA_Slow"]].hvplot(
#    ylabel='Price in $',
#    width=1000,
#    height=400
#)

# Overlay plots
entry_exit_plot = security_close *  signal_1 * signal_neg_1 # moving_avgs
entry_exit_plot

In [444]:
# Calculate the strategy returns and add them to the signals_df DataFrame
# NOTE that the signal is from the previous period.
technical_variables_enhanced_df['Strategy Returns'] = technical_variables_enhanced_df['vix return'] * technical_variables_enhanced_df['Signal'].shift()

# Review the DataFrame
display(technical_variables_enhanced_df.head())
display(technical_variables_enhanced_df.tail())

Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday,Signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-07-03,13.05,-0.002294,19.114446,15.095,16.48,16.9885,15.239,13.5626,12.9816,13.09455,...,0,0,0,0,1,0,0,0,0.0,
2006-07-05,14.15,0.084291,19.114446,14.841,15.836,16.829,15.287,13.5729,12.997,13.1092,...,0,0,0,0,0,0,0,1,1.0,0.0
2006-07-06,13.65,-0.035336,19.114446,14.654,15.315333,16.6215,15.325,13.5807,13.008733,13.11675,...,0,0,0,0,0,1,0,0,0.0,-0.035336
2006-07-07,13.97,0.023443,19.114446,14.463,15.186667,16.4025,15.3692,13.5869,13.021467,13.1234,...,0,0,0,1,0,0,0,0,1.0,0.0
2006-07-10,14.02,0.003579,19.114446,14.276,14.971333,16.1975,15.4128,13.6046,13.04,13.12455,...,0,0,0,0,1,0,0,0,1.0,0.003579


Unnamed: 0_level_0,vix close,vix return,mean,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,...,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday,Signal,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-09-27,18.76,0.056901,19.114446,20.322,19.896,19.002,18.362,18.1287,18.774133,19.97435,...,0,0,0,0,1,0,0,0,1.0,0.0
2021-09-28,23.25,0.239339,19.114446,20.701,20.236667,19.355,18.377,18.1773,18.786867,19.978,...,0,0,0,0,0,0,1,0,1.0,0.239339
2021-09-29,22.559999,-0.029677,19.114446,21.139,20.543333,19.659,18.4336,18.236,18.744667,19.97425,...,0,0,0,0,0,0,0,1,0.0,-0.029677
2021-09-30,23.139999,0.025709,19.114446,21.584,20.832667,20.0105,18.5382,18.2708,18.7126,19.96635,...,0,0,0,0,0,1,0,0,1.0,0.0
2021-10-01,21.1,-0.088159,19.114446,21.613,20.842667,20.245,18.6064,18.2634,18.6976,19.9574,...,0,0,0,1,0,0,0,0,0.0,-0.088159


In [445]:
# Check if the data is finite or has nans, etc.
display(np.all(np.isfinite(technical_variables_enhanced_df)))

False

In [446]:
# technical_variables_enhanced_df
X = technical_variables_enhanced_df.drop(columns=["vix close", "vix return", "mean", "Signal", "Strategy Returns"]) 

# Assign a copy of the sma_fast and sma_slow columns to a features DataFrame called X
X = X.shift()
display(np.all(np.isfinite(X)))
X = X.iloc[1:,:]
display(np.all(np.isfinite(X)))

# Review the DataFrame
X.head()

False

True

Unnamed: 0_level_0,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,sma 250,a15,a50,...,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-07-05,15.095,16.48,16.9885,15.239,13.5626,12.9816,13.09455,12.91576,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2006-07-06,14.841,15.836,16.829,15.287,13.5729,12.997,13.1092,12.9224,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2006-07-07,14.654,15.315333,16.6215,15.325,13.5807,13.008733,13.11675,12.9312,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2006-07-10,14.463,15.186667,16.4025,15.3692,13.5869,13.021467,13.1234,12.94196,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2006-07-11,14.276,14.971333,16.1975,15.4128,13.6046,13.04,13.12455,12.95424,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [447]:
# Check if the data is finite or has nans, etc.
display(np.all(np.isfinite(X)))
display(np.any(np.isnan(X)))

True

False

In [448]:
# Create the target set selecting the Signal column and assiging it to y
y = technical_variables_enhanced_df['Signal']
y = y.iloc[1:]

# Review the value counts
display(y.value_counts())
display(y.head())

0.0    2067
1.0    1772
Name: Signal, dtype: int64

Date
2006-07-05    1.0
2006-07-06    0.0
2006-07-07    1.0
2006-07-10    1.0
2006-07-11    0.0
Name: Signal, dtype: float64

In [449]:
# Select the start of the training period
training_begin = X.index.min()

# Display the training begin date
print(training_begin)

2006-07-05 00:00:00


In [450]:
# Select the ending period for the training data with an offset of 3 months
training_end = training_begin + DateOffset(months=training_months)

# Display the training end date
print(training_end)

2016-07-05 00:00:00


In [451]:
# Generate the X_train and y_train DataFrames
X_train = X.loc[training_begin:training_end]
y_train = y.loc[training_begin:training_end]

# Review the X_train DataFrame
display(X_train.head())
display(X_train.tail())
display(X_train.shape)
display(y_train.shape)
display(np.any(np.isnan(X_train)))
display(np.all(np.isfinite(X_train)))

Unnamed: 0_level_0,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,sma 250,a15,a50,...,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2006-07-05,15.095,16.48,16.9885,15.239,13.5626,12.9816,13.09455,12.91576,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2006-07-06,14.841,15.836,16.829,15.287,13.5729,12.997,13.1092,12.9224,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2006-07-07,14.654,15.315333,16.6215,15.325,13.5807,13.008733,13.11675,12.9312,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2006-07-10,14.463,15.186667,16.4025,15.3692,13.5869,13.021467,13.1234,12.94196,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2006-07-11,14.276,14.971333,16.1975,15.4128,13.6046,13.04,13.12455,12.95424,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


Unnamed: 0_level_0,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,sma 250,a15,a50,...,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-06-28,20.43,19.004667,17.7105,15.834,16.8765,17.946133,18.08775,18.1108,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2016-06-29,20.255,19.318,17.9385,15.942,16.8456,17.968,18.0655,18.12144,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2016-06-30,19.905,19.488667,18.0605,16.01,16.7782,17.9748,18.02745,18.12084,0.0,1.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2016-07-01,19.531,19.554667,18.1605,16.057,16.6745,17.9728,17.9929,18.11532,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2016-07-05,19.067,19.404,18.2255,16.0734,16.5568,17.97,17.96,18.11004,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


(2518, 41)

(2518,)

False

True

In [452]:
# Use RandomOverSampler to resample the dataset using random_state=1
ros = RandomOverSampler(random_state = 1)
X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)

display(y_train_resampled.value_counts())

0.0    1354
1.0    1354
Name: Signal, dtype: int64

In [453]:
# Generate the X_test and y_test DataFrames
X_test = X.loc[training_end+DateOffset(days=1):]
y_test = y.loc[training_end+DateOffset(days=1):]

# Review the X_test DataFrame
display(X_test.head())
display(X_test.tail())

Unnamed: 0_level_0,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,sma 250,a15,a50,...,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-07-06,18.788,19.044667,18.322,16.1206,16.4497,17.973067,17.9322,18.09372,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2016-07-07,18.436,18.675333,18.3675,16.1382,16.3179,17.965267,17.8956,18.07368,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2016-07-08,17.795,18.316667,18.4015,16.1542,16.2115,17.965867,17.8687,18.0654,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
2016-07-11,17.39,17.905333,18.3295,16.1428,16.1024,17.9478,17.8225,18.0626,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2016-07-12,16.168,17.514,18.155,16.1092,16.0147,17.917333,17.77955,18.06328,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


Unnamed: 0_level_0,sma 10,sma 15,sma 20,sma 50,sma 100,sma 150,sma 200,sma 250,a15,a50,...,c_3_200,c_3_pos_returns,c_3_neg_returns,>b_upper,<b_lower,Friday,Monday,Thursday,Tuesday,Wednesday
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-09-27,20.383,19.739333,18.8835,18.3558,18.1326,18.803133,19.9919,21.29828,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2021-09-28,20.322,19.896,19.002,18.362,18.1287,18.774133,19.97435,21.26824,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2021-09-29,20.701,20.236667,19.355,18.377,18.1773,18.786867,19.978,21.25576,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2021-09-30,21.139,20.543333,19.659,18.4336,18.236,18.744667,19.97425,21.2392,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2021-10-01,21.584,20.832667,20.0105,18.5382,18.2708,18.7126,19.96635,21.22124,1.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [454]:
# Scale the features DataFrames

# Create a StandardScaler instance
scaler = StandardScaler()

# Apply the scaler model to fit the X-train data
X_scaler = scaler.fit(X_train_resampled)

# Transform the X_train and X_test DataFrames using the X_scaler
X_train_scaled = X_scaler.transform(X_train_resampled)
X_test_scaled = X_scaler.transform(X_test)
display(np.any(np.isnan(X_train_scaled)))
display(np.all(np.isfinite(X_train_scaled)))

False

True

In [455]:
# Initiate the AdaBoost model
# Initiate the model instance
adaboost_model = AdaBoostClassifier(n_estimators=adaboost_estimators)
adaboost_model
# Fit the model 
adaboost_model = adaboost_model.fit(X_train_scaled, y_train_resampled)

pred_adaboost = adaboost_model.predict(X_test_scaled)
# Use a classification report to evaluate the model using the predictions and testing data
adaboost_report = classification_report(y_test, pred_adaboost)

# Print the classification report
print("         AdaBoost Classification Report")
print(adaboost_report)

         AdaBoost Classification Report
              precision    recall  f1-score   support

         0.0       0.59      0.50      0.54       713
         1.0       0.51      0.60      0.55       608

    accuracy                           0.55      1321
   macro avg       0.55      0.55      0.55      1321
weighted avg       0.55      0.55      0.55      1321



In [456]:
# Check the feature importance of the model.
importance = adaboost_model.feature_importances_
# summarize feature importance
feature_importance_list = []
x_test_columns = X_test.columns
# Print the feature importance
for i,v in enumerate(importance):
	print(f'Feature: {i} Score: {v} Name {x_test_columns[i]}')

Feature: 0 Score: 0.13953488372093023 Name sma 10
Feature: 1 Score: 0.11627906976744186 Name sma 15
Feature: 2 Score: 0.046511627906976744 Name sma 20
Feature: 3 Score: 0.046511627906976744 Name sma 50
Feature: 4 Score: 0.046511627906976744 Name sma 100
Feature: 5 Score: 0.13953488372093023 Name sma 150
Feature: 6 Score: 0.06976744186046512 Name sma 200
Feature: 7 Score: 0.13953488372093023 Name sma 250
Feature: 8 Score: 0.0 Name a15
Feature: 9 Score: 0.0 Name a50
Feature: 10 Score: 0.0 Name a100
Feature: 11 Score: 0.0 Name a150
Feature: 12 Score: 0.0 Name a200
Feature: 13 Score: 0.0 Name b15
Feature: 14 Score: 0.023255813953488372 Name b50
Feature: 15 Score: 0.0 Name b100
Feature: 16 Score: 0.0 Name b150
Feature: 17 Score: 0.0 Name b200
Feature: 18 Score: 0.0 Name vix_pos_return
Feature: 19 Score: 0.0 Name vix_neg_return
Feature: 20 Score: 0.0 Name c_1_15
Feature: 21 Score: 0.0 Name c_1_50
Feature: 22 Score: 0.023255813953488372 Name c_1_100
Feature: 23 Score: 0.046511627906976744 Nam

In [457]:
# Create a new empty predictions DataFrame.

# Create a predictions DataFrame
adaboost_predictions_df = pd.DataFrame(index = X_test.index)

# Add the DTC model predictions to the DataFrame
adaboost_predictions_df['Predicted'] = pred_adaboost

# Add the actual returns to the DataFrame
adaboost_predictions_df['Actual Returns'] = technical_variables_enhanced_df.loc[training_end+DateOffset(days=1):,'vix return']

# Add the strategy returns to the DataFrame
adaboost_predictions_df['Strategy Returns'] = adaboost_predictions_df['Actual Returns'] * adaboost_predictions_df['Predicted'].shift()

# Review the DataFrame
display(adaboost_predictions_df.head())
display(adaboost_predictions_df.tail())

Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-07-06,1.0,-0.039795,
2016-07-07,1.0,-0.013369,-0.013369
2016-07-08,1.0,-0.105691,-0.105691
2016-07-11,1.0,0.025758,0.025758
2016-07-12,1.0,0.000739,0.000739


Unnamed: 0_level_0,Predicted,Actual Returns,Strategy Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-09-27,1.0,0.056901,0.0
2021-09-28,0.0,0.239339,0.239339
2021-09-29,0.0,-0.029677,-0.0
2021-09-30,0.0,0.025709,0.0
2021-10-01,0.0,-0.088159,-0.0


In [458]:
# Figure out the actual returns, strategy returns, the shifted strategy returns, strategy returns actual
adaboost_strategy_returns_hvplot = (1 + adaboost_predictions_df['Strategy Returns']).cumprod().hvplot(title = "Adaboost Strategy Returns - ROI 4056% (42X)")
adaboost_actual_returns_hvplot = (1 + adaboost_predictions_df['Actual Returns']).cumprod().hvplot()
# Plot the actual returns versus the strategy returns
adaboost_strategy_returns_hvplot

In [459]:
# Figure out the ROI of the strategy returns
cum_prod_df = (1 + adaboost_predictions_df['Strategy Returns']).cumprod()
print(f"ROI value {cum_prod_df[-1]}")
roi = (cum_prod_df[-1] - 1) * 100
print(f"ROI is {roi:.02f}%")
print(f"final revenue is ${cum_prod_df[-1]:.02f} for an initial investment is $1")

ROI value 41.55570752037431
ROI is 4055.57%
final revenue is $41.56 for an initial investment is $1


In [460]:
# Code to fine tune the model 
if run_multiple_tuning_iterations == True: 
    # Create a predictions DataFrame
    adaboost_predictions_df = pd.DataFrame(index = X_test.index)
    adaboost_model_results_df = pd.DataFrame()
    plot_list = []
    # for num_estimators in range (1,100, 1):
    for num_estimators in [43, 23, 24, 46, 45, 29, 30, 25, 41,47, 14, 64, 66, 34, 37, 36]:
        print(num_estimators)
        adaboost_model = AdaBoostClassifier(n_estimators=num_estimators)

        # Fit the model 
        adaboost_model = adaboost_model.fit(X_train_scaled, y_train_resampled)
        pred_adaboost = adaboost_model.predict(X_test_scaled)
        # Use a classification report to evaluate the model using the predictions and testing data
        adaboost_report = classification_report(y_test, pred_adaboost)

        f1_score_1 = f1_score(y_test, pred_adaboost, pos_label=1)
        f1_score_0 = f1_score(y_test, pred_adaboost, pos_label=0)
        recall_score_1 = recall_score(y_test, pred_adaboost, pos_label=1)
        recall_score_0 = recall_score(y_test, pred_adaboost, pos_label=0)
        accuracy_score_model = accuracy_score(y_test, pred_adaboost)
        
        # Add the DTC model predictions to the DataFrame
        adaboost_predictions_df['Predicted'] = pred_adaboost

        # Add the actual returns to the DataFrame
        adaboost_predictions_df['Actual Returns'] = technical_variables_enhanced_df.loc[training_end+DateOffset(days=1):,'vix return']

        # Add the strategy returns to the DataFrame
        adaboost_predictions_df['Strategy Returns'] = adaboost_predictions_df['Actual Returns'] * adaboost_predictions_df['Predicted'].shift()
        
        adaboost_predictions_df["Cum Returns"] = (1 + adaboost_predictions_df['Strategy Returns']).cumprod()
        
        adaboost_model_results_df.loc[num_estimators, "Accuracy"] = accuracy_score_model
        adaboost_model_results_df.loc[num_estimators, "ROI"] = adaboost_predictions_df["Cum Returns"][-1] -1
        adaboost_model_results_df.loc[num_estimators, "F1 score 1"] = f1_score_1
        adaboost_model_results_df.loc[num_estimators, "F1 score 0"] = f1_score_0
        adaboost_model_results_df.loc[num_estimators, "Recall score 1"] = recall_score_1
        adaboost_model_results_df.loc[num_estimators, "Recall score 0"] = recall_score_0
        
        display(adaboost_predictions_df["Cum Returns"].hvplot.line(subplots=True))
        #if  accuracy_score_model >= .50 and f1_score_1 >= .50 and recall_score_1 >= .50:
        #    print(f"estimators {num_estimators}")
        #    print("         AdaBoost Classification Report")
        #    print(adaboost_report)

In [461]:
# Check the model tuning output results - sort by accuracy
if run_multiple_tuning_iterations == True: 
    adaboost_model_results_df.sort_values(by="Accuracy", ascending=False)

In [462]:
# Check the model tuning output results - sort by ROI
if run_multiple_tuning_iterations == True: 
    adaboost_model_results_df.sort_values(by="ROI", ascending=False).head(20)