In [1]:
# import modules
import panel as pn
pn.extension('tabulator')
import pandas as pd
import numpy as np
from panel.template import FastListTemplate
from pathlib import Path
from yahoo_fin.stock_info import get_data
import datetime
from matplotlib.figure import Figure
from matplotlib import cm
%matplotlib inline
import hvplot.pandas
import holoviews as hv
from holoviews import opts


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# import modules that help build tabs
import modules.helpers as helpers
import modules.HistoricalData as hst
import modules.MCTab as MCTab
import modules.intro as intro
import modules.profile as prf
import modules.algorithmic_functions as af


import pandas_ta as ta


from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, roc_auc_score, f1_score 

import seaborn as sns

from joblib import dump, load

# set random seed for reproducible results
np.random.seed(8171)

## Load Datasets and define portfolios being used

In [2]:
# load X_train_full and X_test_full
X_train_full_conservative = pd.read_csv(Path("./data/X_train_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_conservative = pd.read_csv(Path("./data/X_test_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_balanced = pd.read_csv(Path("./data/X_train_full_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_balanced = pd.read_csv(Path("./data/X_test_full_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_growth = pd.read_csv(Path("./data/X_train_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_growth = pd.read_csv(Path("./data/X_test_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_aggressive = pd.read_csv(Path("./data/X_train_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_aggressive = pd.read_csv(Path("./data/X_test_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_alternative = pd.read_csv(Path("./data/X_train_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_alternative = pd.read_csv(Path("./data/X_test_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)



#load y_train and y_test
y_train_conservative = pd.read_csv(Path("./data/y_train_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()
y_test_conservative = pd.read_csv(Path("./data/y_test_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()

y_train_balanced = pd.read_csv(Path("./data/y_train_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()
y_test_balanced = pd.read_csv(Path("./data/y_test_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()

y_train_growth = pd.read_csv(Path("./data/y_train_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()
y_test_growth = pd.read_csv(Path("./data/y_test_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()

y_train_aggressive = pd.read_csv(Path("./data/y_train_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()
y_test_aggressive = pd.read_csv(Path("./data/y_test_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()

y_train_alternative = pd.read_csv(Path("./data/y_train_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()
y_test_alternative = pd.read_csv(Path("./data/y_test_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True).values.ravel()

datafiles_full = {'conservative': [X_train_full_conservative,
                              X_test_full_conservative, 
                              y_train_conservative, 
                              y_test_conservative],
            'balanced': [X_train_full_balanced,
                              X_test_full_balanced, 
                              y_train_balanced, 
                              y_test_balanced],
            'growth': [X_train_full_growth,
                              X_test_full_growth, 
                              y_train_growth, 
                              y_test_growth],
            'aggressive': [X_train_full_aggressive,
                              X_test_full_aggressive, 
                              y_train_aggressive, 
                              y_test_aggressive],
            'alternative': [X_train_full_alternative,
                              X_test_full_alternative, 
                              y_train_alternative, 
                              y_test_alternative]}


# load X_train_reduced and X_test_reduced
X_train_reduced_conservative = pd.read_csv(Path("./data/X_train_reduced_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_reduced_conservative = pd.read_csv(Path("./data/X_test_reduced_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_reduced_balanced = pd.read_csv(Path("./data/X_train_reduced_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_reduced_balanced = pd.read_csv(Path("./data/X_test_reduced_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_reduced_growth = pd.read_csv(Path("./data/X_train_reduced_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_reduced_growth = pd.read_csv(Path("./data/X_test_reduced_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_reduced_aggressive = pd.read_csv(Path("./data/X_train_reduced_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_reduced_aggressive = pd.read_csv(Path("./data/X_test_reduced_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_reduced_alternative = pd.read_csv(Path("./data/X_train_reduced_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_reduced_alternative = pd.read_csv(Path("./data/X_test_reduced_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)




datafiles_reduced = {'conservative': [X_train_reduced_conservative,
                              X_test_reduced_conservative, 
                              y_train_conservative, 
                              y_test_conservative],
            'balanced': [X_train_reduced_balanced,
                              X_test_reduced_balanced, 
                              y_train_balanced, 
                              y_test_balanced],
            'growth': [X_train_reduced_growth,
                              X_test_reduced_growth, 
                              y_train_growth, 
                              y_test_growth],
            'aggressive': [X_train_reduced_aggressive,
                              X_test_reduced_aggressive, 
                              y_train_aggressive, 
                              y_test_aggressive],
            'alternative': [X_train_reduced_alternative,
                              X_test_reduced_alternative, 
                              y_train_alternative, 
                              y_test_alternative]}

portfolios = ['conservative', 'balanced', 'growth', 'aggressive','alternative']



## Define model parameters for each model to be trained

In [3]:
# initialize standard scaler
scaler = StandardScaler()

In [4]:
# Initialize bagging classifier models for training and subsequent evaluation/comparison
model1 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=100, learning_rate=1.0, algorithm='SAMME')
model2 = AdaBoostClassifier(base_estimator=LogisticRegression(solver='lbfgs'), n_estimators=50, learning_rate=0.5, algorithm='SAMME')
model3 = AdaBoostClassifier(base_estimator=SVC(kernel='linear'), n_estimators=200, learning_rate=0.1, algorithm='SAMME')
model4 = AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50), n_estimators=100, learning_rate=1.0, algorithm='SAMME')
model5 = AdaBoostClassifier(base_estimator=GradientBoostingClassifier(max_depth=3), n_estimators=150, learning_rate=0.2, algorithm='SAMME')
model6 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=5), n_estimators=100, learning_rate=1.0, algorithm='SAMME')
model7 = AdaBoostClassifier(base_estimator=LogisticRegression(solver='lbfgs'), n_estimators=50, learning_rate=0.25, algorithm='SAMME')
model8 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=3), n_estimators=200, learning_rate=0.01, algorithm='SAMME')
model9 = AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50), n_estimators=100, learning_rate=0.5, algorithm='SAMME')
model10 = AdaBoostClassifier(base_estimator=GradientBoostingClassifier(max_depth=10), n_estimators=150, learning_rate=0.1, algorithm='SAMME')
model11 = AdaBoostClassifier(estimator=SVC(max_iter=10000), n_estimators=150, learning_rate=0.05, algorithm='SAMME')

## Setup model pipeline, consisting of data scaling and the model training/fitting

### Loop through defined models, fitting and evaluating each model for both the full and reduced features datasets. Save evaluation results to a table for later comparisons

In [5]:
# will fit/evaluate multiple models using a series of for-loops. Models will be built using all indicators at once, just SMA inidcators, just MACD indicators, and just Bollinger Band indicators
# create list of defined models that can be looped through for fit/evaluation

models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11]

In [6]:
df_full_results = pd.DataFrame()
for portfolio in portfolios:
    
    X_train = datafiles_full[portfolio][0]
    X_test = datafiles_full[portfolio][1]
    y_train = datafiles_full[portfolio][2]
    y_test = datafiles_full[portfolio][3]
    i=1
    df_results = pd.DataFrame()
    for model in models:
        print(f"Beginning model {i}")
        pipeline = Pipeline([('scaler', scaler), ('model', model)])
        pipeline.fit(X_train, y_train)
        preds = pipeline.predict(X_test)
        proba = pipeline.predict_proba(X_test)
        roc = roc_auc_score(y_test,proba[:,1])
        f1 = f1_score(y_test, preds)
        df = pd.DataFrame()
        df.loc['f1_score',f"model{i}"] = f1
        df.loc['roc_auc_score',f"model{i}"] = roc
        df_results = pd.concat([df_results, df], axis=1)
        i += 1
    df_results['type'] = portfolio
    df_full_results = pd.concat([df_full_results, df_results])
    
df_full_results.set_index('type', append=True, inplace=True)
df_full_results = df_full_results.reorder_levels(['type', 0])

Beginning model 1
Beginning model 2
Beginning model 3
Beginning model 4
Beginning model 5
Beginning model 6
Beginning model 7
Beginning model 8
Beginning model 9
Beginning model 10
Beginning model 11
Beginning model 1
Beginning model 2
Beginning model 3
Beginning model 4
Beginning model 5
Beginning model 6
Beginning model 7
Beginning model 8
Beginning model 9
Beginning model 10
Beginning model 11
Beginning model 1
Beginning model 2
Beginning model 3
Beginning model 4
Beginning model 5
Beginning model 6
Beginning model 7
Beginning model 8
Beginning model 9
Beginning model 10
Beginning model 11
Beginning model 1
Beginning model 2
Beginning model 3
Beginning model 4
Beginning model 5
Beginning model 6
Beginning model 7
Beginning model 8
Beginning model 9
Beginning model 10
Beginning model 11
Beginning model 1
Beginning model 2
Beginning model 3
Beginning model 4
Beginning model 5
Beginning model 6
Beginning model 7
Beginning model 8
Beginning model 9
Beginning model 10
Beginning model 11


In [7]:

df_reduced_results = pd.DataFrame()
for portfolio in portfolios:
    X_train = datafiles_reduced[portfolio][0]
    X_test = datafiles_reduced[portfolio][1]
    y_train = datafiles_reduced[portfolio][2]
    y_test = datafiles_reduced[portfolio][3]
    i=1
    df_results = pd.DataFrame()
    for model in models:
        pipeline = Pipeline([('scaler', scaler), ('model', model)])
        pipeline.fit(X_train, y_train)
        preds = pipeline.predict(X_test)
        proba = pipeline.predict_proba(X_test)
        roc = roc_auc_score(y_test,proba[:,1])
        f1 = f1_score(y_test, preds)
        df = pd.DataFrame()
        df.loc['f1_score',f"model{i}"] = f1
        df.loc['roc_auc_score',f"model{i}"] = roc
        df_results = pd.concat([df_results, df], axis=1)
        i += 1
    df_results['type'] = portfolio
    df_reduced_results = pd.concat([df_reduced_results, df_results])
    
df_reduced_results.set_index('type', append=True, inplace=True)
df_reduced_results = df_reduced_results.reorder_levels(['type', 0])

### Combine evaluation metrics into one table for review and selection of best model for comparison to other model types

In [8]:
df_reduced_results['type'] = 'Adaboost - Reduced'
df_full_results['type'] = 'Adaboost- Full'


df_reduced_results.set_index('type', append=True,inplace=True)
df_full_results.set_index('type', append=True,inplace=True)

df_reduced_results = df_reduced_results.unstack(level=2)

df_reduced_results = df_reduced_results.reorder_levels([1,0], axis=1)


df_full_results = df_full_results.unstack(level=2)

df_full_results = df_full_results.reorder_levels([1,0], axis=1)

df_adaboost_results = pd.concat([df_reduced_results, df_full_results], axis=1)


pd.set_option('display.max_columns', None)


display(df_adaboost_results.style.highlight_max(color='lightblue', axis = 1).set_caption("Metrics Comparison for All Models"))
df_adaboost_results.style.background_gradient(axis=1) 

Unnamed: 0_level_0,type,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full
Unnamed: 0_level_1,Unnamed: 1_level_1,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
aggressive,f1_score,0.0,0.696104,0.696104,0.0,0.198758,0.521401,0.696104,0.696104,0.0,0.0,0.696104,0.014815,0.696104,0.696104,0.057554,0.108108,0.445378,0.696104,0.692708,0.071429,0.235955,0.696104
aggressive,roc_auc_score,0.533486,0.5,0.5,0.5,0.518784,0.52427,0.5,0.530712,0.5,0.5,0.5,0.535527,0.526375,0.48938,0.510652,0.505836,0.50287,0.523568,0.516392,0.514383,0.480068,0.5
alternative,f1_score,0.0,0.696104,0.696104,0.120805,0.209877,0.477876,0.696104,0.0,0.0,0.336842,0.696104,0.042857,0.689295,0.696104,0.029412,0.085106,0.486957,0.68984,0.107383,0.043165,0.521368,0.696104
alternative,roc_auc_score,0.514351,0.511736,0.5,0.507941,0.546466,0.527969,0.5,0.518178,0.5,0.516839,0.5,0.545095,0.550612,0.5,0.507463,0.506697,0.567356,0.550325,0.555077,0.502647,0.560945,0.5
balanced,f1_score,0.275862,0.675824,0.699229,0.242775,0.35122,0.446352,0.641176,0.697917,0.120805,0.525547,0.699229,0.145695,0.562914,0.699229,0.107383,0.121622,0.359223,0.603077,0.619883,0.331606,0.293478,0.699229
balanced,roc_auc_score,0.496418,0.467163,0.49073,0.50883,0.445073,0.483786,0.476213,0.534471,0.515994,0.482655,0.5,0.510464,0.472285,0.542044,0.508044,0.498869,0.475176,0.470934,0.505342,0.510809,0.509521,0.5
conservative,f1_score,0.217949,0.528455,0.393939,0.38191,0.324324,0.462222,0.269663,0.425926,0.409756,0.472727,0.661376,0.424528,0.471074,0.457944,0.354839,0.295858,0.562044,0.365482,0.34555,0.32967,0.491667,0.661376
conservative,roc_auc_score,0.480094,0.524312,0.528844,0.511375,0.507094,0.496406,0.492031,0.501906,0.519563,0.540031,0.5,0.490656,0.499281,0.532469,0.522625,0.534875,0.534344,0.491063,0.508281,0.514531,0.51725,0.5
growth,f1_score,0.06993,0.699482,0.699482,0.0,0.278075,0.517647,0.699482,0.696335,0.0,0.082759,0.699482,0.014706,0.699482,0.699482,0.014706,0.129032,0.5625,0.699482,0.705882,0.0,0.356436,0.699482
growth,roc_auc_score,0.506162,0.5,0.5,0.5,0.488123,0.506258,0.5,0.503672,0.5,0.504981,0.5,0.576277,0.503799,0.5,0.503704,0.462101,0.51092,0.521328,0.539527,0.501756,0.499713,0.5


Unnamed: 0_level_0,type,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost - Reduced,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full,Adaboost- Full
Unnamed: 0_level_1,Unnamed: 1_level_1,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11
type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2
aggressive,f1_score,0.0,0.696104,0.696104,0.0,0.198758,0.521401,0.696104,0.696104,0.0,0.0,0.696104,0.014815,0.696104,0.696104,0.057554,0.108108,0.445378,0.696104,0.692708,0.071429,0.235955,0.696104
aggressive,roc_auc_score,0.533486,0.5,0.5,0.5,0.518784,0.52427,0.5,0.530712,0.5,0.5,0.5,0.535527,0.526375,0.48938,0.510652,0.505836,0.50287,0.523568,0.516392,0.514383,0.480068,0.5
alternative,f1_score,0.0,0.696104,0.696104,0.120805,0.209877,0.477876,0.696104,0.0,0.0,0.336842,0.696104,0.042857,0.689295,0.696104,0.029412,0.085106,0.486957,0.68984,0.107383,0.043165,0.521368,0.696104
alternative,roc_auc_score,0.514351,0.511736,0.5,0.507941,0.546466,0.527969,0.5,0.518178,0.5,0.516839,0.5,0.545095,0.550612,0.5,0.507463,0.506697,0.567356,0.550325,0.555077,0.502647,0.560945,0.5
balanced,f1_score,0.275862,0.675824,0.699229,0.242775,0.35122,0.446352,0.641176,0.697917,0.120805,0.525547,0.699229,0.145695,0.562914,0.699229,0.107383,0.121622,0.359223,0.603077,0.619883,0.331606,0.293478,0.699229
balanced,roc_auc_score,0.496418,0.467163,0.49073,0.50883,0.445073,0.483786,0.476213,0.534471,0.515994,0.482655,0.5,0.510464,0.472285,0.542044,0.508044,0.498869,0.475176,0.470934,0.505342,0.510809,0.509521,0.5
conservative,f1_score,0.217949,0.528455,0.393939,0.38191,0.324324,0.462222,0.269663,0.425926,0.409756,0.472727,0.661376,0.424528,0.471074,0.457944,0.354839,0.295858,0.562044,0.365482,0.34555,0.32967,0.491667,0.661376
conservative,roc_auc_score,0.480094,0.524312,0.528844,0.511375,0.507094,0.496406,0.492031,0.501906,0.519563,0.540031,0.5,0.490656,0.499281,0.532469,0.522625,0.534875,0.534344,0.491063,0.508281,0.514531,0.51725,0.5
growth,f1_score,0.06993,0.699482,0.699482,0.0,0.278075,0.517647,0.699482,0.696335,0.0,0.082759,0.699482,0.014706,0.699482,0.699482,0.014706,0.129032,0.5625,0.699482,0.705882,0.0,0.356436,0.699482
growth,roc_auc_score,0.506162,0.5,0.5,0.5,0.488123,0.506258,0.5,0.503672,0.5,0.504981,0.5,0.576277,0.503799,0.5,0.503704,0.462101,0.51092,0.521328,0.539527,0.501756,0.499713,0.5


### save evalution metrics of selected model for comparison with metrics from other model types

In [9]:
best = df_adaboost_results['Adaboost - Reduced'][['model11']]

best.reset_index(inplace=True)

best.to_csv(Path("./model_metrics/adaboost.csv"), index=False)