In [1]:
# import modules
import panel as pn
pn.extension('tabulator')
import pandas as pd
import numpy as np
from panel.template import FastListTemplate
from pathlib import Path
from yahoo_fin.stock_info import get_data
import datetime
from matplotlib.figure import Figure
from matplotlib import cm
%matplotlib inline
import hvplot.pandas
import holoviews as hv
from holoviews import opts


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# import modules that help build tabs
import modules.helpers as helpers
import modules.HistoricalData as hst
import modules.MCTab as MCTab
import modules.intro as intro
import modules.profile as prf
import modules.algorithmic_functions as af


import pandas_ta as ta
import yfinance as yf

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

import seaborn as sns

## Compile Data**

## * If necessary/desired, use 'build_portfolio_signal_ml_df' to pull machine learning data to create/refresht he test/train datasets

## * Load the test/train datasets

In [2]:
# signals_df, ml_df = af.build_portfolio_signal_ml_df('conservative',2017,12,31)

# af.create_train_test(ml_df)

In [3]:
# load X_train_full and X_test_full
X_train_full_conservative = pd.read_csv(Path("./data/X_train_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_conservative = pd.read_csv(Path("./data/X_test_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_balanced = pd.read_csv(Path("./data/X_train_full_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_balanced = pd.read_csv(Path("./data/X_test_full_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_growth = pd.read_csv(Path("./data/X_train_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_growth = pd.read_csv(Path("./data/X_test_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_aggressive = pd.read_csv(Path("./data/X_train_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_aggressive = pd.read_csv(Path("./data/X_test_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_alternative = pd.read_csv(Path("./data/X_train_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_alternative = pd.read_csv(Path("./data/X_test_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)



#load y_train and y_test
y_train_conservative = pd.read_csv(Path("./data/y_train_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_conservative = pd.read_csv(Path("./data/y_test_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_balanced = pd.read_csv(Path("./data/y_train_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_balanced = pd.read_csv(Path("./data/y_test_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_growth = pd.read_csv(Path("./data/y_train_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_growth = pd.read_csv(Path("./data/y_test_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_aggressive = pd.read_csv(Path("./data/y_train_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_aggressive = pd.read_csv(Path("./data/y_test_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_alternative = pd.read_csv(Path("./data/y_train_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_alternative = pd.read_csv(Path("./data/y_test_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

datafiles = {'conservative': [X_train_full_conservative,
                              X_test_full_conservative, 
                              y_train_conservative, 
                              y_test_conservative],
            'balanced': [X_train_full_balanced,
                              X_test_full_balanced, 
                              y_train_balanced, 
                              y_test_balanced],
            'growth': [X_train_full_growth,
                              X_test_full_growth, 
                              y_train_growth, 
                              y_test_growth],
            'aggressive': [X_train_full_aggressive,
                              X_test_full_aggressive, 
                              y_train_aggressive, 
                              y_test_aggressive],
            'alternative': [X_train_full_alternative,
                              X_test_full_alternative, 
                              y_train_alternative, 
                              y_test_alternative]}

portfolios = ['conservative', 'balanced', 'growth', 'aggressive','alternative']

## Create Models

In [4]:
# initialize standard scaler

scaler = StandardScaler()

In [5]:
# Create a base classifier (e.g. decision tree)
base_classifier = DecisionTreeClassifier()

In [6]:
# Initialize bagging classifier models for training and subsequent evaluation/comparison
model1 = BaggingClassifier(base_estimator=base_classifier, n_estimators=200, max_samples=0.8, max_features=0.5, bootstrap=True, oob_score=True, random_state=42)
model2 = BaggingClassifier(base_estimator=base_classifier, n_estimators=50, max_samples=0.8, max_features=0.5, bootstrap=True, oob_score=True, random_state=42)
model3 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.5, bootstrap=True, oob_score=True, random_state=42)
model4 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.9, max_features=0.5, bootstrap=True, oob_score=True, random_state=42)
model5 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.5, max_features=0.5, bootstrap=True, oob_score=True, random_state=42)
model6 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.7, bootstrap=True, oob_score=True, random_state=42)
model7 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.3, bootstrap=True, oob_score=True, random_state=42)
model8 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.5, bootstrap=False, oob_score=False, random_state=42)
model9 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.5, bootstrap=True, oob_score=True, random_state=123)
model10 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.5, bootstrap=True, oob_score=False, random_state=123)
model11 = BaggingClassifier(base_estimator=base_classifier, n_estimators=100, max_samples=0.8, max_features=0.5, bootstrap=True, oob_score=False, random_state=42)

## fitting and evaluating models
We use a loop to train/test each model with each portfolio class, then display metrics for all runs **Setup model pipeline, consisting of data scaling and the model training/fitting**

In [7]:
# will fit/evaluate multiple models using a series of for-loops. Models will be built using all indicators at once, just SMA inidcators, just MACD indicators, and just Bollinger Band indicators
# create list of defined models that can be looped through for fit/evaluation

models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11]

In [8]:
df_full_results = pd.DataFrame()
for portfolio in portfolios:
    X_train = datafiles[portfolio][0]
    X_test = datafiles[portfolio][1]
    y_train = datafiles[portfolio][2]
    y_test = datafiles[portfolio][3]
    i=1
    df_results = pd.DataFrame()
    for model in models:
        pipeline = Pipeline([('scaler', scaler), ('model', model)])
        pipeline.fit(X_train, y_train)
        preds = pipeline.predict(X_test)
        report = classification_report(y_test, preds, output_dict=True)
        df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
        df.loc['accuracy',f'model{i}'] = report['accuracy']
        df_results = pd.concat([df_results, df], axis=1)
        df_results.drop('support', inplace=True)
        i += 1
    df_results['type'] = portfolio
    df_full_results = pd.concat([df_full_results, df_results])
    
df_full_results.set_index('type', append=True, inplace=True)
df_full_results = df_full_results.reorder_levels(['type', 0])

In [9]:
cm = sns.light_palette("blue", as_cmap=True)
df_full_results.style.background_gradient(cmap=cm, axis=1).set_caption("Metrics for Full Train/Test Datasets")

Unnamed: 0_level_0,Unnamed: 1_level_0,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
conservative,precision,0.509456,0.529585,0.513532,0.464557,0.498542,0.497437,0.521792,0.479933,0.501686,0.501686,0.513532
conservative,recall,0.503226,0.525806,0.509677,0.46129,0.493548,0.493548,0.509677,0.474194,0.5,0.5,0.509677
conservative,f1-score,0.500905,0.525713,0.509473,0.461184,0.492267,0.493216,0.499623,0.470928,0.500422,0.500422,0.509473
conservative,accuracy,0.503226,0.525806,0.509677,0.46129,0.493548,0.493548,0.509677,0.474194,0.5,0.5,0.509677
balanced,precision,0.515284,0.506276,0.520423,0.562273,0.541261,0.56521,0.559327,0.526269,0.503551,0.503551,0.520423
balanced,recall,0.5,0.493548,0.5,0.532258,0.509677,0.525806,0.532258,0.512903,0.490323,0.490323,0.5
balanced,f1-score,0.47038,0.465242,0.45703,0.499531,0.457525,0.478741,0.50293,0.495633,0.45561,0.45561,0.45703
balanced,accuracy,0.5,0.493548,0.5,0.532258,0.509677,0.525806,0.532258,0.512903,0.490323,0.490323,0.5
growth,precision,0.547062,0.517641,0.545322,0.536498,0.524306,0.522076,0.523565,0.537619,0.523922,0.523922,0.545322
growth,recall,0.539446,0.513859,0.539446,0.533049,0.520256,0.518124,0.520256,0.530917,0.520256,0.520256,0.539446
