In [9]:
# import modules
import panel as pn
pn.extension('tabulator')
import pandas as pd
import numpy as np
from panel.template import FastListTemplate
from pathlib import Path
from yahoo_fin.stock_info import get_data
import datetime
from matplotlib.figure import Figure
from matplotlib import cm
%matplotlib inline
import hvplot.pandas
import holoviews as hv
from holoviews import opts


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# import modules that help build tabs
import modules.helpers as helpers
import modules.HistoricalData as hst
import modules.MCTab as MCTab
import modules.intro as intro
import modules.profile as prf
import modules.algorithmic_functions as af


import pandas_ta as ta
import yfinance as yf

from sklearn.svm import SVC
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import seaborn as sns


# Compile Data

## * If necessary/desired, use 'build_portfolio_signal_ml_df' to pull machine learning data to create/refresht he test/train datasets

## * Load the test/train datasets## use 'build_portfolio_signal_ml_df' to pull machine learning data for training purposes. We will use indicators for SMA, MACD and Bollinger Bands to train the model

uncomment the below code in order to create or refresh the test/train datasets

In [2]:
# af.create_train_test()

In [3]:
# load X_train_full and X_test_full
X_train_full_conservative = pd.read_csv(Path("./data/X_train_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_conservative = pd.read_csv(Path("./data/X_test_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_balanced = pd.read_csv(Path("./data/X_train_full_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_balanced = pd.read_csv(Path("./data/X_test_full_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_growth = pd.read_csv(Path("./data/X_train_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_growth = pd.read_csv(Path("./data/X_test_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_aggressive = pd.read_csv(Path("./data/X_train_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_aggressive = pd.read_csv(Path("./data/X_test_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_alternative = pd.read_csv(Path("./data/X_train_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_alternative = pd.read_csv(Path("./data/X_test_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)



#load y_train and y_test
y_train_conservative = pd.read_csv(Path("./data/y_train_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_conservative = pd.read_csv(Path("./data/y_test_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_balanced = pd.read_csv(Path("./data/y_train_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_balanced = pd.read_csv(Path("./data/y_test_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_growth = pd.read_csv(Path("./data/y_train_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_growth = pd.read_csv(Path("./data/y_test_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_aggressive = pd.read_csv(Path("./data/y_train_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_aggressive = pd.read_csv(Path("./data/y_test_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_alternative = pd.read_csv(Path("./data/y_train_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_alternative = pd.read_csv(Path("./data/y_test_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

datafiles = {'conservative': [X_train_full_conservative,
                              X_test_full_conservative, 
                              y_train_conservative, 
                              y_test_conservative],
            'balanced': [X_train_full_balanced,
                              X_test_full_balanced, 
                              y_train_balanced, 
                              y_test_balanced],
            'growth': [X_train_full_growth,
                              X_test_full_growth, 
                              y_train_growth, 
                              y_test_growth],
            'aggressive': [X_train_full_aggressive,
                              X_test_full_aggressive, 
                              y_train_aggressive, 
                              y_test_aggressive],
            'alternative': [X_train_full_alternative,
                              X_test_full_alternative, 
                              y_train_alternative, 
                              y_test_alternative]}

portfolios = ['conservative', 'balanced', 'growth', 'aggressive','alternative']

## Create model

### initialize standard scaler

In [4]:
scaler = StandardScaler()

### Initialize SVM models for training and subsequent evaluation/comparison

In [5]:
model1 = SVC(random_state=42, max_iter=1000, kernel='linear', C=0.5)
model2 = SVC(random_state=42, max_iter=1000, kernel='linear',C=1)
model3 = SVC(random_state=42, max_iter=1000, kernel='linear',C=10)
model4 = SVC(random_state=42, max_iter=1000, kernel='rbf',C=0.5)
model5 = SVC(random_state=42, max_iter=1000, kernel='rbf',C=1)
model6 = SVC(random_state=42, max_iter=1000, kernel='rbf',C=10)
model7 = SVC(random_state=42, max_iter=1000, kernel='sigmoid',C=0.5)
model8 = SVC(random_state=42, max_iter=1000, kernel='sigmoid',C=1)
model9 = SVC(random_state=42, max_iter=1000, kernel='sigmoid',C=10)



### Setup model pipeline, consisting of data scaling and the model training/fitting
will fit/evaluate multiple models using a series of for-loops <br>
models will be built using all indicators at once, just SMA inidcators, just MACD indicators, and just Bollinger Band indicators

In [6]:
# create list of defined models that can be looped through for fit/evaluation
models = [model1, model2, model3, model4, model5, model6, model7, model8, model9]

## fitting and evaluating models
We use a loop to train/test each model with each portfolio class, then display metrics for all runs 

In [7]:
df_full_results = pd.DataFrame()
for portfolio in portfolios:
    X_train = datafiles[portfolio][0]
    X_test = datafiles[portfolio][1]
    y_train = datafiles[portfolio][2]
    y_test = datafiles[portfolio][3]
    i=1
    df_results = pd.DataFrame()
    for model in models:
        pipeline = Pipeline([('scaler', scaler), ('model', model)])
        pipeline.fit(X_train, y_train)
        preds = pipeline.predict(X_test)
        report = classification_report(y_test, preds, output_dict=True)
        df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
        df.loc['accuracy',f'model{i}'] = report['accuracy']
        df_results = pd.concat([df_results, df], axis=1)
        df_results.drop('support', inplace=True)
        i += 1
    df_results['type'] = portfolio
    df_full_results = pd.concat([df_full_results, df_results])
    
df_full_results.set_index('type', append=True, inplace=True)
df_full_results = df_full_results.reorder_levels(['type', 0])

In [10]:
cm = sns.light_palette("blue", as_cmap=True)
df_full_results.style.background_gradient(cmap=cm, axis=1).set_caption("Metrics for Full Train/Test Datasets")

Unnamed: 0_level_0,Unnamed: 1_level_0,model1,model2,model3,model4,model5,model6,model7,model8,model9
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
conservative,precision,0.221811,0.221811,0.48875,0.577109,0.493,0.499425,0.514976,0.498274,0.490704
conservative,recall,0.470968,0.470968,0.490323,0.483871,0.474194,0.503226,0.512903,0.496774,0.490323
conservative,f1-score,0.301585,0.301585,0.489212,0.349343,0.384326,0.499237,0.513294,0.497194,0.490493
conservative,accuracy,0.470968,0.470968,0.490323,0.483871,0.474194,0.503226,0.512903,0.496774,0.490323
balanced,precision,0.227929,0.227929,0.504094,0.227929,0.227929,0.560873,0.494078,0.476709,0.490075
balanced,recall,0.477419,0.477419,0.509677,0.477419,0.477419,0.516129,0.483871,0.470968,0.480645
balanced,f1-score,0.308551,0.308551,0.498312,0.308551,0.308551,0.454728,0.45241,0.451528,0.446225
balanced,accuracy,0.477419,0.477419,0.509677,0.477419,0.477419,0.516129,0.483871,0.470968,0.480645
growth,precision,0.251067,0.251067,0.248935,0.500743,0.50957,0.522499,0.49868,0.498795,0.49892
growth,recall,0.501066,0.501066,0.498934,0.501066,0.509595,0.522388,0.498934,0.498934,0.498934
