In [21]:
# import modules
import panel as pn
pn.extension('tabulator')
import pandas as pd
import numpy as np
from panel.template import FastListTemplate
from pathlib import Path
from yahoo_fin.stock_info import get_data
import datetime
from matplotlib.figure import Figure
from matplotlib import cm
%matplotlib inline
import hvplot.pandas
import holoviews as hv
from holoviews import opts


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# import modules that help build tabs
import modules.helpers as helpers
import modules.HistoricalData as hst
import modules.MCTab as MCTab
import modules.intro as intro
import modules.profile as prf
import modules.algorithmic_functions as af


import pandas_ta as ta
import yfinance as yf

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import LinearSVC

from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

import seaborn as sns

**Compile Data**

* If necessary/desired, use 'build_portfolio_signal_ml_df' to pull machine learning data to create/refresht he test/train datasets
* Load the test/train datasets
* uncomment the below code in order to create or refresh the test/train datasets

In [3]:
# signals_df, ml_df = af.build_portfolio_signal_ml_df('conservative',2017,12,31)

# af.create_train_test(ml_df)

In [5]:
# load X_train_full and X_test_full
X_train_full_conservative = pd.read_csv(Path("./data/X_train_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_conservative = pd.read_csv(Path("./data/X_test_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)


# create additional X_train/test datasets with subsets of indicators
X_train_sma_conservative = X_train_full_conservative[['SMA_30', 'SMA_100']]
X_test_sma_conservative = X_test_full_conservative[['SMA_30', 'SMA_100']]
X_train_macd_conservative = X_train_full_conservative[['MACD_12_26_9', 'MACDh_12_26_9','MACDs_12_26_9']]
X_test_macd_conservative = X_test_full_conservative[['MACD_12_26_9', 'MACDh_12_26_9','MACDs_12_26_9']]
X_train_bb_conservative = X_train_full_conservative[['BBL_20_2.0','BBM_20_2.0','BBU_20_2.0','BBB_20_2.0','BBP_20_2.0']]
X_test_bb_conservative = X_test_full_conservative[['BBL_20_2.0','BBM_20_2.0','BBU_20_2.0','BBB_20_2.0','BBP_20_2.0']]
X_train_rsi_conservative = X_train_full_conservative[['RSI_14']]
X_test_rsi_conservative = X_test_full_conservative[['RSI_14']]
X_train_hlc3_conservative = X_train_full_conservative[['HLC3']]
X_test_hlc3_conservative = X_test_full_conservative[['HLC3']]
X_train_ohl4_conservative = X_train_full_conservative[['OHLC4']]
X_test_ohl4_conservative = X_test_full_conservative[['OHLC4']]
X_train_rsi_conservative = X_train_full_conservative[['RSI_14']]
X_test_rsi_conservative = X_test_full_conservative[['RSI_14']]
X_train_lr_conservative = X_train_full_conservative[['LR_14']]
X_test_l_conservativer = X_test_full_conservative[['LR_14']]
X_train_stoch_conservative = X_train_full_conservative[['STOCHk_14_3_3', 'STOCHd_14_3_3']]
X_test_stoch_conservative = X_test_full_conservative[['STOCHk_14_3_3', 'STOCHd_14_3_3']]


#load y_train and y_test
y_train_conservative = pd.read_csv(Path("./data/y_train_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_conservative = pd.read_csv(Path("./data/y_test_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)


**Create Model**

In [6]:
# initialize standard scaler
scaler = StandardScaler()

In [12]:
# Initialize bagging classifier models for training and subsequent evaluation/comparison
model1 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=100, learning_rate=1.0)
model2 = AdaBoostClassifier(base_estimator=LogisticRegression(solver='lbfgs'), n_estimators=50, learning_rate=0.5)
model3 = AdaBoostClassifier(base_estimator=SVC(kernel='linear'), n_estimators=200, learning_rate=0.1)
model4 = AdaBoostClassifier(base_estimator=RandomForestClassifier(n_estimators=50), n_estimators=100, learning_rate=1.0)
model5 = AdaBoostClassifier(base_estimator=GradientBoostingClassifier(max_depth=3), n_estimators=150, learning_rate=0.2)
model6 = AdaBoostClassifier(base_estimator=XGBClassifier(max_depth=4), n_estimators=100, learning_rate=0.5)
model7 = AdaBoostClassifier(base_estimator=KNeighborsClassifier(n_neighbors=5), n_estimators=50, learning_rate=1.0)
model8 = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=3), n_estimators=200, learning_rate=0.01)
model9 = AdaBoostClassifier(base_estimator=MLPClassifier(hidden_layer_sizes=(50, 50)), n_estimators=100, learning_rate=0.1)
model10 = AdaBoostClassifier(base_estimator=QuadraticDiscriminantAnalysis(), n_estimators=50, learning_rate=0.5)
model11 = AdaBoostClassifier(base_estimator=LinearSVC(max_iter=10000), n_estimators=150, learning_rate=0.05)

**Setup model pipeline, consisting of data scaling and the model training/fitting**

In [13]:
# will fit/evaluate multiple models using a series of for-loops. Models will be built using all indicators at once, just SMA inidcators, just MACD indicators, and just Bollinger Band indicators
# create list of defined models that can be looped through for fit/evaluation

models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11]

In [18]:
### Fit/Evaluate model with all indicators

i = 1
df_full_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('AB', model)])
    pipeline.fit(X_train_full_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_full_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_full_results = pd.concat([df_full_results, df], axis=1)
    df_full_results.drop('support', inplace=True)
    i += 1

### Fit/Evaluate model with only SMA indicators

i = 1
df_sma_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('AB', model)])
    pipeline.fit(X_train_sma_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_sma_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_sma_results = pd.concat([df_sma_results, df], axis=1)
    df_sma_results.drop('support', inplace=True)
    i += 1

### Fit/Evaluate model with only MACD indicators

i = 1
df_macd_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('AB', model)])
    pipeline.fit(X_train_macd_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_macd_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_macd_results = pd.concat([df_macd_results, df], axis=1)
    df_macd_results.drop('support', inplace=True)
    i += 1


### Fit/Evaluate model with only Bollinger Bands indicators

i = 1
df_bb_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('AB', model)])
    pipeline.fit(X_train_bb_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_bb_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_bb_results = pd.concat([df_bb_results, df], axis=1)
    df_bb_results.drop('support', inplace=True)
    i += 1

### combine all results into one table for comparison

df_bb_results['type'] = 'bb'
df_macd_results['type'] = 'macd'
df_sma_results['type'] = 'sma'
df_full_results['type'] = 'full'

results = pd.concat([df_bb_results, df_macd_results, df_sma_results, df_full_results], axis=0)
results.set_index('type', append=True, inplace=True)
results_conservative = results.reorder_levels(['type', 0])

In [23]:
cm = sns.light_palette("blue", as_cmap=True)
results_conservative.style.background_gradient(cmap=cm, axis=1).set_caption("conservative")


Unnamed: 0_level_0,Unnamed: 1_level_0,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
bb,precision,0.524448,0.509667,0.501954,0.494523,0.543396,0.501954,0.501954,0.519418,0.494523,0.494523,0.501954
bb,recall,0.506452,0.493548,0.487097,0.483871,0.525806,0.487097,0.487097,0.503226,0.483871,0.483871,0.487097
bb,f1-score,0.485161,0.468707,0.460341,0.468918,0.511516,0.460341,0.460341,0.483865,0.468918,0.468918,0.460341
bb,accuracy,0.506452,0.493548,0.487097,0.483871,0.525806,0.487097,0.487097,0.503226,0.483871,0.483871,0.487097
macd,precision,0.482841,0.47671,0.47323,0.478791,0.49048,0.464557,0.47323,0.484727,0.478791,0.478791,0.47323
macd,recall,0.477419,0.470968,0.467742,0.474194,0.483871,0.46129,0.467742,0.480645,0.474194,0.474194,0.467742
macd,f1-score,0.474978,0.467388,0.464437,0.472863,0.479772,0.461184,0.464437,0.480024,0.472863,0.472863,0.464437
macd,accuracy,0.477419,0.470968,0.467742,0.474194,0.483871,0.46129,0.467742,0.480645,0.474194,0.474194,0.467742
sma,precision,0.500299,0.499899,0.499505,0.512128,0.493821,0.499505,0.499505,0.503997,0.496903,0.496903,0.499505
sma,recall,0.490323,0.490323,0.490323,0.503226,0.483871,0.490323,0.490323,0.496774,0.487097,0.487097,0.490323
