In [10]:
# import modules
import panel as pn
pn.extension('tabulator')
import pandas as pd
import numpy as np
from panel.template import FastListTemplate
from pathlib import Path
from yahoo_fin.stock_info import get_data
import datetime
from matplotlib.figure import Figure
from matplotlib import cm
%matplotlib inline
import hvplot.pandas
import holoviews as hv
from holoviews import opts


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# import modules that help build tabs
import modules.helpers as helpers
import modules.HistoricalData as hst
import modules.MCTab as MCTab
import modules.intro as intro
import modules.profile as prf
import modules.algorithmic_functions as af


import pandas_ta as ta
import yfinance as yf

from sklearn.ensemble import RandomForestClassifier
from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

import seaborn as sns

**Compile Data**

* If necessary/desired, use 'build_portfolio_signal_ml_df' to pull machine learning data to create/refresht he test/train datasets
* Load the test/train datasets
* uncomment the below code in order to create or refresh the test/train datasets

In [2]:
# signals_df, ml_df = af.build_portfolio_signal_ml_df('conservative',2017,12,31)

# af.create_train_test(ml_df)

In [2]:
# load X_train_full and X_test_full
X_train_full_conservative = pd.read_csv(Path("./data/X_train_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_conservative = pd.read_csv(Path("./data/X_test_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)


# create additional X_train/test datasets with subsets of indicators
X_train_sma_conservative = X_train_full_conservative[['SMA_30', 'SMA_100']]
X_test_sma_conservative = X_test_full_conservative[['SMA_30', 'SMA_100']]
X_train_macd_conservative = X_train_full_conservative[['MACD_12_26_9', 'MACDh_12_26_9','MACDs_12_26_9']]
X_test_macd_conservative = X_test_full_conservative[['MACD_12_26_9', 'MACDh_12_26_9','MACDs_12_26_9']]
X_train_bb_conservative = X_train_full_conservative[['BBL_20_2.0','BBM_20_2.0','BBU_20_2.0','BBB_20_2.0','BBP_20_2.0']]
X_test_bb_conservative = X_test_full_conservative[['BBL_20_2.0','BBM_20_2.0','BBU_20_2.0','BBB_20_2.0','BBP_20_2.0']]
X_train_rsi_conservative = X_train_full_conservative[['RSI_14']]
X_test_rsi_conservative = X_test_full_conservative[['RSI_14']]
X_train_hlc3_conservative = X_train_full_conservative[['HLC3']]
X_test_hlc3_conservative = X_test_full_conservative[['HLC3']]
X_train_ohl4_conservative = X_train_full_conservative[['OHLC4']]
X_test_ohl4_conservative = X_test_full_conservative[['OHLC4']]
X_train_rsi_conservative = X_train_full_conservative[['RSI_14']]
X_test_rsi_conservative = X_test_full_conservative[['RSI_14']]
X_train_lr_conservative = X_train_full_conservative[['LR_14']]
X_test_l_conservativer = X_test_full_conservative[['LR_14']]
X_train_stoch_conservative = X_train_full_conservative[['STOCHk_14_3_3', 'STOCHd_14_3_3']]
X_test_stoch_conservative = X_test_full_conservative[['STOCHk_14_3_3', 'STOCHd_14_3_3']]


#load y_train and y_test
y_train_conservative = pd.read_csv(Path("./data/y_train_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_conservative = pd.read_csv(Path("./data/y_test_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)


**Create Model**

In [3]:
# initialize standard scaler

scaler = StandardScaler()

In [6]:
# Initialize bagging classifier models for training and subsequent evaluation/comparison
model1 = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=5, min_samples_leaf=1, max_features='sqrt', bootstrap=True, criterion='gini', min_impurity_decrease=0.0, class_weight=None, oob_score=False)
model2 = RandomForestClassifier(n_estimators=200, max_depth=20, min_samples_split=10, min_samples_leaf=5, max_features='log2', bootstrap=True, criterion='entropy', min_impurity_decrease=0.001, class_weight='balanced_subsample', oob_score=True)
model3 = RandomForestClassifier(n_estimators=500, max_depth=30, min_samples_split=20, min_samples_leaf=10, max_features=0.5, bootstrap=True, criterion='gini', min_impurity_decrease=0.005, class_weight={0: 1, 1: 3}, oob_score=True)
model4 = RandomForestClassifier(n_estimators=1000, max_depth=40, min_samples_split=50, min_samples_leaf=20, max_features=None, bootstrap=True, criterion='entropy', min_impurity_decrease=0.01, class_weight={0: 1, 1: 5}, oob_score=True)
model5 = RandomForestClassifier(n_estimators=200, max_depth=20, min_samples_split=10, min_samples_leaf=5, max_features=0.7, bootstrap=True, criterion='gini', min_impurity_decrease=0.0, class_weight=None, oob_score=False)
model6 = RandomForestClassifier(n_estimators=500, max_depth=30, min_samples_split=20, min_samples_leaf=10, max_features=0.3, bootstrap=True, criterion='entropy', min_impurity_decrease=0.0, class_weight='balanced', oob_score=True)
model7 = RandomForestClassifier(n_estimators=1000, max_depth=40, min_samples_split=50, min_samples_leaf=20, max_features='sqrt', bootstrap=True, criterion='gini', min_impurity_decrease=0.0, class_weight={0: 1, 1: 10}, oob_score=True)
model8 = RandomForestClassifier(n_estimators=2000, max_depth=50, min_samples_split=100, min_samples_leaf=50, max_features='log2', bootstrap=True, criterion='entropy', min_impurity_decrease=0.0, class_weight=None, oob_score=False)
model9 = RandomForestClassifier(n_estimators=1000, max_depth=30, min_samples_split=20, min_samples_leaf=10, max_features=None, bootstrap=True, criterion='gini', min_impurity_decrease=0.005, class_weight='balanced', oob_score=True)
model10 = RandomForestClassifier(n_estimators=500, max_depth=20, min_samples_split=10, min_samples_leaf=5, max_features=0.7, bootstrap=True, criterion='entropy', min_impurity_decrease=0.001, class_weight={0: 1, 1: 5}, oob_score=True)
model11 = RandomForestClassifier(n_estimators=1000, max_depth=30, min_samples_split=10, min_samples_leaf=5, max_features=0.5, bootstrap=True, criterion='entropy', min_impurity_decrease=0.001, class_weight='balanced', oob_score=True)

**Setup model pipeline, consisting of data scaling and the model training/fitting**

In [7]:
# will fit/evaluate multiple models using a series of for-loops. Models will be built using all indicators at once, just SMA inidcators, just MACD indicators, and just Bollinger Band indicators
# create list of defined models that can be looped through for fit/evaluation

models = [model1, model2, model3, model4, model5, model6, model7, model8, model9, model10, model11]

In [12]:
### Fit/Evaluate model with all indicators

i = 1
df_full_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('rf', model)])
    pipeline.fit(X_train_full_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_full_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_full_results = pd.concat([df_full_results, df], axis=1)
    df_full_results.drop('support', inplace=True)
    i += 1

### Fit/Evaluate model with only SMA indicators

i = 1
df_sma_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('rf', model)])
    pipeline.fit(X_train_sma_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_sma_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_sma_results = pd.concat([df_sma_results, df], axis=1)
    df_sma_results.drop('support', inplace=True)
    i += 1

### Fit/Evaluate model with only MACD indicators

i = 1
df_macd_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('rf', model)])
    pipeline.fit(X_train_macd_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_macd_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_macd_results = pd.concat([df_macd_results, df], axis=1)
    df_macd_results.drop('support', inplace=True)
    i += 1


### Fit/Evaluate model with only Bollinger Bands indicators

i = 1
df_bb_results = pd.DataFrame()
for model in models:
    pipeline = Pipeline([('scaler', scaler), ('rf', model)])
    pipeline.fit(X_train_bb_conservative, y_train_conservative)
    preds = pipeline.predict(X_test_bb_conservative)
    report = classification_report(y_test_conservative, preds, output_dict=True)
    df = pd.DataFrame.from_dict(report['weighted avg'], orient='index', columns=[f'model{i}'])
    df.loc['accuracy',f'model{i}'] = report['accuracy']
    df_bb_results = pd.concat([df_bb_results, df], axis=1)
    df_bb_results.drop('support', inplace=True)
    i += 1

### combine all results into one table for comparison

df_bb_results['type'] = 'bb'
df_macd_results['type'] = 'macd'
df_sma_results['type'] = 'sma'
df_full_results['type'] = 'full'

results = pd.concat([df_bb_results, df_macd_results, df_sma_results, df_full_results], axis=0)
results.set_index('type', append=True, inplace=True)
results_conservative = results.reorder_levels(['type', 0])


In [13]:
cm = sns.light_palette("blue", as_cmap=True)
results_conservative.style.background_gradient(cmap=cm, axis=1).set_caption("conservative")

Unnamed: 0_level_0,Unnamed: 1_level_0,model1,model2,model3,model4,model5,model6,model7,model8,model9,model10,model11
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
bb,precision,0.492797,0.487289,0.221811,0.221811,0.495788,0.50292,0.221811,0.462262,0.485647,0.535532,0.506266
bb,recall,0.483871,0.487097,0.470968,0.470968,0.490323,0.506452,0.470968,0.46129,0.5,0.493548,0.506452
bb,f1-score,0.47426,0.487188,0.301585,0.301585,0.48841,0.502789,0.301585,0.37951,0.473186,0.416665,0.506354
bb,accuracy,0.483871,0.487097,0.470968,0.470968,0.490323,0.506452,0.470968,0.46129,0.5,0.493548,0.506452
macd,precision,0.503973,0.517458,0.221811,0.221811,0.498542,0.524774,0.221811,0.456099,0.538121,0.221811,0.529015
macd,recall,0.503226,0.525806,0.470968,0.470968,0.493548,0.532258,0.470968,0.451613,0.535484,0.470968,0.535484
macd,f1-score,0.503516,0.501916,0.301585,0.301585,0.492267,0.451292,0.301585,0.42213,0.431772,0.301585,0.509798
macd,accuracy,0.503226,0.525806,0.470968,0.470968,0.493548,0.532258,0.470968,0.451613,0.535484,0.470968,0.535484
sma,precision,0.51793,0.53848,0.221811,0.221811,0.519175,0.518618,0.221811,0.517839,0.517839,0.540344,0.529154
sma,recall,0.506452,0.519355,0.470968,0.470968,0.483871,0.496774,0.470968,0.487097,0.487097,0.477419,0.512903
