In [1]:
# import modules
import panel as pn
pn.extension('tabulator')
import pandas as pd
import numpy as np
from panel.template import FastListTemplate
from pathlib import Path
from yahoo_fin.stock_info import get_data
import datetime
from matplotlib.figure import Figure
from matplotlib import cm
%matplotlib inline
import hvplot.pandas
import holoviews as hv
from holoviews import opts


import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# import modules that help build tabs
import modules.helpers as helpers
import modules.HistoricalData as hst
import modules.MCTab as MCTab
import modules.intro as intro
import modules.profile as prf
import modules.algorithmic_functions as af


import pandas_ta as ta
import yfinance as yf

from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from pandas.tseries.offsets import DateOffset
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, roc_auc_score, f1_score 

import seaborn as sns

from joblib import dump, load

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


2023-04-13 11:22:49.359831: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# fit and save best model for all portfolio classes

### All portfolio classes found either Random Forest to provide the best results, except for the conservative class which found the neural network to provide the best results.
### Random Forest had the optimal performance with the reduced feature set while the neural network worked best with the reduced feature set

#### loading train/test data for reduced features and defining the models

In [2]:
# load X_train_reduced and X_test_reduced
X_train_full_conservative = pd.read_csv(Path("./data/X_train_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_conservative = pd.read_csv(Path("./data/X_test_full_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_reduced_balanced = pd.read_csv(Path("./data/X_train_reduced_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_reduced_balanced = pd.read_csv(Path("./data/X_test_reduced_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_growth = pd.read_csv(Path("./data/X_train_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_growth = pd.read_csv(Path("./data/X_test_full_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_aggressive = pd.read_csv(Path("./data/X_train_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_aggressive = pd.read_csv(Path("./data/X_test_full_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

X_train_full_alternative = pd.read_csv(Path("./data/X_train_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
X_test_full_alternative = pd.read_csv(Path("./data/X_test_full_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

#load y_train and y_test
y_train_conservative = pd.read_csv(Path("./data/y_train_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_conservative = pd.read_csv(Path("./data/y_test_conservative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_balanced = pd.read_csv(Path("./data/y_train_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_balanced = pd.read_csv(Path("./data/y_test_balanced.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_growth = pd.read_csv(Path("./data/y_train_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_growth = pd.read_csv(Path("./data/y_test_growth.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_aggressive = pd.read_csv(Path("./data/y_train_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_aggressive = pd.read_csv(Path("./data/y_test_aggressive.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

y_train_alternative = pd.read_csv(Path("./data/y_train_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)
y_test_alternative = pd.read_csv(Path("./data/y_test_alternative.csv"), index_col="Unnamed: 0", parse_dates=True, infer_datetime_format=True)

In [3]:
svc_model = SVC(random_state=42, max_iter=1000, kernel='linear', C=0.5, probability=True)
gaussian_model = GaussianNB(var_smoothing=1.0, priors=[.3, 0.7])
rf_model = RandomForestClassifier(n_estimators=1000, max_depth=40, min_samples_split=50, min_samples_leaf=20, max_features=None, bootstrap=True, criterion='entropy', min_impurity_decrease=0.01, class_weight={0: 1, 1: 5}, oob_score=True)


#### Define standard scaler to use in model pipelines

In [4]:
scaler = StandardScaler()

## training best model for each portfolio class and saving model for future use

### Aggressive


In [5]:
np.random.seed(8171)

pipeline = Pipeline([('scaler', scaler), ('model', rf_model)])
pipeline.fit(X_train_full_aggressive, y_train_aggressive)
dump(pipeline,Path("./saved_models/aggressive.joblib"))


['saved_models/aggressive.joblib']

### Alternative

In [6]:
np.random.seed(8171)

pipeline = Pipeline([('scaler', scaler), ('model', rf_model)])
pipeline.fit(X_train_full_alternative, y_train_alternative)
dump(pipeline,Path("./saved_models/alternative.joblib"))




# # Create the scaler instance
# X_scaler = StandardScaler()

# # Fit the scaler
# X_scaler.fit(X_train_reduced_alternative)

# # Scale the data
# X_train_reduced_alternative_scaled = X_scaler.transform(X_train_reduced_alternative)
# X_test_reduced_alternative_scaled = X_scaler.transform(X_test_reduced_alternative)
# number_input_features = 7
# hidden_nodes_layer1 = 8
# hidden_nodes_layer2 = 3
# activation_1 = 'relu'
# activation_2 = 'relu'
# lr = 0.001

# # Create a sequential neural network model
# nn_alternative = Sequential()

# # Add the first hidden layer
# nn_alternative.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation=activation_1))

# # Add the second hidden layer
# nn_alternative.add(Dense(units=hidden_nodes_layer2, activation=activation_2))

# # Add the output layer
# nn_alternative.add(Dense(units=1, activation="sigmoid"))

# # Compile the model 
# # Set the parameters as mean_squared_error, adam, and accuracy.
# nn_alternative.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=lr), metrics=["accuracy"])

# # Fit the model
# deep_net_alternative_model = nn_alternative.fit(X_train_reduced_alternative_scaled, y_train_alternative, epochs=100, verbose=0)


# nn_alternative.save(Path("./saved_models/alternative.h5"))


['saved_models/alternative.joblib']

### Balanced

In [7]:
# pipeline = Pipeline([('scaler', scaler), ('model', rf_model)])
# pipeline.fit(X_train_full_balanced, y_train_balanced)
# dump(pipeline, Path("./saved_models/balanced.joblib"))
tf.keras.utils.set_random_seed(42)

# Create the scaler instance
X_scaler = StandardScaler()

# Fit the scaler
X_scaler.fit(X_train_reduced_balanced)




# Scale the data
X_train_reduced_balanced_scaled = X_scaler.transform(X_train_reduced_balanced)
X_test_reduced_balanced_scaled = X_scaler.transform(X_test_reduced_balanced)
number_input_features = 7
hidden_nodes_layer1 = 32
hidden_nodes_layer2 = 3
activation_1 = 'tanh'
activation_2 = 'tanh'
lr = 0.01

# Create a sequential neural network model
nn_balanced = Sequential()

# Add the first hidden layer
nn_balanced.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation=activation_1))

# Add the second hidden layer
nn_balanced.add(Dense(units=hidden_nodes_layer2, activation=activation_2))

# Add the output layer
nn_balanced.add(Dense(units=1, activation="sigmoid"))

# Compile the model 
# Set the parameters as mean_squared_error, adam, and accuracy.
nn_balanced.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=lr), metrics=["accuracy"])

# Fit the model
deep_net_balanced_model = nn_balanced.fit(X_train_reduced_balanced_scaled, y_train_balanced, epochs=100, verbose=0)


nn_balanced.save(Path("./saved_models/balanced.h5"))

2023-04-13 11:23:00.252449: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Conservative

In [8]:
np.random.seed(8171)

pipeline = Pipeline([('scaler', scaler), ('model', svc_model)])
pipeline.fit(X_train_full_conservative, y_train_conservative)
dump(pipeline, Path("./saved_models/conservative.joblib"))



# # Create the scaler instance
# X_scaler = StandardScaler()

# # Fit the scaler
# X_scaler.fit(X_train_reduced_conservative)

# # Scale the data
# X_train_reduced_conservative_scaled = X_scaler.transform(X_train_reduced_conservative)
# X_test_reduced_conservative_scaled = X_scaler.transform(X_test_reduced_conservative)
# number_input_features = 7
# hidden_nodes_layer1 = 36
# hidden_nodes_layer2 = 3
# activation_1 = 'tanh'
# activation_2 = 'tanh'
# lr = 0.01

# # Create a sequential neural network model
# nn_conservative = Sequential()

# # Add the first hidden layer
# nn_conservative.add(Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation=activation_1))

# # Add the second hidden layer
# nn_conservative.add(Dense(units=hidden_nodes_layer2, activation=activation_2))

# # Add the output layer
# nn_conservative.add(Dense(units=1, activation="sigmoid"))

# # Compile the model 
# # Set the parameters as mean_squared_error, adam, and accuracy.
# nn_conservative.compile(loss="binary_crossentropy", optimizer=tf.keras.optimizers.Adam(learning_rate=lr), metrics=["accuracy"])

# # Fit the model
# deep_net_conservative_model = nn_conservative.fit(X_train_reduced_conservative_scaled, y_train_conservative, epochs=100, verbose=0)


# nn_conservative.save(Path("./saved_models/conservative.h5"))

['saved_models/conservative.joblib']

### Growth

In [9]:
np.random.seed(8171)

pipeline = Pipeline([('scaler', scaler), ('model', rf_model)])
pipeline.fit(X_train_full_growth, y_train_growth)
dump(pipeline, Path("./saved_models/growth.joblib"))

['saved_models/growth.joblib']