In [1]:
# Install the latest version of the colorama package to enable colorful terminal text, enhancing readability
!pip install -U colorama

# Install the empyrical package to compute various financial risk and performance metrics, useful for quantitative finance
!pip install empyrical

# Install a specific version (1.4.3) of the bayesian-optimization package, which is used for optimizing hyperparameters in machine learning models
!pip install bayesian-optimization==1.4.3

# Install the pmdarima package, a convenient tool for auto-fitting ARIMA models, which are widely used in time series forecasting
!pip install pmdarima

# Install a specific version (2.14.0) of tensorflow, a powerful library for machine learning and neural networks, to ensure compatibility with certain features or other libraries
!pip install tensorflow==2.16.1

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [3]:
# The following line of code is used to install the 'ipython-extensions' package using pip, Python's package installer. 
# The 'ipython-extensions' package contains a collection of extensions for IPython, which enhance its functionality and usability. 
# These extensions can include new magic commands, additional configuration options, and improved integration with other tools and libraries. 
# Installing this package can significantly enhance the interactive Python programming experience, especially within Jupyter notebooks where IPython is commonly used.

!pip install ipython-extensions

Defaulting to user installation because normal site-packages is not writeable
Collecting ipython-extensions
  Downloading ipython-extensions-0.2.tar.gz (3.9 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: ipython-extensions
  Building wheel for ipython-extensions (setup.py) ... [?25ldone
[?25h  Created wheel for ipython-extensions: filename=ipython_extensions-0.2-py3-none-any.whl size=4806 sha256=3ba97854e13eeaf9209b1b2e17afd3ff99b8218954bbd85c423ebf5c5082582c
  Stored in directory: /home/repl/.cache/pip/wheels/e6/b8/8c/937067faedc50d949c55c159058f7ed0fbbc4b76fb689a7b99
Successfully built ipython-extensions
Installing collected packages: ipython-extensions
Successfully installed ipython-extensions-0.2


In [2]:
# Import the bayes_opt library
# This library is used for Bayesian Optimization which is a method of finding the maximum or minimum of an objective function
# that is expensive to evaluate. It's often used for hyperparameter tuning in machine learning models.
import bayes_opt

In [4]:
# Import the MarketDataEngineer class from the MarketDataEngineer module located in the champion.source.data package.
# This class is likely responsible for engineering or processing market data for further analysis or modeling.
# It could be used to clean, normalize, or aggregate market data before it's used in financial models or analyses.
from champion.source.data.MarketDataEngineer import MarketDataEngineer

# Import all functions and classes from the _util module located in the champion.source.util package.
# This could include various utility functions or classes that are used across the project for different purposes such as data manipulation, logging, etc.
# Utility modules often contain helper functions that simplify common tasks, making the main code cleaner and more readable.
from champion.source.util._util import *

# Import all functions and classes from the _trainModels module located in the champion.pipeline package.
# This module is likely responsible for training machine learning models. It could contain functions or classes to train, evaluate, and save models.
# Training models involves feeding data into algorithms to allow them to learn and make predictions. This module likely encapsulates that process.
from champion.pipeline._trainModels import *

# Import all functions and classes from the _evaluateModels module located in the champion.pipeline package.
# This module is probably dedicated to evaluating the performance of trained models. It might include functions for calculating accuracy, precision, recall, etc.
# Evaluating models is crucial to understand their effectiveness and to decide which model performs best for a given dataset or problem.
from champion.pipeline._evaluateModels import *

# Import all functions and classes from the _causalInference module located in the champion.pipeline package.
# This module is likely focused on causal inference methods, which are used to determine causality rather than just correlations between variables.
# Causal inference is important in many fields, including economics, epidemiology, and social sciences, to understand the impact of interventions.
from champion.pipeline._causalInference import *

# Import all functions and classes from the _stressBacktest module located in the champion.pipeline package.
# This module is probably used for stress testing and backtesting models. Stress testing involves testing models under extreme conditions,
# while backtesting is the process of testing a predictive model on historical data.
# These techniques are essential for assessing the robustness and reliability of financial models before they are deployed.
from champion.pipeline._stressBacktest import *

2024-05-19 14:01:02.025272: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-19 14:01:02.028468: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-19 14:01:02.064057: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Call the dir_management function to manage directories
# This could involve creating necessary directories for output, logs, or intermediate data storage
dir_management()

# Initialize a timer to measure the duration of the data loading process
start = time.time()

# Create an instance of the MarketDataEngineer class
# This object is responsible for handling market data, possibly including cleaning, transforming, and loading operations
loader = MarketDataEngineer()

# Execute the ETL (Extract, Transform, Load) process using the loader instance
# This process extracts data from a source, transforms it into a suitable format, and loads it into a destination for further use
loader.etl_process()

# Calculate the time taken for the ETL process to complete
# This is done by subtracting the start time from the current time
etl_load_time = time.time() - start

In [8]:
# Start measuring the time to track how long the training process takes
start_time = time.time()

# Initialize the Logistic Regression model
logistic_regression_model = Logistic_Regression()

# Train the Logistic Regression model using the available data
logistic_regression_model.model_training()

# Save the trained Logistic Regression model to a file for future use
logistic_regression_model.save_model()

# Calculate the duration of the training process by subtracting the start time from the current time
LR_training_duration = time.time() - start_time

In [6]:
# Start measuring the time to track how long the ARIMA model training process takes
training_start_time = time.time()

# Initialize the ARIMA model for time-series forecasting
time_series_model = ARIMA()

# Train the ARIMA model using the available time-series data
time_series_model.model_training()

# Save the trained ARIMA model to a file for future use or deployment
time_series_model.save_model()

# Calculate the duration of the ARIMA model training process
# This is done by subtracting the start time from the current time
arima_training_duration = time.time() - training_start_time

In [21]:
# Start timing the Random Forest training process to measure how long it takes
start_time_random_forest = time.time()

# Initialize the Random Forest model
# This step creates an instance of the Random Forest algorithm to be trained with the dataset
random_forest_model = RandomForest()

# Train the Random Forest model with the provided dataset
# This involves fitting the model to the data, allowing it to learn the patterns in the data
random_forest_model.model_training()

# Save the trained Random Forest model to a file
# This allows the model to be reused in the future without needing to retrain it
random_forest_model.save_model()

# Calculate the training duration for the Random Forest model
# This is done by subtracting the start time from the current time to get the total training time
random_forest_training_duration = time.time() - start_time_random_forest

In [24]:
# 4. Boosting Models Training and Saving

# Start timing the LightGBM model training process
lightgbm_start_time = time.time()

# Initialize the LightGBM model
# This creates an instance of the LightGBM algorithm to be trained with the dataset
lightgbm_boosting_model = LightGBM()

# Train the LightGBM model with the provided dataset
# This involves fitting the model to the data, allowing it to learn from the patterns
lightgbm_boosting_model.model_training()

# Save the trained LightGBM model to a file
# This step is crucial for deploying the model in production without retraining
lightgbm_boosting_model.save_model()

# Calculate the training duration for the LightGBM model
# This is achieved by subtracting the start time from the current time
lightgbm_training_duration = time.time() - lightgbm_start_time

# Start timing the XGBoost model training process
xgboost_start_time = time.time()

# Initialize the XGBoost model
# This step creates an instance of the XGBoost algorithm for training
xgboost_boosting_model = XGBoost()

# Train the XGBoost model with the dataset
# The training process allows the model to learn the patterns within the data
xgboost_boosting_model.model_training()

# Save the trained XGBoost model to a file
# Saving the model enables future use without the need for retraining
xgboost_boosting_model.save_model()

# Calculate the training duration for the XGBoost model
# The duration is calculated by subtracting the start time from the current time
xgboost_training_duration = time.time() - xgboost_start_time

In [7]:
# 5. Neural Network - Artificial Neural Network (ANN)

# Record the start time for training the ANN model
start_time = time.time()

# Instantiate the ANN model
ann_network = ANN()

# Train the ANN model
ann_network.model_training()

# Save the trained ANN model using TensorFlow's save method
ann_network.model.save(os.path.join(p.model_path, f'{ann_network.model_name}.h5'))

# Calculate the total training time for the ANN model
ann_training_duration = time.time() - start_time

In [8]:
# 6. Neural Network - Long Short-Term Memory (LSTM)

# Record the start time before training the LSTM model
start_time = time.time()

# Instantiate the LSTM model
lstm_network = LongShortTM()

# Train the LSTM model
lstm_network.model_training()

# Save the trained ANN model using TensorFlow's save method
lstm_network.model.save(os.path.join(p.model_path, f'{lstm_network.model_name}.h5'))

# Calculate the total training time for the LSTM model
lstm_training_duration = time.time() - start_time

In [27]:
# 7. Ensemble of Models

# Train and save the Ensemble Voting model

# Record the start time before training the Ensemble Voting model
start_time_voting = time.time()

# Create an instance of the Ensemble Voting model
ensemble_voting = Ensemble_Voting()

# Train the Ensemble Voting model
ensemble_voting.model_training()

# Save the trained Ensemble Voting model
ensemble_voting.save_model()

# Calculate the total training time for the Ensemble Voting model
voting_training_duration = time.time() - start_time_voting

# Train and save the Ensemble Stacking model

# Record the start time before training the Ensemble Stacking model
start_time_stacking = time.time()

# Create an instance of the Ensemble Stacking model
ensemble_stacking = Ensemble_Stacking()

# Train the Ensemble Stacking model
ensemble_stacking.model_training()

# Save the trained Ensemble Stacking model
ensemble_stacking.save_model()

# Calculate the total training time for the Ensemble Stacking model
stacking_training_duration = time.time() - start_time_stacking

In [28]:
# Print the time taken for each process
print(f"""
The time taken (s) for the following processes are noted below:
ETL             : {etl_load_time:.2f}
Logistic        : {LR_training_duration:.2f}
ARIMA           : {arima_training_duration: .2f}
Random Forest   : {random_forest_training_duration:.2f}
LightGBM        : {lightgbm_training_duration:.2f}
XGBoost         : {xgboost_training_duration:.2f}
ANN             : {ann_training_duration:.2f}
LSTM            : {lstm_training_duration:.2f}
Voting          : {voting_training_duration:.2f}
Stacking        : {stacking_training_duration:.2f}
""")

In [5]:
# Initialize and evaluate the Logistic Regression model, then backtest its trading strategy
logistic_regression_model = Logistic_Regression()
logistic_regression_model.model_evaluation()  # Evaluate the Logistic Regression model's performance
logistic_regression_model.backtest_strategy()  # Backtest the trading strategy based on the Logistic Regression model

# Initialize and evaluate the ARIMA model, then backtest its trading strategy
arima_model_instance = ARIMA()
arima_model_instance.model_evaluation()  # Evaluate the ARIMA model's forecasting accuracy
arima_model_instance.backtest_strategy()  # Backtest the trading strategy based on the ARIMA model predictions

# Initialize and evaluate the Random Forest model, then backtest its trading strategy
random_forest_model = RandomForest()
random_forest_model.model_evaluation()  # Evaluate the Random Forest model's classification or regression performance
random_forest_model.backtest_strategy()  # Backtest the trading strategy based on the Random Forest model

# Initialize and evaluate the LightGBM model, then backtest its trading strategy
lightgbm_model_instance = LightGBM()
lightgbm_model_instance.model_evaluation()  # Evaluate the LightGBM model's performance
lightgbm_model_instance.backtest_strategy()  # Backtest the trading strategy based on the LightGBM model

# Initialize and evaluate the XGBoost model, then backtest its trading strategy
xgboost_model_instance = XGBoost()
xgboost_model_instance.model_evaluation()  # Evaluate the XGBoost model's performance
xgboost_model_instance.backtest_strategy()  # Backtest the trading strategy based on the XGBoost model

# Initialize and evaluate the ANN (Artificial Neural Network) model, then backtest its trading strategy
ann_model_instance = ANN()
ann_model_instance.model_evaluation()  # Evaluate the ANN model's performance
ann_model_instance.backtest_strategy()  # Backtest the trading strategy based on the ANN model

# Initialize and evaluate the LSTM (Long Short-Term Memory) model, then backtest its trading strategy
lstm_model_instance = LongShortTM()
lstm_model_instance.model_evaluation()  # Evaluate the LSTM model's performance
lstm_model_instance.backtest_strategy()  # Backtest the trading strategy based on the LSTM model

# Initialize and evaluate the Ensemble Voting model, then backtest its trading strategy
voting_ensemble_model = Ensemble_Voting()
voting_ensemble_model.model_evaluation()  # Evaluate the Ensemble Voting model's performance
voting_ensemble_model.backtest_strategy()  # Backtest the trading strategy based on the Ensemble Voting model

# Initialize and evaluate the Ensemble Stacking model, then backtest its trading strategy
stacking_ensemble_model = Ensemble_Stacking()
stacking_ensemble_model.model_evaluation()  # Evaluate the Ensemble Stacking model's performance
stacking_ensemble_model.backtest_strategy()  # Backtest the trading strategy based on the Ensemble Stacking model

# Compile and review all model evaluation reports to compare performance and select the best model
compile_model_eval_reports()  # Compile model evaluation reports for all models

In [4]:
# Instantiate and evaluate various financial models, then backtest their strategies under stress conditions

# Instantiate the Logistic Regression model
logistic_regression_model = Logistic_Regression()
# Evaluate the Logistic Regression model under stress backtest scenario
logistic_regression_model.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the Logistic Regression model under stress conditions
logistic_regression_model.backtest_strategy(backtest="stress")

# Instantiate the Random Forest model
random_forest_model = RandomForest()
# Evaluate the Random Forest model under stress backtest conditions
random_forest_model.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the Random Forest model under stress conditions
random_forest_model.backtest_strategy(backtest="stress")

# Instantiate the LightGBM classifier model
lightgbm_classifier_model = LightGBM()
# Evaluate the LightGBM model under stress backtest conditions
lightgbm_classifier_model.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the LightGBM model under stress conditions
lightgbm_classifier_model.backtest_strategy(backtest="stress")

# Instantiate the XGBoost classifier model
xgboost_classifier_model = XGBoost()
# Evaluate the XGBoost model under stress backtest conditions
xgboost_classifier_model.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the XGBoost model under stress conditions
xgboost_classifier_model.backtest_strategy(backtest="stress")

# Instantiate the Artificial Neural Network (ANN) model
artificial_neural_network_model = ANN()
# Evaluate the ANN model under stress backtest conditions
artificial_neural_network_model.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the ANN model under stress conditions
artificial_neural_network_model.backtest_strategy(backtest="stress")

# Instantiate the Long Short-Term Memory (LSTM) network model
lstm_network_model = LongShortTM()
# Evaluate the LSTM model under stress backtest conditions
lstm_network_model.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the LSTM model under stress conditions
lstm_network_model.backtest_strategy(backtest="stress")

# Instantiate the Ensemble Voting classifier model
ensemble_voting_classifier = Ensemble_Voting()
# Evaluate the Ensemble Voting model under stress backtest conditions
ensemble_voting_classifier.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the Ensemble Voting model under stress conditions
ensemble_voting_classifier.backtest_strategy(backtest="stress")

# Instantiate the Ensemble Stacking classifier model
ensemble_stacking_classifier = Ensemble_Stacking()
# Evaluate the Ensemble Stacking model under stress backtest conditions
ensemble_stacking_classifier.model_evaluation(backtest="stress")
# Backtest the trading strategy based on the Ensemble Stacking model under stress conditions
ensemble_stacking_classifier.backtest_strategy(backtest="stress")

# Compile and generate evaluation reports for all models to compare their performance
compile_model_eval_reports()

In [4]:
# Measure the execution time for each model's SHAP value generation

# Logistic Regression
start_time_logistic = time.time()  # Start the timer for Logistic Regression
logistic_regression_model = Logistic_Regression()  # Instantiate the Logistic Regression model
logistic_regression_model.generate_shap_value()  # Generate SHAP values for Logistic Regression
logistic_regression_shap_time = time.time() - start_time_logistic  # Calculate the execution time for Logistic Regression

# Random Forest
start_time_rf = time.time()  # Start the timer for Random Forest
random_forest_model = RandomForest()  # Instantiate the Random Forest model
random_forest_model.generate_shap_value()  # Generate SHAP values for Random Forest
random_forest_shap_time = time.time() - start_time_rf  # Calculate the execution time for Random Forest

In [5]:
# LightGBM
start_time_lgbm = time.time()  # Start the timer for LightGBM
lightgbm_model = LightGBM()  # Instantiate the LightGBM model
lightgbm_model.generate_shap_value()  # Generate SHAP values for LightGBM
lightgbm_shap_time = time.time() - start_time_lgbm  # Calculate the execution time for LightGBM

In [6]:
# XGBoost
start_time_xgb = time.time()  # Start the timer for XGBoost
xgboost_model = XGBoost()  # Instantiate the XGBoost model
xgboost_model.generate_shap_value()  # Generate SHAP values for XGBoost
xgboost_shap_time = time.time() - start_time_xgb  # Calculate the execution time for XGBoost

In [7]:
# Ensemble Voting
start_time_voting = time.time()  # Start the timer for Ensemble Voting
ensemble_voting_model = Ensemble_Voting()  # Instantiate the Ensemble Voting model
ensemble_voting_model.generate_shap_value()  # Generate SHAP values for Ensemble Voting
voting_shap_time = time.time() - start_time_voting  # Calculate the execution time for Ensemble Voting

In [8]:
# Ensemble Stacking
start_time_stacking = time.time()  # Start the timer for Ensemble Stacking
ensemble_stacking_model = Ensemble_Stacking()  # Instantiate the Ensemble Stacking model
ensemble_stacking_model.generate_shap_value()  # Generate SHAP values for Ensemble Stacking
stacking_shap_time = time.time() - start_time_stacking  # Calculate the execution time for Ensemble Stacking

In [5]:
%%capture

# ANN (Artificial Neural Network)
start_time_ann = time.time()  # Start the timer for ANN
ann_model = ANN()  # Instantiate the ANN model
ann_model.generate_shap_value()  # Generate SHAP values for ANN
ann_shap_time = time.time() - start_time_ann  # Calculate the execution time for ANN

In [6]:
%%capture

# LSTM (Long Short-Term Memory)
start_time_lstm = time.time()  # Start the timer for LSTM
lstm_model = LongShortTM()  # Instantiate the LSTM model
lstm_model.generate_shap_value()  # Generate SHAP values for LSTM
lstm_shap_time = time.time() - start_time_lstm  # Calculate the execution time for LSTM