# Load the train/test data

In [1]:
#!pip install lightgbm
#!pip install xgboost
#!pip install catboost

In [2]:
import BayesianHyperOptClasses as bho
import numpy as np

# Load Data

In [3]:
train_X = np.random.rand(10, 10)
train_y = np.random.rand(10)
test_X = np.random.rand(10, 10)
test_y = np.random.rand(10)

# XGBoost - Hyperparameter Optimization

In [4]:
#train_features_file = "train_101features_X_20140205_20190204"
#train_returns_file = "train_101features_returns_20140205_20190204"

# describe the parameter space for the different variables given to the XGBoost algorithm
search_space_xgb = {"lambda_1": (1e-9, 1.0),
                    "alpha": (1e-9, 1.0),
                    "max_depth": (1, 9),
                    "eta": (1e-9, 1.0),
                    "gamma": (1e-8, 1.0)
                   }

# Create an instance of the class that contains the ML model that should be optimized
XGBOpt = bho.XGBoost_HyperOpt(X = train_X, y = train_y,
                         train_test_split_perc = 0.8, search_space = search_space_xgb,
                         is_reg_task = "True", perf_metric = "rmse", max_or_min = "min")

In [5]:
# Search of the optimal hyperparameters
# !pip install scipy==1.7
XGBOpt.optimize_hyperparameters(init_points = 1, n_iter = 2)

Now right before setting the optimal_params field
Start Training of XGBoost
Training of XGBoost took: 0.09033656120300293 sec.
Performance for the 0 iteration: -0.17985118
The average performance is -0.17985118
Start Training of XGBoost
Training of XGBoost took: 0.046909332275390625 sec.
Performance for the 0 iteration: -0.17142601
The average performance is -0.17142601
Start Training of XGBoost
Training of XGBoost took: 0.030627012252807617 sec.
Performance for the 0 iteration: -0.19337648
The average performance is -0.19337648
{'alpha': 0.6233601161684426, 'eta': 0.01582124383073504, 'gamma': 0.929437234449389, 'lambda_1': 0.690896917826027, 'max_depth': 8.978582803611843}


In [6]:
# train a model with the optimal parameters found by the optimize_hyperparameters() function
xgb_opt_model = XGBOpt.train_optimal_model()
pred_xgb = xgb_opt_model.predict(test_X)
perf_xgb = XGBOpt.prediction_performance_score(test_y, pred_xgb)
print("Peformance XGBoost: ", perf_xgb)

Parameters: { "lambda_1" } are not used.

Peformance XGBoost:  0.28261376


# LightGBM - Hyperparameter Optimization

In [7]:
# create an object of the Bayesian Optimization Class corresponding to the decision tree that should be optimized

#train_features_file = "train_101features_X_20140205_20190204"
#train_returns_file = "train_101features_returns_20140205_20190204"


# describe the parameter space for the different variables given to the LightGBM algorithm
search_space_lgbm = {
        #"n_estimators": trial.suggest_categorical("n_estimators", [10000]),
        "learning_rate": (0.01, 0.3), # float
        "num_leaves": (20, 3000), # int
        "max_depth": (3, 12), #int
        "min_data_in_leaf": (200, 10000), #int
        "lambda_l1": (0, 100), #int
        "lambda_l2": (0, 100), #int
        "min_gain_to_split": (0, 15), #float
        "bagging_fraction": (0.2, 0.95), #float
        #"bagging_freq": trial.suggest_categorical("bagging_freq", [1]),
        "feature_fraction": (0.2, 0.95) #float
    }

# Create an instance of the class that contains the ML model that should be optimized
LGBMOpt = bho.LightGBM_HyperOpt(X = train_X, y = train_y,
                         train_test_split_perc = 0.8, search_space = search_space_lgbm,
                         is_reg_task = "True", perf_metric = "rmse", max_or_min = "min")

In [8]:
# Search of the optimal hyperparameters
# !pip install scipy==1.7
LGBMOpt.optimize_hyperparameters(init_points = 1, n_iter = 2)

Now right before setting the optimal_params field
Start Training of XGBoost
Training of XGBoost took: 0.0075304508209228516 sec.
Performance for the 0 iteration: -0.18354627
The average performance is -0.18354627
Start Training of XGBoost
Training of XGBoost took: 0.007042884826660156 sec.
Performance for the 0 iteration: -0.18692348
The average performance is -0.18692348
Start Training of XGBoost
Training of XGBoost took: 0.005797386169433594 sec.
Performance for the 0 iteration: -0.31238918
The average performance is -0.31238918
{'bagging_fraction': 0.5127665035269304, 'feature_fraction': 0.7402433700816187, 'lambda_l1': 0.011437481734488664, 'lambda_l2': 30.233257263183976, 'learning_rate': 0.05255920833696278, 'max_depth': 3.83104735291918, 'min_data_in_leaf': 2025.3500715011749, 'min_gain_to_split': 5.183410905645716, 'num_leaves': 1202.3670732073965}


In [9]:
# train a model with the optimal parameters found by the optimize_hyperparameters() function
lgbm_opt_model = LGBMOpt.train_optimal_model()
pred_lgbm = lgbm_opt_model.predict(test_X)
perf_lgbm = LGBMOpt.prediction_performance_score(test_y, pred_lgbm)
print("Peformance LightGBM: ", perf_lgbm)

Peformance LightGBM:  0.28250146


# CatBoost - Hyperparameter Optimization

In [10]:
# create an object of the Bayesian Optimization Class corresponding to the decision tree that should be optimized

#train_features_file = "train_101features_X_20140205_20190204"
#train_returns_file = "train_101features_returns_20140205_20190204"

search_space_cat = {'iterations': (100, 1000),
                 'depth': (1, 8),
                 'learning_rate': (0.01, 1.0),
                 'random_strength': (1e-9, 10),
                 'bagging_temperature': (0.0, 1.0),
                 'border_count': (1, 255),
                 'l2_leaf_reg': (2, 30),
                 #'scale_pos_weight':Real(0.01, 1.0, 'uniform')
                 }

# Create an instance of the class that contains the ML model that should be optimized
CatOpt = bho.CatBoost_HyperOpt(X = train_X, y = train_y,
                         train_test_split_perc = 0.8, search_space = search_space_cat,
                         is_reg_task = "True", perf_metric = "rmse", max_or_min = "min")

In [11]:
# Search of the optimal hyperparameters
# !pip install scipy==1.7
CatOpt.optimize_hyperparameters(init_points = 1, n_iter = 2)

Now right before setting the optimal_params field
Start Training of CatBoost
0:	learn: 0.1277785	total: 139ms	remaining: 51.7s
1:	learn: 0.1259273	total: 140ms	remaining: 25.9s
2:	learn: 0.1241028	total: 140ms	remaining: 17.2s
3:	learn: 0.1223049	total: 140ms	remaining: 12.9s
4:	learn: 0.1205329	total: 141ms	remaining: 10.3s
5:	learn: 0.1187866	total: 141ms	remaining: 8.59s
6:	learn: 0.1170657	total: 141ms	remaining: 7.35s
7:	learn: 0.1153696	total: 141ms	remaining: 6.42s
8:	learn: 0.1136982	total: 141ms	remaining: 5.7s
9:	learn: 0.1120509	total: 141ms	remaining: 5.12s
10:	learn: 0.1104276	total: 142ms	remaining: 4.64s
11:	learn: 0.1088277	total: 142ms	remaining: 4.25s
12:	learn: 0.1072510	total: 142ms	remaining: 3.92s
13:	learn: 0.1056972	total: 142ms	remaining: 3.63s
14:	learn: 0.1041658	total: 142ms	remaining: 3.38s
15:	learn: 0.1026567	total: 142ms	remaining: 3.17s
16:	learn: 0.1011694	total: 143ms	remaining: 2.98s
17:	learn: 0.0997037	total: 143ms	remaining: 2.81s
18:	learn: 0.098

In [12]:
# train a model with the optimal parameters found by the optimize_hyperparameters() function
cat_opt_model = CatOpt.train_optimal_model()

# train a model with the optimal parameters found by the optimize_hyperparameters() function
cat_opt_model = CatOpt.train_optimal_model()
pred_cat = cat_opt_model.predict(test_X)
perf_cat = CatOpt.prediction_performance_score(test_y, pred_cat)
print("Peformance CatBoost: ", perf_cat)

0:	learn: 0.1713369	total: 336us	remaining: 125ms
1:	learn: 0.1710044	total: 1.05ms	remaining: 194ms
2:	learn: 0.1700019	total: 1.22ms	remaining: 150ms
3:	learn: 0.1698257	total: 1.35ms	remaining: 124ms
4:	learn: 0.1686921	total: 1.47ms	remaining: 108ms
5:	learn: 0.1685247	total: 1.82ms	remaining: 111ms
6:	learn: 0.1679145	total: 1.96ms	remaining: 102ms
7:	learn: 0.1669702	total: 2.09ms	remaining: 95ms
8:	learn: 0.1666827	total: 2.2ms	remaining: 88.9ms
9:	learn: 0.1666823	total: 2.32ms	remaining: 83.9ms
10:	learn: 0.1660954	total: 2.43ms	remaining: 79.7ms
11:	learn: 0.1654526	total: 2.56ms	remaining: 76.8ms
12:	learn: 0.1653685	total: 2.67ms	remaining: 73.9ms
13:	learn: 0.1628930	total: 2.79ms	remaining: 71.3ms
14:	learn: 0.1620144	total: 2.9ms	remaining: 69ms
15:	learn: 0.1614350	total: 3.01ms	remaining: 66.9ms
16:	learn: 0.1598106	total: 3.12ms	remaining: 65.1ms
17:	learn: 0.1596343	total: 3.23ms	remaining: 63.5ms
18:	learn: 0.1573455	total: 3.34ms	remaining: 62ms
19:	learn: 0.157008

# PredictionAverage: LightGBM + XGBoost + CatBoost

In [13]:
# Let us see how well the three models perform together
pred_sum = pred_xgb + pred_lgbm + pred_cat
avg_pred = pred_sum.copy() / 3

# evaluate the performance of the joined trading decision
comb_perf = CatOpt.prediction_performance_score(test_y, avg_pred)

# We remember the performance of the single models:
print("LightGBM performance: "+str(perf_lgbm))
print("XGBoost performance: "+str(perf_xgb))
print("CatBoost performance: "+str(perf_cat))
print("Combined performance: "+str(comb_perf))

LightGBM performance: 0.28250146
XGBoost performance: 0.28261376
CatBoost performance: 0.36631755
Combined performance: 0.30871936
