In [None]:
import pandas as pd
import numpy as np
from copy import deepcopy
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.graphics.tsaplots import plot_acf,plot_pacf
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV,train_test_split
import skopt
from skopt import BayesSearchCV
from sklearn.metrics import accuracy_score,f1_score,roc_auc_score,confusion_matrix,roc_curve
import xgboost as xgb
import time
import warnings

## LightGBM

In [None]:
param_grid = {'class_weight': [None, 'balanced'],
              'objective': ['binary', 'cross_entropy'],
              'boosting_type': ['gbdt', 'goss', 'dart'],
              'data_sample_strategy': ['bagging', 'goss'],
              'tree_learner': ['serial', 'feature', 'data', 'voting'],
              'num_leaves': list(range(30, 200)),
              'learning_rate': [0.01, 0.03, 0.06, 0.1, 0.15, 0.2, 0.25, 0.300000012, 0.4, 0.5, 0.6, 0.7],
              'subsample_for_bin': list(range(20000, 300000, 2000)),
              'min_child_samples': list(range(20, 500, 5)),
              'reg_alpha': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              'reg_lambda': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              'colsample_bytree': list(np.linspace(0.6, 1, 10))
             }

In [None]:
from lightgbm import LGBMClassifier

#default params
lgbm = LGBMClassifier()

lgbm.fit(X_train_scaled, y)

#default params from benchmark model
default_params = {}
gparams = lgbm.get_params()

#deepcopy of default parameters
params = deepcopy(default_params)

#setting grid for iteration
param_grid = {'class_weight': [None, 'balanced'],
              'objective': ['binary', 'cross_entropy'],
              'boosting_type': ['gbdt', 'goss', 'dart'],
              'tree_learner': ['serial', 'feature', 'data', 'voting'],
              'num_leaves': list(range(30, 200)),
              'learning_rate': [0.01, 0.03, 0.06, 0.1, 0.15, 0.2, 0.25, 0.300000012, 0.4, 0.5, 0.6, 0.7],
              'subsample_for_bin': list(range(20000, 300000, 2000)),
              'min_child_samples': list(range(20, 500, 5)),
              'reg_alpha': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              'reg_lambda': [0,0.1,0.2,0.4,0.8,1.6,3.2,6.4,12.8,25.6,51.2,102.4,200],
              'colsample_bytree': list(np.linspace(0.6, 1, 10))
}

#start time
t0 = time.time()

#number of jobs
gcvj = np.cumsum([len(x) for x in param_grid.values()])[-1]
bcvj = int(gcvj)

#unwrapping list values of default parameters
default_params_lgbm = {}

for key in default_params.keys():
    default_params_lgbm[key] = default_params[key][0]
    
#providing default parameters to xgbc model before randomized search CV
lgbm_0 = LGBMClassifier(**default_params_lgbm)

clf = BayesSearchCV(estimator = lgbm_0,
                    search_spaces = param_grid,
                    n_iter = bcvj,
                    scoring = 'accuracy',
                    cv = 10,
                    return_train_score = True,
                    verbose = 3)

clf.fit(X_train_scaled, y_train)

#results dataframe
df = pd.DataFrame(clf.cv_results_)

In [None]:
#predictions
train_predictions_lgbm = clf.predict(X_train_scaled)
test_predictions_lgbm = clf.predict(X_test_scaled)
#unseen_predictions_lgbm = clf.predict(unseen_X)

#confusion matrices
cfm_train_lgbm = confusion_matrix(y_train, train_predictions_lgbm)
cfm_test_lgbm = confusion_matrix(y_test, test_predictions_lgbm)
#cfm_unseen_lgbm = confusion_matrix(unseen_y, unseen_predictions_lgbm)

#accuracy scores
accs_train_lgbm = accuracy_score(y_train, train_predictions_lgbm)
accs_test_lgbm = accuracy_score(y_test, test_predictions_lgbm)
#accs_unseen_lgbm = accuracy_score(unseen_y, unseen_predictions_lgbm)

#best parameters
bp_lgbm = clf.best_params_

#storing computed values in results dictionary
results_dict = {}
results_dict['lgbm_bcv'] = {'classifier': deepcopy(clf),
                            'cv_results': df.copy(),
                            'cfm_train': cfm_train_lgbm,
                            'cfm_test': cfm_test_lgbm,
                            #'cfm_unseen': cfm_unseen_lgbm,
                            'train_accuracy': accs_train_lgbm,
                            'test_accuracy': accs_test_lgbm,
                            #'unseen_accuracy': accs_unseen_lgbm,
                            'best_params': bp_lgbm}

#stop time
t1 = time.time()

#elapsed time
bcvt = t1 - t0

In [None]:
results_dict['lgbm_bcv']['best_params']

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

#default params
rf = RandomForestClassifier()

rf.fit(X_train_scaled, y)

#default params from benchmark model
default_params = {}
gparams = rf.get_params()

#deepcopy of default parameters
params = deepcopy(default_params)

#setting grid for iteration
param_grid = {'bootstrap': [True, False],
              'max_features': ['auto', 'sqrt'],
              'max_depth': list(range(5, 100, 1)),
              'n_estimators': list(range(5, 100, 1)),
              'min_samples_leaf': list(range(1, 20, 1)),
              'min_samples_split': list(range(2, 50, 2)),
              }

#start time
t0 = time.time()

#number of jobs
gcvj = np.cumsum([len(x) for x in param_grid.values()])[-1]
bcvj = int(gcvj)

#unwrapping list values of default parameters
default_params_lgbm = {}

for key in default_params.keys():
    default_params_lgbm[key] = default_params[key][0]
    
#providing default parameters to xgbc model before randomized search CV
rf_0 = RandomForestClassifier(**default_params_lgbm)

clf = BayesSearchCV(estimator = rf_0,
                    search_spaces = param_grid,
                    n_iter = bcvj,
                    scoring = 'accuracy',
                    cv = 10,
                    return_train_score = True,
                    verbose = 3)

clf.fit(X_train_scaled, y_train)

#results dataframe
df = pd.DataFrame(clf.cv_results_)

In [None]:
#predictions
train_predictions_rf = clf.predict(X_train_scaled)
test_predictions_rf = clf.predict(X_test_scaled)
#unseen_predictions_rf = clf.predict(unseen_X)

#confusion matrices
cfm_train_rf = confusion_matrix(y_train, train_predictions_rf)
cfm_test_rf = confusion_matrix(y_test, test_predictions_rf)
#cfm_unseen_rf = confusion_matrix(unseen_y, unseen_predictions_rf)

#accuracy scores
accs_train_rf = accuracy_score(y_train, train_predictions_rf)
accs_test_rf = accuracy_score(y_test, test_predictions_rf)
#accs_unseen_rf = accuracy_score(unseen_y, unseen_predictions_rf)

#best parameters
bp_rf = clf.best_params_

#storing computed values in results dictionary
results_dict = {}
results_dict['rf_bcv'] = {'classifier': deepcopy(clf),
                            'cv_results': df.copy(),
                            'cfm_train': cfm_train_rf,
                            'cfm_test': cfm_test_rf,
                            #'cfm_unseen': cfm_unseen_rf,
                            'train_accuracy': accs_train_rf,
                            'test_accuracy': accs_test_rf,
                            #'unseen_accuracy': accs_unseen_rf,
                            'best_params': bp_rf}

#stop time
t1 = time.time()

#elapsed time
bcvt = t1 - t0

In [None]:
results_dict['rf_bcv']['best_params']