In [None]:
import pandas as pd 
import xgboost as xgb
import numpy as np
import pickle

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, accuracy_score
from sklearn.linear_model import LogisticRegression
from bayes_opt.bayes_logistic import bayes_logistic
from sklearn.tree import DecisionTreeClassifier
from bayes_opt.bayes_tree import bayes_tree
from bayes_opt.bayes_xgboost import bayes_xgboost
from bayes_opt.utils import test_learning


train_sets = pickle.load(open("datasets/train_sets_bank.dat","rb"))
test_sets = pickle.load(open("datasets/test_sets_bank.dat","rb"))
val_sets = [pickle.load(open("datasets/val_sets_bank.dat","rb")), pickle.load(open("datasets/val_test_sets_bank.dat","rb"))]
features = pickle.load(open("datasets/bank_features","rb"))
returns = pickle.load(open("datasets/bank_returns","rb"))
outcomes = pickle.load(open("datasets/bank_outcomes","rb"))
rmax = np.zeros(len(returns))
for i in range(len(returns)):
    rmax[i] = returns[i,np.argmax(returns[i])]
    
replication = 1
n_splits = 1
n_steps_xgb = 4
n_trials = 2
results = []

for rep in range(replication):

    bank_ml_results = np.zeros((n_splits, 10))

    for i in range(n_splits):

        config = {
            "cval": True,
            "rep" : rep,
            "set" : i,
            "n_trials" : n_trials,
            "val_sets": val_sets,
            "n_steps": n_steps_xgb
            }    
            
        scaler = MinMaxScaler()
        x_train_scaled = scaler.fit_transform(features[train_sets[rep][i]])
        x_test_scaled = scaler.transform(features[test_sets[rep][i]])            
            
        bayes = bayes_logistic(config, x_train_scaled, outcomes[train_sets[rep][i]], returns[train_sets[rep][i]])
        best_params_logistic = bayes.bayes()
        c, penalty = best_params_logistic.get("c", ""), best_params_logistic.get("penalty", "")
        clf = (LogisticRegression(C = c, penalty=penalty, solver='saga') if penalty != "elasticnet"
               else LogisticRegression(C = c, penalty=penalty, solver='saga', l1_ratio = best_params_logistic.get("l1", "")))          
        clf_fit = clf.fit(x_train_scaled, outcomes[train_sets[rep][i]])
        probs = clf_fit.predict_proba(x_test_scaled)
        test_return, test_outcome = test_learning(probs, returns[test_sets[rep][i]])
        bank_ml_results[i, 0] = test_return / sum(rmax[test_sets[rep][i]])
        bank_ml_results[i, 3] = accuracy_score(outcomes[test_sets[rep][i]], test_outcome)
        bank_ml_results[i, 4] = f1_score(outcomes[test_sets[rep][i]], test_outcome)
            
        bayes = bayes_tree(config, features[train_sets[rep][i]], outcomes[train_sets[rep][i]], returns[train_sets[rep][i]])
        best_params_tree = bayes.bayes()
        depth, min_samples, cp = best_params_tree.get("max_depth", ""), best_params_tree.get("min_samples_leaf", ""), best_params_tree.get("ccp_alpha", "")
        dt = DecisionTreeClassifier(min_samples_leaf = min_samples, ccp_alpha = cp, max_depth = depth).fit(features[train_sets[rep][i]], outcomes[train_sets[rep][i]])  
        probs = dt.predict_proba(features[test_sets[rep][i]])
        test_return, test_outcome = test_learning(probs, returns[test_sets[rep][i]])
        bank_ml_results[i, 1] = test_return / sum(rmax[test_sets[rep][i]])
        bank_ml_results[i, 5] = accuracy_score(outcomes[test_sets[rep][i]], test_outcome)
        bank_ml_results[i, 6] = f1_score(outcomes[test_sets[rep][i]], test_outcome)      
            
        bayes = bayes_xgboost(config, features[train_sets[rep][i]], outcomes[train_sets[rep][i]], returns[train_sets[rep][i]])
        best_params_xgb = bayes.bayes()            
        param = {'eta' : best_params_xgb.get("eta", ""), 
                 'max_depth' : best_params_xgb.get("max_depth", ""),
                 'min_child_weight' : best_params_xgb.get("min_child_weight", ""),
                 'gamma' : best_params_xgb.get("gamma", ""),
                 'colsample_bytree' : best_params_xgb.get("colsample_bytree", ""),
                 'objective': 'multi:softprob',
                 'num_class': 2 }                 
        model = xgb.train(param, xgb.DMatrix(features[train_sets[rep][i]], label=outcomes[train_sets[rep][i]]), config["n_steps"])                      
        probs = model.predict(xgb.DMatrix(features[test_sets[rep][i]], label=outcomes[test_sets[rep][i]]))
        test_return, test_outcome = test_learning(probs, returns[test_sets[rep][i]])
        bank_ml_results[i, 2] = test_return / sum(rmax[test_sets[rep][i]])
        bank_ml_results[i, 7] = accuracy_score(outcomes[test_sets[rep][i]], test_outcome)
        bank_ml_results[i, 8] = f1_score(outcomes[test_sets[rep][i]], test_outcome)   
    
    results.append(bank_ml_results)
            