In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import timeit

from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, precision_score, recall_score, accuracy_score, auc
from hmeasure import h_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, precision_recall_fscore_support,roc_auc_score, accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline

import optuna

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers
from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.layers import LeakyReLU
from keras.wrappers.scikit_learn import KerasClassifier
from keras import initializers
from keras.models import Sequential
from keras.optimizers import Adam

# Read the data using csv
path_train = 'credit_risk-dataset-cleanedtraining.csv'
df_train = pd.read_csv(path_train, encoding = "ISO-8859-1")

path_test = 'credit_risk-dataset-cleanedtesting.csv'
df_test = pd.read_csv(path_test, encoding = "ISO-8859-1")

# Prin 5 forst lines of df
df_train.head()

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_cred_hist_length,person_home_ownership,loan_intent,loan_grade,cb_person_default_on_file
0,23,62500,7.0,26000,11.71,1,0.42,2,3,3,1,0
1,23,120000,1.0,25600,12.69,0,0.21,3,3,1,1,0
2,22,60000,0.0,25475,10.99,1,0.42,3,3,5,1,0
3,23,83000,7.0,25300,10.99,1,0.3,3,3,1,1,0
4,21,42500,3.0,25000,12.73,1,0.59,3,3,1,2,1


In [2]:
X_test, y_test = df_test.iloc[:,:-1], df_test.iloc[:,-1]

X_train, y_train = df_train.iloc[:,:-1], df_train.iloc[:,-1]

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.2)

In [3]:
class tuning_methods:
    
    def __init__(self, X_train, X_val, y_train,  y_val, X_test, y_test):
        self.X_train = X_train
        self.X_val = X_val
        self.X_test = X_test
        self.y_test = y_test
        self.y_train = y_train
        self.y_val = y_val
        
    # Define the Keras model
    def create_model(self, units_1, alpha_1, dropout_1, l2_kern_1, l2_bias_1,units_2, alpha_2, dropout_2, l2_kern_2, l2_bias_2, lr, batch_size):        # 
        model = Sequential()
        model.add(Dense(units_1, 
                input_shape=(X_train.shape[1],), 
                activation = LeakyReLU(alpha=alpha_1),
                kernel_regularizer = l2(l2_kern_1),
                bias_regularizer = l2(l2_bias_1),
                kernel_initializer = initializers.RandomNormal(mean=0.0, stddev=0.05)
        ))
        model.add(Dropout(dropout_1))
    
        model.add(Dense(units_2,
                    activation =LeakyReLU(alpha=alpha_2),
                    kernel_regularizer = l2(l2_kern_2),
                    bias_regularizer = l2(l2_bias_2),
                    kernel_initializer = initializers.RandomNormal(mean=0.0, stddev=0.05)
        ))
        model.add(Dropout(dropout_2))
    
        model.add(Dense(1, activation='sigmoid'))
    
        optimizer = Adam(learning_rate=lr)
        model.compile(optimizer=optimizer,
                  loss='binary_crossentropy', 
                  metrics=['AUC'])
        return model

    
    # Define the Optuna study
    def objective(self, trial):
        
        # Define the pipeline
        pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', KerasClassifier(build_fn=self.create_model, verbose=0))
                ])
        # Define hyperparameters range values for tuning
        units_1 = trial.suggest_int('units_1', 2, 40)
        units_2 = trial.suggest_int('units_2', 2, 40)
        alpha_1 = trial.suggest_float('alpha_1', 0.01, 0.1)
        alpha_2 = trial.suggest_float('alpha_2', 0.01, 0.1)
        dropout_1 = trial.suggest_float("dropout_1", 0.2, 0.5)    
        dropout_2 = trial.suggest_float("dropout_2", 0.2, 0.5)    
        l2_kern_1 = trial.suggest_float("l2_kern_1",  1e-5, 1e-2, log=True)
        l2_kern_2 = trial.suggest_float("l2_kern_2",  1e-5, 1e-2, log=True)
        l2_bias_1 = trial.suggest_float("l2_bias_1",  1e-5, 1e-2, log=True)
        l2_bias_2 = trial.suggest_float("l2_bias_2",  1e-5, 1e-2, log=True)
        lr = trial.suggest_float("lr", 5e-5, 1e-2, log=True)
        batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512, 1024])
        epochs = 200
        validation_data = (self.X_val, self.y_val)
        early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
        
        # Set parameters into pipeline
        pipeline.set_params(classifier__units_1=units_1,
                        classifier__alpha_1=alpha_1,
                        classifier__l2_kern_1=l2_kern_1,
                        classifier__l2_bias_1=l2_bias_1,
                        classifier__dropout_1=dropout_1,
                        classifier__units_2=units_2,
                        classifier__alpha_2=alpha_2,
                        classifier__l2_kern_2=l2_kern_2,
                        classifier__l2_bias_2=l2_bias_2,
                        classifier__dropout_2=dropout_2,
                        classifier__lr=lr,
                        classifier__epochs = epochs,
                        classifier__batch_size = batch_size,
                        classifier__validation_data = validation_data,
                        classifier__callbacks=[early_stopping])
        
        # Apply cross validation
        kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=2023)
        scores = cross_val_score(pipeline, self.X_train, self.y_train, cv=kfold, scoring="roc_auc", error_score='raise')
    
        if trial.should_prune():
                raise optuna.TrialPruned()

        return np.mean(scores)

    def Tuning(self, n_trials=100, sampler = optuna.samplers.TPESampler()):
        
        start = timeit.default_timer()

        study = optuna.create_study(direction="maximize",             
                            sampler=sampler, 
                            pruner= optuna.pruners.MedianPruner()
                           )
        study.optimize(self.objective, n_trials=n_trials)  # to be converged it needs atleast 200 trials 

        print("Number of finished trials: {}".format(len(study.trials)))

        print("Best trial:")
        trial = study.best_trial

        print("  Value: {}".format(trial.value))

        stop = timeit.default_timer()

        time_opt = stop - start
        params_TPE = []

        for key, value in trial.params.items():
            params_TPE.append([key,value])
            print("    {}: {}".format(key, value))
            
        params_TPE.append(["time",round(time_opt/60,2)])

        return params_TPE
    
    def Best_MLP_model(self, params):
        
        # Define hyperpameters
        units_1 = params[0][1]
        alpha_1 = np.round(params[2][1],3)
        dropout_1 = np.round(params[4][1],2)
        lr = np.round(params[10][1],5)
        batchs = params[11][1]
        kernel_regularizer_1=np.round(params[6][1],5)
        bias_regularizer_1=np.round(params[8][1],5)
        units_2 = params[1][1]
        alpha_2 = np.round(params[3][1],3)
        dropout_2 = np.round(params[5][1],2)
        kernel_regularizer_2=np.round(params[7][1],5)
        bias_regularizer_2=np.round(params[9][1],5)

        # Define the pipeline
        pipeline = Pipeline([
                    ('scaler', StandardScaler()),
                    ('classifier', KerasClassifier(build_fn=tun.create_model, verbose=0))
                ])

        # Set hyperparameters
        epochs = 200
        validation_data = (self.X_val, self.y_val)
        early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
        pipeline.set_params(classifier__units_1=units_1,
                            classifier__alpha_1 =alpha_1,
                            classifier__l2_kern_1=kernel_regularizer_1,
                            classifier__l2_bias_1=bias_regularizer_1,
                            classifier__dropout_1=dropout_1,
                            classifier__units_2=units_2,
                            classifier__alpha_2=alpha_2,
                            classifier__l2_kern_2=kernel_regularizer_2,
                            classifier__l2_bias_2=bias_regularizer_2,
                            classifier__dropout_2=dropout_2,
                            classifier__lr=lr,
                            classifier__epochs = epochs,
                            classifier__batch_size = batchs,
                            classifier__validation_data = validation_data,
                            classifier__callbacks=[early_stopping])
        
        # Use cross validation to monitor the deferrence between training and testing data as an evidence of overfitting
        kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
        scores_train = cross_val_score(pipeline, self.X_train, self.y_train, cv=kfold, scoring="roc_auc", error_score='raise')
        mean_score_train = np.mean(scores_train)

        kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
        scores_test = cross_val_score(pipeline, self.X_test, self.y_test, cv=kfold, scoring="roc_auc", error_score='raise')
        mean_score_test = np.mean(scores_test)

        print(f"Train mean AUC score is {mean_score_train} while test mean AUC score is {mean_score_test}.")
      
        # Fit the pipeline to the training data
        history = pipeline.fit(self.X_train, self.y_train)

        #score = pipeline.evaluate(X_test, y_test, verbose=0)  

        y_pred_prob_tpe = pipeline.predict(self.X_test)

        y_pred_tpe = y_pred_prob_tpe.round()

        print(classification_report(self.y_test, y_pred_tpe, target_names = ['No','Yes']))

        acc = accuracy_score(self.y_test, y_pred_tpe)
        pre = precision_score(self.y_test, y_pred_tpe)
        rec = recall_score(self.y_test, y_pred_tpe)
        f1 = f1_score(self.y_test, y_pred_tpe)
        auc = roc_auc_score(self.y_test, y_pred_tpe)
        
        y_test_array = np.array(self.y_test)
        y_pred_tpe_array = np.array(y_pred_tpe)
        H_measure = h_score(y_test_array, y_pred_tpe_array)

        opt = [acc, pre, rec, f1, auc, H_measure]

        scores = pd.DataFrame(opt, index =['Accuracy', 'Precision', 'Recall', 'F1-score', 'AUC', "H-measure"], 
                              columns =['Tree-structured Parzen Estimator'])
        
        return scores 

In [4]:
tun = tuning_methods(X_train, X_val, y_train,  y_val, X_test, y_test)
trials = 10

# Tree-structured Parzen Estimator
params_tpe = tun.Tuning(n_trials=trials, sampler = optuna.samplers.TPESampler())
tpe_scores = tun.Best_MLP_model(params_tpe)

[I 2024-12-01 00:13:45,286] A new study created in memory with name: no-name-d07ae8d8-3c67-4905-b7cb-0d84bb0662b3




[I 2024-12-01 00:17:16,732] Trial 0 finished with value: 0.9015909647622594 and parameters: {'units_1': 5, 'units_2': 32, 'alpha_1': 0.06553687325901972, 'alpha_2': 0.020576215510027335, 'dropout_1': 0.3668909767982732, 'dropout_2': 0.205201757591992, 'l2_kern_1': 1.676048572109567e-05, 'l2_kern_2': 0.00906762139839308, 'l2_bias_1': 0.0006255326430483562, 'l2_bias_2': 0.0032143426143430315, 'lr': 8.44510401748531e-05, 'batch_size': 64}. Best is trial 0 with value: 0.9015909647622594.




[I 2024-12-01 00:18:02,260] Trial 1 finished with value: 0.9004098742180263 and parameters: {'units_1': 2, 'units_2': 26, 'alpha_1': 0.09501457586910023, 'alpha_2': 0.03410412420360929, 'dropout_1': 0.3128959805458805, 'dropout_2': 0.33772812932443214, 'l2_kern_1': 0.0010734604755378028, 'l2_kern_2': 3.219127301058884e-05, 'l2_bias_1': 7.74090993075638e-05, 'l2_bias_2': 0.002081263625740702, 'lr': 0.00019774953647210724, 'batch_size': 128}. Best is trial 0 with value: 0.9015909647622594.




[I 2024-12-01 00:24:37,488] Trial 2 finished with value: 0.9019076770017785 and parameters: {'units_1': 40, 'units_2': 24, 'alpha_1': 0.08620037943770793, 'alpha_2': 0.060109509624928094, 'dropout_1': 0.4905506449461053, 'dropout_2': 0.20190210868377134, 'l2_kern_1': 0.001896604188719022, 'l2_kern_2': 0.003508368490164961, 'l2_bias_1': 0.006461540810495219, 'l2_bias_2': 0.000416686261837043, 'lr': 0.00020083785344434394, 'batch_size': 16}. Best is trial 2 with value: 0.9019076770017785.




[I 2024-12-01 00:26:50,668] Trial 3 finished with value: 0.9032230139867328 and parameters: {'units_1': 39, 'units_2': 8, 'alpha_1': 0.045642795390563984, 'alpha_2': 0.07851143574681622, 'dropout_1': 0.35506962382322715, 'dropout_2': 0.41047197382808076, 'l2_kern_1': 1.6279472589824243e-05, 'l2_kern_2': 0.0005619221796386721, 'l2_bias_1': 0.00021536523097419674, 'l2_bias_2': 0.00019362525509310135, 'lr': 0.0006240957274556814, 'batch_size': 16}. Best is trial 3 with value: 0.9032230139867328.




[I 2024-12-01 00:27:24,574] Trial 4 finished with value: 0.9016891715408951 and parameters: {'units_1': 12, 'units_2': 36, 'alpha_1': 0.05697459808441794, 'alpha_2': 0.0878018605076696, 'dropout_1': 0.2838123793399635, 'dropout_2': 0.4474600891381164, 'l2_kern_1': 0.001118334618509445, 'l2_kern_2': 0.000489185478500184, 'l2_bias_1': 3.3922774554253165e-05, 'l2_bias_2': 0.0002324077458210086, 'lr': 0.0007508849509017906, 'batch_size': 1024}. Best is trial 3 with value: 0.9032230139867328.




[I 2024-12-01 00:29:57,197] Trial 5 finished with value: 0.9026689687759255 and parameters: {'units_1': 23, 'units_2': 15, 'alpha_1': 0.024241656864587457, 'alpha_2': 0.057949063108354286, 'dropout_1': 0.20111902940473841, 'dropout_2': 0.41500255581339074, 'l2_kern_1': 2.032796582333357e-05, 'l2_kern_2': 0.0013766183811490284, 'l2_bias_1': 5.467478444726923e-05, 'l2_bias_2': 0.003922544303248508, 'lr': 0.00114636938110198, 'batch_size': 16}. Best is trial 3 with value: 0.9032230139867328.




[I 2024-12-01 00:32:38,019] Trial 6 finished with value: 0.9002438789116014 and parameters: {'units_1': 13, 'units_2': 28, 'alpha_1': 0.0441328047085962, 'alpha_2': 0.07470690973218676, 'dropout_1': 0.26916805778313224, 'dropout_2': 0.37036629481057093, 'l2_kern_1': 0.0002270232064396782, 'l2_kern_2': 0.0003040722596395823, 'l2_bias_1': 1.586315505050386e-05, 'l2_bias_2': 0.007450400954189523, 'lr': 5.085894601018923e-05, 'batch_size': 256}. Best is trial 3 with value: 0.9032230139867328.




[I 2024-12-01 00:34:28,236] Trial 7 finished with value: 0.9012561597513987 and parameters: {'units_1': 31, 'units_2': 37, 'alpha_1': 0.026231280483026996, 'alpha_2': 0.07519550224153441, 'dropout_1': 0.29714694598498836, 'dropout_2': 0.21017470535401722, 'l2_kern_1': 0.007201754233136097, 'l2_kern_2': 1.507252880528258e-05, 'l2_bias_1': 0.000763930086235962, 'l2_bias_2': 0.00027029784402391696, 'lr': 0.00025428546364726986, 'batch_size': 512}. Best is trial 3 with value: 0.9032230139867328.




[I 2024-12-01 00:35:32,441] Trial 8 finished with value: 0.899233677202468 and parameters: {'units_1': 28, 'units_2': 2, 'alpha_1': 0.013911237501730955, 'alpha_2': 0.06515704276213732, 'dropout_1': 0.214203208499625, 'dropout_2': 0.2799923597914939, 'l2_kern_1': 1.1421550131962918e-05, 'l2_kern_2': 0.008601407181924632, 'l2_bias_1': 0.0013061659283658987, 'l2_bias_2': 7.184152527982936e-05, 'lr': 0.0007726332424909194, 'batch_size': 512}. Best is trial 3 with value: 0.9032230139867328.




[I 2024-12-01 00:37:02,244] Trial 9 finished with value: 0.9007772310649628 and parameters: {'units_1': 17, 'units_2': 23, 'alpha_1': 0.05445865755024316, 'alpha_2': 0.0780486051271178, 'dropout_1': 0.23724027860104244, 'dropout_2': 0.4666057416404802, 'l2_kern_1': 3.9967447062097633e-05, 'l2_kern_2': 0.0003797745490409892, 'l2_bias_1': 6.388693682490004e-05, 'l2_bias_2': 6.038597049266501e-05, 'lr': 0.00015680799794630656, 'batch_size': 256}. Best is trial 3 with value: 0.9032230139867328.


Number of finished trials: 10
Best trial:
  Value: 0.9032230139867328
    units_1: 39
    units_2: 8
    alpha_1: 0.045642795390563984
    alpha_2: 0.07851143574681622
    dropout_1: 0.35506962382322715
    dropout_2: 0.41047197382808076
    l2_kern_1: 1.6279472589824243e-05
    l2_kern_2: 0.0005619221796386721
    l2_bias_1: 0.00021536523097419674
    l2_bias_2: 0.00019362525509310135
    lr: 0.0006240957274556814
    batch_size: 16
Train mean AUC score is 0.9024410292047929 while test mean AUC score is 0.8912968726430961.
              precision    recall  f1-score   support

          No       0.92      0.86      0.89      4814
         Yes       0.50      0.67      0.57      1021

    accuracy                           0.83      5835
   macro avg       0.71      0.76      0.73      5835
weighted avg       0.85      0.83      0.84      5835



In [5]:
# Genetic Algorithm
params_ga = tun.Tuning(n_trials=trials, sampler = optuna.samplers.NSGAIISampler())
ga_scores = tun.Best_MLP_model(params_ga)
ga_scores = ga_scores.rename(columns={'Tree-structured Parzen Estimator': 'Genetic Algorithm'})

[I 2024-12-01 00:50:32,823] A new study created in memory with name: no-name-621183f4-8e0e-486d-b9b4-254cb802799b




[I 2024-12-01 00:51:14,515] Trial 0 finished with value: 0.9008820350229909 and parameters: {'units_1': 5, 'units_2': 28, 'alpha_1': 0.08288072382836201, 'alpha_2': 0.012721903267294082, 'dropout_1': 0.36736526786187773, 'dropout_2': 0.4517674488215062, 'l2_kern_1': 1.924309716050961e-05, 'l2_kern_2': 0.0007074040023484705, 'l2_bias_1': 2.895716089732384e-05, 'l2_bias_2': 5.823015394116879e-05, 'lr': 0.00020631958214136758, 'batch_size': 128}. Best is trial 0 with value: 0.9008820350229909.




[I 2024-12-01 00:52:22,794] Trial 1 finished with value: 0.9016762840576836 and parameters: {'units_1': 9, 'units_2': 8, 'alpha_1': 0.03303779294016903, 'alpha_2': 0.08825529423846405, 'dropout_1': 0.3970743669029417, 'dropout_2': 0.38404062134434336, 'l2_kern_1': 0.000770175110876312, 'l2_kern_2': 0.00026073059396302853, 'l2_bias_1': 0.0021092065228513765, 'l2_bias_2': 0.0066378116015168385, 'lr': 0.0002703798488878733, 'batch_size': 64}. Best is trial 1 with value: 0.9016762840576836.




[I 2024-12-01 00:54:01,376] Trial 2 finished with value: 0.9006969177720604 and parameters: {'units_1': 5, 'units_2': 35, 'alpha_1': 0.04159900013931, 'alpha_2': 0.0683456082930343, 'dropout_1': 0.25834054555840735, 'dropout_2': 0.2403767452639776, 'l2_kern_1': 7.213327264305548e-05, 'l2_kern_2': 0.00014035891956901914, 'l2_bias_1': 0.0020146564623762476, 'l2_bias_2': 0.000641323374684864, 'lr': 0.00019481417315314703, 'batch_size': 128}. Best is trial 1 with value: 0.9016762840576836.




[I 2024-12-01 00:54:30,385] Trial 3 finished with value: 0.901852879967303 and parameters: {'units_1': 25, 'units_2': 30, 'alpha_1': 0.09626921982129917, 'alpha_2': 0.07073903367697931, 'dropout_1': 0.24248975351941213, 'dropout_2': 0.4998717382961363, 'l2_kern_1': 9.41439661019295e-05, 'l2_kern_2': 7.562851178335467e-05, 'l2_bias_1': 3.417375259146618e-05, 'l2_bias_2': 0.00015013747032709628, 'lr': 0.007955130091911013, 'batch_size': 128}. Best is trial 3 with value: 0.901852879967303.




[I 2024-12-01 00:54:56,628] Trial 4 finished with value: 0.9001448135992955 and parameters: {'units_1': 9, 'units_2': 12, 'alpha_1': 0.08248010731144603, 'alpha_2': 0.05348329018025415, 'dropout_1': 0.31862213093925584, 'dropout_2': 0.34116828574435953, 'l2_kern_1': 0.00020926304459379413, 'l2_kern_2': 0.001836754613441191, 'l2_bias_1': 1.1906579477067236e-05, 'l2_bias_2': 2.672132091439371e-05, 'lr': 0.005759472290409036, 'batch_size': 512}. Best is trial 3 with value: 0.901852879967303.




[I 2024-12-01 00:57:22,147] Trial 5 finished with value: 0.9002583996582993 and parameters: {'units_1': 12, 'units_2': 19, 'alpha_1': 0.07719046478972477, 'alpha_2': 0.025700129341647462, 'dropout_1': 0.4636668940565389, 'dropout_2': 0.21102247418870063, 'l2_kern_1': 1.1311262090898889e-05, 'l2_kern_2': 0.0003050444541068952, 'l2_bias_1': 0.00525812627181464, 'l2_bias_2': 1.8453885250728087e-05, 'lr': 0.0009422678590650917, 'batch_size': 16}. Best is trial 3 with value: 0.901852879967303.




[I 2024-12-01 01:03:43,162] Trial 6 finished with value: 0.900924589909986 and parameters: {'units_1': 18, 'units_2': 17, 'alpha_1': 0.012643174468838626, 'alpha_2': 0.04256539744999144, 'dropout_1': 0.41775397974038386, 'dropout_2': 0.28181035725040954, 'l2_kern_1': 0.00018933066799333556, 'l2_kern_2': 0.0010497113025432143, 'l2_bias_1': 6.411967893958489e-05, 'l2_bias_2': 0.000198075352709132, 'lr': 0.00010179024910610483, 'batch_size': 16}. Best is trial 3 with value: 0.901852879967303.




[I 2024-12-01 01:04:29,434] Trial 7 finished with value: 0.90106123037198 and parameters: {'units_1': 23, 'units_2': 21, 'alpha_1': 0.04199326092289172, 'alpha_2': 0.05222740521250725, 'dropout_1': 0.2811548331967324, 'dropout_2': 0.3930604553605025, 'l2_kern_1': 0.004722450755088554, 'l2_kern_2': 0.005885202295477239, 'l2_bias_1': 1.2462269662614452e-05, 'l2_bias_2': 7.94581134343182e-05, 'lr': 0.0008275783492939509, 'batch_size': 512}. Best is trial 3 with value: 0.901852879967303.




[I 2024-12-01 01:05:40,196] Trial 8 finished with value: 0.8998233537847011 and parameters: {'units_1': 39, 'units_2': 16, 'alpha_1': 0.09683638627699157, 'alpha_2': 0.05918912122094277, 'dropout_1': 0.22919458098803508, 'dropout_2': 0.43580440678089827, 'l2_kern_1': 0.00040355508207244714, 'l2_kern_2': 0.0027578911881398875, 'l2_bias_1': 3.523852137473243e-05, 'l2_bias_2': 0.004711723118524078, 'lr': 0.0001535352706836133, 'batch_size': 1024}. Best is trial 3 with value: 0.901852879967303.




[I 2024-12-01 01:08:04,301] Trial 9 finished with value: 0.9037836143789847 and parameters: {'units_1': 29, 'units_2': 31, 'alpha_1': 0.09471848422964016, 'alpha_2': 0.06255844850552322, 'dropout_1': 0.33495752474112656, 'dropout_2': 0.3408913309439916, 'l2_kern_1': 1.0617667322086375e-05, 'l2_kern_2': 4.69057230841221e-05, 'l2_bias_1': 3.184231685053928e-05, 'l2_bias_2': 0.00023008301666822893, 'lr': 0.0001272329478702959, 'batch_size': 32}. Best is trial 9 with value: 0.9037836143789847.


Number of finished trials: 10
Best trial:
  Value: 0.9037836143789847
    units_1: 29
    units_2: 31
    alpha_1: 0.09471848422964016
    alpha_2: 0.06255844850552322
    dropout_1: 0.33495752474112656
    dropout_2: 0.3408913309439916
    l2_kern_1: 1.0617667322086375e-05
    l2_kern_2: 4.69057230841221e-05
    l2_bias_1: 3.184231685053928e-05
    l2_bias_2: 0.00023008301666822893
    lr: 0.0001272329478702959
    batch_size: 32




Train mean AUC score is 0.9024505186226245 while test mean AUC score is 0.8919526489885439.
              precision    recall  f1-score   support

          No       0.95      0.84      0.89      4814
         Yes       0.51      0.78      0.61      1021

    accuracy                           0.83      5835
   macro avg       0.73      0.81      0.75      5835
weighted avg       0.87      0.83      0.84      5835



In [6]:
# Quasi Monte Carlo
params_mc = tun.Tuning(n_trials=trials, sampler = optuna.samplers.QMCSampler())
mc_scores = tun.Best_MLP_model(params_mc)
mc_scores = mc_scores.rename(columns={'Tree-structured Parzen Estimator': 'Quasi Monte Carlo'})

  
[I 2024-12-01 01:17:33,825] A new study created in memory with name: no-name-dbdf1c7b-c647-41c1-93f8-5adce4455f7c




[I 2024-12-01 01:19:28,078] Trial 0 finished with value: 0.9028448777357078 and parameters: {'units_1': 34, 'units_2': 20, 'alpha_1': 0.029097104364290284, 'alpha_2': 0.09106654818062127, 'dropout_1': 0.31234623497971115, 'dropout_2': 0.2994373327401675, 'l2_kern_1': 4.6535935620337764e-05, 'l2_kern_2': 0.0006709338021310612, 'l2_bias_1': 9.558549156289927e-05, 'l2_bias_2': 0.003043756897592826, 'lr': 0.00040732188457528394, 'batch_size': 64}. Best is trial 0 with value: 0.9028448777357078.




[I 2024-12-01 01:21:26,032] Trial 1 finished with value: 0.8573140236413179 and parameters: {'units_1': 2, 'units_2': 2, 'alpha_1': 0.01, 'alpha_2': 0.01, 'dropout_1': 0.2, 'dropout_2': 0.2, 'l2_kern_1': 0.0006261538693505821, 'l2_kern_2': 0.0004024669032233186, 'l2_bias_1': 0.0025371242426594407, 'l2_bias_2': 0.0017788064807028746, 'lr': 5.000000000000004e-05, 'batch_size': 256}. Best is trial 0 with value: 0.9028448777357078.




[I 2024-12-01 01:27:14,939] Trial 2 finished with value: 0.9021407047061407 and parameters: {'units_1': 21, 'units_2': 21, 'alpha_1': 0.05500000000000001, 'alpha_2': 0.05500000000000001, 'dropout_1': 0.35, 'dropout_2': 0.35, 'l2_kern_1': 0.00031622776601683783, 'l2_kern_2': 0.00031622776601683783, 'l2_bias_1': 0.00031622776601683783, 'l2_bias_2': 0.00031622776601683783, 'lr': 0.0007071067811865483, 'batch_size': 16}. Best is trial 0 with value: 0.9028448777357078.




[I 2024-12-01 01:27:52,120] Trial 3 finished with value: 0.900631235239952 and parameters: {'units_1': 31, 'units_2': 11, 'alpha_1': 0.0325, 'alpha_2': 0.0325, 'dropout_1': 0.425, 'dropout_2': 0.425, 'l2_kern_1': 5.6234132519034893e-05, 'l2_kern_2': 0.0017782794100389236, 'l2_bias_1': 0.0017782794100389236, 'l2_bias_2': 0.0017782794100389236, 'lr': 0.002659147948472495, 'batch_size': 256}. Best is trial 0 with value: 0.9028448777357078.




[I 2024-12-01 01:30:03,850] Trial 4 finished with value: 0.9004012384625452 and parameters: {'units_1': 11, 'units_2': 31, 'alpha_1': 0.0775, 'alpha_2': 0.0775, 'dropout_1': 0.275, 'dropout_2': 0.275, 'l2_kern_1': 0.0017782794100389236, 'l2_kern_2': 5.6234132519034893e-05, 'l2_bias_1': 5.6234132519034893e-05, 'l2_bias_2': 5.6234132519034893e-05, 'lr': 0.00018803015465431968, 'batch_size': 32}. Best is trial 0 with value: 0.9028448777357078.




[I 2024-12-01 01:30:48,386] Trial 5 finished with value: 0.901381367198984 and parameters: {'units_1': 16, 'units_2': 16, 'alpha_1': 0.06625, 'alpha_2': 0.08875000000000001, 'dropout_1': 0.3125, 'dropout_2': 0.23750000000000002, 'l2_kern_1': 0.00013335214321633237, 'l2_kern_2': 0.004216965034285825, 'l2_bias_1': 0.004216965034285825, 'l2_bias_2': 0.0007498942093324562, 'lr': 0.005156692688606234, 'batch_size': 256}. Best is trial 0 with value: 0.9028448777357078.




[I 2024-12-01 01:32:30,312] Trial 6 finished with value: 0.9036199229371612 and parameters: {'units_1': 36, 'units_2': 36, 'alpha_1': 0.02125, 'alpha_2': 0.043750000000000004, 'dropout_1': 0.4625, 'dropout_2': 0.3875, 'l2_kern_1': 0.004216965034285825, 'l2_kern_2': 0.00013335214321633237, 'l2_bias_1': 0.00013335214321633237, 'l2_bias_2': 2.3713737056616547e-05, 'lr': 0.00036463323686085564, 'batch_size': 128}. Best is trial 6 with value: 0.9036199229371612.




[I 2024-12-01 01:34:10,402] Trial 7 finished with value: 0.9006042897083487 and parameters: {'units_1': 26, 'units_2': 6, 'alpha_1': 0.08875000000000001, 'alpha_2': 0.06625, 'dropout_1': 0.3875, 'dropout_2': 0.4625, 'l2_kern_1': 2.3713737056616547e-05, 'l2_kern_2': 2.3713737056616547e-05, 'l2_bias_1': 2.3713737056616547e-05, 'l2_bias_2': 0.00013335214321633237, 'lr': 9.696137237434293e-05, 'batch_size': 128}. Best is trial 6 with value: 0.9036199229371612.




[I 2024-12-01 01:35:13,844] Trial 8 finished with value: 0.9010013691352793 and parameters: {'units_1': 6, 'units_2': 26, 'alpha_1': 0.043750000000000004, 'alpha_2': 0.02125, 'dropout_1': 0.23750000000000002, 'dropout_2': 0.3125, 'l2_kern_1': 0.0007498942093324562, 'l2_kern_2': 0.0007498942093324562, 'l2_bias_1': 0.0007498942093324562, 'l2_bias_2': 0.004216965034285825, 'lr': 0.0013712408783810375, 'batch_size': 128}. Best is trial 6 with value: 0.9036199229371612.




[I 2024-12-01 01:39:00,158] Trial 9 finished with value: 0.898835532984648 and parameters: {'units_1': 9, 'units_2': 14, 'alpha_1': 0.094375, 'alpha_2': 0.04937500000000001, 'dropout_1': 0.36875, 'dropout_2': 0.29375, 'l2_kern_1': 0.00020535250264571456, 'l2_kern_2': 0.0064938163157621165, 'l2_bias_1': 0.0064938163157621165, 'l2_bias_2': 8.659643233600651e-05, 'lr': 0.0019095372132033872, 'batch_size': 16}. Best is trial 6 with value: 0.9036199229371612.


Number of finished trials: 10
Best trial:
  Value: 0.9036199229371612
    units_1: 36
    units_2: 36
    alpha_1: 0.02125
    alpha_2: 0.043750000000000004
    dropout_1: 0.4625
    dropout_2: 0.3875
    l2_kern_1: 0.004216965034285825
    l2_kern_2: 0.00013335214321633237
    l2_bias_1: 0.00013335214321633237
    l2_bias_2: 2.3713737056616547e-05
    lr: 0.00036463323686085564
    batch_size: 128




Train mean AUC score is 0.9033872319123919 while test mean AUC score is 0.8914537697134675.
              precision    recall  f1-score   support

          No       0.97      0.82      0.89      4814
         Yes       0.50      0.86      0.64      1021

    accuracy                           0.83      5835
   macro avg       0.74      0.84      0.76      5835
weighted avg       0.89      0.83      0.84      5835



In [7]:
# Random Sampler
params_rs = tun.Tuning(n_trials=trials, sampler = optuna.samplers.RandomSampler())
rs_scores = tun.Best_MLP_model(params_rs)
rs_scores = rs_scores.rename(columns={'Tree-structured Parzen Estimator': 'Random Sampler'})

[I 2024-12-01 01:43:09,998] A new study created in memory with name: no-name-957e86ed-cef2-40e0-b035-d451a2d3fb83




[I 2024-12-01 01:44:14,641] Trial 0 finished with value: 0.9015470823682813 and parameters: {'units_1': 23, 'units_2': 7, 'alpha_1': 0.08915244891602771, 'alpha_2': 0.04326421639257537, 'dropout_1': 0.3730528951709152, 'dropout_2': 0.37406201928462834, 'l2_kern_1': 0.0003226073236081091, 'l2_kern_2': 0.003921952326418541, 'l2_bias_1': 0.00014674354536796123, 'l2_bias_2': 0.00012801527059783042, 'lr': 0.0031097268085320843, 'batch_size': 128}. Best is trial 0 with value: 0.9015470823682813.




[I 2024-12-01 01:45:06,003] Trial 1 finished with value: 0.9025066263760347 and parameters: {'units_1': 25, 'units_2': 40, 'alpha_1': 0.05031867950334902, 'alpha_2': 0.07712956830042926, 'dropout_1': 0.225927736075979, 'dropout_2': 0.4103492723733395, 'l2_kern_1': 0.003947212354095434, 'l2_kern_2': 0.0013373655676857852, 'l2_bias_1': 1.8235413333385304e-05, 'l2_bias_2': 0.00818075070452638, 'lr': 0.0006763007811968942, 'batch_size': 512}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 01:47:11,818] Trial 2 finished with value: 0.9017486673605072 and parameters: {'units_1': 36, 'units_2': 13, 'alpha_1': 0.037223158285590044, 'alpha_2': 0.04987463454095484, 'dropout_1': 0.2251635488955478, 'dropout_2': 0.43013874066382923, 'l2_kern_1': 0.00012099338854482007, 'l2_kern_2': 0.003626889406993568, 'l2_bias_1': 3.051820204224524e-05, 'l2_bias_2': 0.005745783556309709, 'lr': 0.00020499970647538978, 'batch_size': 128}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 01:47:54,601] Trial 3 finished with value: 0.8996608063808456 and parameters: {'units_1': 21, 'units_2': 33, 'alpha_1': 0.09933094405235067, 'alpha_2': 0.04438114676904828, 'dropout_1': 0.26636946158812486, 'dropout_2': 0.4304333985191402, 'l2_kern_1': 0.0005553122930922061, 'l2_kern_2': 0.0012209340326288962, 'l2_bias_1': 7.104926227659414e-05, 'l2_bias_2': 0.0001092612285519099, 'lr': 0.00985364132996012, 'batch_size': 64}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 01:50:46,542] Trial 4 finished with value: 0.9009214868031626 and parameters: {'units_1': 6, 'units_2': 3, 'alpha_1': 0.02366009916760472, 'alpha_2': 0.06945671450725754, 'dropout_1': 0.4019830506935689, 'dropout_2': 0.28697737320375893, 'l2_kern_1': 3.9058000949469376e-05, 'l2_kern_2': 6.04570213130712e-05, 'l2_bias_1': 7.632926232478824e-05, 'l2_bias_2': 0.005853316931514501, 'lr': 0.001164550050314094, 'batch_size': 16}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 01:51:32,890] Trial 5 finished with value: 0.9015258315678499 and parameters: {'units_1': 6, 'units_2': 11, 'alpha_1': 0.04755008029731297, 'alpha_2': 0.08646370887575669, 'dropout_1': 0.46373682623819573, 'dropout_2': 0.4989699427475551, 'l2_kern_1': 2.5453073034203985e-05, 'l2_kern_2': 0.0020350309633112534, 'l2_bias_1': 1.4939415967477574e-05, 'l2_bias_2': 0.0010125765870340464, 'lr': 0.008756162366975385, 'batch_size': 64}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 01:54:29,174] Trial 6 finished with value: 0.9012935199428064 and parameters: {'units_1': 30, 'units_2': 37, 'alpha_1': 0.03103356014614346, 'alpha_2': 0.01792808509770106, 'dropout_1': 0.4763825472227947, 'dropout_2': 0.3049963704789793, 'l2_kern_1': 0.006709965790947348, 'l2_kern_2': 0.00020699759575657244, 'l2_bias_1': 0.0023138183770515766, 'l2_bias_2': 0.0017258678445988744, 'lr': 7.050936536152531e-05, 'batch_size': 128}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 02:14:12,195] Trial 7 finished with value: 0.8995925968081617 and parameters: {'units_1': 40, 'units_2': 5, 'alpha_1': 0.08934452810902219, 'alpha_2': 0.07039705132425203, 'dropout_1': 0.2788273500980887, 'dropout_2': 0.4010450540575675, 'l2_kern_1': 0.008973989455869223, 'l2_kern_2': 0.0011203223435687906, 'l2_bias_1': 0.00039618628989743183, 'l2_bias_2': 0.002642038154078671, 'lr': 0.00046187224011183243, 'batch_size': 256}. Best is trial 1 with value: 0.9025066263760347.




[I 2024-12-01 02:16:54,477] Trial 8 finished with value: 0.9043930316442058 and parameters: {'units_1': 37, 'units_2': 22, 'alpha_1': 0.03224051675481995, 'alpha_2': 0.0317179029708391, 'dropout_1': 0.46271585050565556, 'dropout_2': 0.36509184563962327, 'l2_kern_1': 3.622493638135818e-05, 'l2_kern_2': 0.0008523686229688028, 'l2_bias_1': 0.002170436288504063, 'l2_bias_2': 6.520843995428655e-05, 'lr': 0.0005257393966034638, 'batch_size': 32}. Best is trial 8 with value: 0.9043930316442058.




[I 2024-12-01 02:17:53,843] Trial 9 finished with value: 0.900285159383421 and parameters: {'units_1': 39, 'units_2': 15, 'alpha_1': 0.047293006169479374, 'alpha_2': 0.04403904692701241, 'dropout_1': 0.26688532839314627, 'dropout_2': 0.4924821892785271, 'l2_kern_1': 0.0008168345187721416, 'l2_kern_2': 1.5420389070655598e-05, 'l2_bias_1': 0.009589309265798608, 'l2_bias_2': 0.0005078508191399476, 'lr': 0.0062813019982575794, 'batch_size': 32}. Best is trial 8 with value: 0.9043930316442058.


Number of finished trials: 10
Best trial:
  Value: 0.9043930316442058
    units_1: 37
    units_2: 22
    alpha_1: 0.03224051675481995
    alpha_2: 0.0317179029708391
    dropout_1: 0.46271585050565556
    dropout_2: 0.36509184563962327
    l2_kern_1: 3.622493638135818e-05
    l2_kern_2: 0.0008523686229688028
    l2_bias_1: 0.002170436288504063
    l2_bias_2: 6.520843995428655e-05
    lr: 0.0005257393966034638
    batch_size: 32




Train mean AUC score is 0.9028646187195944 while test mean AUC score is 0.8861172524864717.
              precision    recall  f1-score   support

          No       0.97      0.81      0.89      4814
         Yes       0.50      0.89      0.64      1021

    accuracy                           0.83      5835
   macro avg       0.74      0.85      0.76      5835
weighted avg       0.89      0.83      0.84      5835



In [9]:
all_scores = pd.concat([tpe_scores, rs_scores, ga_scores, mc_scores], axis =1)
all_scores

Unnamed: 0,Tree-structured Parzen Estimator,Random Sampler,Genetic Algorithm,Quasi Monte Carlo
Accuracy,0.826735,0.82605,0.828792,0.827763
Precision,0.503698,0.501648,0.507033,0.504577
Recall,0.666993,0.894221,0.77669,0.863859
F1-score,0.573957,0.642731,0.61354,0.637053
AUC,0.763804,0.852906,0.808266,0.841983
H-measure,0.347174,0.513164,0.429021,0.492916


In [10]:

all_scores.to_csv('Credit_risk_dataset_2_Layer_scores.csv', index=False)