In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import timeit

from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, precision_score, recall_score, accuracy_score, auc
from hmeasure import h_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, precision_recall_fscore_support,roc_auc_score, accuracy_score
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.pipeline import Pipeline

import optuna

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

from keras.layers import Dense, Dropout
from keras.regularizers import l2
from keras.layers import LeakyReLU
from keras.wrappers.scikit_learn import KerasClassifier
from keras import initializers
from keras.models import Sequential
from keras.optimizers import Adam

# Read the data using csv
path_train = 'credit_risk-dataset-cleanedtraining.csv'
df_train = pd.read_csv(path_train, encoding = "ISO-8859-1")

path_test = 'credit_risk-dataset-cleanedtesting.csv'
df_test = pd.read_csv(path_test, encoding = "ISO-8859-1")

# Prin 5 forst lines of df
df_train.head()

Unnamed: 0,person_age,person_income,person_emp_length,loan_amnt,loan_int_rate,loan_status,loan_percent_income,cb_person_cred_hist_length,person_home_ownership,loan_intent,loan_grade,cb_person_default_on_file
0,23,62500,7.0,26000,11.71,1,0.42,2,3,3,1,0
1,23,120000,1.0,25600,12.69,0,0.21,3,3,1,1,0
2,22,60000,0.0,25475,10.99,1,0.42,3,3,5,1,0
3,23,83000,7.0,25300,10.99,1,0.3,3,3,1,1,0
4,21,42500,3.0,25000,12.73,1,0.59,3,3,1,2,1


In [2]:
X_test, y_test = df_test.iloc[:,:-1], df_test.iloc[:,-1]

X_train, y_train = df_train.iloc[:,:-1], df_train.iloc[:,-1]

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, stratify=y_train, test_size=0.2)

In [3]:
class tuning_methods:
    
    def __init__(self, X_train, X_val, y_train,  y_val, X_test, y_test):
        self.X_train = X_train
        self.X_val = X_val
        self.y_train = y_train
        self.y_val = y_val
        self.X_test = X_test
        self.y_test = y_test
        
    
    # Define the Keras model
    def create_model(self, units, alpha, dropout, l2_kern, l2_bias, lr, batch_size):        # 
        
        model = Sequential()
        model.add(Dense(units, input_shape=(self.X_train.shape[1],), 
                                 activation =model.add(LeakyReLU(alpha=alpha)),
                                 kernel_regularizer = l2(l2_kern),
                                 bias_regularizer = l2(l2_bias),
                                 kernel_initializer = initializers.RandomNormal(mean=0.0, stddev=0.05)
            ))
        model.add(Dropout(dropout))
        model.add(Dense(1, activation='sigmoid'))
        optimizer = Adam(learning_rate=lr)
        model.compile(optimizer=optimizer,
                  loss='binary_crossentropy', 
                  metrics=['AUC'])
        return model
    
    
    # Define the Optuna study
    def objective(self, trial):
        
        # Define the pipeline
        pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('classifier', KerasClassifier(build_fn=self.create_model, verbose=0))
                ])

        # Set hyperparameters
        units = trial.suggest_int('units', 2, 40)
        alpha = trial.suggest_float('alpha', 0.01, 0.1)
        dropout = trial.suggest_float("dropout", 0.2, 0.5)    
        lr = trial.suggest_float("lr", 5e-5, 1e-2, log=True)
        batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128, 256, 512, 1024])
        l2_kern = trial.suggest_float("l2_kern",  1e-5, 1e-2, log=True)
        l2_bias = trial.suggest_float("l2_bias",  1e-5, 1e-2, log=True)
        epochs = 200
        validation_data = (self.X_val, self.y_val)
        early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
        pipeline.set_params(classifier__units=units,
                        classifier__alpha=alpha,
                        classifier__l2_kern=l2_kern,
                        classifier__l2_bias=l2_bias,
                        classifier__dropout=dropout,
                        classifier__lr=lr,
                        classifier__epochs = epochs,
                        classifier__batch_size = batch_size,
                        classifier__validation_data = validation_data,
                        classifier__callbacks=[early_stopping])
        kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=2023)
        scores = cross_val_score(pipeline, self.X_train, self.y_train, cv=kfold, scoring="roc_auc", error_score='raise')
    
        if trial.should_prune():
            raise optuna.TrialPruned()

        return np.mean(scores)

    def Tuning(self, n_trials=100, sampler = optuna.samplers.TPESampler()):
        
        start = timeit.default_timer()

        study = optuna.create_study(direction="maximize",             
                            sampler=sampler, 
                            pruner= optuna.pruners.MedianPruner()
                           )
        study.optimize(self.objective, n_trials=n_trials)  # to be converged it needs atleast 200 trials 

        print("Number of finished trials: {}".format(len(study.trials)))

        print("Best trial:")
        trial = study.best_trial

        print("  Value: {}".format(trial.value))

        stop = timeit.default_timer()

        time_opt = stop - start
        params_TPE = []

        for key, value in trial.params.items():
            params_TPE.append([key,value])
            print("    {}: {}".format(key, value))
            
        params_TPE.append(["time",round(time_opt/60,2)])

        return params_TPE
    
    def Best_MLP_model(self, params):
        units = params[0][1]
        alpha = np.round(params[1][1],3)
        dropout = np.round(params[2][1],2)
        lr = np.round(params[3][1],5)
        batchs = params[4][1]
        kernel_regularizer=np.round(params[5][1],5)
        bias_regularizer=np.round(params[6][1],5)

        # Define the pipeline
        pipeline = Pipeline([
                    ('scaler', StandardScaler()),
                    ('classifier', KerasClassifier(build_fn=tun.create_model, verbose=0))
                ])

        # Set hyperparameters
        epochs = 200
        validation_data = (self.X_val, self.y_val)
        early_stopping = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
        pipeline.set_params(classifier__units=units,
                        classifier__alpha=alpha,
                        classifier__l2_kern=kernel_regularizer,
                        classifier__l2_bias=bias_regularizer,
                        classifier__dropout=dropout,
                        classifier__lr=lr,
                        classifier__epochs = epochs,
                        classifier__batch_size = batchs,
                        classifier__validation_data = validation_data,
                        classifier__callbacks=[early_stopping])
        
        # Use cross validation to monitor the deferrence between training and testing data as an evidence of overfitting
        kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
        scores_train = cross_val_score(pipeline, self.X_train, self.y_train, cv=kfold, scoring="roc_auc", error_score='raise')
        mean_score_train = np.mean(scores_train)

        kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=2023)
        scores_test = cross_val_score(pipeline, self.X_test, self.y_test, cv=kfold, scoring="roc_auc", error_score='raise')
        mean_score_test = np.mean(scores_test)

        print(f"Train mean AUC score is {mean_score_train} while test mean AUC score is {mean_score_test}.")
      
        # Fit the pipeline to the training data
        history = pipeline.fit(self.X_train, self.y_train)

        #score = pipeline.evaluate(X_test, y_test, verbose=0)  

        y_pred_prob_tpe = pipeline.predict(self.X_test)

        y_pred_tpe = y_pred_prob_tpe.round()

        print(classification_report(self.y_test, y_pred_tpe, target_names = ['No','Yes']))

        acc = accuracy_score(self.y_test, y_pred_tpe)
        pre = precision_score(self.y_test, y_pred_tpe)
        rec = recall_score(self.y_test, y_pred_tpe)
        f1 = f1_score(self.y_test, y_pred_tpe)
        auc = roc_auc_score(self.y_test, y_pred_tpe)
        y_test_array = np.array(self.y_test)
        y_pred_tpe_array = np.array(y_pred_tpe)
        H_measure = h_score(y_test_array, y_pred_tpe_array)

        opt = [acc, pre, rec, f1, auc, H_measure]

        scores = pd.DataFrame(opt, index =['Accuracy', 'Precision', 'Recall', 'F1-score', 'AUC', "H-measure"], 
                              columns =['Tree-structured Parzen Estimator'])
        
        return scores 

In [4]:
tun = tuning_methods(X_train, X_val, y_train,  y_val, X_test, y_test)
trials = 10

# Tree-structured Parzen Estimator
params_tpe = tun.Tuning(n_trials=trials, sampler = optuna.samplers.TPESampler())
tpe_scores = tun.Best_MLP_model(params_tpe)

[I 2024-12-02 15:01:53,923] A new study created in memory with name: no-name-a1cbcda2-d9d8-4313-a3f3-f77540ec9a8e




[I 2024-12-02 15:02:20,671] Trial 0 finished with value: 0.89449959046896 and parameters: {'units': 4, 'alpha': 0.0973823052514357, 'dropout': 0.44589501843197965, 'lr': 0.0014337085576995492, 'batch_size': 1024, 'l2_kern': 6.061999444746172e-05, 'l2_bias': 0.002231063650809536}. Best is trial 0 with value: 0.89449959046896.




[I 2024-12-02 15:02:46,162] Trial 1 finished with value: 0.8973814715350535 and parameters: {'units': 25, 'alpha': 0.057512213274957445, 'dropout': 0.32731777555945407, 'lr': 0.0013276346245265408, 'batch_size': 1024, 'l2_kern': 0.0001698811355251308, 'l2_bias': 2.8443871264100175e-05}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:03:28,361] Trial 2 finished with value: 0.8953895477020759 and parameters: {'units': 29, 'alpha': 0.013964564834259625, 'dropout': 0.4163386836633757, 'lr': 0.0059467561187114635, 'batch_size': 64, 'l2_kern': 0.009795242418011598, 'l2_bias': 0.0001058901061835439}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:05:24,110] Trial 3 finished with value: 0.8879163145312617 and parameters: {'units': 5, 'alpha': 0.09491305913140995, 'dropout': 0.24735834210438642, 'lr': 5.9280676730727535e-05, 'batch_size': 128, 'l2_kern': 0.0008860678576956, 'l2_bias': 0.00955151157184621}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:06:35,093] Trial 4 finished with value: 0.8514575301036728 and parameters: {'units': 7, 'alpha': 0.08159488268120987, 'dropout': 0.30346765025407674, 'lr': 7.226343446582974e-05, 'batch_size': 512, 'l2_kern': 0.003815510479373692, 'l2_bias': 0.00051593741990606}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:07:41,577] Trial 5 finished with value: 0.8772178947590913 and parameters: {'units': 3, 'alpha': 0.04826192044955556, 'dropout': 0.34105612914139205, 'lr': 0.00013483328479507652, 'batch_size': 256, 'l2_kern': 3.717178660378275e-05, 'l2_bias': 0.002782091392015131}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:08:18,444] Trial 6 finished with value: 0.8972180276067973 and parameters: {'units': 31, 'alpha': 0.053797404470888315, 'dropout': 0.33998796477014126, 'lr': 0.0006658916475077251, 'batch_size': 256, 'l2_kern': 0.0031164487978176127, 'l2_bias': 0.0001403109158800558}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:09:10,223] Trial 7 finished with value: 0.8931388683581668 and parameters: {'units': 25, 'alpha': 0.0770109504025927, 'dropout': 0.3272297086017917, 'lr': 0.00022523763884947905, 'batch_size': 1024, 'l2_kern': 0.0007988791757511775, 'l2_bias': 0.0031145073357220867}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:09:23,806] Trial 8 finished with value: 0.897225498863156 and parameters: {'units': 34, 'alpha': 0.04568702595389418, 'dropout': 0.2436613622754172, 'lr': 0.009019931726499703, 'batch_size': 1024, 'l2_kern': 0.0006109574024986788, 'l2_bias': 3.3438753259106376e-05}. Best is trial 1 with value: 0.8973814715350535.




[I 2024-12-02 15:13:07,302] Trial 9 finished with value: 0.8954775167334857 and parameters: {'units': 14, 'alpha': 0.039135133033294335, 'dropout': 0.22272127975445125, 'lr': 6.882214418253859e-05, 'batch_size': 16, 'l2_kern': 0.0002753596399851084, 'l2_bias': 0.0013573106981303112}. Best is trial 1 with value: 0.8973814715350535.


Number of finished trials: 10
Best trial:
  Value: 0.8973814715350535
    units: 25
    alpha: 0.057512213274957445
    dropout: 0.32731777555945407
    lr: 0.0013276346245265408
    batch_size: 1024
    l2_kern: 0.0001698811355251308
    l2_bias: 2.8443871264100175e-05
Train mean AUC score is 0.8976027595551758 while test mean AUC score is 0.8897159092643747.
              precision    recall  f1-score   support

          No       0.87      0.92      0.90      4814
         Yes       0.50      0.36      0.42      1021

    accuracy                           0.83      5835
   macro avg       0.69      0.64      0.66      5835
weighted avg       0.81      0.83      0.81      5835



In [8]:
# Genetic Algorithm
params_ga = tun.Tuning(n_trials=trials, sampler = optuna.samplers.NSGAIISampler())
ga_scores = tun.Best_MLP_model(params_ga)
ga_scores = ga_scores.rename(columns={'Tree-structured Parzen Estimator': 'Genetic Algorithm'})

[I 2024-11-30 22:43:03,465] A new study created in memory with name: no-name-639a3470-2e19-40b3-8f1b-ac3cc09ce8e3




[I 2024-11-30 22:47:06,738] Trial 0 finished with value: 0.896767877209648 and parameters: {'units': 22, 'alpha': 0.041742375954221644, 'dropout': 0.4306870591679094, 'lr': 5.666977658187691e-05, 'batch_size': 16, 'l2_kern': 8.920660148109299e-05, 'l2_bias': 0.0001727887292383997}. Best is trial 0 with value: 0.896767877209648.




[I 2024-11-30 22:47:45,830] Trial 1 finished with value: 0.8981252949723814 and parameters: {'units': 39, 'alpha': 0.06584153055154304, 'dropout': 0.27392644967702595, 'lr': 0.0003169023066660518, 'batch_size': 512, 'l2_kern': 0.00773673163126706, 'l2_bias': 1.3205470435460643e-05}. Best is trial 1 with value: 0.8981252949723814.




[I 2024-11-30 22:48:06,856] Trial 2 finished with value: 0.8987292313511697 and parameters: {'units': 32, 'alpha': 0.07490449958563623, 'dropout': 0.2609107164869424, 'lr': 0.0015819966605101774, 'batch_size': 1024, 'l2_kern': 5.954541576064576e-05, 'l2_bias': 4.863522990115782e-05}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:49:16,526] Trial 3 finished with value: 0.8981866844530987 and parameters: {'units': 10, 'alpha': 0.09410206067668621, 'dropout': 0.28035576756209496, 'lr': 0.0004084873136882892, 'batch_size': 32, 'l2_kern': 3.9397577701407676e-05, 'l2_bias': 0.0012192163107784622}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:49:37,445] Trial 4 finished with value: 0.898675247819028 and parameters: {'units': 28, 'alpha': 0.042367981965607315, 'dropout': 0.33443362835243196, 'lr': 0.0008428414650990975, 'batch_size': 256, 'l2_kern': 0.00019630584692624634, 'l2_bias': 0.0005455929327739369}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:49:56,259] Trial 5 finished with value: 0.8976623844081922 and parameters: {'units': 24, 'alpha': 0.04241974322326813, 'dropout': 0.3725354747845236, 'lr': 0.0007228635614149539, 'batch_size': 1024, 'l2_kern': 4.8682498271379715e-05, 'l2_bias': 0.0013800632201066863}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:51:33,064] Trial 6 finished with value: 0.8981452532230104 and parameters: {'units': 22, 'alpha': 0.026411830659409857, 'dropout': 0.25795779712182154, 'lr': 0.0003350211849519735, 'batch_size': 32, 'l2_kern': 0.002262926760166187, 'l2_bias': 0.0016008463876632172}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:52:43,524] Trial 7 finished with value: 0.8959912326590665 and parameters: {'units': 9, 'alpha': 0.03544868582252734, 'dropout': 0.40722698218723663, 'lr': 0.00013302230866888288, 'batch_size': 64, 'l2_kern': 4.863481465427699e-05, 'l2_bias': 0.0007078514905399595}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:53:08,269] Trial 8 finished with value: 0.8980884067830283 and parameters: {'units': 4, 'alpha': 0.07703940339650368, 'dropout': 0.4774888037363271, 'lr': 0.0019812788253269057, 'batch_size': 256, 'l2_kern': 0.0039595753116978055, 'l2_bias': 0.00021360541224814365}. Best is trial 2 with value: 0.8987292313511697.




[I 2024-11-30 22:53:30,081] Trial 9 finished with value: 0.8986064892465304 and parameters: {'units': 22, 'alpha': 0.061155771871548976, 'dropout': 0.4668477513991536, 'lr': 0.0010505044218818702, 'batch_size': 512, 'l2_kern': 1.767038574956156e-05, 'l2_bias': 0.0007342978042217243}. Best is trial 2 with value: 0.8987292313511697.


Number of finished trials: 10
Best trial:
  Value: 0.8987292313511697
    units: 32
    alpha: 0.07490449958563623
    dropout: 0.2609107164869424
    lr: 0.0015819966605101774
    batch_size: 1024
    l2_kern: 5.954541576064576e-05
    l2_bias: 4.863522990115782e-05




Train mean AUC score is 0.8983778762479033 while test mean AUC score is 0.8890161108074999.
              precision    recall  f1-score   support

          No       0.88      0.92      0.90      4814
         Yes       0.50      0.38      0.43      1021

    accuracy                           0.83      5835
   macro avg       0.69      0.65      0.67      5835
weighted avg       0.81      0.83      0.82      5835



In [9]:
params_mc = tun.Tuning(n_trials=trials, sampler = optuna.samplers.QMCSampler())
mc_scores = tun.Best_MLP_model(params_mc)
mc_scores = mc_scores.rename(columns={'Tree-structured Parzen Estimator': 'Quasi Monte Carlo'})

  """Entry point for launching an IPython kernel.
[I 2024-11-30 22:54:24,865] A new study created in memory with name: no-name-cec1eb5d-ba4f-404b-a153-6cc23d52d950




[I 2024-11-30 22:55:33,458] Trial 0 finished with value: 0.8944395771198885 and parameters: {'units': 39, 'alpha': 0.06126248106746605, 'dropout': 0.41605226094560677, 'lr': 0.00012697432586899996, 'batch_size': 1024, 'l2_kern': 0.004555102760797048, 'l2_bias': 0.0019194490075575793}. Best is trial 0 with value: 0.8944395771198885.




[I 2024-11-30 22:59:04,419] Trial 1 finished with value: 0.8899611050500167 and parameters: {'units': 2, 'alpha': 0.01, 'dropout': 0.2, 'lr': 5.000000000000004e-05, 'batch_size': 32, 'l2_kern': 0.004130471934372518, 'l2_bias': 0.002710377116273406}. Best is trial 0 with value: 0.8944395771198885.




[I 2024-11-30 23:01:37,686] Trial 2 finished with value: 0.8970508882691365 and parameters: {'units': 21, 'alpha': 0.05500000000000001, 'dropout': 0.35, 'lr': 0.0007071067811865483, 'batch_size': 16, 'l2_kern': 0.00031622776601683783, 'l2_bias': 0.00031622776601683783}. Best is trial 2 with value: 0.8970508882691365.




[I 2024-11-30 23:02:14,567] Trial 3 finished with value: 0.8981258745099502 and parameters: {'units': 31, 'alpha': 0.0325, 'dropout': 0.275, 'lr': 0.00018803015465431968, 'batch_size': 256, 'l2_kern': 0.0017782794100389236, 'l2_bias': 0.0017782794100389236}. Best is trial 3 with value: 0.8981258745099502.




[I 2024-11-30 23:02:24,418] Trial 4 finished with value: 0.898760774080977 and parameters: {'units': 11, 'alpha': 0.0775, 'dropout': 0.425, 'lr': 0.002659147948472495, 'batch_size': 1024, 'l2_kern': 5.6234132519034893e-05, 'l2_bias': 5.6234132519034893e-05}. Best is trial 4 with value: 0.898760774080977.




[I 2024-11-30 23:02:37,264] Trial 5 finished with value: 0.8979814840665094 and parameters: {'units': 16, 'alpha': 0.043750000000000004, 'dropout': 0.3875, 'lr': 0.005156692688606234, 'batch_size': 256, 'l2_kern': 0.00013335214321633237, 'l2_bias': 2.3713737056616547e-05}. Best is trial 4 with value: 0.898760774080977.




[I 2024-11-30 23:04:36,128] Trial 6 finished with value: 0.8982932580226396 and parameters: {'units': 36, 'alpha': 0.08875000000000001, 'dropout': 0.23750000000000002, 'lr': 0.00036463323686085564, 'batch_size': 16, 'l2_kern': 0.004216965034285825, 'l2_bias': 0.0007498942093324562}. Best is trial 4 with value: 0.898760774080977.




[I 2024-11-30 23:04:53,510] Trial 7 finished with value: 0.8981650047307689 and parameters: {'units': 26, 'alpha': 0.02125, 'dropout': 0.4625, 'lr': 0.0013712408783810375, 'batch_size': 1024, 'l2_kern': 0.0007498942093324562, 'l2_bias': 0.004216965034285825}. Best is trial 4 with value: 0.898760774080977.




[I 2024-11-30 23:05:53,954] Trial 8 finished with value: 0.8400505531050616 and parameters: {'units': 6, 'alpha': 0.06625, 'dropout': 0.3125, 'lr': 9.696137237434293e-05, 'batch_size': 512, 'l2_kern': 2.3713737056616547e-05, 'l2_bias': 0.00013335214321633237}. Best is trial 4 with value: 0.898760774080977.




[I 2024-11-30 23:06:20,511] Trial 9 finished with value: 0.89766088389536 and parameters: {'units': 9, 'alpha': 0.038125000000000006, 'dropout': 0.48125, 'lr': 0.000507774196302167, 'batch_size': 128, 'l2_kern': 0.00048696752516586293, 'l2_bias': 8.659643233600651e-05}. Best is trial 4 with value: 0.898760774080977.


Number of finished trials: 10
Best trial:
  Value: 0.898760774080977
    units: 11
    alpha: 0.0775
    dropout: 0.425
    lr: 0.002659147948472495
    batch_size: 1024
    l2_kern: 5.6234132519034893e-05
    l2_bias: 5.6234132519034893e-05




Train mean AUC score is 0.8978082650013318 while test mean AUC score is 0.8875396786185347.
              precision    recall  f1-score   support

          No       0.87      0.93      0.90      4814
         Yes       0.51      0.35      0.42      1021

    accuracy                           0.83      5835
   macro avg       0.69      0.64      0.66      5835
weighted avg       0.81      0.83      0.81      5835



In [11]:
# Random Sampler
params_rs = tun.Tuning(n_trials=trials, sampler = optuna.samplers.RandomSampler())
rs_scores = tun.Best_MLP_model(params_rs)
rs_scores = rs_scores.rename(columns={'Tree-structured Parzen Estimator': 'Random Sampler'})

[I 2024-11-30 23:19:27,714] A new study created in memory with name: no-name-6c7d667a-61e5-4f10-952f-181ede088765




[I 2024-11-30 23:20:45,721] Trial 0 finished with value: 0.8973120267197405 and parameters: {'units': 16, 'alpha': 0.0490594005454528, 'dropout': 0.26761632983581507, 'lr': 9.00287498476998e-05, 'batch_size': 32, 'l2_kern': 0.0005197021996709356, 'l2_bias': 0.0008995266291855359}. Best is trial 0 with value: 0.8973120267197405.




[I 2024-11-30 23:21:12,722] Trial 1 finished with value: 0.8969949426166393 and parameters: {'units': 25, 'alpha': 0.08591665000402811, 'dropout': 0.47567698977505835, 'lr': 0.0045490063982874165, 'batch_size': 128, 'l2_kern': 7.660079015649083e-05, 'l2_bias': 0.005544761904417827}. Best is trial 0 with value: 0.8973120267197405.




[I 2024-11-30 23:21:43,447] Trial 2 finished with value: 0.8960922728720465 and parameters: {'units': 28, 'alpha': 0.010646151650994418, 'dropout': 0.22079878886406445, 'lr': 0.003871458263877747, 'batch_size': 64, 'l2_kern': 8.72609066086752e-05, 'l2_bias': 0.0008324680234867484}. Best is trial 0 with value: 0.8973120267197405.




[I 2024-11-30 23:22:00,703] Trial 3 finished with value: 0.8979855907769251 and parameters: {'units': 5, 'alpha': 0.03948181859265782, 'dropout': 0.2072724771994514, 'lr': 0.002875940545373594, 'batch_size': 64, 'l2_kern': 0.007209303541239616, 'l2_bias': 0.0005060260765474327}. Best is trial 3 with value: 0.8979855907769251.




[I 2024-11-30 23:22:30,214] Trial 4 finished with value: 0.8974189206626125 and parameters: {'units': 13, 'alpha': 0.014304155615237336, 'dropout': 0.30726955519104804, 'lr': 0.0002547292513079497, 'batch_size': 128, 'l2_kern': 6.629750184570837e-05, 'l2_bias': 0.007847711227019962}. Best is trial 3 with value: 0.8979855907769251.




[I 2024-11-30 23:25:44,007] Trial 5 finished with value: 0.8966175833816917 and parameters: {'units': 15, 'alpha': 0.01177194766961919, 'dropout': 0.3862797359720435, 'lr': 5.3382015887324406e-05, 'batch_size': 32, 'l2_kern': 0.0013803186915395818, 'l2_bias': 4.079083630888884e-05}. Best is trial 3 with value: 0.8979855907769251.




[I 2024-11-30 23:26:38,791] Trial 6 finished with value: 0.7788603901506161 and parameters: {'units': 4, 'alpha': 0.027355097199074856, 'dropout': 0.32480412082119303, 'lr': 6.969321318418787e-05, 'batch_size': 1024, 'l2_kern': 0.002060310104848317, 'l2_bias': 0.00014357414405137507}. Best is trial 3 with value: 0.8979855907769251.




[I 2024-11-30 23:28:27,370] Trial 7 finished with value: 0.8981432743502182 and parameters: {'units': 30, 'alpha': 0.07240223122280215, 'dropout': 0.47269628242210304, 'lr': 0.00028571636588126895, 'batch_size': 32, 'l2_kern': 0.0016884643113742864, 'l2_bias': 0.0015510247178867332}. Best is trial 7 with value: 0.8981432743502182.




[I 2024-11-30 23:29:34,395] Trial 8 finished with value: 0.8982967607110076 and parameters: {'units': 9, 'alpha': 0.05104577480799235, 'dropout': 0.2861620153664307, 'lr': 0.006860003565311547, 'batch_size': 128, 'l2_kern': 3.175228128647522e-05, 'l2_bias': 0.0007000271782982722}. Best is trial 8 with value: 0.8982967607110076.




[I 2024-11-30 23:30:20,546] Trial 9 finished with value: 0.8959809425752949 and parameters: {'units': 18, 'alpha': 0.092318538742708, 'dropout': 0.29510937209071253, 'lr': 0.0001688365683074893, 'batch_size': 256, 'l2_kern': 0.0012362047862405584, 'l2_bias': 0.006914483027083505}. Best is trial 8 with value: 0.8982967607110076.


Number of finished trials: 10
Best trial:
  Value: 0.8982967607110076
    units: 9
    alpha: 0.05104577480799235
    dropout: 0.2861620153664307
    lr: 0.006860003565311547
    batch_size: 128
    l2_kern: 3.175228128647522e-05
    l2_bias: 0.0007000271782982722




Train mean AUC score is 0.8980690469015077 while test mean AUC score is 0.88851664552693.
              precision    recall  f1-score   support

          No       0.87      0.92      0.90      4814
         Yes       0.50      0.37      0.42      1021

    accuracy                           0.82      5835
   macro avg       0.69      0.64      0.66      5835
weighted avg       0.81      0.82      0.81      5835



In [13]:
all_scores = pd.concat([tpe_scores, rs_scores, ga_scores, mc_scores], axis =1)
all_scores

Unnamed: 0,Tree-structured Parzen Estimator,Random Sampler,Genetic Algorithm,Quasi Monte Carlo
Accuracy,0.82605,0.824336,0.825536,0.826564
Precision,0.503958,0.497354,0.501926,0.506311
Recall,0.374143,0.368266,0.382958,0.353575
F1-score,0.429455,0.423185,0.434444,0.416378
AUC,0.648019,0.644665,0.65118,0.640227
H-measure,0.169636,0.163334,0.173336,0.159849


In [16]:
# Save the clean dataset

all_scores.to_csv('credit_risk_dataset_1_Layer_scores.csv', index=False)
