In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna
import time
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix

from credit_g_dataset import get_preprocessed_credit_g_dataset

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 6)

# Load and pre-process version 1 of the dataset credit-g

In [2]:
%%capture
X_train, X_validation, X_test, y_train, y_validation, y_test = get_preprocessed_credit_g_dataset()

## Train the model using the training set and adjust hyperparameters with Optuna using the validation set

In [3]:
random_state=0

In [4]:
def objective(trial):
    params = {
    "C": trial.suggest_float("C", 1e-10, 1e10, log=True),
    "kernel": trial.suggest_categorical("kernel", ["linear", "rbf", "sigmoid"]),
    "gamma": trial.suggest_categorical("gamma", ["scale", "auto"]),
    "max_iter": 300,
    "random_state": random_state    
    }
    

    model = SVC(**params).fit(X_train, y_train)

    y_predicted = model.predict(X_validation)
    (tn, fp, fn, tp) = confusion_matrix(y_validation, y_predicted).ravel()
    f1_val = (2*tp)/(2*tp+fp+fn)
    
    return f1_val


In [5]:
start_time = time.time()

study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials=100)

total_time = time.time()-start_time
print(f"total_time={total_time}")
print(study.best_params)
print(study.best_value)

[I 2023-12-20 18:17:27,163] A new study created in memory with name: no-name-d0b21ca6-abfb-4916-a154-d1b1e5aac166
[I 2023-12-20 18:17:27,180] Trial 0 finished with value: 0.7683923705722071 and parameters: {'C': 115.05130913445616, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 0 with value: 0.7683923705722071.
[I 2023-12-20 18:17:27,196] Trial 1 finished with value: 0.8183807439824945 and parameters: {'C': 2.839839176621525e-10, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 1 with value: 0.8183807439824945.
[I 2023-12-20 18:17:27,209] Trial 2 finished with value: 0.7683923705722071 and parameters: {'C': 7474679.68877256, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 1 with value: 0.8183807439824945.
[I 2023-12-20 18:17:27,225] Trial 3 finished with value: 0.7858942065491183 and parameters: {'C': 2.0003160871892396, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 1 with value: 0.8183807439824945.
[I 2023-12-20 18:17:27,243] Trial 4 finished with value: 0.818380

total_time=2.4686660766601562
{'C': 0.09289671513024933, 'kernel': 'linear', 'gamma': 'auto'}
0.855036855036855


In [6]:
best_c = study.best_params["C"]
best_kernel = study.best_params["kernel"]
best_gamma = study.best_params["gamma"]

In [7]:
model = SVC(C=best_c, kernel=best_kernel, gamma=best_gamma, max_iter=300, random_state=random_state)

## Run on the test set and use training and validation sets for training

In [8]:
X_train_valid = np.concatenate((X_train, X_validation))
y_train_valid = np.concatenate((y_train, y_validation))

model.fit(X_train_valid, y_train_valid)
print(model.score(X_test, y_test))
y_predicted = model.predict(X_test)

(tn, fp, fn, tp) = confusion_matrix(y_test, y_predicted).ravel()
precision_val = float(tp)/float(tp+fp)
recall_val = float(tp)/float(tp+fn)
specificity_val = float(tn)/float(tn+fp)
accuracy_val = float(tp+tn)/float(tn+fp+fn+tp)
f1_val = (2*tp)/(2*tp+fp+fn)
mcc_val = float((tp*tn)-(fp*fn))/np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))


print("\n")
print("Test set precision_val =", precision_val)
print("Test set recall_val =", recall_val)
print("Test set specificity =", specificity_val)
print("Test set mcc_val =", mcc_val)
print("Test set accuracy_val =", accuracy_val)
print("Test set f1_val =", f1_val)



0.74


Test set precision_val = 0.7951807228915663
Test set recall_val = 0.88
Test set specificity = 0.32
Test set mcc_val = 0.23055106080245386
Test set accuracy_val = 0.74
Test set f1_val = 0.8354430379746836


In [9]:
confusion_matrix(y_test, y_predicted)

array([[ 8, 17],
       [ 9, 66]], dtype=int64)

In [10]:
y_test

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1])