In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import optuna
import time
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.metrics import confusion_matrix
from credit_g_dataset import get_preprocessed_credit_g_dataset

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 6)

# Load and pre-process version 1 of the dataset credit-g

In [2]:
%%capture
X_train, X_validation, X_test, y_train, y_validation, y_test = get_preprocessed_credit_g_dataset()

## Train the model using the full training set

In [3]:
X_train_valid = np.concatenate((X_train, X_validation))
y_train_valid = np.concatenate((y_train, y_validation))

In [5]:
model = GaussianNB().fit(X_train_valid, y_train_valid)

## Run on the test set

In [6]:
y_predicted = model.predict(X_test)

(tn, fp, fn, tp) = confusion_matrix(y_test, y_predicted).ravel()
precision_val = float(tp)/float(tp+fp)
recall_val = float(tp)/float(tp+fn)
specificity_val = float(tn)/float(tn+fp)
accuracy_val = float(tp+tn)/float(tn+fp+fn+tp)
f1_val = (2*tp)/(2*tp+fp+fn)
mcc_val = float((tp*tn)-(fp*fn))/np.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn))


print("\n")
print("Test set precision_val =", precision_val)
print("Test set recall_val =", recall_val)
print("Test set specificity =", specificity_val)
print("Test set mcc_val =", mcc_val)
print("Test set accuracy_val =", accuracy_val)
print("Test set f1_val =", f1_val)



Test set precision_val = 0.8548387096774194
Test set recall_val = 0.7066666666666667
Test set specificity = 0.64
Test set mcc_val = 0.30926094424537426
Test set accuracy_val = 0.69
Test set f1_val = 0.7737226277372263


In [7]:
confusion_matrix(y_test, y_predicted)

array([[16,  9],
       [22, 53]], dtype=int64)

In [8]:
y_test

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1])