In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from deep_learning_utils import *

In [None]:
DATA_PATH = '../../data/creditcard.csv'
K_FOLDS = 10

credit_card_df = pd.read_csv(DATA_PATH)
credit_card_df = credit_card_df.sample(frac = 1).reset_index(drop = True) #shuffle the data by row

# Split into dataset and labels
dataset = credit_card_df.drop(columns=['Class']).to_numpy()
labels = credit_card_df['Class'].to_numpy()

# Normalize dataset via StandardScaler
dataset = StandardScaler().fit_transform(dataset)

histories = []
results = []
for train_index, test_index in KFold(n_splits=K_FOLDS).split(dataset):
    x_train, y_train, x_test, y_test = get_splitted_data(dataset, labels, train_index, test_index)
    model = get_compiled_model()
    history = fit_model(model, x_train, y_train)
    test_result = get_test_results(model, x_test, y_test)
    histories.append(history)
    results.append(test_result)

In [None]:
#def display_metrics():
specificities = []
precisions = []
recalls = []
balanced_accuracies = []
loss = []

test_specificities = []
test_precisions = []
test_recalls = []
test_balanced_accuracies = []
test_loss = []


for i, history in enumerate(histories):
    history = history.history
    
    loss.append(np.array(history['loss']))
    test_loss.append(np.array(history['val_loss']))
    
    specificities.append(np.array(history['specificity']))
    test_specificities.append(np.array(history['val_specificity']))
    
    precisions.append(np.array(history['precision']))
    test_precisions.append(np.array(history['val_precision']))
    
    recalls.append(np.array(history['recall']))
    test_recalls.append(np.array(history['val_recall']))
    
    balanced_accuracies.append((precisions[i] + recalls[i]) / 2)
    test_balanced_accuracies.append((test_precisions[i] + test_recalls[i]) / 2)
    
    
specificities = np.mean(np.array(specificities), axis=0)
precisions = np.mean(np.array(precisions), axis=0)
recalls = np.mean(np.array(recalls), axis=0)
balanced_accuracies = np.mean(np.array(balanced_accuracies), axis=0)
loss = np.mean(np.array(loss), axis=0)

test_specificities = np.mean(np.array(test_specificities), axis=0)
test_precisions = np.mean(np.array(test_precisions), axis=0)
test_recalls = np.mean(np.array(test_recalls), axis=0)
test_balanced_accuracies = np.mean(np.array(test_balanced_accuracies), axis=0)
test_loss = np.mean(np.array(test_loss), axis=0)

In [None]:
epochs = np.arange(1, len(recalls) + 1)

# Plotting Average Training Metrics
plt.plot(epochs, balanced_accuracies, label='Balanced Accuracy')
plt.plot(epochs, specificities, label='Specificity')
plt.plot(epochs, precisions, label='Precision')
plt.plot(epochs, recalls, label='Recall')
plt.title("Average Training Metrics Per Epoch")
plt.xlabel('Epochs')
plt.ylabel('Metric Evaluation')
plt.legend()
plt.show()

# Plotting Average Validation Metrics
plt.plot(epochs, test_balanced_accuracies, label='Balanced Accuracy')
plt.plot(epochs, test_specificities, label='Specificity')
plt.plot(epochs, test_precisions, label='Precision')
plt.plot(epochs, test_recalls, label='Recall')
plt.title("Average Validation Metrics Per Epoch")
plt.xlabel('Epochs')
plt.ylabel('Metric Evaluation')
plt.legend()
plt.show()

# Plotting Loss
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, test_loss, label='Validation Loss')
plt.title("Average Training and Validation Loss")
plt.xlabel('Epochs')
plt.ylabel('Loss Value')
plt.legend()
plt.show()

In [None]:
avg_results = []
for result in results:
    avg_results.append(np.array(result[1:5] + [(result[4] + result[5]) / 2] + result[5:]))
avg_results = np.mean(np.array(avg_results), axis=0)

print(f'Specificity: {avg_results[1]}')
print(f'Precision: {avg_results[2]}')
print(f'Recall: {avg_results[3]}')
print(f'Balanced Accuracy: {avg_results[4]}')