In [None]:
import pickle
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os


In [None]:
ml_vars_dir = "/Users/jorismachon/Documents/thesis/ML_data/vars"

In [None]:
def load_variables(file_name):
    file_path = os.path.join(ml_vars_dir, file_name)
    with open(file_path, 'rb') as handle:
        loaded_variables = pickle.load(handle)    
    return loaded_variables

In [None]:
import os
import pickle

def load_all_variables(directory, prefix):
    loaded_variables = {}

    for file_name in os.listdir(directory):
        if file_name.startswith(prefix) and file_name.endswith('.pkl'):  # only load .pkl files with the specified prefix
            file_path = os.path.join(directory, file_name)
            with open(file_path, 'rb') as handle:
                # Drop the prefix and '.pkl'
                key = os.path.splitext(file_name[len(prefix):])[0]
                loaded_variables[key] = pickle.load(handle)

    return loaded_variables

In [None]:
def load_show_metrics(file_name):
    print("Showing metrics for file", file_name)
    # Load variables from the file
    loaded_variables = load_variables(file_name)
    # Return each variable separately
    tprs = loaded_variables['tprs']
    aucs = loaded_variables['aucs']
    N = loaded_variables['N']
    P = loaded_variables['P']
    importances_random = loaded_variables['importances_random']
    scores = loaded_variables['scores']
    TP = loaded_variables['TP']
    FP = loaded_variables['FP']
    TN = loaded_variables['TN']
    FN = loaded_variables['FN']
    tnList = loaded_variables['tnList']
    fpList = loaded_variables['fpList']
    fnList = loaded_variables['fnList']
    tpList = loaded_variables['tpList']
    precisionList = loaded_variables['precisionList']
    f1List = loaded_variables['f1List']
    mccList = loaded_variables['mccList']
    train_splits = loaded_variables['train_splits']
    test_splits = loaded_variables['test_splits']
    train_anomaly_percentage = loaded_variables['train_anomaly_percentage']
    test_anomaly_percentage = loaded_variables['test_anomaly_percentage']
    train_anomaly_absolute = loaded_variables['train_anomaly_absolute']
    test_anomaly_absolute = loaded_variables['test_anomaly_absolute']
    
    mean_auc = np.mean(aucs)
    std_auc = np.std(aucs)
    auc_meanpercent = 100 * mean_auc
    auc_stdpercent = 100 * std_auc
    
    """Show metrics"""
    
    # plt.clf()  # Clear the current figure
    
    print("TN: %.02f %% ± %.02f %% - FN: %.02f %% ± %.02f %%" % (np.mean(tnList),
                                                                    np.std(tnList),
                                                                    np.mean(fnList),
                                                                    np.std(fnList)))
    print("FP: %.02f %% ± %.02f %% - TP: %.02f %% ± %.02f %%" % (np.mean(fpList),
                                                                    np.std(fpList),
                                                                    np.mean(tpList),
                                                                    np.std(tpList)))

    print(
        "Precision: %.02f %% ± %.02f %% - F1: %.02f %% ± %.02f %% - MCC: %.02f %% ± %.02f %%" % (np.mean(precisionList),
                                                                                                    np.std(precisionList),
                                                                                                    np.mean(f1List),
                                                                                                    np.std(f1List),
                                                                                                    np.mean(mccList),
                                                                                                    np.std(mccList)))

    print("AUC: %.02f %% ± %.02f %%" % (auc_meanpercent, auc_stdpercent))
  

In [None]:
import matplotlib.pyplot as plt

def load_plot_metrics(file_name):
    print("Showing metrics for file", file_name)
    # Load variables from the file
    loaded_variables = load_variables(file_name)
    f1List = loaded_variables['f1List']
    tnList = loaded_variables['tnList']
    fpList = loaded_variables['fpList']
    fnList = loaded_variables['fnList']
    tpList = loaded_variables['tpList']

    # Create 2x2 grid of boxplots
    fig, axs = plt.subplots(2, 2, figsize=(10, 10))
    # Boxplots
    axs[0, 0].boxplot(tnList, vert=False)
    axs[0, 0].set_title('TN List')
    axs[0, 1].boxplot(fpList, vert=False)
    axs[0, 1].set_title('FP List')
    axs[1, 0].boxplot(fnList, vert=False)
    axs[1, 0].set_title('FN List')
    axs[1, 1].boxplot(tpList, vert=False)
    axs[1, 1].set_title('TP List')
    # Display the plot
    plt.tight_layout()
    plt.show()
    
    # Plot boxplot of scores
    plt.figure()
    plt.boxplot(f1List)
    plt.title('F1 Distribution')
    plt.show()

In [None]:
def plot_boxplots(data, feature_name, y_label):
    fig, ax = plt.subplots()

    # Loop over the data and create a boxplot for each set of values
    for i, (label, values) in enumerate(data.items()):
        dataList = values[feature_name]
        
        ax.boxplot(dataList, positions=[i], widths=0.6, vert=True, patch_artist=True, labels=[label])

    # Set the x-axis labels and adjust the plot
    ax.set_xticks(range(len(data)))
    ax.set_xticklabels(data.keys(), rotation = 90)
    y_label = y_label + " (%)"
    ax.set_ylabel(y_label)
    plt.tight_layout()

    # Display the plot
    plt.show()

In [None]:
plot_boxplots(load_all_variables(ml_vars_dir, 'abc_LOGO_with_nulls_'), 'f1List', 'F1 Score')

In [None]:
plot_boxplots(load_all_variables(ml_vars_dir, 'abc_LOGO_with_nulls_'), 'precisionList', 'Precision')


In [None]:
plot_boxplots(load_all_variables(ml_vars_dir, 'abc_k5_r1_with_nulls_'), 'precisionList', 'Precision')
