In [7]:
import torch
import torch.nn as nn

import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets
from tqdm.notebook import tqdm

from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
import pandas as pd
import seaborn as sn

from IPython import embed

import os
import pickle

In [8]:
def class_matrix(cf, x):
    
    column_sums = np.sum(cf, axis=0)
    row_sums = np.sum(cf, axis=1)
    total_vals = np.sum(row_sums)

    TP = cf[x,x]
    FP = np.sum(cf[x]) - TP
    FN = column_sums[x] - TP
    TN = total_vals - row_sums[x] - column_sums[x] + TP
    
    return np.asarray([[TP, FP], [FN, TN]])

#harmonic mean of precision and sensitivity
def f1_score(matrix):
    TP = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    TN = matrix[1][1]
    
    return (2 * TP) / (2 * TP + FP + FN)

#
def accuracy_score(matrix):
    TP = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    TN = matrix[1][1]
    
    return (TP + TN) / (TP + TN + FP + FN)

# the ability of the classifier to find all the positive samples
def recall_score(matrix):
    TP = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    TN = matrix[1][1]
    
    return TP / (TP + FN)

# the ability of the classifier to not label as positive a sample that is negative
def precision_score(matrix):
    TP = matrix[0][0]
    FP = matrix[0][1]
    FN = matrix[1][0]
    TN = matrix[1][1]
    
    return TP / (TP + FP)

def plot_results(cf_matrix, num_classes):
    df_cm = pd.DataFrame(cf_matrix / np.sum(cf_matrix) * 10, index = [i for i in range(num_classes)], columns = [i for i in range(num_classes)])
    plt.figure(figsize=(12,7))
    sn.heatmap(df_cm, annot = True, cmap ="plasma")
    plt.show()
    print(cf_matrix)

In [9]:
root = "/Users/andyvarner/Documents/CI_Fall_2022/CI_Projects/NN Project/results/Experiment_test"

all_tasks = os.listdir(root)
all_tasks.sort()

task_results = []

for current_task in all_tasks:
    
    current_task = os.path.join(root, current_task)
    
    all_folds = os.listdir(current_task)
    all_folds.sort()
    
    fold_results = []
    for current_fold in all_folds:
        
        current_fold = os.path.join(current_task, current_fold)
        
        data = pickle.load(open(current_fold, "rb"))
        fold_results.append(data)
         
    task_results.append(fold_results)

# example = task_results[0][1] # Second fold from the first task 

In [10]:
task_0_fold_0 = task_results[0][0]

len(task_0_fold_0)

test_results = task_0_fold_0["test"]
train_results = task_0_fold_0["train"]

print(test_results)

[[205   0]
 [  0 195]]


In [18]:
for task_result in task_results:

    folds = task_result
    
    losses = []
    f1s = []
    accs = []
    recalls = []
    precisions = []
    
    for fold in folds: # there should be a set of confusion matrices in each fold
        
        test_results = fold["test"] # confusion matrix
        train_results = fold["train"] # loss
        
        matrix = test_results
        f1 = f1_score(matrix)
        acc = accuracy_score(matrix)
        recall = recall_score(matrix)
        precision = precision_score(matrix)
        
        f1s.append(f1)
        accs.append(acc)
        recalls.append(recall)
        precisions.append(precision)
        
        losses.append(train_results)
        break
    losses = np.asarray(losses)
    f1s = np.asarray(f1s)
    accs = np.asarray(accs)
    recalls = np.asarray(recalls)
    precisions = np.asarray(precisions)
    loss_avg = np.sum(losses) / losses.size
    f1_avg = f1s / len(f1s)
    acc_avg = accs / len(accs)
    recall_avg = recalls / len(recalls)
    precision_avg = precisions / len(precisions)

        