In [1]:
import joblib
from runners import Result
import os
from collections import defaultdict
import pandas as pd
from sklearn.metrics import accuracy_score, roc_curve, f1_score, auc, roc_auc_score, matthews_corrcoef, precision_score, recall_score
from functools import reduce
import numpy as np

In [37]:
class Metrics:
    """
    Metrics for a given algo/dataset combo
    """
    @staticmethod
    def average(metric_name, results):
        return np.average([r.cv_results_[f'mean_test_{metric_name}'][r.best_index_] for r in results])
    
    @staticmethod
    def average_auc(results):
        return np.average([r.test_AUC for r in results])

    def __init__(self, results: list[Result]):
        self.average_accuracy = Metrics.average('Accuracy', results)
        self.average_f1_score = Metrics.average('F1', results)
        self.average_auc = Metrics.average('AUC', results)
        self.average_mcc = Metrics.average('MCC', results)
        self.average_precision = Metrics.average('Precision', results)
        self.average_recall = Metrics.average('Recall', results)

    def average_all_metrics(self):
        return np.average([self.average_accuracy, self.average_f1_score, self.average_auc, self.average_mcc, self.average_precision, self.average_recall])


In [38]:
# Re-import classifiers and their respective datasets 
runs = os.listdir('classifier_cache')
# Get most recent run
runs.sort(reverse=True, key=lambda k: int(k))
chosen_run = runs[0]
run_dir = f'classifier_cache/{chosen_run}'

results_dict = defaultdict(lambda: defaultdict(list))

for algo_name in os.listdir(run_dir):
    algo_dir = f'{run_dir}/{algo_name}'
    for dataset_name in os.listdir(algo_dir):
        dataset_dir = f'{algo_dir}/{dataset_name}'
        for run_name in os.listdir(dataset_dir):
            run_file_name = f'{dataset_dir}/{run_name}'
            # Read in the file
            result = joblib.load(run_file_name)
            results_dict[algo_name][dataset_name].append(result)




In [39]:
metric_dict = {}
for algo in results_dict.keys():
    for dataset in results_dict[algo].keys():
        results = results_dict[algo][dataset]
        metric_dict[f'{algo} \\ {dataset}'] = vars(Metrics(results))

metric_df = pd.DataFrame.from_dict(metric_dict, orient='index')
metric_df.to_latex('table_1.txt')

In [40]:
table_3_dict = defaultdict(dict)
metrics = ["average_accuracy","average_f1_score","average_auc","average_mcc","average_precision","average_recall"]
ds_count = 0
for algo in results_dict.keys():
    for dataset in results_dict[algo].keys():
        ds_count += 1
        table_3_dict[algo][dataset] = Metrics(results_dict[algo][dataset]).average_all_metrics()

pd.DataFrame.from_dict(table_3_dict, orient='index').to_latex('Table3.txt', column_format='p{0.2\linewidth}' + 'p{0.1\linewidth}' * ds_count)