In [4]:
import os
import pandas as pd
import numpy as np
from pathlib import Path
from utils.retrieve_stats import *
import pprint

def compute_results_table(path):
    path = Path(path)
    folders = sorted(list(path.glob("*")))

    # Get statistics for each run
    results = [getstats(folder, rolling_window=20, endofepoch=False) for folder in folders]
    best_epochs = {folder.name: find_bestepoch(result, avoid_premature=10) for folder, result in zip(folders, results)}

    # Metrics to extract
    metrics = ["train-loss", "train-acc", "train-auc", "test-loss", "test-acc", "test-auc"]
    results_dict = {metric: [] for metric in metrics}

    # Process each run
    for run_name, best_epoch in best_epochs.items():
        epoch_dir = path / run_name / f"Epoch {best_epoch}"

        if not epoch_dir.exists():
            print(f"Warning: {epoch_dir} not found!")
            continue

        for metric in metrics:
            file_name = f"{metric.replace('-auc', '-preds')}.csv" if "auc" in metric else f"{metric}.csv"
            file_path = epoch_dir / file_name
            
            if file_path.exists():
                df = pd.read_csv(file_path, header=None if "auc" not in metric else 0)

                if "auc" in metric:
                    try:
                        auc_value = roc_auc_score(df["actual"], df["preds"])
                    except Exception as e:
                        print(f"Error computing AUC for {file_path}: {e}")
                        auc_value = np.nan
                    results_dict[metric].append(auc_value)
                else:
                    results_dict[metric].append(df.mean().values[0])
            else:
                print(f"Warning: {file_path} not found!")
                results_dict[metric].append(np.nan)

    summary = {
        "Metric": metrics,
        "Min": [np.nanmin(results_dict[m]) for m in metrics],
        "Max": [np.nanmax(results_dict[m]) for m in metrics],
        "Mean": [np.nanmean(results_dict[m]) for m in metrics]
    }

    df_summary = pd.DataFrame(summary)

    return df_summary


In [5]:
paths = [
    Path.cwd() / "results-new-alpha" / "symbolic" / "random",
    Path.cwd() / "results-new-alpha" / "symbolic" / "atchley",
    Path.cwd() / "results-new-alpha" / "symbolic" / "kidera",
    Path.cwd() / "results-new-alpha" / "symbolic" / "aaprop",
    Path.cwd() / "results-new-alpha" / "tcr-bert",
    Path.cwd() / "results-new-alpha" / "sceptr",
    
    # Path.cwd() / "results-new-beta" / "symbolic" / "random",
    # Path.cwd() / "results-new-beta" / "symbolic" / "atchley",
    # Path.cwd() / "results-new-beta" / "symbolic" / "kidera",
    # Path.cwd() / "results-new-beta" / "symbolic" / "aaprop",
    # Path.cwd() / "results-new-beta" / "tcr-bert",
    # Path.cwd() / "results-new-beta" / "sceptr",
]

for path in paths:
    print(f"Results for {path}")
    results = compute_results_table(path)
    from IPython.display import display
    display(results)


Results for /Users/janp/Desktop/fyp/tcr-cancer-prediction/results-new-alpha/symbolic/random


Unnamed: 0,Metric,Min,Max,Mean
0,train-loss,0.65648,0.725651,0.691361
1,train-acc,0.336449,0.700935,0.536449
2,train-auc,0.56332,0.698016,0.614162
3,test-loss,0.6424,0.724441,0.688028
4,test-acc,0.296296,0.703704,0.533333
5,test-auc,0.664474,0.790123,0.724201


Results for /Users/janp/Desktop/fyp/tcr-cancer-prediction/results-new-alpha/symbolic/atchley


Unnamed: 0,Metric,Min,Max,Mean
0,train-loss,0.685265,0.696702,0.692288
1,train-acc,0.317757,0.672897,0.465421
2,train-auc,0.334328,0.70892,0.567982
3,test-loss,0.686482,0.696692,0.692275
4,test-acc,0.185185,0.740741,0.459259
5,test-auc,0.517045,0.794118,0.684084


Results for /Users/janp/Desktop/fyp/tcr-cancer-prediction/results-new-alpha/symbolic/kidera


Unnamed: 0,Metric,Min,Max,Mean
0,train-loss,0.691013,0.69241,0.691885
1,train-acc,0.504673,0.663551,0.614953
2,train-auc,0.60717,0.765637,0.673856
3,test-loss,0.691142,0.694361,0.692244
4,test-acc,0.333333,0.777778,0.614815
5,test-auc,0.631579,0.835227,0.750504


Results for /Users/janp/Desktop/fyp/tcr-cancer-prediction/results-new-alpha/symbolic/aaprop


Unnamed: 0,Metric,Min,Max,Mean
0,train-loss,0.686925,0.693052,0.691031
1,train-acc,0.514019,0.691589,0.605607
2,train-auc,0.572973,0.737452,0.650765
3,test-loss,0.686351,0.693645,0.690708
4,test-acc,0.37037,0.814815,0.644444
5,test-auc,0.684211,0.842105,0.765977


Results for /Users/janp/Desktop/fyp/tcr-cancer-prediction/results-new-alpha/tcr-bert


Unnamed: 0,Metric,Min,Max,Mean
0,train-loss,0.039882,0.240556,0.140219
1,train-acc,0.934579,1.0,0.96729
2,train-auc,0.971986,1.0,0.985993
3,test-loss,0.090458,0.387853,0.239156
4,test-acc,0.777778,0.962963,0.87037
5,test-auc,0.93956,1.0,0.96978


Results for /Users/janp/Desktop/fyp/tcr-cancer-prediction/results-new-alpha/sceptr


Unnamed: 0,Metric,Min,Max,Mean
0,train-loss,0.25563,0.657681,0.576953
1,train-acc,0.691589,0.981308,0.813084
2,train-auc,0.804259,0.999603,0.876269
3,test-loss,0.258272,0.668632,0.595197
4,test-acc,0.62963,1.0,0.792593
5,test-auc,0.703947,1.0,0.829699
