## Model metrics

In [14]:
import pandas as pd
import numpy as np
from tabulate import tabulate
from metrics_utils import *

In [15]:
LOG_PATH = "lightning_logs"
METRICS_PATH = "../Metrics"

## Generate cleaned up csv files for each run

In [22]:
model_ids = ['g_lenet_cut_dataset_repeat',
             'lenet_cut_dataset_repeat',
             'resnet18_cut_dataset_repeat',
             'resnet50_cut_dataset_repeat',
             'jiaresnet50_cut_dataset_repeat',
             'G_ResNet18_cut_dataset_repeat']

def save_run(model_ids,max_runs):
    for model in model_ids:
        for run in range(max_runs):
            print(f"{model}, run {run}")
            try:
                save_metrics_from_logger(model,LOG_PATH,METRICS_PATH,version=run)
            except:
                print(f"Error with {model}, run {run}")

#save_run(model_ids,5) #No longer needed as newer framework does this automatically

## Create graphs for each run

In [17]:
def generate_graphs_run(model_ids,max_runs):
    for model in model_ids:
        for run in range(max_runs):
            #print(f"{model}, run {run}")
            try:
                plot_train_metrics(model,METRICS_PATH,version=run,show=False)
            except:
                print(f"Error with {model}, run {run}")
#generate_graphs_run(model_ids,5) 

## Get best loss epoch for each run

In [39]:
def get_train_results_runs(model_ids,max_runs):
    repeat_metrics = pd.DataFrame(columns=["Loss","Accuracy","ECE","C Viol"],index=model_ids)
    repeat_metrics.columns.name="Model"
    for model in model_ids:
        best_losses = []
        best_accs = []
        best_eces = []
        best_chiralities = []
        for run in range(max_runs):
            try:
                metrics = get_metrics_from_csv(model,METRICS_PATH,version=run)
                #best_loss_epoch = 59 
                best_loss_epoch = metrics['val_loss'].argmin()
                best_losses.append(metrics['val_loss'][best_loss_epoch])
                best_accs.append(metrics['val_acc'][best_loss_epoch])
                best_eces.append(metrics['val_calibration_error'][best_loss_epoch])
                best_chiralities.append((metrics['val_chirality_violation'][best_loss_epoch]))
            except:
                print(f"Error with {model}, run {run}")

        # if model == 'LeNet_cut_dataset_repeat':
        #     print(best_chiralities)
        repeat_metrics.loc[model] = {"Loss": f"{np.average(best_losses):.4f} ± {np.std(best_losses):.4f}",
                                        "Accuracy": f"{np.average(best_accs):.2%} ± {np.std(best_accs):.2%}",
                                        "ECE": f"{np.average(best_eces):.4f} ± {np.std(best_eces):.4f}",
                                        "C Viol": f"{np.average(best_chiralities):.4f} ± {np.std(best_chiralities):.4f}"}
    #print(tabulate(repeat_metrics,headers='keys',tablefmt='github'))
    return repeat_metrics

repeat_metrics = get_train_results_runs(model_ids,5)
repeat_metrics.head(6)

Model,Loss,Accuracy,ECE,C Viol
g_lenet_cut_dataset_repeat,0.7821 ± 0.0351,77.54% ± 2.58%,0.1587 ± 0.0089,0.4174 ± 0.2577
lenet_cut_dataset_repeat,0.8562 ± 0.0341,70.61% ± 3.42%,0.1421 ± 0.0146,nan ± nan
resnet18_cut_dataset_repeat,0.5290 ± 0.0003,98.16% ± 0.12%,0.2273 ± 0.0023,0.4963 ± 0.1710
resnet50_cut_dataset_repeat,0.5309 ± 0.0002,97.40% ± 0.13%,0.2195 ± 0.0031,0.7647 ± 0.3068
jiaresnet50_cut_dataset_repeat,0.5310 ± 0.0034,97.65% ± 0.82%,0.2232 ± 0.0066,0.2887 ± 0.0618
G_ResNet18_cut_dataset_repeat,0.5346 ± 0.0024,97.79% ± 0.25%,0.2235 ± 0.0041,0.5458 ± 0.1820


In [40]:
def get_test_results_runs(model_ids,max_runs):
    repeat_metrics = pd.DataFrame(columns=["Loss","Accuracy","ECE","C Viol"],index=model_ids)
    repeat_metrics.columns.name="Model"
    for model in model_ids:
        best_losses = []
        best_accs = []
        best_eces = []
        best_chiralities = []
        for run in range(max_runs):
            try:
                metrics = get_metrics_from_csv(model,METRICS_PATH,version=run,mode='test')
                best_losses.append(metrics['test_loss'])
                best_accs.append(metrics['test_acc'])
                best_eces.append(metrics['test_calibration_error'])
                best_chiralities.append((metrics['test_chirality_violation']))
            except:
                print(f"Error with {model}, run {run}")

        # if model == 'LeNet_cut_dataset_repeat':
        #     print(best_chiralities)
        nans_removed = np.count_nonzero(np.isnan(np.concatenate((best_losses, best_accs, best_eces, best_chiralities))))
        if nans_removed > 0:
            print(f"{model}: Removed {nans_removed} NaNs")
        best_losses = np.array(best_losses)[~np.isnan(best_losses)]
        best_accs = np.array(best_accs)[~np.isnan(best_accs)]
        best_eces = np.array(best_eces)[~np.isnan(best_eces)]
        best_chiralities = np.array(best_chiralities)[~np.isnan(best_chiralities)]
        
        
        repeat_metrics.loc[model] = {"Loss": f"{np.average(best_losses):.4f} ± {np.std(best_losses):.4f}",
                                        "Accuracy": f"{np.average(best_accs):.2%} ± {np.std(best_accs):.2%}",
                                        "ECE": f"{np.average(best_eces):.4f} ± {np.std(best_eces):.4f}",
                                        "C Viol": f"{np.average(best_chiralities):.4f} ± {np.std(best_chiralities):.4f}"}
    #print(tabulate(repeat_metrics,headers='keys',tablefmt='github'))
    return repeat_metrics

# model_ids = ['resnet18_cut_dataset_repeat', #Replace with repeat once all repeats have been tested
#              'resnet50_cut_dataset',
#              'jiaresnet50_cut_dataset',
#              'G_ResNet18_cut_dataset']
repeat_metrics = get_test_results_runs(model_ids,5)
repeat_metrics.head(6)

g_lenet_cut_dataset_repeat: Removed 4 NaNs
lenet_cut_dataset_repeat: Removed 5 NaNs
resnet18_cut_dataset_repeat: Removed 1 NaNs
resnet50_cut_dataset_repeat: Removed 1 NaNs


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Model,Loss,Accuracy,ECE,C Viol
g_lenet_cut_dataset_repeat,0.5098 ± 0.0087,89.53% ± 0.28%,0.1545 ± 0.0085,0.1405 ± 0.0000
lenet_cut_dataset_repeat,0.5192 ± 0.0095,89.60% ± 0.60%,0.1797 ± 0.0121,nan ± nan
resnet18_cut_dataset_repeat,0.4514 ± 0.0075,92.18% ± 0.29%,0.1109 ± 0.0040,0.9351 ± 0.3880
resnet50_cut_dataset_repeat,0.4579 ± 0.0077,92.32% ± 0.37%,0.1201 ± 0.0060,1.3171 ± 0.4113
jiaresnet50_cut_dataset_repeat,0.4075 ± 0.0023,94.27% ± 0.06%,0.1171 ± 0.0063,0.0117 ± 0.0550
G_ResNet18_cut_dataset_repeat,0.4723 ± 0.0548,91.09% ± 1.64%,0.1063 ± 0.0089,0.3742 ± 0.5673
