# Listing Evaluation Metrics for Each Trained Model

The aim of this file is to evaluate each of the trained files and to create a dictionary where the name of the file/folder refers to its evaluated metrics.

In [1]:
#Basics
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd

#Evaluation
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, recall_score, f1_score, precision_score

#PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import SubsetRandomSampler

#Torchvision
import torchvision
from torchvision import transforms, datasets

#Data
from FRDEEP import FRDEEPF
from MiraBest import MiraBest_full

#My Functions / Models
from models_new import *

#Other
import pickle
from tqdm import tqdm
import PIL
from torchsummary import summary
from models.networks_other import init_weights

## To do:
(Evaluate data distributions (see bottom) to see if other uncert vs certain makes the distribution more like FRDEEP-F - This is now in datadistribution eval)

1. Create list of all models / folders. - for loops (os)
1. Extract model and formating option (ie. ft/deep_sup etc.) - IF loops
1. Prepare dataset according to requirements
1. Create dict for all models and their scores.
1. Create comprable ROC etc. for MiraBest.
1. Call function to output sorted list of best models.
1. Save scores somehow - csv? npy?

In [2]:
# Define transformations required for analysis (ie. func evaluate(f))
out_transform = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize([0.5],[0.5])
                                    ])
test_transform = transforms.Compose([transforms.RandomRotation([0,360],resample=PIL.Image.BILINEAR),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5],[0.5])
                                     ])

In [3]:
# We extract two lists containing all folders and files respectively in TrainedNetworks
dirs = os.listdir('TrainedNetworks/')
folders = []
files = []
for k in dirs:
    if k[-3:]=='.pt':
        files.append(k)
    elif k[-3:]!='.pt' and k[0]!='.':
        folders.append(k)
    else:
        pass
print(f'Folders: {len(folders)}\nFiles: {len(files)}')

Folders: 46
Files: 40


In [4]:
# Selecting dataset to be loaded and setting class_group(ings).
def determine_dataset(dataset,model):
    if dataset == 'automatic':
        if 'MiraBest' in model:
            dataset = 'MiraBest'
        else:
            dataset = 'FRDEEP-F'
    if dataset == 'MiraBest':
        if 'NOHYBRID' in model:
            class_groups = [[0,1,2,3,4],[5,6,7]]
            data_name=dataset+'NOHYBRID'
        elif 'NOUNC' in model:
            class_groups = [[0,1,2],[5,6]]
            data_name=dataset+'NOUNC'
        else:
            class_groups = [[0,1,2,3,4],[5,6,7,8,9]]
            data_name=dataset+'HYBRID'
    elif dataset == 'FRDEEP-F':
        class_groups = [[0],[1]]
        data_name=dataset
    elif 'NOHYBRID' in dataset:
        dataset = 'MiraBest'
        data_name=dataset+'NOHYBRID'
        class_groups = [[0,1,2,3,4],[5,6,7]]
    elif 'NOUNC' in dataset:
        dataset= 'MiraBest'
        data_name=dataset+'NOUNC'
        class_groups = [[0,1,2],[5,6]]
    elif 'HYBRID' in MiraBest:
        dataset= 'MiraBest'
        data_name=dataset+'HYBRID'
        class_groups = [[0,1,2,3,4],[5,6,7,8,9]]
    
    return dataset, data_name, class_groups

In [None]:
def evaluate(f,dataset='automatic'):
    """
    Goal is to evaluate the model in TrainedNetworks/f directory/file.
    Args:
        f             folder or file to be evaluated.
        dataset       dataset to be tested on. Valid selections include: ['Automatic','FRDEEP-F','MiraBest']
    Returns:
        
    """
    PATH = 'TrainedNetworks/'+f
    exemptions = ['playground-0128-0.1_500Epochs.pt',
                  '0303-MiraBest-playgroundv1']#,'0224-log-playgroundv1']
    
    

    assert os.path.isdir(PATH) or os.path.isfile(PATH), f"Entered file path does not lead to valid path: {PATH}"
    assert dataset in ['FRDEEP-F','MiraBest','MiraBestNOHYBRID','MiraBestNOUNC','automatic'], f"Dataset {dataset} is not applicable." 
    
    # Pass files which are outdated or tests:
    if ('CIFAR' in f) or (f in exemptions):
        print(f'NOT ABLE TO EVALUATE: {f}')
        return [np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,np.nan]
    
    # Extract model path
    # Extract details from file path (model)
    if os.path.isfile(PATH):
        model = PATH
        variables = model.replace('/','-').split('-')
        date = variables[2][:4]
        
        lr = float(variables[-1].split('_')[0])
        if variables[-2][-1]=='e':
            lr = 1*10**-lr
        
        epoch = int(variables[-1].split('_')[-1][:-9])
    elif os.path.isdir(PATH):
        folder = PATH
        local_files = os.listdir(folder)
        # Select model with lowest loss
        b=0
        for i in local_files:
            a=i.split('.')[0]
            if a.isnumeric() and int(a)>b:
                b=int(a)
        model = folder+f'/{b}.pt'
        variables = model.replace('/','-').split('-')
        date = variables[1]
        epoch = int(variables[-1][:-3])
        lr = np.nan
        if variables[-2].isnumeric():
            lr = float(variables[-2])
            #print('Found learning rate!')
            if variables[-3][-1]=='e':
                lr = 1*10**-lr
    
    
    # Find network name
    available_networks = ['playground',
                          'playgroundv1',
                          'playgroundv2_concat',
                          'playgroundv2_mean',
                          'playgroundv2_deep_sup',
                          'playgroundv2_ft',
                          'transfer_original',
                          'AGSononet',
                          'AGTransfer']
    for idx,i in enumerate(available_networks):
        if i in model:
            net_name = i
    
    
    # Select network, put on device and load in model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if net_name=='playground': net=playground()
    if net_name=='playgroundv1': net=playgroundv1b()
    if net_name=='playgroundv2_concat': net=playgroundv2(aggregation_mode='concat')
    if net_name=='playgroundv2_mean': net = playgroundv2(aggregation_mode='mean')
    if net_name=='playgroundv2_deep_sup': net = playgroundv2(aggregation_mode='deep_sup')
    if net_name=='playgroundv2_ft': net = playgroundv2(aggregation_mode='ft')
    if net_name=='transfer_original': net = transfer_original()
    if net_name=='AGSononet': net = AGSononet()
    if net_name=='AGTransfer': net = AGTransfer()
    net.to(device)
    net.load_state_dict(torch.load(model,map_location=torch.device(device)))
    net.eval()
    
    # Select class groups and dataset based off of 'dataset':
    dataset, temp1, class_groups = determine_dataset(dataset,model)
    # Extract data_name of data which 'model' was trained on:
    temp2, data_name, temp3 = determine_dataset('automatic',model)
    
    
    # Import and Batch Data
    if dataset == 'MiraBest':
        testdata = MiraBest_full(root='./FIRST_data', train=False, download=True, transform=test_transform)
        outdata = MiraBest_full(root='./FIRST_data', train=False, download=True, transform=out_transform)
    else:
        testdata = FRDEEPF(root='./FIRST_data', train=False, download=True, transform=test_transform)
        outdata = FRDEEPF(root='./FIRST_data', train=False, download=True, transform=out_transform)
    batch_size = 16
    valid_size = 0.2
    allowed_labels = range(10)[:-2]
    outset = torch.utils.data.DataLoader(outdata, batch_size=batch_size)
    testset = torch.utils.data.DataLoader(testdata, batch_size=batch_size,shuffle=True)
    
    # Evaluate Testing Data
    labels=[]
    predicted=[]; predicted_prob=[]
    temp=[]
    for counter in range(360):
        with torch.no_grad():
            for data, label in testset:
                data = data.to(device)
                output = net.forward(data)
                for i in range(output.cpu().numpy().shape[0]):
                    if label.cpu().numpy()[i] in class_groups[0]:
                        predicted.append(np.argmax(output.cpu().numpy()[i]))
                        temp.append(output.cpu().numpy()[i][1]-output.cpu().numpy()[i][0])
                        labels.append(0)
                    elif label.cpu().numpy()[i] in class_groups[1]:
                        predicted.append(np.argmax(output.cpu().numpy()[i]))
                        temp.append(output.cpu().numpy()[i][1]-output.cpu().numpy()[i][0])
                        labels.append(1)
                    else: # Do not consider labels outside of class_groups
                        pass
    predicted_prob = np.asarray(temp)/max(1,np.max(np.positive(np.asarray(temp))))/2+0.5    
    #print(len(predicted_prob),predicted_prob[:5])
    #print(len(labels),labels[:5])

    
    # Evaluation Metrics
    
    ### Make sure sizes are the same of predicted and labels <--- F1 score not possible.
    auc = roc_auc_score(labels,predicted_prob) # Make into probabilities, not predicted class labels!!! ???
    confusion = confusion_matrix(labels,predicted) # Make into probabilities, not predicted class labels!!! ???
    fpr, tpr, thresholds = roc_curve(labels,predicted_prob)
    #accuracies = np.array([confusion[0,0]/np.sum(confusion[:,0]),confusion[1,1]/np.sum(confusion[:,1])])
    recall = recall_score(labels,predicted,average=None)
    precision = precision_score(labels,predicted,average=None)
    f1 = f1_score(labels,predicted,average=None)
    
    #print(f"auc:{auc}\nconfusion:{confusion}\naccuracies:{accuracies}\nrecall:{recall}\nprecision:{precision}\nf1:{f1}")
    # Return list to be added to dictionary of results
    out = [data_name,net_name,date,lr,epoch,auc]+[confusion[0,0],confusion[1,0],confusion[0,1],confusion[1,1]]+[recall[0],recall[1]]+[precision[0],precision[1]]+[f1[0],f1[1]]+[fpr,tpr,thresholds]
    return out

In [6]:
# Evaluate all of the missing models:
def eval_models(model_paths,dataset='automatic'):
    # Either load existing dictionary or initiate an empty dictionary:
    if os.path.isfile(f'evaluated_models_{dataset}.pickle'):
        pickle_in = open(f'evaluated_models_{dataset}.pickle','rb')
        evaluated_models = pickle.load(pickle_in)
        
        pickle_in.close()
        print(f'{dataset} Successfully loaded in dict of evaluated models.')
        #return evaluated_models
    else:
        evaluated_models = {}
        #evaluated_models['format'] = form
        print(f'{dataset} Initialised dict for model evaluation.')

    for idx, i in enumerate(model_paths):
        print(f'{idx}\t{i}')
        if evaluated_models.get(i,False) == False:
            print(f'\tEvaluating ...')
            evaluated_models[i]=evaluate(i,dataset=dataset)
        else:
            pass
    pickle_out = open(f'evaluated_models_{dataset}.pickle','wb')
    pickle.dump(evaluated_models,pickle_out)
    pickle_out.close()
    return evaluated_models

In [7]:
data_options = ['FRDEEP-F','MiraBest','MiraBestNOHYBRID','MiraBestNOUNC','automatic']
for dataset in data_options:
    print(f'--> EVALUATING ON: {dataset}')
    temp_dict = eval_models(files+folders,dataset)

"""
dataset_selection = 'automatic'
pickle_in = open(f'evaluated_models_{dataset_selection}.pickle','rb')
evaluated_models = pickle.load(pickle_in)
pickle_in.close()
#"""

--> EVALUATING ON: FRDEEP-F
FRDEEP-F Successfully loaded in dict of evaluated models.
0	playgroundv1-0214-0.01_360Epochs.pt
1	playgroundv1-0217-1e-07_124Epochs.pt
2	playground-0213-0.1_180Epochs.pt
3	playground-0211-0.1_360Epochs.pt
4	playground-0212-0.1_360Epochs.pt
5	playground-0207-0.1_360Epochs.pt
6	playgroundv1-0215-0.0001_124Epochs.pt
7	playground-0128-0.1_500Epochs.pt
8	playgroundv1-0217-0.001_124Epochs.pt
9	playground-0213-0.1_64Epochs.pt
10	playground-0129-0.1_500Epochs.pt
11	playgroundv1-0214-0.1_360Epochs.pt
12	playgroundv1-0214-0.001_361Epochs.pt
13	playground-0129-0.1_360Epochs.pt
14	playground-TESTING-0.001_124Epochs.pt
15	playgroundv1-0215-0.001_360Epochs.pt
16	AGSononet-0219-0.001_360Epochs.pt
17	playgroundv1-0213-0.1_5Epochs.pt
18	playgroundv1-0217Adam-0.0001_124Epochs.pt
19	playgroundv1-0217Adam-1e-05_360Epochs.pt
20	playgroundv1-0215-0.0001_360Epochs.pt
21	playgroundv1-0214-0.001_360Epochs.pt
22	playgroundv1-0213-0.1_360Epochs.pt
23	playgroundv1-0217Adam-1e-05_124Epo

  init.kaiming_normal(m.weight.data, a=0, mode='fan_in')
  nn.init.constant(self.psi.bias.data, 10.0) # Initialises the tensor self.psi.bias.data with values of 10.0 (Because bias=True in initialisation)
  init.normal(m.weight.data, 1.0, 0.02)
  init.constant(m.bias.data, 0.0)


Files already downloaded and verified
Files already downloaded and verified


  "See the documentation of nn.Upsample for details.".format(mode))


43	0402-MiraBestNOUNC-playgroundv2_deep_sup-0.001
44	1427-FRDEEPNOUNCAdam-transfer_original-0.0001
	Evaluating ...
Files already downloaded and verified
Files already downloaded and verified
45	0310-MiraBestSGD-playgroundv2_ft
46	0408-MiraBestNOHYBRIDSGD-playgroundv2_ft-0.0001
47	0228-playgroundv2_mean
48	0311-MiraBestSGDVariedLR-playgroundv2_deep_sup
49	0420-MiraBestNOHYBRID-playgroundv2_mean-0.0001
50	0422-MiraBestNOHYBRIDAdagrad-playgroundv2_mean-0.001
51	0331-MiraBestNOUNC-playgroundv2_deep_sup-0.0001
52	0228-playgroundv2_concat
53	0423-MiraBestNOHYBRIDSGD-playgroundv2_deep_sup-0.0001
54	0305-MiraBest-playgroundv2_mean
55	0427-MiraBestNOHYBRIDAdam-playgroundv2_mean-0.0001
	Evaluating ...
Files already downloaded and verified
Files already downloaded and verified
56	0311-CIFAR10-pg2_ft_CIFAR10
57	0423-MiraBestNOHYBRIDSGD-playgroundv2_mean-0.0001
58	0422-MiraBestNOHYBRIDAdagrad-playgroundv2_concat-0.001
59	0307-MiraBest-playgroundv2_ft
60	0331-MiraBestNOUNC-playgroundv2_deep_sup
61	0

"\ndataset_selection = 'automatic'\npickle_in = open(f'evaluated_models_{dataset_selection}.pickle','rb')\nevaluated_models = pickle.load(pickle_in)\npickle_in.close()\n#"

In [8]:
if 'df' in locals():
    del df
# The data in the dictionaries is in the format (form):
form = ['data_train','net_name','date','lr','epoch','auc','friP','friN','friiN','friiP','frirecall','friirecall','friprecision','friiprecision','frif1','friif1','fpr','tpr','thresholds']
### Add models trained on each dataset
for data in data_options[:-1]:
    # Load in dict:
    pickle_in = open(f'evaluated_models_{data}.pickle','rb')
    evaluated_models = pickle.load(pickle_in)
    pickle_in.close()

    # Load dict into dataframe
    df_temp = pd.DataFrame.from_dict(evaluated_models,orient='index',columns=form)
    # Make keys of dict into their own column
    df_temp['model'] = df_temp.index
    # Add data_evaluation column
    if data == 'MiraBest':
        df_temp = df_temp.assign(data_eval = f'{data}HYBRID')
    else:
        df_temp = df_temp.assign(data_eval = f'{data}')
    
    # Assign df or stack onto df
    if 'df' not in locals(): #Initialise
        df = df_temp
    elif 'df' in locals(): #Stack
        df = pd.concat([df,df_temp],axis=0)
    else:
        print('Something went wrong!')
    
# Reset index to numeric digits
df = df.reset_index(drop=True)
df[df.date!='TEST'].sort_values(by='date',ascending=False).head()

Unnamed: 0,data_train,net_name,date,lr,epoch,auc,friP,friN,friiN,friiP,...,friirecall,friprecision,friiprecision,frif1,friif1,fpr,tpr,thresholds,model,data_eval
254,FRDEEP-F,transfer_original,3427,,6.0,0.749383,18414.0,9718.0,8226.0,18722.0,...,0.658298,0.654557,0.694745,0.672387,0.676031,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.75375375...","[0.0, 3.516174402250352e-05, 0.033192686357243...","[2.0, 1.0, 0.72943354, 0.72936654, 0.71371394,...",3427-FRDEEPNOUNCSGD-transfer_original-0.0001,MiraBestNOHYBRID
340,FRDEEP-F,transfer_original,3427,,6.0,0.831932,13074.0,5032.0,4566.0,14768.0,...,0.745859,0.722081,0.763836,0.731494,0.75474,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 5.0505050505050505e-05, 0.00636363636363...","[2.0, 1.0, 0.9533529, 0.9531288, 0.9369577, 0....",3427-FRDEEPNOUNCSGD-transfer_original-0.0001,MiraBestNOUNC
82,FRDEEP-F,transfer_original,3427,,6.0,0.98679,7201.0,79.0,719.0,10001.0,...,0.992163,0.989148,0.932929,0.9475,0.961635,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 9.92063492063492e-05, 0.0081349206349206...","[2.0, 1.0, 0.9358625, 0.9354595, 0.9065662, 0....",3427-FRDEEPNOUNCSGD-transfer_original-0.0001,FRDEEP-F
168,FRDEEP-F,transfer_original,3427,,6.0,0.831819,13114.0,5063.0,4526.0,14737.0,...,0.744293,0.721461,0.765042,0.732278,0.754525,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 5.0505050505050505e-05, 0.00782828282828...","[2.0, 1.0, 0.9482585, 0.9478603, 0.9452208, 0....",3427-FRDEEPNOUNCSGD-transfer_original-0.0001,MiraBestHYBRID
343,FRDEEP-F,transfer_original,1427,,10.0,0.918092,16184.0,5258.0,1456.0,14542.0,...,0.734444,0.75478,0.908989,0.828207,0.812448,"[0.0, 5.668934240362812e-05, 0.003458049886621...","[0.0, 0.0, 0.0, 0.00020202020202020202, 0.0002...","[2.0, 1.0, 0.73541844, 0.7306974, 0.72464633, ...",1427-FRDEEPNOUNCSGD-transfer_original-0.0001,MiraBestNOUNC


In [9]:
# Add accuracy column
df['accuracy']=(df.friP+df.friiP)/(df.friP+df.friiP+df.friiN+df.friN)
#accuracy = (df.friP.to_numpy()+df.friiP.to_numpy())/(df.friP.to_numpy()+df.friiP.to_numpy()+df.friiN.to_numpy()+df.friN.to_numpy()) #NUMPY ACCURACY

In [32]:
df = df.sort_values(by=['accuracy','auc'],ascending=False)
df[df.date!='TEST'][['auc','accuracy','data_train','data_eval','net_name','model','date']].head(20)

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
82,0.98679,0.955667,FRDEEP-F,FRDEEP-F,transfer_original,3427-FRDEEPNOUNCSGD-transfer_original-0.0001,3427
85,0.978276,0.920333,FRDEEP-F,FRDEEP-F,transfer_original,1427-FRDEEPNOUNCSGD-transfer_original-0.0001,1427
301,0.956907,0.905235,MiraBestHYBRID,MiraBestNOUNC,playgroundv2_ft,0310-MiraBestSGD-playgroundv2_ft,310
4,0.948812,0.904278,FRDEEP-F,FRDEEP-F,playground,playground-0212-0.1_360Epochs.pt,212
339,0.958958,0.900588,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_mean,0427-MiraBestNOHYBRIDAdam-playgroundv2_mean-0....,427
342,0.963946,0.897463,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427
34,0.959157,0.896944,FRDEEP-F,FRDEEP-F,playground,playground-0211-0.1_180Epochs.pt,211
13,0.952663,0.890667,FRDEEP-F,FRDEEP-F,playground,playground-0129-0.1_360Epochs.pt,129
337,0.946634,0.887286,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_ft,0427-MiraBestNOHYBRIDAdam-playgroundv2_ft-0.0001,427
80,0.976564,0.887111,FRDEEP-F,FRDEEP-F,transfer_original,1427-FRDEEPNOUNCAdam-transfer_original-0.0001,1427


In [11]:
# Sort df by AUC and then by acc
df = df.sort_values(by=['auc','accuracy'],ascending=False)

In [12]:
# Find all 'FRDEEP-F' models:
df[['auc','accuracy','data_train','data_eval','net_name','model','date']][df['data_train']=='FRDEEP-F'].head()

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
82,0.98679,0.955667,FRDEEP-F,FRDEEP-F,transfer_original,3427-FRDEEPNOUNCSGD-transfer_original-0.0001,3427
85,0.978276,0.920333,FRDEEP-F,FRDEEP-F,transfer_original,1427-FRDEEPNOUNCSGD-transfer_original-0.0001,1427
80,0.976564,0.887111,FRDEEP-F,FRDEEP-F,transfer_original,1427-FRDEEPNOUNCAdam-transfer_original-0.0001,1427
34,0.959157,0.896944,FRDEEP-F,FRDEEP-F,playground,playground-0211-0.1_180Epochs.pt,211
13,0.952663,0.890667,FRDEEP-F,FRDEEP-F,playground,playground-0129-0.1_360Epochs.pt,129


In [13]:
# Find all non 'FRDEEP-F' models:
df[['auc','accuracy','data_train','data_eval','net_name','model','date','epoch']][df['data_train']!='FRDEEP-F'].head(20)

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date,epoch
342,0.963946,0.897463,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427,27.0
339,0.958958,0.900588,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_mean,0427-MiraBestNOHYBRIDAdam-playgroundv2_mean-0....,427,26.0
301,0.956907,0.905235,MiraBestHYBRID,MiraBestNOUNC,playgroundv2_ft,0310-MiraBestSGD-playgroundv2_ft,310,306.0
312,0.948409,0.878632,MiraBestHYBRID,MiraBestNOUNC,playgroundv2_ft,0309-MiraBest-playgroundv2_ft,309,1247.0
337,0.946634,0.887286,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_ft,0427-MiraBestNOHYBRIDAdam-playgroundv2_ft-0.0001,427,29.0
324,0.938332,0.881677,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_concat,0417-MiraBestNOHYBRID-playgroundv2_concat-0.0001,417,630.0
328,0.926317,0.865705,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_mean,0423-MiraBestNOHYBRIDSGD-playgroundv2_mean-0.0001,423,358.0
322,0.918706,0.857933,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_deep_sup,0331-MiraBestNOHYBRID-playgroundv2_deep_sup,331,977.0
215,0.912941,0.835784,MiraBestHYBRID,MiraBestNOHYBRID,playgroundv2_ft,0310-MiraBestSGD-playgroundv2_ft,310,306.0
323,0.911839,0.826736,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_mean,0420-MiraBestNOHYBRID-playgroundv2_mean-0.0001,420,660.0


In [14]:
# Change dataframe to desired columns:
d = df[['auc','accuracy','data_train','data_eval','net_name','model','date']]
d.head(20)

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
82,0.98679,0.955667,FRDEEP-F,FRDEEP-F,transfer_original,3427-FRDEEPNOUNCSGD-transfer_original-0.0001,3427
85,0.978276,0.920333,FRDEEP-F,FRDEEP-F,transfer_original,1427-FRDEEPNOUNCSGD-transfer_original-0.0001,1427
80,0.976564,0.887111,FRDEEP-F,FRDEEP-F,transfer_original,1427-FRDEEPNOUNCAdam-transfer_original-0.0001,1427
342,0.963946,0.897463,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427
34,0.959157,0.896944,FRDEEP-F,FRDEEP-F,playground,playground-0211-0.1_180Epochs.pt,211
339,0.958958,0.900588,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_mean,0427-MiraBestNOHYBRIDAdam-playgroundv2_mean-0....,427
301,0.956907,0.905235,MiraBestHYBRID,MiraBestNOUNC,playgroundv2_ft,0310-MiraBestSGD-playgroundv2_ft,310
13,0.952663,0.890667,FRDEEP-F,FRDEEP-F,playground,playground-0129-0.1_360Epochs.pt,129
4,0.948812,0.904278,FRDEEP-F,FRDEEP-F,playground,playground-0212-0.1_360Epochs.pt,212
312,0.948409,0.878632,MiraBestHYBRID,MiraBestNOUNC,playgroundv2_ft,0309-MiraBest-playgroundv2_ft,309


In [15]:
# Five Best FRDEEP Trained Models:
best_frdeep = ['playground-0211-0.1_180Epochs.pt',
               'playground-0129-0.1_360Epochs.pt',
               'playground-0212-0.1_360Epochs.pt',
               'playground-0129-0.1_500Epochs.pt',
               '0224-playgroundv1']
d[d.model==best_frdeep[0]]

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
34,0.959157,0.896944,FRDEEP-F,FRDEEP-F,playground,playground-0211-0.1_180Epochs.pt,211
292,0.738033,0.6336,FRDEEP-F,MiraBestNOUNC,playground,playground-0211-0.1_180Epochs.pt,211
206,0.706924,0.613272,FRDEEP-F,MiraBestNOHYBRID,playground,playground-0211-0.1_180Epochs.pt,211
120,0.690327,0.6046,FRDEEP-F,MiraBestHYBRID,playground,playground-0211-0.1_180Epochs.pt,211


In [16]:
d[d.model==best_frdeep[-1]]

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
66,0.940708,0.8855,FRDEEP-F,FRDEEP-F,playgroundv1,0224-playgroundv1,224
321,0.782202,0.68344,FRDEEP-F,MiraBestNOUNC,playgroundv1,0224-playgroundv1,224
235,0.722892,0.647113,FRDEEP-F,MiraBestNOHYBRID,playgroundv1,0224-playgroundv1,224
149,0.704694,0.633422,FRDEEP-F,MiraBestHYBRID,playgroundv1,0224-playgroundv1,224


In [25]:
# Best MiraBest performer:
best_mirabest = ['0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-0.0001',
                 '0310-MiraBestSGD-playgroundv2_ft',
                 '0309-MiraBest-playgroundv2_ft',
                 '0331-MiraBestNOHYBRID-playgroundv2_deep_sup',
                 '0310-MiraBestSGD-playgroundv2_ft',
                 '0305-MiraBest-playgroundv2_deep_sup']
d[d.model==best_mirabest[0]]

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
342,0.963946,0.897463,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427
170,0.899092,0.822023,MiraBestNOHYBRID,MiraBestHYBRID,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427
256,0.897362,0.820679,MiraBestNOHYBRID,MiraBestNOHYBRID,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427
84,0.848919,0.735556,MiraBestNOHYBRID,FRDEEP-F,playgroundv2_concat,0427-MiraBestNOHYBRIDAdam-playgroundv2_concat-...,427


In [24]:
d[d.model==best_mirabest[1]]

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
312,0.948409,0.878632,MiraBestHYBRID,MiraBestNOUNC,playgroundv2_ft,0309-MiraBest-playgroundv2_ft,309
226,0.888097,0.812037,MiraBestHYBRID,MiraBestNOHYBRID,playgroundv2_ft,0309-MiraBest-playgroundv2_ft,309
140,0.881668,0.805467,MiraBestHYBRID,MiraBestHYBRID,playgroundv2_ft,0309-MiraBest-playgroundv2_ft,309
57,0.840517,0.775944,MiraBestHYBRID,FRDEEP-F,playgroundv2_ft,0309-MiraBest-playgroundv2_ft,309


In [19]:
best_models = best_frdeep+best_mirabest
avg_auc_max = 0 ; auc_model = ''
avg_acc_max = 0 ; acc_model = ''

for model in best_models:
    temp = d[d.model==model].mean(axis=0)
    if temp['auc']>avg_auc_max:
        avg_auc_max=temp['auc']
        auc_model=model
    if temp['accuracy']>avg_acc_max:
        avg_acc_max=temp['accuracy']
        acc_model=model

print(f"""
Best AUC avg: {avg_auc_max:.4f} from: {auc_model}
Best ACC avg: {avg_acc_max:.4f} from: {acc_model}
""")


Best AUC avg: 0.8994 from: 0310-MiraBestSGD-playgroundv2_ft
Best ACC avg: 0.8354 from: 0310-MiraBestSGD-playgroundv2_ft



In [20]:
# Best overall performer (avg)
if auc_model == acc_model:
    best_model = auc_model
    print(f'Our overall best model is: {best_model}')
else:
    print(f'No single best model. \n\tAUC Best: {auc_model}\n\tACC Best:{acc_model}')

Our overall best model is: 0310-MiraBestSGD-playgroundv2_ft


In [21]:
d[d.model=='0408-MiraBestNOHYBRIDSGD-playgroundv2_ft-0.0001']

Unnamed: 0,auc,accuracy,data_train,data_eval,net_name,model,date
325,0.908216,0.851309,MiraBestNOHYBRID,MiraBestNOUNC,playgroundv2_ft,0408-MiraBestNOHYBRIDSGD-playgroundv2_ft-0.0001,408
239,0.827982,0.757752,MiraBestNOHYBRID,MiraBestNOHYBRID,playgroundv2_ft,0408-MiraBestNOHYBRIDSGD-playgroundv2_ft-0.0001,408
153,0.826639,0.75846,MiraBestNOHYBRID,MiraBestHYBRID,playgroundv2_ft,0408-MiraBestNOHYBRIDSGD-playgroundv2_ft-0.0001,408
68,0.751685,0.624111,MiraBestNOHYBRID,FRDEEP-F,playgroundv2_ft,0408-MiraBestNOHYBRIDSGD-playgroundv2_ft-0.0001,408


In [22]:
df[['data_train','data_eval','auc','accuracy','friprecision','friiprecision','frif1','friif1']][df.model==best_mirabest[1]]

Unnamed: 0,data_train,data_eval,auc,accuracy,friprecision,friiprecision,frif1,friif1
312,MiraBestHYBRID,MiraBestNOUNC,0.948409,0.878632,0.883867,0.874252,0.869041,0.886915
226,MiraBestHYBRID,MiraBestNOHYBRID,0.888097,0.812037,0.822247,0.803382,0.800563,0.822263
140,MiraBestHYBRID,MiraBestHYBRID,0.881668,0.805467,0.806368,0.804725,0.789259,0.81936
57,MiraBestHYBRID,FRDEEP-F,0.840517,0.775944,0.730464,0.816034,0.753379,0.794727


In [23]:
print(best_frdeep[-1])
df[['data_train','data_eval','auc','accuracy','friprecision','friiprecision','frif1','friif1']][df.model==best_frdeep[-1]]

0224-playgroundv1


Unnamed: 0,data_train,data_eval,auc,accuracy,friprecision,friiprecision,frif1,friif1
66,FRDEEP-F,FRDEEP-F,0.940708,0.8855,0.888579,0.883281,0.86668,0.899664
321,FRDEEP-F,MiraBestNOUNC,0.782202,0.68344,0.614896,0.824355,0.723265,0.630226
235,FRDEEP-F,MiraBestNOHYBRID,0.722892,0.647113,0.592415,0.77944,0.703754,0.563694
149,FRDEEP-F,MiraBestHYBRID,0.704694,0.633422,0.573423,0.782764,0.690581,0.550359
