In [2]:
import sys
import os
import torch
import glob
import pandas as pd
import numpy as np
import pingouin as pg
from age_prediction.models.\
    efficientnet_pytorch_3d import EfficientNet3D as EfNetB0
from age_prediction.metrics import MSE, MAE
from age_prediction.trainer import ModuleTrainer
from age_prediction.dataloader import MyDataLoader

In [1]:
def check_state_dict(ck, md):
    if ck.keys() == md.keys():
        return ''
    else:
        split_key = list(ck.keys())[0].split(
            list(md.keys())[0])[0]
        return split_key

def correct_state_dict(ck, split_key):
    ck2 = {}
    for k in ck.keys():
        ck2[k.split(split_key)[1]] = ck[k]
    return ck2

def MAE_pred(y_pred, y_true):
    mae = torch.nn.L1Loss(reduction='mean')(y_pred, y_true).cpu().detach().numpy().item()
    return mae

def MSE_pred(y_pred, y_true):
    mse = torch.nn.MSELoss()(y_pred, y_true).cpu().detach().numpy().item()
    return mse

def delta_pred(y_pred, y_true):
    delta = y_pred - y_true
    return np.mean(delta)

def pearson(y_pred, y_true):
    return pg.corr(y_pred, y_true)['r'][0]


In [3]:
def get_snapshot_results(side, snapshot, dropout_rate, testfile):
    # Load effNet3D B0
    model = EfNetB0.from_name("efficientnet-b0",
                            override_params={
                                'num_classes': 1,
                                'dropout_rate': dropout_rate
                            },
                            in_channels=1,
                            )
    device = torch.device('cpu')
    model = model.to(device)

    optimizer = torch.optim.RMSprop(model.parameters(),
                                lr=.256, alpha=0.9,
                                eps=1e-08, momentum=0.9,
                                weight_decay=0)

    # print('Loading model from {}'.format(snapshot))
    checkpoint = torch.load(snapshot, map_location=device)
    split_key = check_state_dict(checkpoint['state_dict'], model.state_dict())
    if split_key is not None:
        checkpoint['state_dict'] = correct_state_dict(checkpoint['state_dict'], split_key)

    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    epoch = checkpoint['epoch']
    _loss = checkpoint['loss']
    _val_loss = checkpoint['val_loss']
    # print("Snapshot trained for {} epochs. \
    #     Loss: {} and Val loss {}".format(epoch, _loss, _val_loss))

    loss = torch.nn.L1Loss(reduction='mean')
    metrics = [MSE()]
    
    # Predict
    trainer = ModuleTrainer(model.to(device))

    trainer.compile(loss=loss,
                    optimizer=optimizer,
                    metrics=metrics)
    
    dataloader = MyDataLoader(database='../datasets',
                          csv_data='database_split',
                          side=side,
                          batch=512,
                          data_aug=False,
                          test_file=testfile
                          )
    dataloader.prepare_data('test_label')
    dataloader.setup('test_label')
    
    # print("Test size", len(dataloader.test.inputs[0]))


    pred = trainer.predict_loader(dataloader.testlabel_dataloader(),
                                  cuda_device=False)
    
    imgs = dataloader.testpath
    imgs = [img.split("/")[-1].split(side)[0] for img in imgs]
    
    preds = pd.concat([pd.DataFrame(imgs),
                       pd.DataFrame(dataloader.testlabel),
                       pd.DataFrame(pred.detach().cpu().numpy())], axis=1)
    preds.columns = ['Name', 'True', 'Pred']
    
    return preds, epoch

def metrics_pipeline(preds):
    mae = MAE_pred(torch.from_numpy(preds['Pred'].values), torch.from_numpy(preds['True'].values))
    mse = MSE_pred(torch.from_numpy(preds['Pred'].values), torch.from_numpy(preds['True'].values))
    delta = delta_pred(preds['Pred'], preds['True'])
    r = pearson(preds['Pred'], preds['True'])
    # print("MAE", mae)
    # print("MSE", mse)
    # print("Delta", delta)
    # print("Pearson", r)
    return mae, mse, delta, r


In [4]:
def train_results(logger, epoch):
    df = pd.read_csv(logger)
    col = df.columns[~df.columns.str.contains('val|epoch')]
    df = df.loc[epoch - 1, col]
    # order mae, mse
    if any(df.index.str.contains('mae')):
        mae = df['mae_metric']
        mse = df['loss']
    else:
        mae = df['loss']
        mse = df['mse_metric']
    return mae, mse

In [5]:
def organize_results(res, type, dp, ep=None, age=None, side=None):
    if age is not None:
        df = {'side': side, 'age': age}
        if ep is not None:
            df['epoch'] = ep
        df['mae_' + type] = res[0]
        df['mse_' + type] = res[1]
    else:
        df = {'mae_' + type: res[0], 'mse_' + type: res[1]}
    if type != 'train':
        _, _, delta, r = res
        df['delta_' + type] = delta
        df['r_' + type] = r   

    return pd.DataFrame(df, index=[dp])


In [6]:
def get_results(side, age, date):
    results = pd.DataFrame()
    for ckpt in glob.glob('outputs/ckpt_'+date+'*'):
        if age in ckpt and side+"_" in ckpt and 'best' in ckpt:
            # print(ckpt)
        # if "e-5" not in ckpt and age in ckpt:
            if 'best' in ckpt:
                dropout_rate = float(ckpt.split("_dp")[-1].split("_")[0])
                delim = '_model_best.pth.tar'
            else:
                dropout_rate = float(ckpt.split("_dp")[-1].split(".")[0])
                delim = ".pth.tar"
            # print(dropout_rate)
            # val
            pred, epoch = get_snapshot_results(side, ckpt, dropout_rate, 'val_'+age+'.csv')
            val = organize_results(metrics_pipeline(pred), 'val', dropout_rate)
            # print("TEST")
            pred, epoch = get_snapshot_results(side, ckpt, dropout_rate, 'test_'+age+'.csv')
            test = organize_results(metrics_pipeline(pred), 'test', dropout_rate)
            # print("TRAIN RES")
            train = organize_results(train_results(ckpt.replace("ckpt", "logger")
                                                    .replace(delim, ".csv"),
                                                    epoch), 'train', dropout_rate, epoch,
                                                        age, side.split("_")[-1])
            df = pd.concat([train, val, test], axis=1)
            results = pd.concat([results, df])
    return results.sort_index()


In [9]:
res_R = get_results('_R', '0-70', '07-05-2021')
res_L = get_results('_L', '0-70', '07-05-2021')
results = pd.concat([res_L, res_R])
results

Unnamed: 0,side,age,epoch,mae_train,mse_train,mae_val,mse_val,delta_val,r_val,mae_test,mse_test,delta_test,r_test
0.2,L,0-70,566,1.308844,3.497234,3.333917,38.850494,0.298486,0.921178,6.48219,82.824793,-0.801165,0.82868
0.3,L,0-70,10,14.184983,259.862375,14.349529,251.683874,0.666164,0.047892,15.331074,287.101268,-6.34488,0.085681
0.4,L,0-70,582,1.335328,3.611637,3.148836,29.648598,0.110822,0.939834,6.523004,77.003927,-0.961461,0.839602
0.5,L,0-70,7,37.834399,1717.621644,13.839173,254.13828,-1.702403,0.003073,16.013167,322.767948,-8.713448,-0.035793
0.2,R,0-70,600,1.112827,2.616436,2.096694,12.536907,0.697655,0.976245,4.811347,48.79422,-0.684023,0.899617
0.3,R,0-70,592,2.859706,19.332509,3.990293,36.463289,0.653599,0.928416,5.75174,58.468112,-0.792853,0.878401
0.4,R,0-70,564,1.454314,4.847975,2.971371,26.308556,1.02998,0.950073,5.763256,63.761752,-0.556254,0.869763
0.5,R,0-70,597,2.41276,13.57205,3.802365,39.093561,0.403675,0.922945,6.657606,86.679592,-0.685925,0.825391


In [9]:
results.to_csv('predict_results/metrics_0-70.csv')