# Model Accuracy

Up until now, I've been using mostly qualitative, visual assessment to determine how well a model performed. I want to create a better metric. I'm thinking that a good metric will be the percentage of pixels correctly labelled; let's see how well it works.

In [1]:
import torch
import pandas as pd

import monai
from monai.transforms import (
    Compose,
    LoadNiftid,
    NormalizeIntensityd,
    AddChanneld,
    ToTensord,
    CenterSpatialCropd,
)
from monai.metrics import DiceMetric
from monai.inferers import sliding_window_inference

import numpy as np

In [2]:
def get_model_metrics(model):
    data_df = pd.read_csv('/data/shared/prostate/yale_prostate/input_lists/MR_yale.csv')

    test_imgs = data_df['IMAGE'][295:].tolist()
    test_masks = data_df['SEGM'][295:].tolist()

    test_dicts = [{'image': image, 'mask': mask} for (image, mask) in zip(test_imgs, test_masks)]

    # Basic transforms
    data_keys = ["image", "mask"]
    data_transforms = Compose(
        [
            LoadNiftid(keys=data_keys),
            AddChanneld(keys=data_keys),
            NormalizeIntensityd(keys="image"),
#             CenterSpatialCropd(
#                 keys=data_keys,
#                 roi_size=[128, 128, 16]
#             ),
        ]
    )

    test_dataset = monai.data.CacheDataset(
        data=test_dicts,
        transform=Compose(
            [
                data_transforms,
                ToTensord(keys=data_keys)
            ]
        ),
    )
    
#     test_dataloader = monai.data.DataLoader(
#         test_dataset, batch_size=16, shuffle=False, num_workers=8
#     )
    
    accuracies = np.array([])
    
    metrics = np.array([])
    dice_metric = DiceMetric(
        include_background=True,
    )
    
    for sample in test_dataset:
        test_image = sample['image'].unsqueeze(0)
        
        test_mask = sliding_window_inference(
            test_image,
            roi_size=[128, 128, 16],
            sw_batch_size=1,
            predictor=model
        )

#         test_mask = model(test_image)
        
        test_mask = test_mask.argmax(1).detach()
        
        metric = dice_metric(
            y_pred=test_mask.unsqueeze(0),
            y=sample['mask'].unsqueeze(0)
        )
        
        metrics = np.append(metrics, metric)
        accuracy = 1 - torch.sum(torch.abs(sample['mask'] - test_mask)) / sample['mask'].numel()
        accuracies = np.append(accuracies, accuracy)
        
    avg_accuracy = np.average(accuracies)
    median_accuracy = np.median(accuracies)
    avg_metric = np.average(metrics)
    
    return {
        'average': avg_accuracy,
        'median': median_accuracy,
        'dice_metric': avg_metric
    }

In [3]:
import glob
import CustomModels

In [4]:
def print_acc_metrics(model_class, checkpoint_path):
    model = model_class.load_from_checkpoint(checkpoint_path)
    print(model.hparams.name)
    metrics = get_model_metrics(model)
    print('Average Accuracy: ', metrics['average'])
    print('Median Accuracy: ', metrics['median'])
    print('Average Dice: ', metrics['dice_metric'])

## 3D UNet

## Dice + Focal Loss

In [5]:
print_acc_metrics(CustomModels.UNet_DF, 'models/7-24-2020_dicefocal_prostate/_ckpt_epoch_19.ckpt')

7-24-2020_dicefocal_prostate
Average Accuracy:  0.9904237119656689
Median Accuracy:  0.9916963577270508
Average Dice:  0.7752539594218416


## MaskGAN

In [6]:
print_acc_metrics(CustomModels.MaskGAN, 'models/7-23-2020_MaskGAN_prostate/_ckpt_epoch_1964.ckpt')

7-23-2020_MaskGAN_prostate
Average Accuracy:  0.9956749036626996
Median Accuracy:  0.9967206716537476
Average Dice:  0.8872080805166712


## Pos Emphasized

In [7]:
print_acc_metrics(CustomModels.UNet_DF, 'models/7-28-2020_pos_emphasized_prostate/_ckpt_epoch_39.ckpt')

7-28-2020_pos_emphasized_prostate
Average Accuracy:  0.9934578897818079
Median Accuracy:  0.9942318797111511
Average Dice:  0.8384897090354055


## With Residuals