# Notebook to evaluate the activity model

This notebook is designed as an evaluation notebook.

In [1]:
%matplotlib notebook
import matplotlib as mpl
import matplotlib.pyplot as plt
import csv
import time
import os
import ants
import pandas as pd
import statsmodels.api as smapi
import numpy as np
from nibabel import load as load_nii
from nibabel import Nifti1Image
from skimage.filters import threshold_otsu
from skimage.measure import label as bwlabeln
from scipy.ndimage.morphology import binary_erosion
from scipy.ndimage.morphology import binary_dilation
from scipy.ndimage.morphology import binary_closing
from scipy.ndimage.morphology import binary_fill_holes
from scipy.stats import ttest_rel, normaltest
from scipy.stats import spearmanr, kendalltau
from shutil import copyfile
import seaborn as sns

## Utility functions
### Statistics

In [2]:
def plot_correlation(metrics, network, lowess=False):
    model_vols = [m[5] for m in metrics['positive'] if m[1] == network]
    manual_vols = [m[6] for m in metrics['positive'] if m[1] == network]
    model_lesions = [m[10] for m in metrics['positive'] if m[1] == network]
    manual_lesions = [m[11] for m in metrics['positive'] if m[1] == network]
    array = [
        [
            (m[6] - np.mean(manual_vols)) / np.std(manual_vols),
            (m[5] - np.mean(model_vols)) / np.std(model_vols),
            'Volume'
        ]
        for m in metrics['positive'] if m[1] == network
    ] + [
        [
            (m[11] - np.mean(manual_lesions)) / np.std(manual_lesions),
            (m[10] - np.mean(model_lesions)) / np.std(model_lesions),
            'Lesions'
        ]
        for m in metrics['positive'] if m[1] == network
    ]

    columns = [
        'Manual',
        'Model',
        'Value per patient'
    ]
    dataframe = pd.DataFrame(array, columns=columns)

    snd_handle = sns.lmplot(
        x='Model', y='Manual', data=dataframe, hue='Value per patient',
        ci=68, truncate=False, lowess=lowess, robust=(not lowess)
    ) 
    
    lesions_dataframe = dataframe[dataframe['Value per patient'] == 'Lesions']
    x = lesions_dataframe['Model'].to_numpy()
    y = lesions_dataframe['Manual'].to_numpy()
    
    x[np.isnan(x)] = 0
    y[np.isnan(y)] = 0
    
    results_lesions = smapi.OLS(y, smapi.add_constant(x)).fit()
    spr_lesions, _ = spearmanr(x, y)
    
    volume_dataframe = dataframe[dataframe['Value per patient'] == 'Volume']
    x = volume_dataframe['Model'].to_numpy()
    y = volume_dataframe['Manual'].to_numpy()
    
    x[np.isnan(x)] = 0
    y[np.isnan(y)] = 0
    
    results_volume = smapi.OLS(y, smapi.add_constant(x)).fit()
    spr_volume, _ = spearmanr(x, y)

    if lowess:
        plt.title(
            u'{:} Lesions [\u03C1 = {:5.3f}] / Volume [\u03C1 = {:5.3f}]'.format(
                network, spr_lesions, spr_volume     
            )
        )
    else:
        plt.title(
            u'{:} Lesions [R\u00b2 = {:5.3f}] / Volume [R\u00b2 = {:5.3f}]'.format(
                network,
                results_lesions.rsquared,
                results_volume.rsquared     
            )
        )
    snd_handle.fig.subplots_adjust(top=.95, bottom=.1)

### Others

In [3]:
def get_int(string):
    """
    Function to get the int number contained in a string. If there are more
    than one int number (or there is a floating point number), this function
    will concatenate all digits and return an int, anyways.
    :param string: String that contains an int number
    :return: int number
    """
    return int(''.join(filter(str.isdigit, string)))


def get_dirs(path):
    """
    Function to get the folder name of the patients given a path.
    :param path: Folder where the patients should be located.
    :return: List of patient names.
    """
    # All patients (full path)
    patient_paths = sorted(
        filter(
            lambda d: os.path.isdir(os.path.join(path, d)),
            os.listdir(path)
        )
    )
    # Patients used during training
    return patient_paths

## Data loading

In [4]:
path = '/media/transcend/MSReports/Longitudinal/MICCAI_Challenge2021/training/'
cases = [
    p for p in sorted(os.listdir(path))
    if os.path.isdir(os.path.join(path, p))
]
print(cases)

['013', '015', '016', '018', '019', '020', '021', '024', '026', '027', '029', '030', '032', '035', '037', '039', '043', '047', '048', '049', '051', '052', '057', '061', '068', '069', '070', '074', '077', '083', '084', '088', '089', '090', '091', '094', '095', '096', '099', '100']


In [5]:
positive_cases = []
negative_cases = []
for p in cases:
    p_path = os.path.join(path, p)
    print('\033[KChecking {:}'.format(p_path), end='\r')
    gt_bool = load_nii(
        os.path.join(p_path, 'ground_truth.nii.gz')
    ).get_fdata().astype(bool)
    if np.sum(gt_bool) > 0:
        positive_cases.append(p)
    else:
        negative_cases.append(p)
print('Positive cases', positive_cases)
print('Negative cases', negative_cases)

Positive cases ['013', '016', '018', '020', '021', '024', '026', '027', '029', '030', '032', '035', '037', '039', '043', '047', '048', '057', '061', '069', '074', '077', '083', '088', '091', '094', '095', '099', '100']
Negative cases ['015', '019', '049', '051', '052', '068', '070', '084', '089', '090', '096']


## Metrics

### Loading and data preparation

In [6]:
all_metrics = {
    'positive': [],
    'negative': []
}
manual_vols = []
model_vols = []
manual_lesions = []
model_lesions = []
positive_metrics = [
    'Patient', 'Network',
    'TPF (V)', 'FPF (V)', 'DSC', 'TP (V)', 'V','GT (V)',
    'TPF (D)', 'FPF (D)', 'TP (D)', 'D', 'GT (D)', 'TP (C)'
]
negative_metrics = [
    'Patient', 'Network',
    'FP (V)', 'FP (D)', 'FP (C)'
]
training_labels = {
    '': 'Baseline',
    '_ft': 'Fine-tuning',
    '_ft-freeze': 'Fine-tuning (frozen)',
    '_init': 'Trained',
}
with open(
        os.path.join(path, 'activity_metrics.csv'), 'w'
) as csvfile:
    evalwriter = csv.writer(csvfile)
    evalwriter.writerow(positive_metrics)
    for p in cases:
        p_path = os.path.join(path, p)
        print('\033[KChecking {:}'.format(p_path), end='\r')
        gt_bool = load_nii(
            os.path.join(p_path, 'ground_truth.nii.gz')
        ).get_fdata().astype(bool)
        gt_lab = bwlabeln(gt_bool)
        gt_v = np.sum(gt_bool)
        gt_d = len(np.unique(gt_lab[gt_lab > 0]))
        gtc = gt_v > 0
        
        for tr_key, tr_value in training_labels.items():
            auto_name = os.path.join(
                p_path, 'positive_activity{:}.nii.gz'.format(tr_key)
            )
            auto_bool = load_nii(auto_name).get_fdata().astype(bool)
            auto_lab = bwlabeln(auto_bool)
            auto_labs = np.unique(auto_lab)
            
            v = np.sum(auto_bool)

            # Positive cases
            if gtc:
                # Some intermediate steps.
                overlap = np.logical_and(gt_bool, auto_bool)
                nonoverlap = np.logical_and(np.logical_not(gt_bool), auto_bool)
                               
                # Pretty common and normal voxelwise / segmentation metrics.
                sum_v = v + gt_v
                
                tp_v = np.sum(overlap)
                tpf_v = tp_v / gt_v if gt_v > 0 else 0
                fp_v = np.sum(nonoverlap)
                fpf_v = fp_v / v if v > 0 else 0
                dsc = 2 * tp_v / sum_v if sum_v > 0 else 0

                # And finally a few regionwise / detection metrics.
                tp_labs = np.unique(gt_lab[auto_bool])
                notfp_labs = np.unique(auto_lab[gt_bool])
                fp_mask = np.logical_not(np.isin(auto_lab, notfp_labs))
                fp_labs = np.unique(auto_lab[fp_mask])
                tp = len(tp_labs[tp_labs > 0])
                tp_d = len(tp_labs[tp_labs > 0])
                fp = len(fp_labs[fp_labs > 0])
                tpf_d = 100 * tp / gt_d if gt_d > 0 else 0
                d = len(np.unique(auto_lab[auto_lab > 0]))
                fpf_d = 100 * fp / d if d > 0 else 0

                tpc = v > 0

                all_metrics['positive'].append([
                    p, tr_value, tpf_v, fpf_v, dsc, tp_v, v, gt_v,
                    tpf_d, fpf_d, tp_d, d, gt_d, tpc
                ])
                evalwriter.writerow([
                    p, tr_value, tpf_v, fpf_v, dsc, tp_v, v, gt_v,
                    tpf_d, fpf_d, tp_d, d, gt_d, tpc
                ])
            else:
                # And finally a few regionwise / detection metrics.
                fp_d = len(auto_labs[auto_labs > 0])
                fp_c = fp_d > 0
                fp_v = v

                all_metrics['negative'].append([
                    p, tr_value, fp_v, fp_d, fp_c
                ])

[KChecking /media/transcend/MSReports/Longitudinal/MICCAI_Challenge2021/training/100

### Patient-based metrics

In [7]:
pd.DataFrame(all_metrics['positive'], columns=positive_metrics)

Unnamed: 0,Patient,Network,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
0,013,Baseline,0.650000,0.587302,0.504854,26,63,40,100.000000,50.000000,1,2,1,True
1,013,Fine-tuning,0.650000,0.500000,0.565217,26,52,40,100.000000,50.000000,1,2,1,True
2,013,Fine-tuning (frozen),0.675000,0.578125,0.519231,27,64,40,100.000000,50.000000,1,2,1,True
3,013,Trained,0.750000,0.651163,0.476190,30,86,40,100.000000,50.000000,1,2,1,True
4,016,Baseline,0.631225,0.078585,0.749201,469,509,743,83.333333,0.000000,5,5,6,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,099,Trained,0.072289,0.000000,0.134831,6,6,83,100.000000,0.000000,1,1,1,True
112,100,Baseline,0.530837,0.020325,0.688571,482,492,908,100.000000,33.333333,2,3,2,True
113,100,Fine-tuning,0.512115,0.029228,0.670512,465,479,908,100.000000,50.000000,2,4,2,True
114,100,Fine-tuning (frozen),0.482379,0.049892,0.639883,438,461,908,100.000000,50.000000,2,4,2,True


In [8]:
positive_df = pd.DataFrame(all_metrics['positive'], columns=positive_metrics)
positive_df[positive_df['DSC'] < 0.6]

Unnamed: 0,Patient,Network,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
0,013,Baseline,0.650000,0.587302,0.504854,26,63,40,100.000,50.000000,1,2,1,True
1,013,Fine-tuning,0.650000,0.500000,0.565217,26,52,40,100.000,50.000000,1,2,1,True
2,013,Fine-tuning (frozen),0.675000,0.578125,0.519231,27,64,40,100.000,50.000000,1,2,1,True
3,013,Trained,0.750000,0.651163,0.476190,30,86,40,100.000,50.000000,1,2,1,True
8,018,Baseline,0.049451,0.000000,0.094241,18,18,364,50.000,0.000000,1,1,2,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,094,Trained,0.097953,0.816679,0.127682,244,1331,2491,60.000,87.500000,3,24,5,True
106,095,Fine-tuning (frozen),0.405045,0.027537,0.571889,7981,8207,19704,90.625,12.820513,29,39,32,True
108,099,Baseline,0.301205,0.000000,0.462963,25,25,83,100.000,0.000000,1,1,1,True
109,099,Fine-tuning,0.313253,0.000000,0.477064,26,26,83,100.000,0.000000,1,1,1,True


In [42]:
positive_df[(positive_df['Network'] == 'Trained') | (positive_df['Network'] == 'Baseline')]
trained_df = positive_df[positive_df['Network'] == 'Trained']
baseline_df = positive_df[positive_df['Network'] == 'Baseline']

trained_patients = []
baseline_patients = []

for (trained_idx, trained_row), (baseline_idx, baseline_row) in zip(trained_df.iterrows(), baseline_df.iterrows()):
    if trained_row['DSC'] > baseline_row['DSC']:
        trained_patients.append(trained_row['Patient'])
    else:
        baseline_patients.append(trained_row['Patient'])

In [43]:
trained_df[trained_df['Patient'].isin(trained_patients)]

Unnamed: 0,Patient,Network,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
11,18,Trained,0.258242,0.196581,0.390852,94,117,364,100.0,50.0,2,4,2,True
23,24,Trained,0.731152,0.131757,0.793822,514,592,703,100.0,25.0,6,8,6,True
31,27,Trained,0.551464,0.045752,0.698983,584,612,1059,100.0,20.0,4,5,4,True
39,30,Trained,0.760284,0.208272,0.775687,536,677,705,100.0,57.142857,3,7,3,True
67,48,Trained,0.721831,0.088889,0.805501,205,225,284,100.0,0.0,1,1,1,True
71,57,Trained,0.407588,0.176342,0.545323,752,913,1845,81.25,18.75,13,16,16,True
75,61,Trained,0.536087,0.338993,0.592029,1129,1708,2106,91.666667,33.333333,11,15,12,True
91,83,Trained,0.591623,0.76442,0.336978,339,1439,573,100.0,90.0,2,20,2,True
115,100,Trained,0.577093,0.059246,0.715358,524,557,908,100.0,50.0,2,4,2,True


In [48]:
baseline_df[~baseline_df['Patient'].isin(baseline_patients)]

Unnamed: 0,Patient,Network,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
8,18,Baseline,0.049451,0.0,0.094241,18,18,364,50.0,0.0,1,1,2,True
20,24,Baseline,0.681366,0.064453,0.788477,479,512,703,83.333333,0.0,5,5,6,True
28,27,Baseline,0.521246,0.029877,0.678133,552,569,1059,100.0,0.0,4,4,4,True
36,30,Baseline,0.693617,0.199673,0.743161,489,611,705,100.0,50.0,3,6,3,True
64,48,Baseline,0.700704,0.074419,0.797595,199,215,284,100.0,0.0,1,1,1,True
68,57,Baseline,0.275339,0.086331,0.423157,508,556,1845,62.5,9.090909,10,11,16,True
72,61,Baseline,0.308167,0.347739,0.418575,649,995,2106,66.666667,46.153846,8,13,12,True
88,83,Baseline,0.390925,0.898643,0.160977,224,2210,573,100.0,94.594595,2,37,2,True
112,100,Baseline,0.530837,0.020325,0.688571,482,492,908,100.0,33.333333,2,3,2,True


In [49]:
trained_df[~trained_df['Patient'].isin(trained_patients)]

Unnamed: 0,Patient,Network,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
3,13,Trained,0.75,0.651163,0.47619,30,86,40,100.0,50.0,1,2,1,True
7,16,Trained,0.656797,0.24575,0.702158,488,647,743,83.333333,37.5,5,8,6,True
15,20,Trained,0.0,1.0,0.0,0,26,201,0.0,100.0,0,2,1,True
19,21,Trained,0.633139,0.309162,0.660731,1410,2041,2227,100.0,50.0,4,8,4,True
27,26,Trained,0.231383,0.022472,0.374194,87,89,376,44.444444,0.0,4,4,9,True
35,29,Trained,0.0,0.0,0.0,0,0,802,0.0,0.0,0,0,2,False
43,32,Trained,0.0,1.0,0.0,0,16,53,0.0,100.0,0,1,1,True
47,35,Trained,0.568807,0.325039,0.617354,434,643,763,100.0,73.333333,4,15,4,True
51,37,Trained,1.0,0.382353,0.763636,21,34,21,100.0,0.0,1,1,1,True
55,39,Trained,0.252745,0.032389,0.4008,3107,3211,12293,76.923077,21.428571,10,14,13,True


In [45]:
baseline_df[baseline_df['Patient'].isin(baseline_patients)]

Unnamed: 0,Patient,Network,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
0,13,Baseline,0.65,0.587302,0.504854,26,63,40,100.0,50.0,1,2,1,True
4,16,Baseline,0.631225,0.078585,0.749201,469,509,743,83.333333,0.0,5,5,6,True
12,20,Baseline,0.522388,0.447368,0.537084,105,190,201,100.0,50.0,1,2,1,True
16,21,Baseline,0.612034,0.23168,0.68133,1363,1774,2227,100.0,33.333333,4,6,4,True
24,26,Baseline,0.385638,0.152047,0.530165,145,171,376,44.444444,20.0,4,5,9,True
32,29,Baseline,0.0,0.0,0.0,0,0,802,0.0,0.0,0,0,2,False
40,32,Baseline,0.0,1.0,0.0,0,2,53,0.0,100.0,0,1,1,True
44,35,Baseline,0.689384,0.329082,0.680026,526,784,763,75.0,62.5,3,8,4,True
48,37,Baseline,0.952381,0.0,0.97561,20,20,21,100.0,0.0,1,1,1,True
52,39,Baseline,0.285447,0.025548,0.44155,3509,3601,12293,69.230769,0.0,9,12,13,True


In [10]:
pd.DataFrame(all_metrics['negative'], columns=negative_metrics)

Unnamed: 0,Patient,Network,FP (V),FP (D),FP (C)
0,15,Baseline,0,0,False
1,15,Fine-tuning,0,0,False
2,15,Fine-tuning (frozen),0,0,False
3,15,Trained,0,0,False
4,19,Baseline,32,1,True
5,19,Fine-tuning,18,1,True
6,19,Fine-tuning (frozen),0,0,False
7,19,Trained,58,1,True
8,49,Baseline,12,1,True
9,49,Fine-tuning,0,0,False


In [11]:
negative_df = pd.DataFrame(all_metrics['negative'], columns=negative_metrics)
negative_df[negative_df['FP (C)']]

Unnamed: 0,Patient,Network,FP (V),FP (D),FP (C)
4,19,Baseline,32,1,True
5,19,Fine-tuning,18,1,True
7,19,Trained,58,1,True
8,49,Baseline,12,1,True
25,70,Fine-tuning,58,6,True
27,70,Trained,160,14,True
29,84,Fine-tuning,8,1,True
31,84,Trained,384,20,True
33,89,Fine-tuning,35,4,True
34,89,Fine-tuning (frozen),197,9,True


### Mean metrics

In [12]:
positive_df = pd.DataFrame(all_metrics['positive'], columns=positive_metrics)
positive_df.groupby('Network', sort=False).mean()

Unnamed: 0_level_0,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Baseline,0.447054,0.241587,0.505466,993.517241,1341.034483,2049.37931,75.842728,29.119513,3.931034,8.344828,5.206897,0.931034
Fine-tuning,0.385398,0.232584,0.448027,909.551724,1253.551724,2049.37931,75.56366,36.038769,3.931034,11.275862,5.206897,0.931034
Fine-tuning (frozen),0.391566,0.240381,0.437137,827.793103,1267.758621,2049.37931,67.922009,31.661212,3.586207,10.448276,5.206897,0.862069
Trained,0.427942,0.309784,0.466699,876.206897,1092.37931,2049.37931,72.393899,40.843596,4.103448,7.931034,5.206897,0.931034


In [13]:
positive_df[positive_df['DSC'] > 0.6].groupby('Network', sort=False).mean()

Unnamed: 0_level_0,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Baseline,0.649356,0.095513,0.74959,1605.642857,1772.857143,2678.0,91.542659,17.702914,5.0,6.571429,5.714286,True
Fine-tuning,0.634284,0.105195,0.738327,1935.909091,2084.818182,3280.727273,94.065657,33.869464,5.909091,9.545455,6.545455,True
Fine-tuning (frozen),0.651574,0.137007,0.72655,790.333333,902.583333,1283.0,94.212963,27.700818,3.166667,5.083333,3.5,True
Trained,0.658192,0.179928,0.713557,1188.384615,1336.769231,2197.923077,93.856838,32.06044,5.153846,8.0,5.846154,True


In [14]:
positive_df[positive_df['DSC'] > 0.6].groupby('Network', sort=False).sum()

Unnamed: 0_level_0,TPF (V),FPF (V),DSC,TP (V),V,GT (V),TPF (D),FPF (D),TP (D),D,GT (D),TP (C)
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Baseline,9.090979,1.337179,10.494254,22479,24820,37492,1281.597222,247.840803,70,92,80,14
Fine-tuning,6.977122,1.157145,8.121596,21295,22933,36088,1034.722222,372.564103,65,105,72,11
Fine-tuning (frozen),7.818892,1.644084,8.718597,9484,10831,15396,1130.555556,332.409812,38,61,42,12
Trained,8.556499,2.339059,9.276238,15449,17378,28573,1220.138889,416.785714,67,104,76,13


In [15]:
negative_df = pd.DataFrame(all_metrics['negative'], columns=negative_metrics)
negative_df.groupby('Network', sort=False).mean()

Unnamed: 0_level_0,FP (V),FP (D),FP (C)
Network,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Baseline,5.545455,0.272727,0.272727
Fine-tuning,12.545455,1.181818,0.454545
Fine-tuning (frozen),22.454545,0.909091,0.181818
Trained,59.181818,3.727273,0.454545


### Correlations

In [16]:
plot_correlation(all_metrics, 'Baseline')
plot_correlation(all_metrics, 'Baseline', lowess=True)
plot_correlation(all_metrics, 'Fine-tuning')
plot_correlation(all_metrics, 'Fine-tuning', lowess=True)
plot_correlation(all_metrics, 'Fine-tuning (frozen)')
plot_correlation(all_metrics, 'Fine-tuning (frozen)', lowess=True)
plot_correlation(all_metrics, 'Trained')
plot_correlation(all_metrics, 'Trained', lowess=True)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>