In [25]:
import seg_metrics.seg_metrics as sg
import pandas as pd
import numpy as np
import nibabel as nib
import os
import pydicom
from skimage import morphology
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
import cv2
from skimage.morphology import skeletonize, skeletonize_3d
import pickle
from sklearn.metrics import brier_score_loss
from scipy.stats import gaussian_kde
from matplotlib.patches import Rectangle
from skimage import morphology
import glob
from sklearn.metrics import r2_score
import scipy
from scipy.stats import mannwhitneyu
from scipy.stats import wilcoxon
import seaborn as sns
from scipy import stats
from scipy import ndimage
import numpy as np
import SimpleITK as sitk
from sklearn.metrics import mean_squared_error

In [49]:
import lookup_tables

In [116]:
def icc(Y, icc_type='ICC(2,1)'):
    ''' Calculate intraclass correlation coefficient

    ICC Formulas are based on:
    Shrout, P. E., & Fleiss, J. L. (1979). Intraclass correlations: uses in
    assessing rater reliability. Psychological bulletin, 86(2), 420.
    icc1:  x_ij = mu + beta_j + w_ij
    icc2/3:  x_ij = mu + alpha_i + beta_j + (ab)_ij + epsilon_ij
    Code modifed from nipype algorithms.icc
    https://github.com/nipy/nipype/blob/master/nipype/algorithms/icc.py

    Args:
        Y: The data Y are entered as a 'table' ie. subjects are in rows and repeated
            measures in columns
        icc_type: type of ICC to calculate. (ICC(2,1), ICC(2,k), ICC(3,1), ICC(3,k)) 
    Returns:
        ICC: (np.array) intraclass correlation coefficient
    '''

    [n, k] = Y.shape

    # Degrees of Freedom
    dfc = k - 1
    dfe = (n - 1) * (k-1)
    dfr = n - 1

    # Sum Square Total
    mean_Y = np.mean(Y)
    SST = ((Y - mean_Y) ** 2).sum()

    # create the design matrix for the different levels
    x = np.kron(np.eye(k), np.ones((n, 1)))  # sessions
    x0 = np.tile(np.eye(n), (k, 1))  # subjects
    X = np.hstack([x, x0])

    # Sum Square Error
    predicted_Y = np.dot(np.dot(np.dot(X, np.linalg.pinv(np.dot(X.T, X))),
                                X.T), Y.flatten('F'))
    residuals = Y.flatten('F') - predicted_Y
    SSE = (residuals ** 2).sum()

    MSE = SSE / dfe

    # Sum square column effect - between colums
    SSC = ((np.mean(Y, 0) - mean_Y) ** 2).sum() * n
    MSC = SSC / dfc  # / n (without n in SPSS results)

    # Sum Square subject effect - between rows/subjects
    SSR = SST - SSC - SSE
    MSR = SSR / dfr

    if icc_type == 'icc1':
        # ICC(2,1) = (mean square subject - mean square error) /
        # (mean square subject + (k-1)*mean square error +
        # k*(mean square columns - mean square error)/n)
        # ICC = (MSR - MSRW) / (MSR + (k-1) * MSRW)
        NotImplementedError("This method isn't implemented yet.")

    elif icc_type == 'ICC(2,1)' or icc_type == 'ICC(2,k)':
        # ICC(2,1) = (mean square subject - mean square error) /
        # (mean square subject + (k-1)*mean square error +
        # k*(mean square columns - mean square error)/n)
        if icc_type == 'ICC(2,k)':
            k = 1
        ICC = (MSR - MSE) / (MSR + (k-1) * MSE + k * (MSC - MSE) / n)

    elif icc_type == 'ICC(3,1)' or icc_type == 'ICC(3,k)':
        # ICC(3,1) = (mean square subject - mean square error) /
        # (mean square subject + (k-1)*mean square error)
        if icc_type == 'ICC(3,k)':
            k = 1
        ICC = (MSR - MSE) / (MSR + (k-1) * MSE)

    return ICC

def cl_score(v, s):
    """[this function computes the skeleton volume overlap]
    Args:
        v ([bool]): [image]
        s ([bool]): [skeleton]
    Returns:
        [float]: [computed skeleton volume intersection]
    """
    return np.sum(v*s)/np.sum(s)


def clDice(v_l, v_p):
    """[this function computes the cldice metric]
    Args:
        v_p ([bool]): [predicted image]
        v_l ([bool]): [ground truth image]
    Returns:
        [float]: [cldice metric]
    """
    if len(v_p.shape)==2:
        tprec = cl_score(v_p,skeletonize(v_l))
        tsens = cl_score(v_l,skeletonize(v_p))
    elif len(v_p.shape)==3:
        tprec = cl_score(v_p,skeletonize_3d(v_l))
        tsens = cl_score(v_l,skeletonize_3d(v_p))
    return 2*tprec*tsens/(tprec+tsens)

In [66]:
def compute_dice_coef(y_true, y_pred):

    smooth=1
    y_true_f = y_true.flatten()
    y_pred_f = y_pred.flatten()
    intersection = np.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (np.sum(y_true_f) + np.sum(y_pred_f) + smooth)

In [121]:
def compute_jaccard_coef(y_true, y_pred):

    smooth=1
    y_true_f = y_true.flatten()
    y_pred_f = y_pred.flatten()
    intersection = np.sum(y_true_f * y_pred_f)
    union = np.sum(y_true_f) + np.sum(y_pred_f) - intersection
    return intersection / union

In [100]:
def getEdgeOfMask(mask):
    
    edge = np.zeros_like(mask)
    mask_pixels = np.where(mask > 0)

    for idx in range(0,mask_pixels[0].size):

        x = mask_pixels[0][idx]
        y = mask_pixels[1][idx]
        z = mask_pixels[2][idx]

        if mask[x-1:x+2, y-1:y+2, z-1:z+2].sum() < 27:
            edge[x,y,z] = 1
            
    return edge

def compute_AddedPathLength(mask_true, mask_pred, spacing_mm):
    
    edge_true = getEdgeOfMask(mask_true)
    edge_pred = getEdgeOfMask(mask_pred)
   
    apl = (edge_true > edge_pred).astype(int).sum()
    
    return apl*spacing_mm[0]*spacing_mm[1]*spacing_mm[2]/10 

In [105]:
def compute_nsd(img_1, img_2, tau):
    
    img_1_b = getEdgeOfMask(img_1)
    img_2_b = getEdgeOfMask(img_2)
    
    strel_size = 1 + tau*2
    strel = np.ones((strel_size, strel_size, strel_size))
    
    img_1_bb = morphology.binary_dilation(img_1_b, strel)
    img_2_bb = morphology.binary_dilation(img_2_b, strel)
    
    int_1 = img_1_b*img_2_bb
    int_2 = img_1_bb*img_2_b
    
    return (np.sum(int_1)+np.sum(int_2))/(np.sum(img_1_b) + np.sum(img_2_b))

In [138]:
def compute_hausdorf_surf_distance(mask_gt, mask_pred, spacing_mm, percent=95):
    """Computes the robust Hausdorff distance. "Robust", because it uses the `percent` percentile 
    of the distances instead of the maximum distance. The percentage is computed by correctly taking 
    the area of each surface element into account.
    Args:
    mask_gt: 3-dim Numpy array of type bool. The ground truth mask.
    mask_pred: 3-dim Numpy array of type bool. The predicted mask.
    spacing_mm: 3-element list-like structure. Voxel spacing in x0, x1 and x2 direction.
    percent: a float value between 0 and 100.
    Returns:
    a float value. The robust Hausdorff distance in mm. If one of the masks
    is empty, the corresponding lists are empty and all distances in the other
    list are `inf`.
    """
    
    mask_gt = mask_gt>0
    mask_pred = mask_pred>0

    neighbour_code_to_surface_area = np.zeros([256])
    for code in range(256):
        normals = np.array(lookup_tables._NEIGHBOUR_CODE_TO_NORMALS[code])
        sum_area = 0
        for normal_idx in range(normals.shape[0]):
            # normal vector
            n = np.zeros([3])
            n[0] = normals[normal_idx, 0] * spacing_mm[1] * spacing_mm[2]
            n[1] = normals[normal_idx, 1] * spacing_mm[0] * spacing_mm[2]
            n[2] = normals[normal_idx, 2] * spacing_mm[0] * spacing_mm[1]
            area = np.linalg.norm(n)
            sum_area += area
        neighbour_code_to_surface_area[code] = sum_area

    # compute the bounding box of the masks to trim
    # the volume to the smallest possible processing subvolume
    mask_all = mask_gt | mask_pred
    bbox_min = np.zeros(3, np.int64)
    bbox_max = np.zeros(3, np.int64)

    # max projection to the x0-axis
    proj_0 = np.max(np.max(mask_all, axis=2), axis=1)
    idx_nonzero_0 = np.nonzero(proj_0)[0]
    if len(idx_nonzero_0) == 0:  # pylint: disable=g-explicit-length-test
        return {"distances_gt_to_pred": np.array([]),
                "distances_pred_to_gt": np.array([]),
                "surfel_areas_gt": np.array([]),
                "surfel_areas_pred": np.array([])}

    bbox_min[0] = np.min(idx_nonzero_0)
    bbox_max[0] = np.max(idx_nonzero_0)

    # max projection to the x1-axis
    proj_1 = np.max(np.max(mask_all, axis=2), axis=0)
    idx_nonzero_1 = np.nonzero(proj_1)[0]
    bbox_min[1] = np.min(idx_nonzero_1)
    bbox_max[1] = np.max(idx_nonzero_1)

    # max projection to the x2-axis
    proj_2 = np.max(np.max(mask_all, axis=1), axis=0)
    idx_nonzero_2 = np.nonzero(proj_2)[0]
    bbox_min[2] = np.min(idx_nonzero_2)
    bbox_max[2] = np.max(idx_nonzero_2)

    # crop the processing subvolume.
    # we need to zeropad the cropped region with 1 voxel at the lower,
    # the right and the back side. This is required to obtain the "full"
    # convolution result with the 2x2x2 kernel
    cropmask_gt = np.zeros((bbox_max - bbox_min)+2, np.uint8)
    cropmask_pred = np.zeros((bbox_max - bbox_min)+2, np.uint8)

    cropmask_gt[0:-1, 0:-1, 0:-1] = mask_gt[bbox_min[0]:bbox_max[0]+1,bbox_min[1]:bbox_max[1]+1,
                                          bbox_min[2]:bbox_max[2]+1]

    cropmask_pred[0:-1, 0:-1, 0:-1] = mask_pred[bbox_min[0]:bbox_max[0]+1,bbox_min[1]:bbox_max[1]+1,
                                              bbox_min[2]:bbox_max[2]+1]

    # compute the neighbour code (local binary pattern) for each voxel
    # the resultsing arrays are spacially shifted by minus half a voxel in each
    # axis.
    # i.e. the points are located at the corners of the original voxels
    kernel = np.array([[[128, 64],[32, 16]],[[8, 4],[2, 1]]])
    
    neighbour_code_map_gt = ndimage.filters.correlate(cropmask_gt.astype(np.uint8), kernel, mode="constant", cval=0)
    neighbour_code_map_pred = ndimage.filters.correlate(cropmask_pred.astype(np.uint8), kernel, mode="constant", cval=0)

    # create masks with the surface voxels
    borders_gt = ((neighbour_code_map_gt != 0) & (neighbour_code_map_gt != 255))
    borders_pred = ((neighbour_code_map_pred != 0) &(neighbour_code_map_pred != 255))

    # compute the distance transform (closest distance of each voxel to the surface voxels)
    if borders_gt.any():
        distmap_gt = ndimage.morphology.distance_transform_edt(~borders_gt, sampling=spacing_mm)
    else:
        distmap_gt = np.Inf * np.ones(borders_gt.shape)

    if borders_pred.any():
        distmap_pred = ndimage.morphology.distance_transform_edt(~borders_pred, sampling=spacing_mm)
    else:
        distmap_pred = np.Inf * np.ones(borders_pred.shape)

    # compute the area of each surface element
    surface_area_map_gt = neighbour_code_to_surface_area[neighbour_code_map_gt]
    surface_area_map_pred = neighbour_code_to_surface_area[neighbour_code_map_pred]

    # create a list of all surface elements with distance and area
    distances_gt_to_pred = distmap_pred[borders_gt]
    distances_pred_to_gt = distmap_gt[borders_pred]
    surfel_areas_gt = surface_area_map_gt[borders_gt]
    surfel_areas_pred = surface_area_map_pred[borders_pred]

    # sort them by distance
    if distances_gt_to_pred.shape != (0,):
        sorted_surfels_gt = np.array(sorted(zip(distances_gt_to_pred, surfel_areas_gt)))
        distances_gt_to_pred = sorted_surfels_gt[:, 0]
        surfel_areas_gt = sorted_surfels_gt[:, 1]

    if distances_pred_to_gt.shape != (0,):
        sorted_surfels_pred = np.array(sorted(zip(distances_pred_to_gt, surfel_areas_pred)))
        distances_pred_to_gt = sorted_surfels_pred[:, 0]
        surfel_areas_pred = sorted_surfels_pred[:, 1]

    if len(distances_gt_to_pred) > 0:  # pylint: disable=g-explicit-length-test
        surfel_areas_cum_gt = np.cumsum(surfel_areas_gt) / np.sum(surfel_areas_gt)
        idx = np.searchsorted(surfel_areas_cum_gt, percent/100.0)
        perc_distance_gt_to_pred = distances_gt_to_pred[min(idx, len(distances_gt_to_pred)-1)]
        max_distance_gt_to_pred = np.max(distances_gt_to_pred)
    else:
        perc_distance_gt_to_pred = np.Inf
        max_distance_gt_to_pred = np.Inf

    if len(distances_pred_to_gt) > 0:  # pylint: disable=g-explicit-length-test
        surfel_areas_cum_pred = (np.cumsum(surfel_areas_pred) /np.sum(surfel_areas_pred))
        idx = np.searchsorted(surfel_areas_cum_pred, percent/100.0)
        perc_distance_pred_to_gt = distances_pred_to_gt[min(idx, len(distances_pred_to_gt)-1)]
        max_distance_pred_to_gt = np.max(distances_pred_to_gt)
    else:
        perc_distance_pred_to_gt = np.Inf
        max_distance_pred_to_gt = np.Inf

    return max(max_distance_gt_to_pred, max_distance_pred_to_gt), max(perc_distance_gt_to_pred, perc_distance_pred_to_gt)

In [139]:
sub_names_test = ['AMC012', 'AMC006', 
                  'MUMC094', 'MUMC027', 'MUMC079', 'MUMC052', 'MUMC127', 'MUMC071', 'MUMC038', 'MUMC093', 'MUMC107', 
                  'MUMC022', 'MUMC114', 'MUMC115', 'MUMC069', 'MUMC130', 'MUMC036', 'MUMC007', 'MUMC059', 'MUMC080', 
                  'UMCU036', 'UMCU025', 'UMCU008', 'UMCU034']

sub_names_emc = ['EMC003', 'EMC004', 'EMC005', 'EMC007', 'EMC008', 'EMC009', 'EMC011', 
                 'EMC015', 'EMC018', 'EMC020', 'EMC024', 'EMC027', 'EMC029', 'EMC031', 
                 'EMC032', 'EMC034', 'EMC035', 'EMC036', 'EMC038', 'EMC041', 'EMC042', 
                 'EMC043', 'EMC045', 'EMC046', 'EMC047', 'EMC048', 'EMC049', 'EMC050', 
                 'EMC051', 'EMC052', 'EMC054', 'EMC055', 'EMC056', 'EMC057']

In [140]:
nifti_dirname_GT_test = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_GT"
nifti_dirname_GT_t2w = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_GT"
nifti_dirname_GT_t1wce = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_GT"
nifti_dirname_GT_emc = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_GT"

nifti_dirname_nnunet_test = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_nnunet"
nifti_dirname_nnunet_t2w = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_nnunet"
nifti_dirname_nnunet_t1wce = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_nnunet"
nifti_dirname_nnunet_emc = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_nnunet"

nifti_dirname_nnunet_test_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_nnunet_p"
nifti_dirname_nnunet_t2w_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_nnunet_p"
nifti_dirname_nnunet_t1wce_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_nnunet_p"
nifti_dirname_nnunet_emc_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_nnunet_p"

nifti_dirname_plaqunet_test = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_plaqunet"
nifti_dirname_plaqunet_t2w = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_plaqunet"
nifti_dirname_plaqunet_t1wce = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_plaqunet"
nifti_dirname_plaqunet_emc = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_plaqunet"

nifti_dirname_plaqunet_test_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_plaqunet_p"
nifti_dirname_plaqunet_t2w_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_plaqunet_p"
nifti_dirname_plaqunet_t1wce_sm=r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_plaqunet_p"
nifti_dirname_plaqunet_emc_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_plaqunet_p"

nifti_dirname_plaqumap_test = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_plaqumap"
nifti_dirname_plaqumap_t2w = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_plaqumap"
nifti_dirname_plaqumap_t1wce = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_plaqumap"
nifti_dirname_plaqumap_emc = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_plaqumap"

nifti_dirname_plaqumap_test_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\test\test_plaqumap_p"
nifti_dirname_plaqumap_t2w_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t2w\t2w_plaqumap_p"
nifti_dirname_plaqumap_t1wce_sm=r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\t1wce\t1wce_plaqumap_p"
nifti_dirname_plaqumap_emc_sm = r"C:\Users\E.Lavrova\Documents\GitHub\plaqueuqalp\res\nifti_compare\emc\emc_plaqumap_p"

In [187]:
def get_scores_df(sub_names, dirname_gt, dirname_pred):

    df_scores = []

    for sub_name in sub_names:

        filename_gt = os.path.join(dirname_gt, sub_name + '.nii.gz')
        filename_pred = os.path.join(dirname_pred, sub_name + '.nii.gz')

        mask_gt = sitk.ReadImage(filename_gt)
        mask_pred = sitk.ReadImage(filename_pred)

        mask_gt = sitk.GetArrayFromImage(mask_gt)
        mask_pred = sitk.GetArrayFromImage(mask_pred)

        rec = {'sub': sub_name} 
        rec['dsc'] = compute_dice_coef(mask_gt, mask_pred)
        rec['jsc'] = compute_jaccard_coef(mask_gt, mask_pred)

        hd_max, hd_95 = compute_hausdorf_surf_distance(mask_gt, mask_pred, [2, 0.303030, 0.303030], percent=95)

        rec['hd'] = hd_max
        rec['hd95'] = hd_95

        rec['clDsc'] = clDice(mask_gt, mask_pred)
        rec['nsd'] = compute_nsd(mask_gt, mask_pred, tau=1)
        rec['apl, cm'] = compute_AddedPathLength(mask_gt, mask_pred, [2, 0.303030, 0.303030])
        
        vol_gt = np.sum(mask_gt)*0.303030*0.303030*2/1000
        vol_pred = np.sum(mask_pred)*0.303030*0.303030*2/1000

        rec['vol_gt, ml'] = vol_gt
        rec['vol_pred, ml'] = vol_pred

        rec['vol_diff, ml'] = vol_gt-vol_pred
        rec['abs_vol_diff, ml'] = abs(vol_gt-vol_pred)

        df_scores.append(rec)

    df_scores = pd.DataFrame(df_scores)
    
    median_scores, iqr_scores = get_score_median_iqr(df_scores)

    return df_scores, median_scores, iqr_scores

In [188]:
def get_score_median_iqr(df_scores):
    median_scores = df_scores.median()
    iqr_scores = df_scores.quantile(0.75) - df_scores.quantile(0.25)
    return median_scores, iqr_scores

In [192]:
df_scores_nnunet_test, median_scores_nnunet_test, iqr_scores_nnunet_test = get_scores_df(sub_names_test, 
                                                                                         nifti_dirname_GT_test, 
                                                                                         nifti_dirname_nnunet_test)
df_scores_nnunet_t1wce, median_scores_nnunet_t1wce, iqr_scores_nnunet_t1wce = get_scores_df(sub_names_test, 
                                                                                            nifti_dirname_GT_t1wce, 
                                                                                            nifti_dirname_nnunet_t1wce)
df_scores_nnunet_t2w, median_scores_nnunet_t2w, iqr_scores_nnunet_t2w = get_scores_df(sub_names_test, 
                                                                                      nifti_dirname_GT_t2w, 
                                                                                      nifti_dirname_nnunet_t2w)
df_scores_nnunet_emc, median_scores_nnunet_emc, iqr_scores_nnunet_emc = get_scores_df(sub_names_emc, 
                                                                                      nifti_dirname_GT_emc, 
                                                                                      nifti_dirname_nnunet_emc)

summary_scores_nnunet = pd.concat([median_scores_nnunet_test, iqr_scores_nnunet_test, 
                                   median_scores_nnunet_t1wce, iqr_scores_nnunet_t1wce, 
                                   median_scores_nnunet_t2w, iqr_scores_nnunet_t2w, 
                                   median_scores_nnunet_emc, iqr_scores_nnunet_emc
                                  ], axis=1)
summary_scores_nnunet.columns = ['test (median)', 'test (IQR)', 't1wce (median)', 't1wce (IQR)', 
                                 't2w (median)', 't2w (IQR)', 'emc (median)', 'emc (IQR)']


print ('dd: ', icc(np.column_stack((df_scores_nnunet_test['vol_gt, ml'], df_scores_nnunet_test['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('d1: ', icc(np.column_stack((df_scores_nnunet_t1wce['vol_gt, ml'], df_scores_nnunet_t1wce['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('d2: ', icc(np.column_stack((df_scores_nnunet_t2w['vol_gt, ml'], df_scores_nnunet_t2w['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('dd: ', icc(np.column_stack((df_scores_nnunet_emc['vol_gt, ml'], df_scores_nnunet_emc['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))

print ('dd: ', mean_squared_error(df_scores_nnunet_test['vol_gt, ml'], df_scores_nnunet_test['vol_pred, ml'], 
                                  squared=False))
print ('d1: ', mean_squared_error(df_scores_nnunet_t1wce['vol_gt, ml'],df_scores_nnunet_t1wce['vol_pred, ml'], 
                                  squared=False))
print ('d2: ', mean_squared_error(df_scores_nnunet_t2w['vol_gt, ml'], df_scores_nnunet_t2w['vol_pred, ml'], 
                                  squared=False))
print ('ed: ', mean_squared_error(df_scores_nnunet_emc['vol_gt, ml'], df_scores_nnunet_emc['vol_pred, ml'], 
                                  squared=False))

summary_scores_nnunet

  


dd:  0.9370858656856272
d1:  0.5452627065435705
d2:  0.8278785943245025
dd:  0.515556396620515
dd:  0.2203022711795197
d1:  0.7210682141106652
d2:  0.2979320560932429
ed:  1.3062246119852325


Unnamed: 0,test (median),test (IQR),t1wce (median),t1wce (IQR),t2w (median),t2w (IQR),emc (median),emc (IQR)
dsc,0.915519,0.04011,0.896231,0.121681,0.898951,0.048856,0.872583,0.116781
jsc,0.844221,0.067904,0.811965,0.18831,0.816445,0.08009,0.773968,0.175413
hd,2.53483,5.266332,3.600734,9.5753,2.636793,7.029601,7.061991,4.632097
hd95,1.00084,1.75567,1.302308,4.750351,1.025428,1.765156,4.36674,3.660876
clDsc,0.983624,0.056235,0.992466,0.275004,1.0,0.08077,0.902388,0.130542
nsd,0.972002,0.052348,0.944786,0.215395,0.970522,0.066613,0.874076,0.161613
"apl, cm",28.797004,17.001803,36.703324,20.417774,33.406728,10.358106,99.210086,63.337798
"vol_gt, ml",2.230574,0.399586,2.216616,0.437924,2.230574,0.394352,3.624602,1.150365
"vol_pred, ml",2.135257,0.428282,2.166754,0.796784,2.131768,0.431266,2.891271,0.946371
"vol_diff, ml",0.134619,0.271212,-0.077043,0.354728,0.030854,0.305922,0.692285,0.522038


In [194]:
df_scores_plaqunet_test, median_scores_plaqunet_test, iqr_scores_plaqunet_test = get_scores_df(sub_names_test, 
                                                                                         nifti_dirname_GT_test, 
                                                                                         nifti_dirname_plaqunet_test)
df_scores_plaqunet_t1wce, median_scores_plaqunet_t1wce, iqr_scores_plaqunet_t1wce = get_scores_df(sub_names_test, 
                                                                                            nifti_dirname_GT_t1wce, 
                                                                                            nifti_dirname_plaqunet_t1wce)
df_scores_plaqunet_t2w, median_scores_plaqunet_t2w, iqr_scores_plaqunet_t2w = get_scores_df(sub_names_test, 
                                                                                      nifti_dirname_GT_t2w, 
                                                                                      nifti_dirname_plaqunet_t2w)
df_scores_plaqunet_emc, median_scores_plaqunet_emc, iqr_scores_plaqunet_emc = get_scores_df(sub_names_emc, 
                                                                                      nifti_dirname_GT_emc, 
                                                                                      nifti_dirname_plaqunet_emc)

summary_scores_plaqunet = pd.concat([median_scores_plaqunet_test, iqr_scores_plaqunet_test, 
                                   median_scores_plaqunet_t1wce, iqr_scores_plaqunet_t1wce, 
                                   median_scores_plaqunet_t2w, iqr_scores_plaqunet_t2w, 
                                   median_scores_plaqunet_emc, iqr_scores_plaqunet_emc
                                  ], axis=1)
summary_scores_plaqunet.columns = ['test (median)', 'test (IQR)', 't1wce (median)', 't1wce (IQR)', 
                                 't2w (median)', 't2w (IQR)', 'emc (median)', 'emc (IQR)']


print ('dd: ', icc(np.column_stack((df_scores_plaqunet_test['vol_gt, ml'], df_scores_plaqunet_test['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('d1: ', icc(np.column_stack((df_scores_plaqunet_t1wce['vol_gt, ml'], df_scores_plaqunet_t1wce['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('d2: ', icc(np.column_stack((df_scores_plaqunet_t2w['vol_gt, ml'], df_scores_plaqunet_t2w['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('dd: ', icc(np.column_stack((df_scores_plaqunet_emc['vol_gt, ml'], df_scores_plaqunet_emc['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))

print ('dd: ', mean_squared_error(df_scores_plaqunet_test['vol_gt, ml'], df_scores_plaqunet_test['vol_pred, ml'], 
                                  squared=False))
print ('d1: ', mean_squared_error(df_scores_plaqunet_t1wce['vol_gt, ml'],df_scores_plaqunet_t1wce['vol_pred, ml'], 
                                  squared=False))
print ('d2: ', mean_squared_error(df_scores_plaqunet_t2w['vol_gt, ml'], df_scores_plaqunet_t2w['vol_pred, ml'], 
                                  squared=False))
print ('ed: ', mean_squared_error(df_scores_plaqunet_emc['vol_gt, ml'], df_scores_plaqunet_emc['vol_pred, ml'], 
                                  squared=False))

summary_scores_plaqunet

  


dd:  0.8855167230246309
d1:  0.7398457351078757
d2:  0.849928780053147
dd:  0.7519983465526479
dd:  0.2782159968007799
d1:  0.44278103341836883
d2:  0.2833773139553194
ed:  0.8688829169473344


Unnamed: 0,test (median),test (IQR),t1wce (median),t1wce (IQR),t2w (median),t2w (IQR),emc (median),emc (IQR)
dsc,0.918985,0.049563,0.886114,0.132073,0.897172,0.050207,0.867757,0.091948
jsc,0.850111,0.082718,0.795519,0.200828,0.813525,0.082103,0.766398,0.140128
hd,2.278981,1.341173,3.48791,6.134541,2.518235,4.761909,7.651124,4.830383
hd95,0.933677,1.171124,1.530153,4.751841,1.249425,3.082536,4.188599,3.605129
clDsc,1.0,0.01712,0.977841,0.190239,1.0,0.07795,0.889349,0.131869
nsd,0.970915,0.053144,0.932086,0.221638,0.962174,0.083859,0.884554,0.106791
"apl, cm",31.276338,17.910892,35.206541,23.273599,33.223074,12.382895,87.612313,55.656454
"vol_gt, ml",2.230574,0.399586,2.216616,0.437924,2.230574,0.394352,3.624602,1.150365
"vol_pred, ml",2.111107,0.438704,2.193017,0.80234,2.087232,0.389072,2.902749,0.978097
"vol_diff, ml",0.160422,0.262396,-0.040863,0.408769,0.024885,0.353856,0.645086,0.303076


In [195]:
df_scores_plaqumap_test, median_scores_plaqumap_test, iqr_scores_plaqumap_test = get_scores_df(sub_names_test, 
                                                                                         nifti_dirname_GT_test, 
                                                                                         nifti_dirname_plaqumap_test)
df_scores_plaqumap_t1wce, median_scores_plaqumap_t1wce, iqr_scores_plaqumap_t1wce = get_scores_df(sub_names_test, 
                                                                                            nifti_dirname_GT_t1wce, 
                                                                                            nifti_dirname_plaqumap_t1wce)
df_scores_plaqumap_t2w, median_scores_plaqumap_t2w, iqr_scores_plaqumap_t2w = get_scores_df(sub_names_test, 
                                                                                      nifti_dirname_GT_t2w, 
                                                                                      nifti_dirname_plaqumap_t2w)
df_scores_plaqumap_emc, median_scores_plaqumap_emc, iqr_scores_plaqumap_emc = get_scores_df(sub_names_emc, 
                                                                                      nifti_dirname_GT_emc, 
                                                                                      nifti_dirname_plaqumap_emc)

summary_scores_plaqumap = pd.concat([median_scores_plaqumap_test, iqr_scores_plaqumap_test, 
                                   median_scores_plaqumap_t1wce, iqr_scores_plaqumap_t1wce, 
                                   median_scores_plaqumap_t2w, iqr_scores_plaqumap_t2w, 
                                   median_scores_plaqumap_emc, iqr_scores_plaqumap_emc
                                  ], axis=1)
summary_scores_plaqumap.columns = ['test (median)', 'test (IQR)', 't1wce (median)', 't1wce (IQR)', 
                                 't2w (median)', 't2w (IQR)', 'emc (median)', 'emc (IQR)']


print ('dd: ', icc(np.column_stack((df_scores_plaqumap_test['vol_gt, ml'], df_scores_plaqumap_test['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('d1: ', icc(np.column_stack((df_scores_plaqumap_t1wce['vol_gt, ml'], df_scores_plaqumap_t1wce['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('d2: ', icc(np.column_stack((df_scores_plaqumap_t2w['vol_gt, ml'], df_scores_plaqumap_t2w['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))
print ('dd: ', icc(np.column_stack((df_scores_plaqumap_emc['vol_gt, ml'], df_scores_plaqumap_emc['vol_pred, ml'])), 
                   icc_type='ICC(2,k)'))

print ('dd: ', mean_squared_error(df_scores_plaqumap_test['vol_gt, ml'], df_scores_plaqumap_test['vol_pred, ml'], 
                                  squared=False))
print ('d1: ', mean_squared_error(df_scores_plaqumap_t1wce['vol_gt, ml'],df_scores_plaqumap_t1wce['vol_pred, ml'], 
                                  squared=False))
print ('d2: ', mean_squared_error(df_scores_plaqumap_t2w['vol_gt, ml'], df_scores_plaqumap_t2w['vol_pred, ml'], 
                                  squared=False))
print ('ed: ', mean_squared_error(df_scores_plaqumap_emc['vol_gt, ml'], df_scores_plaqumap_emc['vol_pred, ml'], 
                                  squared=False))

summary_scores_plaqumap

  


dd:  0.9095519373699396
d1:  0.7369483230271406
d2:  0.8585896937768533
dd:  0.8076071130081085
dd:  0.24130216125400636
d1:  0.45076071275785423
d2:  0.27376673802847973
ed:  0.7252523906087737


Unnamed: 0,test (median),test (IQR),t1wce (median),t1wce (IQR),t2w (median),t2w (IQR),emc (median),emc (IQR)
dsc,0.916972,0.033133,0.891757,0.113594,0.897665,0.050851,0.894006,0.067375
jsc,0.846674,0.055896,0.804656,0.17498,0.814423,0.083896,0.808322,0.106654
hd,2.18518,2.301245,3.594206,6.260535,2.540063,5.934103,7.494247,4.6456
hd95,0.933677,1.259802,1.681668,5.609319,1.152355,1.990329,4.468687,3.743264
clDsc,0.996124,0.045148,0.987482,0.267293,1.0,0.093711,0.907252,0.140077
nsd,0.972961,0.042005,0.936623,0.207302,0.958213,0.067322,0.894854,0.082482
"apl, cm",28.576619,10.693275,36.078899,19.25616,32.470091,11.772245,78.383682,60.10089
"vol_gt, ml",2.230574,0.399586,2.216616,0.437924,2.230574,0.394352,3.624602,1.150365
"vol_pred, ml",2.257112,0.379476,2.351602,0.764048,2.307158,0.344214,3.174741,0.765839
"vol_diff, ml",-0.026171,0.283884,-0.210468,0.416574,-0.110376,0.328512,0.471992,0.300413


In [197]:
def compare_scores(x_1, x_2):
    U, p = stats.wilcoxon(x_1, x_2, zero_method = 'wilcox')
    return p

In [199]:
print ('dsc')
p = compare_scores(df_scores_plaqumap_emc['dsc'], df_scores_nnunet_emc['dsc'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['dsc'], df_scores_plaqunet_emc['dsc'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('jsc')
p = compare_scores(df_scores_plaqumap_emc['jsc'], df_scores_nnunet_emc['jsc'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['jsc'], df_scores_plaqunet_emc['jsc'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('hd')
p = compare_scores(df_scores_plaqumap_emc['hd'], df_scores_nnunet_emc['hd'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['hd'], df_scores_plaqunet_emc['hd'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('hd95')
p = compare_scores(df_scores_plaqumap_emc['hd95'], df_scores_nnunet_emc['hd95'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['hd95'], df_scores_plaqunet_emc['hd95'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('clDsc')
p = compare_scores(df_scores_plaqumap_emc['clDsc'], df_scores_nnunet_emc['clDsc'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['clDsc'], df_scores_plaqunet_emc['clDsc'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('nsd')
p = compare_scores(df_scores_plaqumap_emc['nsd'], df_scores_nnunet_emc['nsd'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['nsd'], df_scores_plaqunet_emc['nsd'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('apl, cm')
p = compare_scores(df_scores_plaqumap_emc['apl, cm'], df_scores_nnunet_emc['apl, cm'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['apl, cm'], df_scores_plaqunet_emc['apl, cm'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('vol_diff, ml')
p = compare_scores(df_scores_plaqumap_emc['vol_diff, ml'], df_scores_nnunet_emc['vol_diff, ml'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['vol_diff, ml'], df_scores_plaqunet_emc['vol_diff, ml'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

print ('abs_vol_diff, ml')
p = compare_scores(df_scores_plaqumap_emc['abs_vol_diff, ml'], df_scores_nnunet_emc['abs_vol_diff, ml'])
print ('plaq-u-net vs plaq-uncertainty-net: ', p)
p = compare_scores(df_scores_plaqumap_emc['abs_vol_diff, ml'], df_scores_plaqunet_emc['abs_vol_diff, ml'])
print ('nnUNet vs plaq-uncertainty-net: ', p)

dsc
plaq-u-net vs plaq-uncertainty-net:  6.639428259340563e-06
nnUNet vs plaq-uncertainty-net:  5.711344941487451e-07
jsc
plaq-u-net vs plaq-uncertainty-net:  6.124988255052021e-06
nnUNet vs plaq-uncertainty-net:  6.484785739349568e-07
hd
plaq-u-net vs plaq-uncertainty-net:  0.6891349940127396
nnUNet vs plaq-uncertainty-net:  0.5386680307064231
hd95
plaq-u-net vs plaq-uncertainty-net:  0.1631073882292975
nnUNet vs plaq-uncertainty-net:  0.27438136433460003
clDsc
plaq-u-net vs plaq-uncertainty-net:  0.03430851926047363
nnUNet vs plaq-uncertainty-net:  0.10807264392253305
nsd
plaq-u-net vs plaq-uncertainty-net:  0.0005030241677042235
nnUNet vs plaq-uncertainty-net:  1.8744405365035262e-05
apl, cm
plaq-u-net vs plaq-uncertainty-net:  6.240101232063704e-07
nnUNet vs plaq-uncertainty-net:  1.7560560082939785e-06
vol_diff, ml
plaq-u-net vs plaq-uncertainty-net:  4.371614103328839e-07
nnUNet vs plaq-uncertainty-net:  5.225915161937346e-07
abs_vol_diff, ml
plaq-u-net vs plaq-uncertainty-net:  

In [200]:
def calculate_area_diameter(mask):
    mask_labeled = label(mask)
    regions = regionprops(mask_labeled)
    area_max = 0
    diameter_max = 0
    for r in regions:
        diameter = r.axis_major_length
        area = r.area
        if area>area_max:
            area_max = area
            diameter_max = diameter
    return area_max, diameter_max

In [None]:
def get_2Dscores(metrics_df, dir_gt, dir_pred):
    areas = []
    diameters = []
    dscs = []
    for filename in metrics_df.filename:
        item = os.path.split(filename)[-1]
        filename_gt = os.path.join(dir_gt,item)
        filename_pred = os.path.join(dir_pred,item)
        nii_gt = nib.load(filename_gt)
        nii_pred = nib.load(filename_pred)
        array_gt = nii_gt.get_fdata()
        array_pred = nii_pred.get_fdata()
        for i in range(array_gt.shape[2]):
            dsc = calculate_dice(array_gt[..., i], array_pred[..., i])
            a, d = calculate_area_diameter(array_gt[..., i])
            areas.append(a*0.303030*0.303030)
            diameters.append(d)
            dscs.append(dsc)
    return areas, diameters, dscs

In [None]:
def get_median_percentile_area_dice(areas_list, dscs_list, 
                                    bin_area_min = 25, bin_area_max = 200, bin_area_step = 25, ci = 90):
    
    bin_areas = np.arange(bin_area_min, bin_area_max+1, bin_area_step)
    
    bin_dscs = []
    for j in range (0, len(bin_areas)-1):
        bin1_dscs = []
        for i in range(0, len(areas_list)):
            if (areas_list[i]>bin_areas[j]) and (areas_list[i]<=bin_areas[j+1]):
                bin1_dscs.append(dscs_list[i])
        bin_dscs.append(bin1_dscs)

    bin_dscs_median = []
    bin_dscs_low = []
    bin_dscs_high = []
    p_low = (100-ci)/2
    p_high = 100-p_low
    for i in range (0, len(bin_dscs)):
        bin_dscs_median.append(np.nanmedian(bin_dscs[i]))
        bin_dscs_low.append(np.percentile(bin_dscs[i], p_low))
        bin_dscs_high.append(np.percentile(bin_dscs[i], p_high))
        
    return bin_areas, bin_dscs_median, bin_dscs_low, bin_dscs_high

In [None]:
def scatter_dice_area(scores_df_1, scores_df_2, scores_df_3, scores_df_4, 
                      dirname_gt_1, dirname_gt_2, dirname_gt_3, dirname_gt_4,
                      dirname_1, dirname_2, dirname_3, dirname_4):
    
    areas_test, diameters_test, dscs_test = get_2Dscores(scores_df_1, dirname_gt_1, dirname_1)
    areas_t2w, diameters_t2w, dscs_t2w = get_2Dscores(scores_df_2, dirname_gt_2, dirname_2)
    areas_t1wce, diameters_t1wce, dscs_t1wce = get_2Dscores(scores_df_3, dirname_gt_3, dirname_3)
    areas_emc, diameters_emc, dscs_emc = get_2Dscores(scores_df_4, dirname_gt_4, dirname_4)

    plt.figure(figsize=(17, 4))
    plt.subplot(141)
    xy = np.vstack([areas_test, dscs_test])
    z = gaussian_kde(xy)(xy)
    plt.scatter(areas_test, dscs_test, c=z, s=50, edgecolors='k')
    plt.xlabel('CA area, mm$^{2}$')
    plt.ylabel('DSC')
    plt.title('T1w')
    plt.subplot(142)
    xy = np.vstack([areas_t2w, dscs_t2w])
    z = gaussian_kde(xy)(xy)
    plt.scatter(areas_t2w, dscs_t2w, c=z, s=50, edgecolors='k')
    plt.xlabel('CA area, mm$^{2}$')
    #plt.ylabel('DSC')
    plt.title('T2w')
    plt.subplot(143)
    xy = np.vstack([areas_t1wce, dscs_t1wce])
    z = gaussian_kde(xy)(xy)
    plt.scatter(areas_t1wce, dscs_t1wce, c=z, s=50, edgecolors='k')
    plt.xlabel('CA area, mm$^{2}$')
    #plt.ylabel('DSC')
    plt.title('T1w CE')
    plt.subplot(144)
    xy = np.vstack([areas_emc, dscs_emc])
    z = gaussian_kde(xy)(xy)
    plt.scatter(areas_emc, dscs_emc, c=z, s=50, edgecolors='k')
    plt.xlabel('CA area, mm$^{2}$')
    #plt.ylabel('DSC')
    plt.title('T1w external')
    plt.suptitle('nnUNet DSC with regards to CA area')
    plt.show()

In [None]:
def plot_dice_area(scores_df_1, scores_df_2, scores_df_3, scores_df_4, 
                   dirname_gt_1, dirname_gt_2, dirname_gt_3, dirname_gt_4,
                   dirname_1, dirname_2, dirname_3, dirname_4):
    
    areas_test, diameters_test, dscs_test = get_2Dscores(scores_df_1, dirname_gt_1, dirname_1)
    areas_t2w, diameters_t2w, dscs_t2w = get_2Dscores(scores_df_2, dirname_gt_2, dirname_2)
    areas_t1wce, diameters_t1wce, dscs_t1wce = get_2Dscores(scores_df_3, dirname_gt_3, dirname_3)
    areas_emc, diameters_emc, dscs_emc = get_2Dscores(scores_df_4, dirname_gt_4, dirname_4)

    bin_areas_test,bin_dscs_median_test,bin_dscs_low_test,bin_dscs_high_test = get_median_percentile_area_dice(areas_test, 
                                                                                                                  dscs_test)
    bin_areas_t2w,bin_dscs_median_t2w,bin_dscs_low_t2w,bin_dscs_high_t2w = get_median_percentile_area_dice(areas_t2w, 
                                                                                                           dscs_t2w)
    bin_areas_t1wce,bin_dscs_median_t1wce,bin_dscs_low_t1wce,bin_dscs_high_t1wce = get_median_percentile_area_dice(areas_t1wce,
                                                                                                                   dscs_t1wce)
    bin_areas_emc,bin_dscs_median_emc,bin_dscs_low_emc,bin_dscs_high_emc = get_median_percentile_area_dice(areas_emc, 
                                                                                                           dscs_emc, 
                                                                                                           bin_area_min = 0, 
                                                                                                           bin_area_max = 400, 
                                                                                                           bin_area_step = 50)

    plt.figure(figsize=(17, 4))
    plt.subplot(141)
    p1 = plt.plot(bin_areas_test[1:], bin_dscs_median_test)
    plt.fill_between(bin_areas_test[1:], bin_dscs_high_test, bin_dscs_low_test, alpha = 0.3)
    plt.xlabel('CA area, mm$^{2}$')
    plt.ylabel('DSC')
    plt.title('T1w')
    rect_ci = Rectangle((0,0), 0,0, alpha=0.3)
    objects = [p1[0], rect_ci]
    labels = ['Median', '90% CI']
    plt.legend(objects, labels, loc='lower right')
    plt.subplot(142)
    plt.plot(bin_areas_t2w[1:], bin_dscs_median_t2w)
    plt.fill_between(bin_areas_t2w[1:], bin_dscs_high_t2w, bin_dscs_low_t2w, alpha = 0.3)
    plt.xlabel('CA area, mm$^{2}$')
    #plt.ylabel('DSC')
    plt.title('T2w')
    plt.legend(objects, labels, loc='lower right')
    plt.subplot(143)
    plt.plot(bin_areas_t1wce[1:], bin_dscs_median_t1wce)
    plt.fill_between(bin_areas_t1wce[1:], bin_dscs_high_t1wce, bin_dscs_low_t1wce, alpha = 0.3)
    plt.xlabel('CA area, mm$^{2}$')
    #plt.ylabel('DSC')
    plt.title('T1w CE')
    plt.legend(objects, labels, loc='lower right')
    plt.subplot(144)
    plt.plot(bin_areas_emc[1:], bin_dscs_median_emc)
    plt.fill_between(bin_areas_emc[1:], bin_dscs_high_emc, bin_dscs_low_emc, alpha = 0.3)
    plt.xlabel('CA area, mm$^{2}$')
    #plt.ylabel('DSC')
    plt.title('T1w external')
    plt.legend(objects, labels, loc='lower right')
    plt.suptitle('nnUNet DSC with regards to CA area')
    plt.show()

In [None]:
def get_dropout_dice(dirname_gt, dirname_do):
    
    sub_names = []

    for item in os.listdir(dirname_gt):
        sub_names.append(item[:-7])
        
        dscs_df = []

    for sub_name in sub_names:

        filenames_sub = glob.glob(dirname_do + sub_name + '*.nii.gz')
        n = len(filenames_sub)

        filename_gt = dirname_gt + sub_name + '.nii.gz'
        gt_nii = nib.load(filename_gt)
        gt = gt_nii.get_fdata()
        dim = gt.shape

        preds_array = np.zeros((dim[0], dim[1], dim[2], n), dtype = np.float32)
        for i in range (0, n):
            preds_array[..., i] = nib.load(filenames_sub[i]).get_fdata()

        pred_mean = np.mean(preds_array, axis = 3)
        dsc_mean = get_dice(gt, pred_mean)

        dscs = []
        for i in range (0, n):
            dscs.append(get_dice(pred_mean, preds_array[..., i]))
        dsc_ws = np.mean(dscs)

        rec = {'sub': sub_name, 'dsc_mean': dsc_mean, 'dsc_ws': dsc_ws}
        dscs_df.append(rec)

    dscs_df = pd.DataFrame(dscs_df)

    print (np.mean(dscs_df['dsc_ws']))
    
    #plt.plot((0.5, 1.0), (0.5, 1.0), ':k')
    plt.ylim(0.0, 1.0)
    plt.xlim(0.0, 1.0)
    plt.plot((0, 1), (0, 1), color='w', alpha = 0)
    plt.scatter(dscs_df['dsc_ws'], dscs_df['dsc_mean'], c = 'yellow', edgecolors = 'navy', s = 50)
    plt.xlabel('DSC within samples')
    plt.ylabel('DSC mean')
    plt.axis('square')

In [None]:
def bland_altman_plot(x_1, x_2, unit):
    
    x_1 = np.asarray(x_1)
    x_2 = np.asarray(x_2)
    mean = np.mean([x_1, x_2], axis=0)
    diff = x_1 - x_2                   
    md = np.mean(diff)                  
    sd = np.std(diff, axis=0) 

    plt.scatter(mean, diff, edgecolor="navy")
    plt.axhline(md, color='gray', linestyle='-', label='MD: '+str(round(md, 2)))
    plt.axhline(md + 1.96*sd, color='gray', linestyle='--', label='MD + 1.96SD: '+str(round(md + 1.96*sd, 2)))
    plt.axhline(md - 1.96*sd, color='gray', linestyle='--', label='MD - 1.96SD: '+str(round(md - 1.96*sd, 2)))
    plt.legend(loc = 'lower right', framealpha=0)
    plt.xlabel('Average, ' + unit)
    plt.ylabel('Difference, ' + unit)
    

In [None]:
def plot_ba(df_1, df_2, df_3, df_4):
    
    plt.figure(figsize=(16, 4))

    plt.subplot(141)
    bland_altman_plot(df_1['volume_gt']/1000, df_1['volume_pred']/1000, 'cm3')
    plt.title('T1w')

    plt.subplot(142)
    bland_altman_plot(df_2['volume_gt']/1000, df_2['volume_pred']/1000, 'cm3')
    plt.title('T2w')
    plt.ylabel('')

    plt.subplot(143)
    bland_altman_plot(df_3['volume_gt']/1000, df_3['volume_pred']/1000, 'cm3')
    plt.title('T1w CE')
    plt.ylabel('')

    plt.subplot(144)
    bland_altman_plot(df_4['volume_gt']/1000, df_4['volume_pred']/1000, 'cm3')
    plt.title('T1w EMC')
    plt.ylabel('')

    plt.show()
    
    