## SOI Localization Estimate

In [1]:
from   ipywidgets import interactive, fixed
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from   pathlib import Path
from   PIL import Image
from   utils.evaluation import get_annotated_slices, get_detections, get_ground_truth_bboxes, get_ground_truth_slices, get_scores_and_bboxes
from   utils.metrics import IoU
from   utils.sets import difference, intersection, union

### Setup

In [2]:
annotation_file = 'my_annotations.csv'
results_dir     = 'results'

### Grount truth (annotations)

CT slices which have been annotated with a bounding box and a class label

In [3]:
df_annotated_slices = get_annotated_slices(annotation_file)
df_annotated_slices

Unnamed: 0,image_path,x1,y1,x2,y2,class_name,ct_id
177,data/ct_scan_0/ae_0_0_3_s_258.jpg,216.0,230.0,276.0,259.0,sternum,ct_scan_0
178,data/ct_scan_0/ae_0_0_3_s_259.jpg,214.0,231.0,277.0,259.0,sternum,ct_scan_0
483,data/ct_scan_1/ae_1_1_3_s_238.jpg,216.0,241.0,289.0,267.0,sternum,ct_scan_1
692,data/ct_scan_2/ae_10_0_9_s_70.jpg,212.0,194.0,306.0,233.0,sternum,ct_scan_2


### Predictions (RetinaNet detections)

CT slices for which the RetinaNet has predicted a bounding box and class label

In [4]:
df_detections = get_detections(results_dir, df_annotated_slices)
df_detections

Unnamed: 0,retina_net_detections,ct_id
0,results/ct_scan_0/ct_scan_0_predictions.csv,ct_scan_0
1,results/ct_scan_1/ct_scan_1_predictions.csv,ct_scan_1
2,results/ct_scan_2/ct_scan_2_predictions.csv,ct_scan_2


### Obejct Detection

#### ToDo

Check IoU calculation. What if BBox = 0,0,1,1?

Also, mean IoU is not the same as for evaluation script (0.74 vs. 0.88)

Check IoU calculation. What if BBox = 0,0,1,1?

Also, mean IoU is not the same as for evaluation script (0.74 vs. 0.88)

In [7]:
def evaluate_object_detection(df_detections, df_annotated_slices):
    # Loop over predictions
    print('Found predictions for n={:d} CT scans'.format(df_detections.shape[0]))

    ct_scans         = []
    true_slices      = []
    detected_slices  = []
    IoUs             = []

    n_slices = 0
    n_annots = 0
    n_tp     = 0
    n_tn     = 0
    n_fp     = 0
    n_fn     = 0

    scores_in_annots = []
    scores_in_fps    = []
    scores_in_tns    = []
    IoUs_in_annots   = []

    for i, detection_ct in enumerate(df_detections['retina_net_detections']):

        score_threshold = 0.05
        IoU_thresholds  = 0.5

        scan                    = detection_ct.split('/')[1]
        true_slices_ct          = get_ground_truth_slices(scan, df_annotated_slices)
        true_bbox_ct            = get_ground_truth_bboxes(scan, df_annotated_slices)
        scores_ct, pred_bbox_ct = get_scores_and_bboxes(detection_ct)

        # Find slices with a detection
        slices_ct          = np.arange(scores_ct.shape[0])
        detected_slices_ct = np.empty([0], dtype=np.int64)
        IoUs_detection_ct  = np.empty([0], dtype=np.float64)
        IoUs_true_ct       = np.empty([0], dtype=np.float64)
        for s, score, pred_bbox in zip(slices_ct, scores_ct, pred_bbox_ct):

            # Check if detection score is above threshold
            if score > score_threshold:
                detected_slices_ct = np.append(detected_slices_ct, s)

                # Calculate IoU
                IoU_slice = 0

                # Check if the detection is in an annotated slice
                if s in true_slices_ct:
                    true_bbox = true_bbox_ct[np.where(true_slices_ct==s)][0]
                    IoU_slice = IoU(pred_bbox, true_bbox)

                IoUs_detection_ct = np.append(IoUs_detection_ct, IoU)

            # Check if the slice is in an annotated slice
            if s in true_slices_ct:

                # Calculate IoU
                IoU_slice = 0

                # Check if detection score is above threshold
                if score > score_threshold:
                    true_bbox = true_bbox_ct[np.where(true_slices_ct==s)][0]
                    IoU_slice = IoU(pred_bbox, true_bbox)

                IoUs_true_ct = np.append(IoUs_true_ct, IoU_slice)

        ct_scans.append(scan)
        true_slices.append(true_slices_ct)
        detected_slices.append(detected_slices_ct)
        IoUs.append(IoUs_true_ct)

        print('\n************************************')
        print('Scan: {:s}'.format(scan))
        print('\tTrue slices:     {}'.format(true_slices_ct))    
        print('\tDetected slices: {}'.format(detected_slices_ct))
        print('\tIoUs:            {}'.format(IoUs_true_ct))

        # Confusion matrix: Calculate TP, TN, FP, FN
        n_slices_ct     = slices_ct.shape[0]
        n_annots_ct     = true_slices_ct.shape[0]
        n_detections_ct = detected_slices_ct.shape[0]
        tp_ct =  intersection(true_slices_ct, detected_slices_ct)
        tn_ct =  slices_ct[~np.isin(slices_ct, union(true_slices_ct, detected_slices_ct))]
        fp_ct =  difference(detected_slices_ct, true_slices_ct)
        fn_ct =  difference(true_slices_ct, detected_slices_ct)

        n_slices += n_slices_ct
        n_annots += n_annots_ct
        n_tp += tp_ct.shape[0]
        n_tn += tn_ct.shape[0]
        n_fp += fp_ct.shape[0]
        n_fn += fn_ct.shape[0]

        print('\nConfusion matrix:')
        print('\tn_slices     = {:d}'.format(n_slices_ct))
        print('\tn_annots     = {:d}'.format(n_annots_ct))
        print('\t# TP         = {:d}/{:d} ({:.2f} %)'.format(tp_ct.shape[0], n_annots_ct,             100*(float(tp_ct.shape[0])/float(n_annots_ct))))
        print('\t# TN         = {:d}/{:d} ({:.2f} %)'.format(tn_ct.shape[0], n_slices_ct-n_annots_ct, 100*(float(tn_ct.shape[0])/float(n_slices_ct-n_annots_ct))))
        print('\t# FP         = {:d}/{:d} ({:.2f} %)'.format(fp_ct.shape[0], n_slices_ct-n_annots_ct, 100*(float(fp_ct.shape[0])/float(n_slices_ct-n_annots_ct))))
        print('\t# FN         = {:d}/{:d} ({:.2f} %)'.format(fn_ct.shape[0], n_annots_ct,             100*(float(fn_ct.shape[0])/float(n_annots_ct))))

        # Classification score
        scores_in_annots_ct  = scores_ct[true_slices_ct]
        scores_in_fp_ct      = scores_ct[fp_ct]
        scores_in_tn_ct      = scores_ct[tn_ct]

        scores_in_annots.extend(scores_in_annots_ct)
        scores_in_fps.extend(scores_in_fp_ct)
        scores_in_tns.extend(scores_in_tn_ct)

        mean_score_in_annots = np.mean(scores_in_annots_ct)
        #print('\nScore statistics:')
        #print('\tMean score in n={:d} annotated slices = {:.1f}'.format(n_annots_ct, mean_score_in_annots))

        # IoU: Average IoU in true slice
        IoUs_in_annots.extend(IoUs_true_ct)

        #print('\nAverage IoU in annotated slices:', np.mean(IoUs_true_ct))

    print('\nTest set:')
    print('\t# slices = {:d}'.format(n_slices))
    print('\t# annots = {:d}'.format(n_annots))
    print('\t# TP     = {:d}/{:d} ({:.1f} %)'.format(n_tp, n_annots,          100*(float(n_tp)/float(n_annots))))
    print('\t# TN     = {:d}/{:d} ({:.1f} %)'.format(n_tn, n_slices-n_annots, 100*(float(n_tn)/float(n_slices-n_annots))))
    print('\t# FP     = {:d}/{:d} ({:.1f} %)'.format(n_fp, n_slices-n_annots, 100*(float(n_fp)/float(n_slices-n_annots))))
    print('\t# FN     = {:d}/{:d} ({:.1f} %)'.format(n_fn, n_annots,          100*(float(n_fn)/float(n_annots))))
    print('\n\tMean score in annots = {:.2f} [{:.2f},{:.2f}]'.format(np.median(scores_in_annots), np.quantile(scores_in_annots, q=0.25), np.quantile(scores_in_annots, q=0.75)))
    #print('\tMean score in fps    = {:.2f} [{:.2f},{:.2f}]'.format(np.median(scores_in_fps),    np.quantile(scores_in_fps, q=0.25),    np.quantile(scores_in_fps, q=0.75)))
    #print('\tMean score in tns    = {:.2f} [{:.2f},{:.2f}]'.format(np.median(scores_in_tns),    np.quantile(scores_in_tns, q=0.25),    np.quantile(scores_in_tns, q=0.75)))
    #print('\tMean IoU in annots   = {:.2f} [{:.2f},{:.2f}]'.format(np.median(IoUs_in_annots), np.quantile(IoUs_in_annots, q=0.25), np.quantile(IoUs_in_annots, q=0.75)))
    #print('\tMean IoU in detect   = {:.2f} [{:.2f},{:.2f}]'.format(np.median(IoUs_detection_ct), np.quantile(IoUs_detection_ct, q=0.25), np.quantile(IoUs_detection_ct, q=0.75)))

In [8]:
evaluate_object_detection(df_detections, df_annotated_slices)

Found predictions for n=3 CT scans

************************************
Scan: ct_scan_0
	True slices:     [258 259]
	Detected slices: [258 259]
	IoUs:            [0.73191611 0.865768  ]

Confusion matrix:
	n_slices     = 328
	n_annots     = 2
	# TP         = 2/2 (100.00 %)
	# TN         = 326/326 (100.00 %)
	# FP         = 0/326 (0.00 %)
	# FN         = 0/2 (0.00 %)

************************************
Scan: ct_scan_1
	True slices:     [238]
	Detected slices: [238]
	IoUs:            [0.79716465]

Confusion matrix:
	n_slices     = 296
	n_annots     = 1
	# TP         = 1/1 (100.00 %)
	# TN         = 295/295 (100.00 %)
	# FP         = 0/295 (0.00 %)
	# FN         = 0/1 (0.00 %)

************************************
Scan: ct_scan_2
	True slices:     [70]
	Detected slices: [70]
	IoUs:            [0.97592318]

Confusion matrix:
	n_slices     = 93
	n_annots     = 1
	# TP         = 1/1 (100.00 %)
	# TN         = 92/92 (100.00 %)
	# FP         = 0/92 (0.00 %)
	# FN         = 0/1 (0.00 %)

Tes

### Structure-Of-Interest (SOI) Localization

In [None]:
def find_nearest(array,value):
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

In [None]:
# Loop over predictions
print('Found predictions for n={:d} CT scans'.format(df_detections.shape[0]))

scans            = []
scores           = []
bbox             = []
localized_slices = []
true_slices      = []
hits             = []
xy_dists_tp      = []
xy_dists_fp      = []

tp_det = 0
fp_det = 0
fn_det = 0

for detection_ct in df_detections['retina_net_detections']:
    
    score_threshold = 0.05
    
    scan               = detection_ct.split('/')[1]
    true_slices_ct     = get_ground_truth_slices(scan, df_annotated_slices)
    true_bbox_ct       = get_ground_truth_bboxes(scan, df_annotated_slices)
    scores_ct, bbox_ct = get_detections(detection_ct)

    # Check if there is a detection with a classification score above the threshold
    if np.amax(scores_ct) > score_threshold:
        # Pick the slice(s) with the highest classification score. Typically, this will be a single slice
        loc_slice = np.where(scores_ct==np.amax(scores_ct))[0]
        
        if loc_slice.shape[0] > 1:
            print("WARNING: Localized more than 1 slice!")
            break
        
        # Check if one or more of the picked slices is in the list of true slices
        slice_overlap = set(loc_slice).intersection(set(true_slices_ct))
        slice_overlap = list(slice_overlap)

        # Store results
        if len(slice_overlap) > 0:
            hits.append(1)
            tp_det +=1
            
            # xy-plane distance
            true_bbox = true_bbox_ct[np.where(true_slices_ct==loc_slice)][0]
            loc_bbox  = bbox_ct[loc_slice][0]
            
            true_x = true_bbox[2] - true_bbox[0]
            true_y = true_bbox[3] - true_bbox[1]
            
            loc_x  = loc_bbox[2] - loc_bbox[0]
            loc_y  = loc_bbox[3] - loc_bbox[1]
            
            loc_xy_distance = np.sqrt( (loc_x-true_x)**2 + (loc_y-true_y)**2 )
            xy_dists_tp.append(loc_xy_distance)
            
        else:
            hits.append(0)
            fp_det += 1
            
            # Find nearest true slice
            nearest_true_slice = find_nearest(true_slices_ct, loc_slice)
            
            # xy-plane distance
            true_bbox = true_bbox_ct[np.where(true_slices_ct==nearest_true_slice)][0]
            loc_bbox  = bbox_ct[loc_slice][0]
            
            true_x = true_bbox[2] - true_bbox[0]
            true_y = true_bbox[3] - true_bbox[1]
            
            loc_x  = loc_bbox[2] - loc_bbox[0]
            loc_y  = loc_bbox[3] - loc_bbox[1]
            
            loc_xy_distance = np.sqrt( (loc_x-true_x)**2 + (loc_y-true_y)**2 )
            xy_dists_fp.append(loc_xy_distance)
    
    else:
        loc_slice = np.empty(shape=(0))
        hits.append(0)
        fn_det += 1
    
    print('\n************************************')    
    print('Scan: {:s}'.format(scan))
    print('\tTrue slices:      {}'.format(true_slices_ct))
    print('\tLocalized slices: {}'.format(loc_slice))
        
    scans.append(scan)
    scores.append(scores_ct)
    bbox.append(np.asarray(bbox_ct))
    localized_slices.append(loc_slice)
    true_slices.append(true_slices_ct)
    
hits = np.asarray(hits)

print('\nConfusion matrix:')
print('\t#TP = {:d}'.format(tp_det))
print('\t#FP = {:d}'.format(fp_det))
print('\t#FN = {:d}'.format(fn_det))

In [None]:
print(hits)

In [None]:
print('Misses:')
for miss in np.where(hits==0)[0]:
    print('\tPredicted slice:', localized_slices[miss], 'Annotated slices:', true_slices[miss])

In [None]:
print('Average localization distance in xy-plane for TPs: {:.1f} +/- {:.1f} pxl (n={:d})'.format(np.mean(xy_dists_tp), np.std(xy_dists_tp), len(xy_dists_tp)))

In [None]:
print('Average localization distance in xy-plane for FPs: {:.1f} +/- {:.1f} pxl (n={:d})'.format(np.mean(xy_dists_fp), np.std(xy_dists_fp), len(xy_dists_fp)))

In [None]:
n_tot = hits.shape[0]
n_0   = np.where(hits==0)[0].shape[0]
n_1   = np.where(hits==1)[0].shape[0]
accuracy = (float(n_tot-n_0) / float(n_tot)) * 100
print('Accuracy = {:d}/{:d} ({:.2f} %)'.format(n_1, n_tot, accuracy))