In [None]:
import numpy as np
import matplotlib.pyplot as plt
import PIL
from PIL import ImageFont
from PIL import Image
from PIL import ImageDraw
import cv2
from create_labels import *
from stats_helper import *

%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [None]:
# Setting the directories
import os

wanted_folder = 'alldata/'
# wanted_folder = 'pruned/'
# wanted_folder = 'Atrium/'
# wanted_folder = 'Ventricle/'

cwd = os.getcwd()
check_directory = cwd
if check_directory == '/home/sim/notebooks/relaynet_pytorch':
    cwd = cwd + '/datasets/OCTData/'+wanted_folder
elif check_directory == '/Users/sim/Desktop/Imperial/Project/PreTrained/relaynet_pytorch':
    cwd = cwd + '/datasets-24-aug/OCTData/'+wanted_folder

print(cwd)

In [None]:
used_image = 1
if used_image == 1:
    image_file = 'whole_raw_image/con_H1_N01848_LV_1_194.tif'
    gnd_truth_image = 'manual_label/label_H1_N01848_LV_1_194.JPG'
    gnd_truth_label = 'png_labels_method/label_H1_N01848_LV_1_194_labels.png'
    result = 'labels_corrected/label_H1_N01848_LV_1_194_labelscorrected.png'
elif used_image == 2:
    image_file = 'whole_raw_image/con_H1_N01848_LA_1_272.tif'
    gnd_truth_image = 'manual_label/label_H1_N01848_LA_1_272.JPG'
    gnd_truth_label = 'png_labels_method/label_H1_N01848_LA_1_272_labels.png'
    result = 'labels_corrected/label_H1_N01848_LA_1_272_labelscorrected.png'
elif used_image == 3:
    image_file = 'whole_raw_image/con_H1_N01848_LA_1_272.tif'
    gnd_truth_image = 'manual_label/label_H1_N01848_LA_1_272.JPG'
    gnd_truth_label = 'png_labels_method/label_H1_N01848_LA_1_272_labels.png'
    result = 'labels_corrected/label_H1_N01848_LV_1_194_labelscorrected.png'
# Seeing whether image_file exists
raw_image_path = cwd + image_file
image = plt.imread(raw_image_path)
test_data = image    

# Seeing whether labelled_image exists
label_image_path = cwd + gnd_truth_image
gnd_truth = plt.imread(label_image_path)
   
# Seeing whether result_image exists
gnd_truth_path = cwd + gnd_truth_label
gnd = plt.imread(gnd_truth_path)
gnd = ((gnd*7)/np.max(values)).astype(int)

# Seeing whether result_image exists
res_image_path = cwd + result
# print(gnd_truth_path)
# print(res_image_path)
res = plt.imread(res_image_path)
res = ((res*7)/np.max(values)).astype(int)

plt.subplot(141)
plt.imshow(test_data,cmap = "gray")
plt.title('Raw OCT Image'), plt.xticks([]), plt.yticks([])
plt.subplot(142)
plt.imshow(gnd_truth, cmap = "gray")
plt.title('Manually Labelled Image'), plt.xticks([]), plt.yticks([])
plt.subplot(143),plt.imshow(gnd,cmap = "gray")
plt.title('Manually Label ID'), plt.xticks([]), plt.yticks([])
plt.subplot(144),plt.imshow(res,cmap = "gray")
plt.title('Bad Image Label'), plt.xticks([]), plt.yticks([])
plt.show()

In [None]:
colour = label_img_to_rgb(gnd)
# print(gnd[300])
# plt.imshow(colour)

# Metrics

File to generate metrics from each of the returned images.

* FP & FN
* Average Thickness of Layers - similar to MAD-LT below
* MAD-LT - error in estimated thickness map (ReLayNet Metric)
* DS - Dice Overlap Score (ReLayNet Metric)
* Distance (Euclidean) between Ground Truth and Where my Segments are - this is CE - Estimated contour error for each layer (ReLayNet Metric)

### TP, TN, FP, FN, Class Accuracy, Precision, Recall, F1

In [None]:
true_labels = list_of_labels(gnd,8)
pred_labels = list_of_labels(res,8)

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=8, figsize=(20,20))
for i, ax in enumerate(axes):
    ax.imshow((true_labels[:,:,i]), alpha=0.2)
    ax.set_title("label " + str(i))
    
fig, axes = plt.subplots(nrows=1, ncols=8, figsize=(20,20))
for i, ax in enumerate(axes):
    ax.imshow((pred_labels[:,:,i]), alpha=0.2)
    ax.set_title("label " + str(i))

In [None]:
def find_stats(true_labels, pred_labels):
    num_classes = 8
    class_vals = []
    thresh = 0.0001

    for i in range(num_classes):
        # NOTE: FOR MY CLASSES WITH MANY IMAGES - JUST ADD EXTRA DIMENSION TO pred_labels[x,:,:,i]
        # NOTE: if Precision etc are NaN, means there's no information about those classes in this image therefore remove them from analysis of that label

        # True Positive (TP): we predict a label of 1 (positive), and the true label is 1.
        TP = np.sum(np.logical_and(pred_labels[:,:,i] == 1, true_labels[:,:,i] == 1))

        # True Negative (TN): we predict a label of 0 (negative), and the true label is 0.
        TN = np.sum(np.logical_and(pred_labels[:,:,i] == 0, true_labels[:,:,i] == 0))

        # False Positive (FP): we predict a label of 1 (positive), but the true label is 0.
        FP = np.sum(np.logical_and(pred_labels[:,:,i] == 1, true_labels[:,:,i] == 0))

        # False Negative (FN): we predict a label of 0 (negative), but the true label is 1.
        FN = np.sum(np.logical_and(pred_labels[:,:,i] == 0, true_labels[:,:,i] == 1))

        # Accuracy - no of correct predictions / all predictions
        Acc = round((TP + TN)/(TP + TN + FP + FN), 3)

        # Precision - number of True Positives divided by the number of True Positives and False Positives,
        # number of positive predictions divided by the total number of positive class values predicted
        Precision = round(TP / (TP + FP),3)

        # Recall - number of True Positives divided by the number of True Positives and the number of False Negatives. 
        # Number of positive predictions divided by the number of positive class values in the test data.
        Recall = round(TP / (TP + FN),3)

        # F1 Score - 2*((precision*recall)/(precision+recall)). 
        # F1 score conveys the balance between the precision and the recall.
        F1 = round(2 * (Precision * Recall) / (Precision + Recall + thresh), 3) # Dice is Same as F1 Score
        # Computing Dice Score
        Dice = round(2 * TP / (2*TP+FP+FN),3) 

        # Dice should return 0 if Precision is low because it means that it classified a value but got it wrong
        if Dice != F1:
            print(Dice, F1)
            print('Dice and F1 not equal')
        else:
            F1 = Dice

#         print('Label:',i)
#         print('TP: {}, FP: {}, TN: {}, FN: {}, Class Accuracy: {}, Precision: {}, Recall: {}, Dice: {}'.format(TP,FP,TN,FN,Acc, Precision, Recall, Dice))
#         print()
        class_vals.append((TP,FP,TN,FN,Acc,Precision,Recall,Dice))
    return class_vals
find_stats(true_labels, pred_labels)

### Average Thickness of Layers and Error in Thickness
Limitation of this is that if image has classified layer seperately from each other, will still give 1 average thickness score for that column but it's actually 2 different parts of a layer.

Assumption is layer will be mostly connecting

Return arrays, that we sum down axis to find the avg thickness etc for all images.

In [None]:
from skimage.measure import compare_ssim as ssim



def thickness_metrics(true_labels, pred_labels): 
    '''
    Takes True and Predicted Labels that are one hot encoded
    returns list of avg true thickness, avg pred thickness, 
    '''
    avg_true_thickness_list = []
    avg_pred_thickness_list = []
    mean_abs_error_list = [] 
    mean_squared_error_list = [] 
    ssim_list = []
    
    N = 512
    error_of_thickness = []
    for i in range(8):
        # NOTE: IF AVERAGE_TIHCKNESS IS NAN, MEANS NOT IN THIS IMAGE
        true_thickness = []
        pred_thickness = []
        class_error = []
        # For each col, find thickness, compare to actual thickness and sum errors 
        for j in range(N):
            true_col = true_labels[:,j,i] # finding number of values in col, go down axis i.e. index of axis
            pred_col = pred_labels[:,j,i] 

            true_width = numpy.count_nonzero(true_col) # count number of 1s
            pred_width = numpy.count_nonzero(pred_col)

            # Finding thickness by looking at pred_width - don't need truth because will find error
            if true_width != 0:
                true_thickness.append(pred_width)
            if pred_width != 0:
                pred_thickness.append(pred_width)

            # If true width is not 0 or pred_width is not 0, append them otherwise there's no label for this image
            if true_width != 0 or pred_width != 0:
                abs_error = abs(true_width - pred_width) # error is true - pred
                class_error.append(abs_error)

        avg_true_thickness = np.average(true_thickness)
        avg_pred_thickness = np.average(pred_thickness)
        mean_abs_error = np.average(class_error)
        mean_squared_error = np.average(np.power(class_error,2))
        s = ssim(true_labels[:,:,i], pred_labels[:,:,i])
        
        #print('Label: {} \nAverage True Thickness: {}' \
        #      '\nAverage Predicted Thickness: {}' \
        #      '\nMean Absolute Error of Thickness: {}'\
        #      '\nMean Squared Error of Thickness: {}'\
        #      '\nSSIM: {}\n'\
        #      .format(i,avg_true_thickness,avg_pred_thickness,mean_abs_error, mean_squared_error, s))
        
        avg_true_thickness_list.append(avg_true_thickness)
        avg_pred_thickness_list.append(avg_pred_thickness)
        mean_abs_error_list.append(mean_abs_error) 
        mean_squared_error_list.append(mean_squared_error) 
        ssim_list.append(s)

    return avg_true_thickness_list,avg_pred_thickness_list,mean_abs_error_list, mean_squared_error_list, ssim_list

avg_true_thickness_list = []
avg_pred_thickness_list = []
mean_abs_error_list = [] 
mean_squared_error_list = [] 
ssim_list = []

for i in range(2):
    avg_true_thickness,avg_pred_thickness,mean_abs_error, mean_squared_error, s = thickness_metrics(true_labels, pred_labels)
    avg_true_thickness_list.append(avg_true_thickness)
    avg_pred_thickness_list.append(avg_pred_thickness)
    mean_abs_error_list.append(mean_abs_error) 
    mean_squared_error_list.append(mean_squared_error) 
    ssim_list.append(s)
# print(avg_true_thickness_list,avg_pred_thickness_list,mean_abs_error_list, mean_squared_error_list, ssim_list)
print(len(avg_true_thickness_list))
print(np.average(avg_true_thickness_list, axis=0))

### Dice Score 

Source: https://stats.stackexchange.com/questions/195006/is-the-dice-coefficient-the-same-as-accuracy

* Dice Score: relaynet_pytorch - solver.py in train function.
* Dice Score: networks -> net_api -> losses.py in class DiceCoeff as well as DiceLoss
* TF Dice Score: Line 42 in ReLayNey_model.ipynb

In [None]:
def compute_dice(true_list, pred_list):
    k = 1
    scores = []
    for i in range(8):
        gt = true_list[:,:,i]
        seg = pred_list[:,:,i]
        dice = np.sum(seg[gt==k]==k)*2.0 / (np.sum(seg[seg==k]==k) + np.sum(gt[gt==k]==k))
        dice = round(dice,2)
#         print('Dice similarity score is {}'.format(dice))
#         if dice > -0.00001:
        scores.append(dice)
#     print('Average Dice Score:', np.average(scores))
    return scores
score = compute_dice(true_labels, pred_labels)
print(score)