# Week 2,3 report

Since we're re-arranging the images multiple times, our image data needed some kind a structure in order to assure that we use it consistently. We created class for image which consists of image nparray, mask nparray, label and filename and class for data which handles loading, preprocessing, creating triplet images and evaluating. 

In [1]:
class Image:
    #extracts the patient id from the filename of the image
    def getPatientId(self, filename):
        split_filename = filename.split('_')
        return split_filename[1]

    #extracts the slice id from the filename of the image
    def getSliceId(self, filename):
        split_filename = filename.split('_')
        return split_filename[3][:-4]

    #constructor 
    def __init__(self, img, mask, flname):
        self.img = img
        self.mask = mask
        self.flname = flname
        self.label = mask.flatten().max() > 0
        self.slice_id = self.getSliceId(flname)
        self.patient_id = self.getPatientId(flname)

    #prints content to check if the label coresponds to the right masks, etc.
    def print(self):
        #print("img shape: " + str(self.img.shape) + " mask shape: " + str(self.mask.shape))
        print(self.flname + " Label: " + str(self.label) + " Slice: " + str(self.slice_id) + " Patient: " + str(self.patient_id))

class TripletImage:
    #constructor of the triplet images - 3 concatenated images, label coresponds to the middle masks
    def __init__(self, img1, img2, img3):
        self.triplet = np.concatenate( (img1.img, img2.img, img3.img), axis=2)
        self.masks =  ( img1.mask, img2.mask, img3.mask )
        self.flnames = ( img1.flname, img2.flname, img3.flname )
        self.slice_ids = ( img1.slice_id, img2.slice_id, img3.slice_id )
        self.patient_ids = ( img1.patient_id, img2.patient_id, img3.patient_id )
        self.label = img2.label

    #prints content to check if the label coresponds to the right masks, etc.
    def print(self):
        print("triplet shape: " + str(self.triplet.shape) )
        print("filenames: " + str(self.flnames[0]) + " " + str(self.flnames[1]) + " " + str(self.flnames[2]) )
        print("Label: " + str(self.label))
        print("="*20)

In [2]:
class Data:
    #loads train and test images, masks and filenames
    def load_data(self):
        #load train images and masks as numpy arrays (N_images, 512, 512) and filenames as numpy array (N_images)
        imgs_train, masks_train, flnames_train = load_train_data_with_flnames(args.input_dir)
        #load test images and masks as numpy arrays (N_images, 512, 512) and filenames as numpy array (N_images)
        imgs_test, masks_test, flnames_test = load_test_data(args.input_dir)

        #preprocess train data
        imgs_train, masks_train = self.preprocess(imgs_train, masks_train)

        #preprocess test data
        imgs_test, masks_test = self.preprocess(imgs_test, masks_test)

        #transfering images, masks, flnames into single object - Image
        self.train_data = []
        for i in range(len(imgs_train)):
            self.train_data.append( Image( imgs_train[i], masks_train[i], flnames_train[i]) )

        self.test_data = []
        for i in range(len(imgs_test)):
            self.test_data.append( Image( imgs_test[i], masks_test[i], flnames_test[i]) )

    def print_data(self):
        for img in self.train_data:
            img.print()

    #adds aditional axis to the images and transfers them into float32
    def add_axis(self, imgs):
        imgs_new = np.ndarray((imgs.shape[0], img_rows, img_cols), dtype=np.uint8)
        for i in range(imgs.shape[0]):
            imgs_new[i] = resize(imgs[i], (img_cols, img_rows), preserve_range=True)

        imgs_new = imgs_new[..., np.newaxis]
        return imgs_new.astype('float32')

    #image preprocessing - adding aditional axis, data centering, data normalization, mask scaling
    def preprocess(self, imgs, masks):
        imgs_new = self.add_axis(imgs)
        masks_new = self.add_axis(masks)

        mean = np.mean(imgs_new)  # mean for data centering
        std = np.std(imgs_new)  # std for data normalization

        imgs_new -= mean
        imgs_new /= std

        masks_new /= 255.  # scale masks to [0, 1]

        return imgs_new, masks_new

    #sorts the images, firstly by patient_id and secondly by slice_id
    def sort_images(self):
        self.train_data = sorted(sorted(self.train_data, key = lambda x : int(x.slice_id)), key = lambda x : x.patient_id)
        self.test_data = sorted(sorted(self.test_data, key = lambda x : int(x.slice_id)), key = lambda x : x.patient_id)
    
    def concate_images(self, imgs):
        BlackImg = Image(np.zeros(shape=(512,512,1)), np.zeros(shape=(512,512,1)), "black_0000_image_0000.tif")
        concated_imgs = []
        for i in range(len(imgs)-1):
            if i == 0:
                concated_imgs.append( TripletImage(BlackImg, imgs[i], imgs[i+1]) )
            if i + 2 >= len(imgs):
                concated_imgs.append( TripletImage(imgs[i], imgs[i+1], BlackImg) )    
            else:    
                concated_imgs.append( TripletImage(imgs[i], imgs[i+1], imgs[i+2]) )    
        return concated_imgs

    #concates images
    def create_triplet_imgs(self):
        self.triplet_train = self.concate_images(self.train_data)
        self.triplet_test = self.concate_images(self.test_data)
    
    def print_triplet_data(self):
        for triplet in self.triplet_train:
            triplet.print()

    def get_train_triplets(self):
        imgs_train = []
        masks_train = []
        flnames_train = []
        labels_train = []
        for triplet in self.triplet_train:
            imgs_train.append(triplet.triplet)
            #middle mask
            masks_train.append(triplet.masks[1])
            #middle filename
            flnames_train.append(triplet.flnames[1])
            labels_train.append(triplet.label)
        return np.array(imgs_train), np.array(masks_train), np.array(labels_train), np.array(flnames_train)

    def get_test_triplets(self):
        imgs_test = []
        masks_test = []
        flnames_test = []
        labels_test = []
        for triplet in self.triplet_test:
            imgs_test.append(triplet.triplet)
            #middle mask
            masks_test.append(triplet.masks[1])
            #middle filename
            flnames_test.append(triplet.flnames[1])
            labels_test.append(triplet.label)
        return np.array(imgs_test), np.array(masks_test), np.array(labels_test), np.array(flnames_test)

    def evaluate(self):
        imgs_test, imgs_mask_test, test_labels, test_flnames = self.get_test_triplets()
        print(len(test_labels), "masks in test set")
        print(sum(test_labels), "images with prostates")
        test_labels = test_labels.astype('float32')
        imgs_mask_test = imgs_mask_test.astype('float32')
        imgs_mask_test = np.around(imgs_mask_test)
        print(test_labels)
        
        pred_masks = np.load(os.path.join(args.input_dir,
                                          'imgs_pred_mask_test.npy'))

        print(len(pred_masks), "pred masks in test set")

        pred_masks = pred_masks.astype('float32')
        scaled = pred_masks / 255.
        scaled = np.around(scaled)
        print(set(scaled.flatten()), set(imgs_mask_test.flatten()))

        pred_labels = np.array([mask_img.flatten().max() > 0 \
                                for mask_img in pred_masks])

        dice = np.array([np_dice_coef(imgs_mask_test[i], scaled[i]) \
                         for i in xrange(len(imgs_mask_test))])

        # recall only applies to masks with
        # prostates in them
        recalls = np.array([recall_score(imgs_mask_test[i].flatten(), scaled[i].flatten()) \
                            for i in xrange(len(imgs_mask_test)) \
                            if test_labels[i] == 1.0])


        pred_components = np.array([labelcc(mask_img, return_num=True)[1]
                                    for mask_img in pred_masks])

        if args.omit_empty:
            # precision only applies to images predicted to have
            # prostates in them
            precisions = np.array([precision_score(imgs_mask_test[i].flatten(), scaled[i].flatten()) \
                                   for i in xrange(len(imgs_mask_test)) \
                                   if pred_labels[i] == 1.0 and test_labels[i] == 1.0])

            imgs_mask_test = imgs_mask_test[test_labels]
            pred_masks = pred_masks[test_labels]
            pred_labels = pred_labels[test_labels]
            dice = dice[test_labels]
            pred_components = pred_components[test_labels]
            test_labels = test_labels[test_labels]
        else:
            # precision only applies to images predicted to have
            # prostates in them
            precisions = np.array([precision_score(imgs_mask_test[i].flatten(), scaled[i].flatten()) \
                                   for i in xrange(len(imgs_mask_test)) \
                                   if pred_labels[i] == 1.0])


        print("Dice", np.sort(dice))

        acc = accuracy_score(test_labels,
                             pred_labels)

        recall = recall_score(test_labels,
                              pred_labels)

        print("Dice coefficient:", np.mean(dice), np.std(dice))
        print("Recall per image:", np.mean(recalls), np.std(recalls))
        print("Precision per image:", np.mean(precisions), np.std(precisions))
        print("Accuracy:", acc)
        print("Recall:", recall)

        bin_counts = np.bincount(pred_components)
        print("CC distribution:", bin_counts)
        print(">1 CC:", 100. * sum(bin_counts[2:]) / len(pred_masks))    


## Triplet model

In order to double check if the triplet images are generated correctly we created test method to see which images the triplet consists of and if the label coresponds to the middle mask. We then manually checked if the label corresponds to the manually created mask. Here is the snippet of the output of our testing.

```------------------------------
triplet shape: (512, 512, 3)
filenames: black_0000_image_0000.tif Pt_1103_Slice_1.tif Pt_1103_Slice_2.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_1.tif Pt_1103_Slice_2.tif Pt_1103_Slice_3.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_2.tif Pt_1103_Slice_3.tif Pt_1103_Slice_4.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_3.tif Pt_1103_Slice_4.tif Pt_1103_Slice_5.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_4.tif Pt_1103_Slice_5.tif Pt_1103_Slice_6.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_5.tif Pt_1103_Slice_6.tif Pt_1103_Slice_7.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_6.tif Pt_1103_Slice_7.tif Pt_1103_Slice_8.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_7.tif Pt_1103_Slice_8.tif Pt_1103_Slice_9.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_8.tif Pt_1103_Slice_9.tif Pt_1103_Slice_10.tif
Label: False
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_9.tif Pt_1103_Slice_10.tif Pt_1103_Slice_11.tif
Label: True
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_10.tif Pt_1103_Slice_11.tif Pt_1103_Slice_12.tif
Label: True
====================
triplet shape: (512, 512, 3)
filenames: Pt_1103_Slice_11.tif Pt_1103_Slice_12.tif Pt_1103_Slice_13.tif
Label: True
====================```

### Triplet model evaluation - 250 epochs

| Metric        | Mean           | Standard deviation  | Overall |
| ------------- |:-------------:| -----:| -----: | 
| Dice coefficient      | 0.734     |   0.380 | X |
| Precision -- per image | 0.629     |    0.426 | X |
| Recall -- per image | 0.708      |   0.293 | X |
| Accuracy | X  | X | 0.829 |
| Recall | X | X | 0.969 |

![a](true.png)