In [None]:
import os 
import sys
import random
import math
import numpy as np
import cv2
import matplotlib.pyplot as plt
import json
import pydicom
from imgaug import augmenters as iaa
from tqdm import tqdm
import pandas as pd 
import glob
import ast
from sklearn.model_selection import KFold

In [None]:
TRAIN_SPLIT = 815 ## or 169 out of 198 unique scans

In [None]:
DIR_1 = 'GRANULOMA_TRAINING_DATA_ONE/'
DIR_2 = 'GRANULOMA_TRAINING_DATA_TWO/'

DATA_DIR = '../../GRANULOMA_RADIOLOGIST_LABELS/'
CSV_PATH_1 = './Granuloma_Data/Granuloma_Annotation_Complete.csv'


ORIG_SIZE = 384

In [None]:
anns = pd.read_csv(os.path.join('../Granuloma/', CSV_PATH_1))
anns.head()

In [None]:
anns['filename'][2]

In [None]:
len(set(anns['filename'][:815]))

In [None]:
anns_test = anns[TRAIN_SPLIT:]
anns= anns[:TRAIN_SPLIT]

In [None]:
anns.shape

In [None]:
# Import Mask RCNN
# sys.path.append(os.path.join('./', 'Mask_RCNN'))  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

In [None]:
COCO_WEIGHTS_PATH = "../Mask_RCNN/mask_rcnn_coco.h5"

In [None]:
cols = ['filename','region_shape_attributes']

In [None]:
healthy = np.load('Normal.npy',mmap_mode='r')

In [None]:
def get_png_fps():
    png_fps = glob.glob(DATA_DIR+'/*/'+'*.png')
    return list(set(png_fps))

def parse_dataset(anns): 
#     image_fps = get_png_fps()
    image_fps = [os.path.join(DATA_DIR,fp) for fp in anns['filename']]
    image_annotations = {fp: [] for fp in image_fps}
    for index, row in anns.iterrows():
        x = ast.literal_eval(row[cols]['region_shape_attributes']) # Get dict of bbox coordinates
        if(len(x)>0):                                              # If they don't exist don;t add to dataset
            fp = os.path.join(DATA_DIR, row['filename'])
            image_annotations[fp].append(row[cols])
    return image_fps, image_annotations 

In [None]:
class DetectorConfig(Config):
    """Configuration for training pneumonia detection on the RSNA pneumonia dataset.
    Overrides values in the base Config class.
    """
    
    # Give the configuration a recognizable name  
    NAME = 'Granuloma-Healthy-Attached-cv-optimize'
    
    # Train on 1 GPU and 8 images per GPU. We can put multiple images on each
    # GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
    GPU_COUNT = 1
    IMAGES_PER_GPU = 2
    
    BACKBONE = 'resnet50'
    
    NUM_CLASSES = 2  # background + 1 pneumonia classes
    
    IMAGE_MIN_DIM = 384
    IMAGE_MAX_DIM = 768
    RPN_ANCHOR_SCALES = ( 4,16,8,32,64)
    TRAIN_ROIS_PER_IMAGE = 32
    MAX_GT_INSTANCES = 6
    DETECTION_MAX_INSTANCES = 5
    DETECTION_MIN_CONFIDENCE = 0.78  ## match target distribution
    DETECTION_NMS_THRESHOLD = 0.01

    STEPS_PER_EPOCH = 200

config = DetectorConfig()
config.display()

In [None]:
healthy_annotation = [{"x": 0,"y" : 0,"width" : 0,"height" : 0}]

In [None]:
all_csv = pd.read_csv('All.csv')
all_csv.head()

In [None]:
attr = all_csv[['filename','region_attributes','region_shape_attributes']].values

In [None]:
attr[2][2]

In [None]:
fname = []
for x in attr:
    d = json.loads(x[1])
    for k,v in d.items():
        if('PRESENT' in v or '[PRESENT]' in v):
            print(k)
            if(k=='End On Vessel'):
                fname.append(x[0])

In [None]:
parse_dataset()

In [None]:
class DetectorDataset(utils.Dataset):
    """Dataset class for training pneumonia detection on the RSNA pneumonia dataset.
    """

    def __init__(self, image_fps, image_annotations, orig_height, orig_width,img_indxs):
        super().__init__(self)
        
        # Add classes
        self.add_class('Granuloma', 1, 'granul')
        
        # add images 
        for i, fp in enumerate(image_fps):
            if(type(fp)==str):
                annotations = image_annotations[fp]
                self.add_image('Granuloma', image_id=img_indxs[i], path=fp, 
                               annotations=annotations, orig_height=orig_height, orig_width=orig_width)
            else:
                pass
#                 annotations = healthy_annotation
#                 self.add_image('Granuloma', image_id=i, path=fp, 
#                                annotations=annotations, orig_height=orig_height, orig_width=orig_width)
            
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

    def load_image(self, image_id):
        info = self.image_info[image_id]
        fp = info['path']
        
        
        image = cv2.imread(fp)
        image  = cv2.resize(image,(ORIG_SIZE,ORIG_SIZE))
        # If grayscale. Convert to RGB for consistency.
        if len(image.shape) != 3 or image.shape[2] != 3:
            image = np.stack((image,) * 3, -1)

            
        im_healthy = healthy[image_id]
        
        im_healthy = cv2.resize(im_healthy,(ORIG_SIZE,ORIG_SIZE))
        
        image = np.hstack((image,im_healthy))
        
        
        return image

    def load_mask(self, image_id):
        
        info = self.image_info[image_id]

        fp = info['path']
        


        image = cv2.imread(fp)
        orig_x, orig_y = image.shape[0], image.shape[1]

        annotations = info['annotations']
        count = len(annotations)

        mask = np.zeros((orig_x, orig_y, count), dtype=np.uint8)
        res_mask = np.zeros((ORIG_SIZE, ORIG_SIZE, count), dtype=np.uint8)
        class_ids = np.zeros((count,), dtype=np.int32)
        
                           

        res_mask_healthy = np.zeros((ORIG_SIZE, ORIG_SIZE, count), dtype=np.uint8)
        
        
        for i, a in enumerate(annotations):
            a = ast.literal_eval(a['region_shape_attributes'])
            x = int(a['x'])
            y = int(a['y'])
            w = int(a['width'])
            h = int(a['height'])
            mask_instance = mask[:, :, i].copy()
            cv2.rectangle(mask_instance, (x, y), (x+w, y+h), 255, -1)
            mask_instance = cv2.resize(mask_instance,(ORIG_SIZE,ORIG_SIZE))
            res_mask[:, :, i] = mask_instance
            
            class_ids[i] = 1
        
        
        
        res_mask = np.hstack((res_mask,res_mask_healthy))

        
        
        return res_mask.astype(np.bool), class_ids.astype(np.int32)

In [None]:
'''
Returns unique elements by maintaing order
'''
def find_unique(x):
    res = []
    for i in x:
        if i not in res:
            res.append(i)
    return res

In [None]:
## Test Set Creation
image_fps_test, image_annotations_test = parse_dataset(anns_test)


## REMOVE THOSE KEYS WHICH DONT HAVE ANY BOUNDING BOX

keys = image_annotations_test.keys()           

for k in list(keys):
    v = image_annotations_test[k]
    if(len(v)==0):
        print("hello")
        print(v)
        del(image_annotations_test[k])
print(len(image_annotations_test)) 


image_fps_test = find_unique(image_fps_test)
# image_fps_test = list(image_annotations_test.keys()) 
image_fps_list_test = list(image_fps_test)

test_index = [i for i in range(len(image_fps_list_test))]

# image_fps_val = [image_fps_list[i] for i in test_index]

# print(len(image_fps_val))

print("Test IDX : ", test_index)


dataset_test = DetectorDataset(image_fps_list_test, image_annotations_test, ORIG_SIZE, ORIG_SIZE, test_index)
dataset_test.prepare()

In [None]:
image_fps, image_annotations = parse_dataset(anns)

In [None]:
image_annotations[image_fps[0]]

In [None]:
test = {fp: [] for fp in image_fps}
print(len(test))

In [None]:
## REMOVE THOSE KEYS WHICH DONT HAVE ANY BOUNDING BOX

keys = image_annotations.keys()           

for k in list(keys):
    v = image_annotations[k]
    if(len(v)==0):
        print("hello")
        print(v)
        del(image_annotations[k])
print(len(image_annotations)) 

In [None]:
# image_fps = list(image_annotations.keys()) 
image_fps = find_unique(image_fps)
image_fps_list = list(image_fps)

In [None]:
test_index = [ 68 , 69 , 70 , 71 , 72 , 73  ,74 , 75  ,76 , 77 , 78 , 79 , 80 , 81,  82 , 83,  84 , 85, 86,  87,  88 , 89,  90,  91,  92 , 93 , 94 , 95,  96,  97,  98 , 99, 100 ,101]

image_fps_val = [image_fps_list[i] for i in test_index]

image_fps_val

In [None]:
augmentation = iaa.Sequential([
    iaa.OneOf([ ## geometric transform
        iaa.Affine(
            scale={"x": (0.98, 1.02), "y": (0.98, 1.04)},
            translate_percent={"x": (-0.02, 0.02), "y": (-0.04, 0.04)},
            rotate=(-2, 2),
            shear=(-1, 1),
        ),
        iaa.PiecewiseAffine(scale=(0.001, 0.025)),
    ]),
    iaa.OneOf([ ## brightness or contrast
        iaa.Multiply((0.9, 1.1)),
        iaa.ContrastNormalization((0.9, 1.1)),
    ]),
    iaa.OneOf([ ## blur or sharpen
        iaa.GaussianBlur(sigma=(0.0, 0.1)),
        iaa.Sharpen(alpha=(0.0, 0.1)),
    ]),
])


In [None]:
### Fold 2

from sklearn.model_selection import KFold

kf = KFold(n_splits=5, random_state=0)
tt = 0

for train_index, test_index in kf.split(image_fps_list):
    
    if(tt==2):
        print("\n\n")
        print("############### FOLD BEGINS {0} ##################################".format(tt))
        print("\n\n")

        image_fps_train = [image_fps_list[i] for i in train_index]
        image_fps_val = [image_fps_list[i] for i in test_index]
        
        print(image_fps_val)

        print(len(image_fps_train), len(image_fps_val))

        print("VAL IDX : ", test_index)

        dataset_train = DetectorDataset(image_fps_train, image_annotations, ORIG_SIZE, ORIG_SIZE,train_index)
        dataset_train.prepare()

        dataset_val = DetectorDataset(image_fps_val, image_annotations, ORIG_SIZE, ORIG_SIZE, test_index)
        dataset_val.prepare()

        model = modellib.MaskRCNN(mode='training', config=config, model_dir='./optimize_model_data/')
        model.load_weights(COCO_WEIGHTS_PATH, by_name=True, exclude=[
            "mrcnn_class_logits", "mrcnn_bbox_fc",
            "mrcnn_bbox", "mrcnn_mask"])

        LEARNING_RATE = 0.006

        model.train(dataset_train, dataset_val,
                learning_rate=LEARNING_RATE*2,
                epochs=2,
                layers='heads',
                augmentation=None)  ## no need to augment yet

        model.train(dataset_train, dataset_val,
                learning_rate=LEARNING_RATE,
                epochs=6,
                layers='all',
                augmentation=augmentation)

        model.train(dataset_train, dataset_val,
                learning_rate=LEARNING_RATE/5,
                epochs=16,
                layers='all',
                augmentation=augmentation)
        
        print("OVER")

    tt+=1
    
    