<a href="https://colab.research.google.com/github/pswaruppk/GL_CAPSTONE/blob/master/pneumonia_utility.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# UTILITY functions

In [None]:
print("Loading functions from Pneumonia utility notebook")
print("Loading pydicom package")
!pip install pydicom

In [None]:
import pickle
import matplotlib.pyplot as plt
import cv2
import pydicom
import tensorflow as tf
import numpy as np
from sklearn import metrics
from sklearn.metrics import classification_report

# History saving function

In [None]:
#utility function to plot training & validation loss & accuracy

import pandas as pd 
import os    



def saveHistory(history,history_file):
  #Merge history
  newhisdf = pd.DataFrame(history.history)
  # newhisdf = pd.DataFrame(history['history'])
  if (os.path.isfile(history_file)):
    hisdf= pd.read_csv(history_file)
    newhisdf = pd.concat([hisdf,newhisdf])
    newhisdf.reset_index(inplace=True,drop=True)
  
  newhisdf.to_csv(history_file,index=False)
  return newhisdf

def loadHistory(history_file):
  if (os.path.isfile(history_file) == False):
    print("History file:{} does not exist".format(history_file))
  hisdf= pd.read_csv(history_file)
  return hisdf



# Histroy plot function

In [None]:
def plot_his(history, metric='mean_iou'):
  plt.figure(figsize=(15,6))
  # metric = history.columns[:-1]
  # l = len(metric)
  
  plt.subplot(121)
  loss = 'loss'
  label = "Train " + loss
  epoch = history.index.tolist()
  plt.plot(epoch, history[loss].tolist(), label=label)
  val_loss = 'val_loss'
  label = "Valid " + val_loss
  plt.plot(epoch, history[val_loss].tolist(), label=label)
  plt.legend()

  plt.subplot(122)
  iou = metric
  label = "Train " + iou
  plt.plot(epoch, history[iou].tolist(), label=label)
  val_iso = 'val_' + iou
  label = "Valid " + val_iso
  plt.plot(epoch, history[val_iso].tolist(), label=label)
  plt.legend()
  # if (l == 4):
  #   return
  # plt.subplot(133)
  # label = "Train " + metric[4]
  # plt.plot(epoch, history[metric[4]].tolist(), label=label)
  # label = "Valid " + metric[4]
  # plt.plot(epoch, history[metric[5]].tolist(), label=label)
  # plt.legend()
  plt.show()

# Load EDA data

In [None]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [None]:
def loadData():
  print('Loading full CXR info')
  ## Read class info
  pd_class_info = pd.read_csv('stage_2_detailed_class_info.csv')
  class_enc = LabelEncoder()
  pd_class_info['class_id'] = class_enc.fit_transform(pd_class_info['class'])
  oh_enc = OneHotEncoder(sparse=False)
  pd_class_info['class_vector'] = oh_enc.fit_transform(
    pd_class_info['class_id'].values.reshape(-1, 1)).tolist() 
  
  pd_hybrid = pd.read_csv('image_bbox_full.csv')
  print('Loading bbox map')
  pd_hybrid['class_vector'] = pd_class_info['class_vector']
  gen_data_dir = 'generated_data'
  bbox_map_file_file = gen_data_dir + '/bbox_map.pkl'
  a_file = open(bbox_map_file_file, "rb")
  bbox_map = pickle.load(a_file)
  a_file.close()
  return pd_hybrid,bbox_map,class_enc

In [None]:
def get_class_map(df_pn,target='class_vector'):
  class_map = {}
  for n, row in df_pn.iterrows():
    pid = row['patientId']
    if pid not in class_map:
      class_map[row.patientId] = row[target]
  return class_map

# Import Generic Models

In [None]:
from tensorflow.keras.layers import Layer
from tensorflow.keras.layers import Concatenate, UpSampling2D, Conv2D, Reshape, BatchNormalization, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Flatten, Input, multiply, LocallyConnected2D, Lambda, AvgPool2D
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

In [None]:
MODEL_MAP = dict(
            VGG16=dict(
                input_shape=(224, 224, 3),
                module_name="vgg16",
                last_conv_layer="block5_conv3",
            ),
            VGG19=dict(
                input_shape=(224, 224, 3),
                module_name="vgg19",
                last_conv_layer="block5_conv4",
            ),
            DenseNet121=dict(
                input_shape=(224, 224, 3),
                module_name="densenet",
                last_conv_layer="bn",
            ),
            ResNet50=dict(
                input_shape=(224, 224, 3),
                module_name="resnet50",
                last_conv_layer="activation_49",
            ),
            InceptionV3=dict(
                input_shape=(299, 299, 3),
                module_name="inception_v3",
                last_conv_layer="mixed10",
            ),
            InceptionResNetV2=dict(
                input_shape=(299, 299, 3),
                module_name="inception_resnet_v2",
                last_conv_layer="conv_7b_ac",
            ),
            NASNetMobile=dict(
                input_shape=(224, 224, 3),
                module_name="nasnet",
                last_conv_layer="activation_188",
            ),
            NASNetLarge=dict(
                input_shape=(331, 331, 3),
                module_name="nasnet",
                last_conv_layer="activation_260",
            ),
        )

In [None]:


BaseModel = Model()
preprocess_input = 0
def selectBaseModel(BASE_MODEL):
  global BaseModel
  global preprocess_input
  model_map = {'InceptionResNetV2':'inception_resnet_v2',
               'InceptionV3':'inception_v3',
               'DenseNet121':'densenet',
               'VGG16':'vgg16',
               'ResNet50':'resnet50',
               'Xception':'xception',
               'DenseNet169':'densenet',
               'MobileNet':'mobilenet'}
  modelFrom = model_map[BASE_MODEL]
  
  print("Setting baseModel :",BASE_MODEL)
  frompack = 'tensorflow.keras.applications.' + modelFrom
  np = __import__(frompack, 
                    globals(), locals(), 
                    [BASE_MODEL, 'preprocess_input'], 0) 
  print(np)
  ascmd = 'global BaseModel; BaseModel =' + 'np.'+BASE_MODEL
  print(ascmd)
  exec(ascmd)
  ascmd = 'global preprocess_input; preprocess_input =' + 'np.'+'preprocess_input'
  print(ascmd)
  exec(ascmd)
  print(type(BaseModel))
  return BaseModel,preprocess_input

#Split data fram

In [None]:
from sklearn.model_selection import train_test_split
def splitData(df,target='Target',is_train_test_only=False):
  image_df = df.groupby('patientId').apply(lambda x: x.sample(1))
  train_df, valid_df = train_test_split(image_df, test_size=0.20, random_state=11,
                                    stratify=image_df[target])
  print(train_df.shape, 'training data')
  test_df = valid_df
  if (is_train_test_only == False):
    print(valid_df.shape, 'raw validation data')
    valid_df, test_df = train_test_split(valid_df, test_size=0.50, random_state=11,
                                    stratify=valid_df[target])
    print(valid_df.shape, 'Validation data')
  print(test_df.shape, 'test data')
  return train_df,valid_df,test_df

#Balance datafram

In [None]:
def balance_df(df, sample_count,target='Target'):
  count = len(set(df[target]))
  fig, ax = plt.subplots(1, 2, figsize = (10, 5))
  df.groupby(target).size().plot.bar(ax=ax[0])
  df = df.groupby(target).apply(lambda x: x.sample(sample_count//count)).reset_index(drop=True)
  df.groupby(target).size().plot.bar(ax=ax[1]) 
  return df

#Display masked Image

In [None]:
import copy
def showMaskedImage(imageSet, maskSet, index,predMaskSet= []) :
    
    maskImage = copy.deepcopy(imageSet[index])

    #pyplot.imshow(maskImage[:,:,0], cmap=pyplot.cm.bone)
    maskImage[:,:,0] = maskSet[index] * imageSet[index][:,:,0]
    maskImage[:,:,1] = maskSet[index] * imageSet[index][:,:,1]
    maskImage[:,:,2] = maskSet[index] * imageSet[index][:,:,2]
    l = len(predMaskSet)
    if (l == 0):
      plt.imshow(maskImage[:,:,0], cmap=plt.cm.bone)
    else:
      f, ax = plt.subplots(1, 2, figsize=(10,7))
      ax[0].imshow(maskImage[:,:,0], cmap=plt.cm.bone)
      ax[0].set_title("Ground Truth")
      maskImage = copy.deepcopy(imageSet[index])
      maskImage[:,:,0] = predMaskSet[index] * imageSet[index][:,:,0]
      maskImage[:,:,1] = predMaskSet[index] * imageSet[index][:,:,1]
      maskImage[:,:,2] = predMaskSet[index] * imageSet[index][:,:,2]
      ax[1].imshow(maskImage[:,:,0], cmap=plt.cm.bone)
      ax[1].set_title("Pred Truth")

  #   f, axarr = plt.subplots(1, 2, figsize=(10,7))
  # axarr[0].imshow(img[:,:,0], cmap=plt.cm.gist_gray)
  # axarr[0].axis('off')
  # # msk = comp = msk[:, :, 0] > 0.5
  # axarr[1].imshow(msk[:,:,0], cmap=plt.cm.gist_gray)
  # comp = msk
  # comp = msk[:,:,0] > 0.5

  
  # print(len(msk.shape))
  # # apply connected components
  # comp = measure.label(comp)
  # for region in measure.regionprops(comp):
  #   # retrieve x, y, height and width
  #   print(type(region))
  #   print(region.bbox)
  #   y, x, y2, x2 = region.bbox
  #   height = y2 - y
  #   width = x2 - x
  #   axarr[0].axis('off')
  #   axarr[0].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,
  #                                                     edgecolor='b',facecolor='none'))
  # # axarr[1].imshow(msk, cmap=plt.cm.gist_gray)
  # plt.show()

In [None]:
from skimage import measure
import matplotlib.patches as patches
def showPredMaskedBatch(imgs,masks,preds):
  plt.style.use('default')
  print("**Red** Bounding box: **PREDICTED**")
  print("**Blue** bouding box: **TRUTH**")
  f, ax = plt.subplots(1, 8, figsize=(15,5))
  # ax = ax.ravel()
  idx = 0
  i = 0
  for img, msk, pred in zip(imgs, masks, preds):
    i=i+1
    if i>8:
      break
    #Show xray
    ax[idx].imshow(img[:, :, 0],cmap=plt.cm.gist_gray)
    ax[idx].axis('off')
          
    #True mask throshold
    comp = msk
    dim = len(msk.shape)
    threshold = 0.5
    if (dim == 3):
      comp = msk[:, :, 0] > threshold
    else:
      comp = msk[:, :] > threshold
          
    comp = measure.label(comp)
          
    predictionString = ''
    for region in measure.regionprops(comp):
      # get x, y, height and width
      y, x, y2, x2 = region.bbox
      height = y2 - y
      width = x2 - x
      ax[idx].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,
                                                      edgecolor='b',facecolor='none'))
      # Predicted mask throshold
      comp = pred
      dim = len(pred.shape)
      if (dim ==3):
        comp = pred[:, :, 0] > threshold
      else:
        comp = pred[:, :] > threshold
      comp = measure.label(comp)
      predictionCoord = 'Pred:'
      for region in measure.regionprops(comp):
        # get x, y, height and width
        y, x, y2, x2 = region.bbox
        height = y2 - y
        width = x2 - x
        ax[idx].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,
                                                      edgecolor='r',facecolor='none'))
        # confidence = np.mean(pred[y:y+height, x:x+width])
              
        # predictionCoord += str(confidence) + ' ' + str(x) + ' ' + str(y) + ' ' + str(width) + ' ' + str(height) + ' '
        # predictionCoord += str(confidence)
      # ax[idx].set_title(predictionCoord)
    idx += 1
  plt.axis('off')
  plt.show()



In [None]:
def show_top_prediction(pred_df, topCount) :
    
    pred_df.sort_values("iou", ascending=False, inplace=True)
    
    topPids = pred_df["patientId"].head(topCount)
    topPidsAry = np.array(topPids)
    
    topIOUs = pred_df["iou"].head(topCount)
    topIOUsAry = np.array(topIOUs)

    
    imageCollc = np.zeros((topCount, IMG_WIDTH, IMG_WIDTH), np.float32) 

    gtCoordCollc = pred_df[["x", "y", "width", "height"]].to_numpy()  
    
    gtMaskCollc  = np.zeros((topCount, IMG_WIDTH, IMG_WIDTH), np.int) 

    
    predCoordCollc = pred_df[["x_pred", "y_pred", "width_pred", "height_pred"]].to_numpy()  
    
    predMaskCollc  = np.zeros((topCount, IMG_WIDTH, IMG_WIDTH), np.int)

    
    folder = 'stage_2_train_images/'
    for indx in range(0, topCount) :
        
        dcm = folder + topPidsAry[indx] +'.dcm'
        imgData = pydicom.dcmread(dcm) # Read image
        img = imgData.pixel_array
        imageCollc[indx][:,:] = preprocess_input(np.array(img[:,:], dtype=np.float32)) # Convert to float32 array

        #  ground truth 
        x = int(gtCoordCollc[indx, 0])
        y = int(gtCoordCollc[indx, 1])
        width = int(gtCoordCollc[indx, 2])
        height = int(gtCoordCollc[indx, 3])
        gtMaskCollc[indx][y:y+height, x:x+width] = 1   # (1024, 1024)

        #  predicted masks
        x_pred = int(predCoordCollc[indx, 0])
        y_pred = int(predCoordCollc[indx, 1])
        width_pred = int(predCoordCollc[indx, 2])
        height_pred = int(predCoordCollc[indx, 3])
        predMaskCollc[indx][y_pred:y_pred+height_pred, x_pred:x_pred+width_pred] = 1   # (1024, 1024)
        
    
    imageArea, axesArry = plt.subplots(int(topCount/4), 4, figsize=(18,18))
    axesArry = axesArry.ravel()
    count = int(int(topCount/4) * 4)
    for axidx in range(0, count) :
        axesArry[axidx].imshow(imageCollc[axidx][:, :], cmap=plt.cm.bone)

        gtComp = gtMaskCollc[axidx][:, :] > 0.5
        
        gtComp = measure.label(gtComp)
        
        for region in measure.regionprops(gtComp):
            
            y1, x1, y2, x2 = region.bbox
            heightReg = y2 - y1
            widthReg = x2 - x1
            axesArry[axidx].add_patch(patches.Rectangle((x1, y1), widthReg, heightReg, linewidth=1, edgecolor='r', 
                                                        facecolor='none'))

        predComp = predMaskCollc[axidx][:, :] > 0.5
        
        predComp = measure.label(predComp)
        
        for region_pred in measure.regionprops(predComp):
            
            y1_pred, x1_pred, y2_pred, x2_pred = region_pred.bbox
            heightReg_pred = y2_pred - y1_pred
            widthReg_pred = x2_pred - x1_pred
            axesArry[axidx].add_patch(patches.Rectangle((x1_pred, y1_pred), widthReg_pred, heightReg_pred, linewidth=1, edgecolor='b', 
                                                        facecolor='none'))
            axesArry[axidx].set_title('IoU : '+str(topIOUsAry[axidx]))
    
    plt.show()

# Training  & validation data generator

##Mask Train Generator

In [None]:
# Mask train generator
class MyDataGenerator(tf.keras.utils.Sequence):
    
    def __init__(self, df,batch_size=BATCH_SIZE,image_size=IMAGE_SIZE,image_width=IMG_WIDTH):       
        self.filenames = df["path"].to_numpy()
        print(len(self.filenames),':No of generator Image')
        np.random.shuffle(self.filenames)
        self.bbox_map = bbox_map #patient id to its all bounding box map
        self.batch_size = batch_size
        self.image_size = image_size #Resized image size
        self.image_width = image_width #Original image size
        
           

    def __len__(self):
        return int(len(self.filenames) / self.batch_size)
        
    def getImage(self,dcm):
        dcmdata = pydicom.dcmread(dcm)
        img = dcmdata.pixel_array
        # img = img/img.max()
        # del dcmdata
        return img
    def load(self,dcm):
      img = self.getImage(dcm)
      # print('Loading image:',dcm)
      msk = np.zeros(img.shape)
      file = dcm.split('/')[1]
      patient = file.split('.')[0]
      if  patient in self.bbox_map:
        # print("Found bbox for:",patient)
        bboxes = self.bbox_map[patient].get('bboxes')
        for bb in bboxes:
          y, x, h, w = bb
          msk[y:y+h, x:x+w] = 1
      

      resized_img = cv2.resize(img, (self.image_size, self.image_size), interpolation=cv2.INTER_AREA)
      resized_msk = cv2.resize(msk, (self.image_size, self.image_size), interpolation=cv2.INTER_AREA)

      final_img = np.zeros((self.image_size, self.image_size, 3), dtype=np.float32)

      final_img[:,:,0] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
      final_img[:,:,1] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
      final_img[:,:,2] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32))

      return final_img,resized_msk
      
    def __getitem__(self, index): # Get a batch
        batch_dcm_files = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
        
        items = [self.load(dcm) for dcm in batch_dcm_files]
        imgs, msks = zip(*items)
        
        imgs = np.array(imgs)
        msks = np.array(msks)
        return   imgs,msks  


## Label Train Generator

In [None]:

class MyLabelDataGenerator(tf.keras.utils.Sequence):    
    def __init__(self,df,target='class_vector',
                 batch_size=BATCH_SIZE, image_size=IMAGE_SIZE, 
                 shuffle=False, augment=False, predict=False):
        self.filenames = df["path"].to_numpy()
        self.df = df
        print(len(self.filenames),':No of generator Image')
        np.random.shuffle(self.filenames)
        self.batch_size = batch_size
        self.image_size = image_size
        self.shuffle = shuffle
        self.augment = augment
        self.predict = predict
        self.target = target
        self.on_epoch_end()

    def getImage(self,filename):
        dcmdata = pydicom.dcmread(filename)
        img = dcmdata.pixel_array
        # img = img/img.max()
        del dcmdata
        return img
    
    def load_class(self,filename):
        img = self.getImage(filename)
        #resize
        resized_img = cv2.resize(img, (self.image_size, self.image_size), interpolation=cv2.INTER_AREA)
        final_img = np.zeros((self.image_size, self.image_size, 3), dtype=np.float32)
        #Preprocess
        final_img[:,:,0] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
        final_img[:,:,1] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
        final_img[:,:,2] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32))
        # if augment then horizontal flip half the time
        if self.augment and np.random.random() > 0.5:
            final_img = np.fliplr(final_img)

        # get filename without extension
        file = filename.split('/')[1]
        patient = file.split('.')[0]
        # print("Patinet:",patient)

        # Get target label        
        row = self.df[self.df['patientId'] == patient]
        label = row[self.target].to_numpy()
        label = label[0]
        # print("Label",label)    
        
        return final_img, label
      
    def __getitem__(self, index):
        # select batch
        filenames = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
                
        items = [self.load_class(filename) for filename in filenames]
        # unzip images and masks
        imgs, labels = zip(*items)
        
        imgs = np.array(imgs)
        labels = np.array(labels)
        # print(labels)
        return imgs, labels
        
        
    def on_epoch_end(self):
      if self.shuffle:
        np.random.shuffle(self.filenames)
        
    def __len__(self):
      # return full batches only
      return int(len(self.filenames) / self.batch_size)

# Test data generator

## Mask Test Generator

In [None]:
# test Mask  data generator
class MyTestDataGenerator(tf.keras.utils.Sequence):
    
    def __init__(self, df,batch_size=BATCH_SIZE,image_size=IMAGE_SIZE,image_width=IMG_WIDTH):       
        self.filenames = df["path"].to_numpy()
        print(len(self.filenames),':No of generator Image')
        np.random.shuffle(self.filenames) # Shuffling is not important for test generator
        self.test_df = df
        self.bbox_map = bbox_map #patient id to its all bounding box map
        self.batch_size = batch_size
        self.image_size = image_size #Resized image size
        self.image_width = image_width #Original image size
        
           

    def __len__(self):
        return int(len(self.filenames) / self.batch_size)
        
    def getImage(self,dcm):
        dcmdata = pydicom.dcmread(dcm)
        img = dcmdata.pixel_array
        # img = img/img.max()
        # del dcmdata
        return img
    def load(self,dcm):
      img = self.getImage(dcm)
      # print('Loading image:',dcm)
      msk = np.zeros(img.shape)
      file = dcm.split('/')[1]
      patient = file.split('.')[0]
      if  patient in self.bbox_map:
        # print("Found bbox for:",patient)
        bboxes = self.bbox_map[patient].get('bboxes')
        for bb in bboxes:
          y, x, h, w = bb
          msk[y:y+h, x:x+w] = 1
      

      resized_img = cv2.resize(img, (self.image_size, self.image_size), interpolation=cv2.INTER_AREA)
      resized_msk = cv2.resize(msk, (self.image_size, self.image_size), interpolation=cv2.INTER_AREA)

      final_img = np.zeros((self.image_size, self.image_size, 3), dtype=np.float32)

      final_img[:,:,0] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
      final_img[:,:,1] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
      final_img[:,:,2] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32))

      row = self.test_df[self.test_df['patientId'] == patient]
      classLabel = row['class']
      target = row['Target']
      return final_img,resized_msk,patient,classLabel,target
      
    def __getitem__(self, index): # Get a batch
        batch_dcm_files = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
        
        items = [self.load(dcm) for dcm in batch_dcm_files]
        imgs, msks,patients,classes,targets = zip(*items)
        
        imgs = np.array(imgs)
        msks = np.array(msks)
        patients = np.array(patients)
        classes = np.array(classes)
        targets = np.array(targets)
        return   imgs,msks,patients,classes,targets  


## Label Test Generator

In [None]:
class MyLabelTestDataGenerator(tf.keras.utils.Sequence):
  def __init__(self,df,target='class_vector',
                 batch_size=BATCH_SIZE, image_size=IMAGE_SIZE 
                 ):
        self.filenames = df["path"].to_numpy()
        self.df = df
        print(len(self.filenames),':No of generator Image')
        self.batch_size = batch_size
        self.image_size = image_size
        self.target = target
  def getImage(self,dcm):
        dcmdata = pydicom.dcmread(dcm)
        img = dcmdata.pixel_array
        # img = img/img.max()
        # del dcmdata
        return img
  def __loadpredict__(self, dcm):
        img = self.getImage(dcm)
        file = dcm.split('/')[1]
        patient = file.split('.')[0]
        #resize
        resized_img = cv2.resize(img, (self.image_size, self.image_size), interpolation=cv2.INTER_AREA)
        final_img = np.zeros((self.image_size, self.image_size, 3), dtype=np.float32)
        #Preprocess
        final_img[:,:,0] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
        final_img[:,:,1] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32)) 
        final_img[:,:,2] = preprocess_input(np.array(resized_img[:,:], dtype=np.float32))

        # Get target label        
        row = self.df[self.df['patientId'] == patient]
        label = row[self.target].to_numpy()
        label = label[0]

        
        return final_img,label
        
  def __getitem__(self, index):
        filenames = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
        items = [self.__loadpredict__(filename) for filename in filenames]
        imgs,classes = zip(*items)
        # create numpy batch
        imgs = np.array(imgs)
        classes = np.array(classes)
        return imgs, filenames,classes
  def __len__(self):
    return int(np.ceil(len(self.filenames) / self.batch_size))

# LOSS functions

In [None]:
#define loss & mean iou function
def iou_loss(y_true, y_pred):
    y_true = tf.reshape(y_true, [-1])
    y_pred = tf.reshape(y_pred, [-1])
    intersection = tf.reduce_sum(y_true * y_pred)
    score = (intersection + 1.) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection + 1.)
    return 1 - score


def mean_iou(y_true, y_pred):
    y_pred = tf.round(y_pred)    
    intersect = tf.reduce_sum(y_true * y_pred, axis=[1]) 
    union = tf.reduce_sum(y_true, axis=[1]) + tf.reduce_sum(y_pred, axis=[1])
    smooth = tf.ones(tf.shape(intersect))
    return tf.reduce_mean((intersect + smooth) / (union - intersect + smooth))

# Test Mask Prediction

In [None]:

def computeIoU(boxA, boxB):
  xA = max(boxA[0], boxB[0])
  yA = max(boxA[1], boxB[1])
  xB = min(boxA[2], boxB[2])
  yB = min(boxA[3], boxB[3])

  #  area of intersection rectangle
  intersection = abs(max((xB - xA, 0)) * max((yB - yA), 0))
  if intersection == 0:
    return 0
  # area both bouding box
  boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
  boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))
  
  iou = intersection / float(boxAArea + boxBArea - intersection)
  # print('{} :IoU'.format(iou))
  return iou

In [None]:
def get_pred_iou(bb,strongPred):
  y, x, h, w = bb
  # collect all reagions for the prediction
  iou_df = pd.DataFrame(columns=['iou', 'x', 'y', 'width', 'height'])
  rowIdx = 0
  for region in measure.regionprops(strongPred):
    # retrieve x1, y1, height and width
    y1, x1, y2, x2 = region.bbox
    height = y2 - y1
    width = x2 - x1
    regionXYs = np.array([x1, y1, x2, y2])
    
    coordsXYs = np.array([x, y, x+w, y+h])
    IOU = computeIoU(coordsXYs, regionXYs)
    # print("IOU ", IOU)
    row = [IOU, x1, y1, width, height]
    iou_df.loc[rowIdx] = row
    rowIdx += 1
  return iou_df

In [None]:
from skimage.transform import resize
from tqdm import tqdm
def doMaskPrediction(test_df,bbox_map,model,pred_file,thresholdIoU) :
  print('Number of Test Samples :', test_df["patientId"].nunique()) 
  test_gen = MyTestDataGenerator(test_df)
  print(len(test_gen), "# of iterations in one test epoch")

  pred_df = pd.DataFrame(columns=['patientId',
                                  'x', 'y', 'width', 'height', 'Target', 'class', 
                                  'x_pred', 'y_pred', 'width_pred','height_pred', 'predTarget',
                                  'iou', 'class_pred'])
  
  iouThreshold =  thresholdIoU

  # print("Predicting Batches ", end='')
  df_idx = 0
  for bimgs, bmsks, bpatients,bclasses, btargets in tqdm(test_gen):    
    # print(".", end = '')    
    bpreds = model.predict(bimgs)    

    for pred,msk,pid,cls,tgt in zip(bpreds,bmsks,bpatients,bclasses,btargets):
        tgt = tgt[0]
        cls = cls[0]
        # resize predicted mask
        pred = resize(pred, (1024, 1024), mode='reflect')   
        # threshold predicted mask
        strongPred = pred[:, :] > 0.5   
        #Find components in the mask
        strongPred = measure.label(strongPred)
        
        #Iterate over every co-ordinate
        bboxes = []
        hasBB = False;
        if  pid in bbox_map:
          bboxes = bbox_map[pid].get('bboxes')
          # print('{}:{}'.format(pid,bboxes))
          hasBB = True
        else:
          bboxes.append([0,0,0,0]) # Fill NO Pneomonia case with 0 co-rodinates
        for bb in bboxes:
          haveBB = True
          y, x, h, w = bb
          iou_df = get_pred_iou(bb,strongPred)
          
          # ground truth data
          #add for differant bounding box
          gt_row = [pid, x, y, w, h, cls]  
          prev_gt_row = []
          # Get top 2 predictions based on IOU 
          iou_df.sort_values("iou", ascending=False, inplace=True)
          predCount = 0
          # if (hasBB):
          #   print('IoU:',iou_df['iou'])
          # If predictions exist
          # if len(iou_df) > 0:
          #   print(len(iou_df))
          #   print(iou_df.head(10))
          if len(iou_df) > 0 :
            for predIdx in (0, len(iou_df)-1) :
              if iou_df.loc[predIdx]["iou"] > iouThreshold :
                # add row with ground truth and prediction values to data frame    
                pred_row = [pid, x, y, w, h, tgt,
                                cls, int(iou_df.loc[predIdx]["x"]), int(iou_df.loc[predIdx]["y"]), 
                                int(iou_df.loc[predIdx]["width"]), int(iou_df.loc[predIdx]["height"]), 
                                1, iou_df.loc[predIdx]["iou"], "Lung Opacity"]
                if predCount < 2 :
                  if gt_row != prev_gt_row :
                    #Insert into the final prediction data frame 
                    pred_df.loc[df_idx] = pred_row
                    df_idx += 1
                    predCount += 1
                    prev_gt_row = gt_row 
                else :
                  break;
              else : # Normal if IOU below threshold
                # add row with ground truth and prediction values to data frame
                if gt_row != prev_gt_row :  
                  pred_row = [pid,x, y, w, h, tgt, 
                              cls, 0, 0, 0, 0, 0, int(iou_df.loc[predIdx]["iou"]), "Normal"]
                  pred_df.loc[df_idx] = pred_row
                  df_idx += 1  
                  prev_gt_row = gt_row
                  break;
                # end of if
              # end of if
            # end of for

          else : # else of If predictions exist. Normal if no predictions
            # add row with ground truth and prediction values to data frame
            pred_row = [pid, x, y, w, h, tgt, 
                            cls, 0, 0, 0, 0, 0, 'NA', "Normal"]
            pred_df.loc[df_idx] = pred_row
            df_idx = df_idx + 1      
    # break; #end of a batch  
        
#   if len(pred_df) >= 5 :
#     break

    
  pred_df.to_csv(pred_file, index=False)
  print("Prediction Complete!")
    
  test_y = pred_df["Target"]
  pred_y = pred_df["predTarget"]
    
  return pred_df,test_y.apply(int), pred_y.apply(int)
  # return pred_df

#Test class Label Prediction

In [None]:
from tqdm import tqdm
def class_predict(gen,model,no_of_batch = 0):
  patients = []
  class_vectors = []
  class_ids = []
  i = 0
  for imgs, dcms,cls in tqdm(gen):
      # print() 
      # print("Predicting image of length:",len(dcms))   
      pred_vectors = model.predict(imgs)
      for pred in pred_vectors:
        class_vectors.append(pred)
      for f in dcms:
        patients.append(f)
      for c in cls:
        class_ids.append(c)
      i += 1
      if (i == no_of_batch):
        break
      
  return patients, class_vectors, class_ids

In [None]:
def get_prediction_map(patients,class_vecs):
  pred_map = {}
  for  idx,filename in enumerate(patients):
    file = filename.split('/')[1]
    patient = file.split('.')[0]
    if patient not in pred_map:
      classes = class_vecs[idx]
      pred_map[patient] = {'PNEUMONIA':classes[0],'NOT_NORMAL':classes[1], "NORMAL":classes[2] }
  df = pd.DataFrame(pred_map).T
  return df

In [None]:
def get_pred_merged_meta_data(pred_d, meta_d):
  pred_meta_merged_map = {}
  for n, row in pred_d.iterrows():
    pid = row['patientId']
    if pid not in pred_meta_merged_map:
      mrow = meta_d[meta_d.patientId == pid]
      pred_meta_merged_map[pid] = {'PNEUMONIA':row['PNEUMONIA']	,
                                   'NOT_NORMAL':row['NOT_NORMAL'],
                                   'NORMAL':row['NORMAL'],
                                   'PatientAge':mrow['PatientAge'].values[0],
                                   'PatientSex':mrow['PatientSex'].values[0],
                                   'ViewPosition':mrow['ViewPosition'].values[0],
                                   'Target':mrow['Target'].values[0]}
  df = pd.DataFrame(pred_meta_merged_map).T
  return df

# Retinanet functins

In [None]:
def draw_detections(image, boxes, scores, labels):
  for box, score, label in zip(boxes[0], scores[0], labels[0]):
    if score < THRES_SCORE:
        break
    # print(box)
    color = label_color(label)

    b = box.astype(int)
    draw_box(image, b, color=color)

    caption = "{} {:.3f}".format(labels_to_names[label], score)
    draw_caption(image, b, caption)
def get_predicted_box(ref_box,th_iou,boxes, scores, labels):
  pred_b_s_i = {'box':[],'score':[],'iou':[]}
  for box, score, label in zip(boxes[0], scores[0], labels[0]):
    if score < THRES_SCORE:
        break
    
    b = box.astype(int)
    # print(b)
    iou = computeIoU(ref_box,b)
    
    if (iou > th_iou):
      pred_b_s_i['box'].append(b)
      pred_b_s_i['score'].append(score) 
      pred_b_s_i['iou'].append(iou)
  return pred_b_s_i


In [None]:
def get_image(dcm_path,isDcm=True):
  # img_path = image_row.image_name
  jpg_path = dcm_path
  if (isDcm):
    ds = pydicom.dcmread(dcm_path)
    pixel = ds.pixel_array
    # get filename without extension
    file = dcm_path.split('/')
    file = file[len(file)-1]
    patient = file.split('.')[0]
    # print(patient)
    jpg_path = patient + '.jpg'
    cv2.imwrite(jpg_path,pixel)
    del pixel
     
  image = read_image_bgr(jpg_path)
  if (isDcm):
    os.remove(jpg_path)
  
  return image, patient
  
def show_detected_objects(dcm_path,bbox_map,model):
  
  image,patient = get_image(dcm_path)
  boxes, scores, labels = retina_predict(image,model)
  
  draw = image.copy()
  draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)
  if  patient in bbox_map:
    # print("Found bbox for:",patient)
    bboxes = bbox_map[patient].get('bboxes')
    for bb in bboxes:
      y, x, h, w = bb
      true_box = [x, y, x+w, y+h]
      draw_box(draw, true_box, color=(255, 255, 0))

  draw_detections(draw, boxes, scores, labels)

  plt.axis('off')
  plt.imshow(draw)
  plt.show()

In [None]:
def retina_predict(image,mymodel):
  # print(image.shape)
  image = preprocess_image(image.copy())
  image, scale = resize_image(image,min_side=IMAGE_SIZE,max_side=IMAGE_SIZE)
  # image, scale = resize_image(image)
  # print(image.shape)
  boxes, scores, labels = mymodel.predict_on_batch(
    np.expand_dims(image, axis=0)
  )

  boxes /= scale

  return boxes, scores, labels

In [None]:
def doRetinaPrediction(df,bbox_map,model,file,th_iou):
  folder = 'stage_2_train_images'
  df_idx = 0;
  pred_df = pd.DataFrame(columns=['patientId',
                                  'x', 'y', 'width', 'height', 'Target', 
                                  'x_pred', 'y_pred', 'width_pred','height_pred', 'predTarget',
                                  'iou', 'confidence' ])
  
  count = 0
  length = df.shape[0]
  den= int(length/100)
  
   
  for j, row in df.iterrows():
    
    if (count%den == 0):
      print("Generating prediction:{} of {}".format(count,length))
    count +=1
    pid = row['patientId']
    tgt = row['Target']
    dcm_path = folder + '/' +pid+ '.dcm'
    #get image
    image,patient= get_image(dcm_path,isDcm=True)
    #Predict on image
    pred_boxes, confidence_scores, labels = retina_predict(image,model)

    bboxes = []
    
    if  pid in bbox_map:
      # print("Found bbox for:",patient)
      bboxes = bbox_map[pid].get('bboxes')
    else:
      # For non pneumonia case
      bboxes.append([0,0,0,0])
    for bb in bboxes:
      y, x, h, w = bb
      true_box = [x, y, x+w, y+h]
      pred_b_s_i = get_predicted_box(true_box,th_iou,pred_boxes, confidence_scores,labels)
      pred_boxs = pred_b_s_i['box']
      pred_scores = pred_b_s_i['score']
      pred_ious = pred_b_s_i['iou']
      n = len(pred_boxs)
      for i in np.arange(0,n):
        box = pred_boxs[i]
        px1,py1,px2,py2 = box
        pw = px2-px1
        ph = py2-py1
        pred_tgt = 1
        # print(pred_iou)
        pred_iou = pred_ious[i]
        confidecne = pred_scores[i]
        pred_row = [pid,x, y, w, h, tgt, 
                      px1, py1, pw, ph, pred_tgt,pred_iou,confidecne ]
        pred_df.loc[df_idx] = pred_row
        df_idx += 1
      if (n == 0):
        pred_tgt = 0
        pred_iou = 0
        px1, py1, pw, ph = [0,0,0,0]
        confidecne = THRES_SCORE
        pred_row = [pid,x, y, w, h, tgt, 
                      px1, py1, pw, ph, pred_tgt,pred_iou,confidecne ]
        pred_df.loc[df_idx] = pred_row
        df_idx += 1

  pred_df.to_csv(file, index=False)
  print("Prediction Complete!")
    
  test_y = pred_df["Target"]
  pred_y = pred_df["predTarget"]
    
  return pred_df,test_y.apply(int), pred_y.apply(int)
  
      

  