In [41]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [42]:
%cd 'filepath of dataI'

filepath of data


In [43]:
## Importing required modules
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import zipfile
import cv2
from skimage import io
import tensorflow as tf
from tensorflow.python.keras import Sequential
from tensorflow.keras import layers, optimizers
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint, LearningRateScheduler
from IPython.display import display
from tensorflow.keras import backend as K
from sklearn.preprocessing import StandardScaler, normalize
import os
import random

In [44]:
df = pd.read_csv('data_mask.csv')
df.shape

(3929, 4)

In [45]:
mask_df = df[df['mask'] == 1]
df['mask'] = df['mask'].apply(lambda x: str(x))
mask_df.head()

Unnamed: 0,patient_id,image_path,mask_path,mask
445,TCGA_DU_5872_19950223,TCGA_CS_5393_19990606/TCGA_CS_5393_19990606_5.tif,TCGA_CS_5393_19990606/TCGA_CS_5393_19990606_5_...,1
507,TCGA_DU_5874_19950510,TCGA_HT_7680_19970202/TCGA_HT_7680_19970202_5.tif,TCGA_HT_7680_19970202/TCGA_HT_7680_19970202_5_...,1
551,TCGA_DU_5854_19951104,TCGA_CS_4944_20010208/TCGA_CS_4944_20010208_6.tif,TCGA_CS_4944_20010208/TCGA_CS_4944_20010208_6_...,1
555,TCGA_DU_5854_19951104,TCGA_CS_5393_19990606/TCGA_CS_5393_19990606_6.tif,TCGA_CS_5393_19990606/TCGA_CS_5393_19990606_6_...,1
617,TCGA_DU_5853_19950823,TCGA_HT_7680_19970202/TCGA_HT_7680_19970202_6.tif,TCGA_HT_7680_19970202/TCGA_HT_7680_19970202_6_...,1


In [46]:
## splitting data up into training and testing
from sklearn.model_selection import train_test_split

X_train, X_val = train_test_split(mask_df, test_size = 0.15)
X_test, X_val = train_test_split(X_val, test_size = 0.5)

In [47]:
print('Training ds: {}'.format(X_train.shape))
print('Validation ds: {}'.format(X_val.shape))
print('Testing ds: {}'.format(X_test.shape))

Training ds: (1167, 4)
Validation ds: (103, 4)
Testing ds: (103, 4)


In [48]:
## The input will be the MRI image and the output will be the mask segmentation 
train_ids = list(X_train.image_path)
train_mask = list(X_train.mask_path)

val_ids = list(X_val.image_path)
val_mask = list(X_val.mask_path)

In [49]:
from utilities import DataGenerator

training_gen = DataGenerator(train_ids, train_mask)
validation_gen = DataGenerator(val_ids, val_mask)

In [50]:
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, MaxPool2D, Activation, Add, UpSampling2D, Concatenate
## Building the residual block
def residual_block(X, filter):

  ## Copying the input
  X_copy = X

  ## Main block
  X = Conv2D(filter, kernel_size = (1, 1), strides = (1, 1), kernel_initializer = 'he_normal')(X)
  X = BatchNormalization()(X)
  X = Activation('relu')(X)

  X = Conv2D(filter, kernel_size = (3, 3), strides = (1, 1), padding = 'same', kernel_initializer = 'he_normal')(X)
  X = BatchNormalization()(X)

  ## SHhortpath (Due to the dimensions changing with the 2 Conv2D operations)
  X_copy = Conv2D(filter, kernel_size = (1, 1), strides = (1, 1), kernel_initializer = 'he_normal')(X_copy)
  X_copy = BatchNormalization()(X_copy)

  ## Adding both paths together
  X = Add()([X, X_copy])
  X = Activation('relu')(X)

  return X

In [51]:
## Defining the upsampling 
def upsampling_concat(X, encoder_output):
  X = UpSampling2D((2, 2)) (X)
  merge = Concatenate()([X, encoder_output])
  
  return merge

In [52]:

## Building ResUNet
input_size = (256, 256, 3)

X_input = Input(input_size)

## Implementing encoder steps
conv_input = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(X_input)
conv_input = BatchNormalization()(conv_input)
conv_input = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv_input)
conv_input = BatchNormalization()(conv_input)
conv_input_pooling = MaxPool2D(pool_size= (2, 2))(conv_input)

conv2_input = residual_block(conv_input_pooling, 32)
conv2_pooling = MaxPool2D(pool_size= (2, 2))(conv2_input)

conv3_input = residual_block(conv2_pooling, 64)
conv3_pooling = MaxPool2D(pool_size= (2, 2))(conv3_input)

conv4_input = residual_block(conv3_pooling, 128)
conv4_pooling = MaxPool2D(pool_size= (2, 2))(conv4_input)

## Bottleneck 
conv5_input = residual_block(conv4_pooling, 256)

## Transitioning into decoding
decode_1 = upsampling_concat(conv5_input, conv4_input)
decode_1 = residual_block(decode_1, 128)

decode_2 = upsampling_concat(decode_1, conv3_input)
decode_2 = residual_block(decode_2, 64)

decode_3 = upsampling_concat(decode_2, conv2_input)
decode_3 = residual_block(decode_3, 32)

decode_4 = upsampling_concat(decode_3, conv_input)
decode_4 = residual_block(decode_4, 16)

## Adding output convolution
conv_output = Conv2D(1, (1,1), padding = 'same', activation = 'sigmoid')(decode_4)

In [53]:
## Model architecture
model = Model(inputs = X_input, outputs = conv_output)
model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv2d_27 (Conv2D)              (None, 256, 256, 16) 448         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_26 (BatchNo (None, 256, 256, 16) 64          conv2d_27[0][0]                  
__________________________________________________________________________________________________
conv2d_28 (Conv2D)              (None, 256, 256, 16) 2320        batch_normalization_26[0][0]     
_______________________________________________________________________________________

In [54]:
## Training the model 
## Will be using custom loss function (tversky)


import pandas as pd
import numpy as np
import seaborn as sns
import cv2
import tensorflow as tf
import os 
from skimage import io
from PIL import Image
from tensorflow.keras import backend as K
  
#creating a custom datagenerator:

class DataGenerator(tf.keras.utils.Sequence):
  def __init__(self, ids , mask, image_dir = './', batch_size = 16, img_h = 256, img_w = 256, shuffle = True):

    self.ids = ids
    self.mask = mask
    self.image_dir = image_dir
    self.batch_size = batch_size
    self.img_h = img_h
    self.img_w = img_w
    self.shuffle = shuffle
    self.on_epoch_end()

  def __len__(self):
    'Get the number of batches per epoch'

    return int(np.floor(len(self.ids)) / self.batch_size)

  def __getitem__(self, index):
    'Generate a batch of data'

    #generate index of batch_size length
    indexes = self.indexes[index* self.batch_size : (index+1) * self.batch_size]

    #get the ImageId corresponding to the indexes created above based on batch size
    list_ids = [self.ids[i] for i in indexes]

    #get the MaskId corresponding to the indexes created above based on batch size
    list_mask = [self.mask[i] for i in indexes]


    #generate data for the X(features) and y(label)
    X, y = self.__data_generation(list_ids, list_mask)

    #returning the data
    return X, y

  def on_epoch_end(self):
    'Used for updating the indices after each epoch, once at the beginning as well as at the end of each epoch'
    
    #getting the array of indices based on the input dataframe
    self.indexes = np.arange(len(self.ids))

    #if shuffle is true, shuffle the indices
    if self.shuffle:
      np.random.shuffle(self.indexes)

  def __data_generation(self, list_ids, list_mask):
    'generate the data corresponding the indexes in a given batch of images'

    # create empty arrays of shape (batch_size,height,width,depth) 
    #Depth is 3 for input and depth is taken as 1 for output becasue mask consist only of 1 channel.
    X = np.empty((self.batch_size, self.img_h, self.img_w, 3))
    y = np.empty((self.batch_size, self.img_h, self.img_w, 1))

    #iterate through the dataframe rows, whose size is equal to the batch_size
    for i in range(len(list_ids)):
      #path of the image
      img_path = './' + str(list_ids[i])
      
      #mask path
      mask_path = './' + str(list_mask[i])
      
      #reading the original image and the corresponding mask image
      img = io.imread(img_path)
      mask = io.imread(mask_path)

      #resizing and coverting them to array of type float64
      img = cv2.resize(img,(self.img_h,self.img_w))
      img = np.array(img, dtype = np.float64)
      
      mask = cv2.resize(mask,(self.img_h,self.img_w))
      mask = np.array(mask, dtype = np.float64)

      #standardising 
      img -= img.mean()
      img /= img.std()
      
      mask -= mask.mean()
      mask /= mask.std()
      
      #Adding image to the empty array
      X[i,] = img
      
      #expanding the dimnesion of the image from (256,256) to (256,256,1)
      y[i,] = np.expand_dims(mask, axis = 2)
    
    #normalizing y
    y = (y > 0).astype(int)

    return X, y






def prediction(test, model, model_seg):
  '''
  Predcition function which takes dataframe containing ImageID as Input and perform 2 type of prediction on the image
  Initially, image is passed through the classification network which predicts whether the image has defect or not, if the model
  is 99% sure that the image has no defect, then the image is labeled as no-defect, if the model is not sure, it passes the image to the
  segmentation network, it again checks if the image has defect or not, if it has defect, then the type and location of defect is found
  '''

  #directory
  directory = "./"

  #Creating empty list to store the results
  mask = []
  image_id = []
  has_mask = []

  #iterating through each image in the test data
  for i in test.image_path:

    path = directory + str(i)

    #reading the image
    img = io.imread(path)

    #Normalizing the image
    img = img * 1./255.

    #Reshaping the image
    img = cv2.resize(img,(256,256))

    #Converting the image into array
    img = np.array(img, dtype = np.float64)
    
    #reshaping the image from 256,256,3 to 1,256,256,3
    img = np.reshape(img, (1,256,256,3))

    #making prediction on the image
    is_defect = model.predict(img)

    #if tumour is not present we append the details of the image to the list
    if np.argmax(is_defect) == 0:
      image_id.append(i)
      has_mask.append(0)
      mask.append('No mask')
      continue

    #Read the image
    img = io.imread(path)

    #Creating a empty array of shape 1,256,256,1
    X = np.empty((1, 256, 256, 3))

    #resizing the image and coverting them to array of type float64
    img = cv2.resize(img,(256,256))
    img = np.array(img, dtype = np.float64)

    #standardising the image
    img -= img.mean()
    img /= img.std()

    #converting the shape of image from 256,256,3 to 1,256,256,3
    X[0,] = img

    #make prediction
    predict = model_seg.predict(X)

    #if the sum of predicted values is equal to 0 then there is no tumour
    if predict.round().astype(int).sum() == 0:
        image_id.append(i)
        has_mask.append(0)
        mask.append('No mask')
    else:
    #if the sum of pixel values are more than 0, then there is tumour
        image_id.append(i)
        has_mask.append(1)
        mask.append(predict)


  return image_id, mask, has_mask
        




'''
We need a custom loss function to train this ResUNet.So,  we have used the loss function as it is from https://github.com/nabsabraham/focal-tversky-unet/blob/master/losses.py


@article{focal-unet,
  title={A novel Focal Tversky loss function with improved Attention U-Net for lesion segmentation},
  author={Abraham, Nabila and Khan, Naimul Mefraz},
  journal={arXiv preprint arXiv:1810.07842},
  year={2018}
}
'''
def tversky(y_true, y_pred, smooth = 1e-6):
    y_true_pos = K.flatten(y_true)
    y_pred_pos = K.flatten(y_pred)
    true_pos = K.sum(y_true_pos * y_pred_pos)
    false_neg = K.sum(y_true_pos * (1-y_pred_pos))
    false_pos = K.sum((1-y_true_pos)*y_pred_pos)
    alpha = 0.7
    return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)

def tversky_loss(y_true, y_pred):
    return 1 - tversky(y_true,y_pred)

def focal_tversky(y_true,y_pred):
    ## IMPORTANT NOTE:
    ## It seems that the type of y_true and y_pred are not the same. One is of type
    ## float32 and other int64. They need to be cast as the same type for the training
    ## to actually work.
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    #print(type(y_pred))
    pt_1 = tversky(y_true, y_pred)
    gamma = 0.75
    return K.pow((1-pt_1), gamma)

In [55]:
## Compiling the model
model.compile(optimizer = tf.keras.optimizers.Adam(lr = .05, epsilon = 0.1), loss = focal_tversky, metrics = [tversky])

In [56]:
## Early stopping and checkpointer
earlystopping = EarlyStopping(monitor = 'val_loss', patience = 20, verbose = 1, mode = 'min')

checkpoint = ModelCheckpoint(filepath = 'ResUNet_mask.hdf5', verbose = 1, save_best_only= True)

In [57]:
history = model.fit(training_gen, epochs = 50, validation_data= validation_gen, callbacks = [earlystopping, checkpoint])

Epoch 1/50
Epoch 00001: val_loss improved from inf to 0.82022, saving model to ResUNet_mask.hdf5
Epoch 2/50
Epoch 00002: val_loss improved from 0.82022 to 0.71426, saving model to ResUNet_mask.hdf5
Epoch 3/50
Epoch 00003: val_loss improved from 0.71426 to 0.45119, saving model to ResUNet_mask.hdf5
Epoch 4/50
Epoch 00004: val_loss did not improve from 0.45119
Epoch 5/50
Epoch 00005: val_loss did not improve from 0.45119
Epoch 6/50
Epoch 00006: val_loss improved from 0.45119 to 0.35729, saving model to ResUNet_mask.hdf5
Epoch 7/50
Epoch 00007: val_loss improved from 0.35729 to 0.26354, saving model to ResUNet_mask.hdf5
Epoch 8/50
Epoch 00008: val_loss improved from 0.26354 to 0.25107, saving model to ResUNet_mask.hdf5
Epoch 9/50
Epoch 00009: val_loss improved from 0.25107 to 0.24319, saving model to ResUNet_mask.hdf5
Epoch 10/50
Epoch 00010: val_loss did not improve from 0.24319
Epoch 11/50
Epoch 00011: val_loss improved from 0.24319 to 0.23510, saving model to ResUNet_mask.hdf5
Epoch 12

In [58]:
import json
model_json = model.to_json()
with open('ResUNet_architecture.json','w') as json_file:
  json_file.write(model_json)