### Importing necessary modules

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

In [None]:
import os
from tqdm import tqdm
import random
from itertools import chain

In [None]:
import cv2
from glob import glob

from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from skimage.color import rgb2gray

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.models import Model, load_model,save_model
from tensorflow.keras.layers import (Input, Activation,
                                     BatchNormalization, 
                                     Dropout, Lambda, Conv2D,
                                     Conv2DTranspose, MaxPooling2D,
                                     concatenate)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

from tensorflow.keras import backend as K

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
from skimage import io
from tensorflow.keras.layers import *
from sklearn.preprocessing import StandardScaler, normalize


In [None]:
os.listdir('../input/lgg-mri-segmentation/kaggle_3m/')

In [None]:
root = '../input/lgg-mri-segmentation/kaggle_3m/'

In [None]:
WIDTH = HEIGHT = 256

In [None]:

# data containing path to Brain MRI and their corresponding mask
brain_df = pd.read_csv('../input/lgg-mri-segmentation/kaggle_3m/data.csv')


In [None]:
brain_df.info

In [None]:
brain_df.head(10)

In [None]:
data_map = []
for sub_dir_path in glob("/kaggle/input/lgg-mri-segmentation/kaggle_3m/"+"*"):
    #if os.path.isdir(sub_path_dir):
    try:
        dir_name = sub_dir_path.split('/')[-1]
        for filename in os.listdir(sub_dir_path):
            image_path = sub_dir_path + '/' + filename
            data_map.extend([dir_name, image_path])
    except Exception as e:
        print(e)

In [None]:
df = pd.DataFrame({"patient_id" : data_map[::2],
                   "path" : data_map[1::2]})
df.head()

In [None]:
df_imgs = df[~df['path'].str.contains("mask")]
df_masks = df[df['path'].str.contains("mask")]

# File path line length images for later sorting
BASE_LEN = 89 # len(/kaggle/input/lgg-mri-segmentation/kaggle_3m/TCGA_DU_6404_19850629/TCGA_DU_6404_19850629_ <-!!!43.tif)
END_IMG_LEN = 4 # len(/kaggle/input/lgg-mri-segmentation/kaggle_3m/TCGA_DU_6404_19850629/TCGA_DU_6404_19850629_43 !!!->.tif)
END_MASK_LEN = 9 # (/kaggle/input/lgg-mri-segmentation/kaggle_3m/TCGA_DU_6404_19850629/TCGA_DU_6404_19850629_43 !!!->_mask.tif)

# Data sorting
imgs = sorted(df_imgs["path"].values, key=lambda x : int(x[BASE_LEN:-END_IMG_LEN]))
masks = sorted(df_masks["path"].values, key=lambda x : int(x[BASE_LEN:-END_MASK_LEN]))

# Sorting check
idx = random.randint(0, len(imgs)-1)
print("Path to the Image:", imgs[idx], "\nPath to the Mask:", masks[idx])

In [None]:
# Final dataframe
brain_df = pd.DataFrame({"patient_id": df_imgs.patient_id.values,
                         "image_path": imgs,
                         "mask_path": masks
                        })
def pos_neg_diagnosis(mask_path):
    value = np.max(cv2.imread(mask_path))
    if value > 0 : 
        return 1
    else:
        return 0
    
brain_df['mask'] = brain_df['mask_path'].apply(lambda x: pos_neg_diagnosis(x))
brain_df

In [None]:
brain_df['mask'].value_counts().index

In [None]:
# Use plotly to plot interactive bar chart
import plotly.graph_objects as go

fig = go.Figure([go.Bar(x = brain_df['mask'].value_counts().index, y = brain_df['mask'].value_counts())])
fig.update_traces(marker_color = 'rgb(0,200,0)', marker_line_color = 'rgb(0,255,0)',
                  marker_line_width = 7, opacity = 0.6)
fig.show()

In [None]:
# loading images
mask_files = glob(root +'*/*_mask*')
img_files = list(map(lambda x: x.replace('_mask',''),mask_files))


In [None]:
len(img_files)

### Visualizing Images

In [None]:
plt.imshow(cv2.imread(brain_df.image_path[623]))

In [None]:
plt.imshow(cv2.imread(brain_df.mask_path[623]))

In [None]:
cv2.imread(brain_df.mask_path[623]).max()

In [None]:
# Basic visualizations: Visualize the images (MRI and Mask) in the dataset separately 
import random
fig, axs = plt.subplots(4,2, figsize=(10,18))
count = 0
for x in range(4):
    i = random.randint(0, len(brain_df)) # select a random index
    axs[count][0].title.set_text("Brain MRI") # set title
    axs[count][0].imshow(cv2.imread(brain_df.image_path[i])) # show MRI 
    axs[count][1].title.set_text("Mask - " + str(brain_df['mask'][i])) # plot title on the mask (0 or 1)
    axs[count][1].imshow(cv2.imread(brain_df.mask_path[i])) # Show corresponding mask
    count += 1

fig.tight_layout()



In [None]:
# display random iteration of images from the dataset with their masks
#fig = plt.figure(figsize=(13, 13))
#rnd_no = np.random.randint(0,len(img_files)-9)
#for ind, i in enumerate(range(rnd_no, rnd_no+9)):
    
 #   fig.add_subplot(3,3,ind+1)
    
    # get image & mask file paths
  #  img_path = img_files[i]
   # msk_path = mask_files[i]
    
    # read images
    #img = cv2.imread(img_path)
    #img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    #msk = cv2.imread(msk_path)
    
    # display images
    #plt.imshow(img)
   # plt.imshow(msk, alpha=0.5)
    #plt.title(img_path.split('/')[-1].split('.')[0])#
#plt.show()

In [None]:

from skimage import io
count = 0
fig, axs = plt.subplots(12, 3, figsize = (20, 50))
for i in range(len(brain_df)):
    if brain_df['mask'][i] ==1 and count <12:
        img = io.imread(brain_df.image_path[i])
        axs[count][0].title.set_text('Brain MRI')
        axs[count][0].imshow(img, cmap='hot')

        mask = io.imread(brain_df.mask_path[i])
        axs[count][1].title.set_text('Mask')
        axs[count][1].imshow(mask, cmap = 'hot')


        img[mask == 255] = (255, 0, 0)
        axs[count][2].title.set_text('MRI with Mask')
        axs[count][2].imshow(img, cmap='hot')
        count+=1

fig.tight_layout()



### Loading dataset paths and slitting into train, valid and test sets

In [None]:
# loading the dataset paths
df = pd.DataFrame(data={"images": img_files,
                     "masks": mask_files})

# train-valid-test split
df_train, df_test = train_test_split(df, test_size=.1)
df_train, df_val = train_test_split(df_train, test_size=.2)

### Image augmentation 

In [None]:
# Augmentation
def train_generator(df, batch_size, aug_dict,
                   image_color_mode = "rgb",
                   mask_color_mode = "grayscale",
                   image_save_prefix = "image",
                   mask_save_prefix = "mask",
                   save_to_dir = None,
                   target_size = (256, 256),
                   seed=1):
    """
    Returns sequence of Augmented images
    by reading the path names from the dataframe
    """
    
    image_datagen = ImageDataGenerator(**aug_dict)
    mask_datagen = ImageDataGenerator(**aug_dict)
    
    image_generator = image_datagen.flow_from_dataframe(
                        df,
                        x_col='images',
                        class_mode=None,
                        color_mode = image_color_mode,
                        target_size = target_size,
                        batch_size = batch_size,
                        save_to_ir = save_to_dir,
                        save_prefix = image_save_prefix,
                        seed = seed
    )
    
    mask_generator = mask_datagen.flow_from_dataframe(
                        df,
                        x_col='masks',
                        class_mode=None,
                        color_mode = mask_color_mode,
                        target_size = target_size,
                        batch_size = batch_size,
                        save_to_ir = save_to_dir,
                        save_prefix = image_save_prefix,
                        seed = seed
    )
    
    train_gen = zip(image_generator, mask_generator)
    
    for (img, mask) in train_gen:
        img, mask = adjust_data(img, mask)
        yield (img, mask)

In [None]:
def adjust_data(img, mask):
    """
    Preprocessing function: 
    Normalizes Image arrays.
    Normalizes and thresholds Mask arrays.
    """
    img = img / 255
    
    mask = mask / 255
    mask[mask > 0.5] = 1
    mask[mask <= 0.5] = 0
    
    return (img, mask)

### Loss Metrics: Sorenson-Dice Loss & Jaccard Distance

In [None]:
# Sorenson-Dice loss
smooth = 100

def dice_coef(y_true, y_pred):
    
    return ((2* K.sum(y_true*y_pred))/
            (K.sum(y_true) + K.sum(y_pred) + smooth))

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

# Jaccard Distance
def iou(y_true, y_pred):
    intersection = K.sum(y_true * y_pred)
    sum_ = K.sum(y_true + y_pred)
    jac = (intersection + smooth)/(sum_ - intersection + smooth)
    
    return jac

def jac_distance(y_true, y_pred):
    y_true = K.flatten(y_true)
    y_pred = K.flatten(y_pred)
    
    return -iou(y_true, y_pred)

### Model Architecture: U-NET

In [None]:
def unet(input_size=(HEIGHT, WIDTH, 3)):
    inputs = Input(input_size)
    
    # block 1 - Downscaling
    conv1 = Conv2D(64, (3,3), padding='same')(inputs)
    bn1 = Activation('relu')(conv1)
    conv1 = Conv2D(64, (3,3), padding='same')(bn1)
    bn1 = BatchNormalization(axis=3)(conv1)
    bn1 = Activation('relu')(bn1)
    pool1 = MaxPooling2D(pool_size=(2,2))(bn1)
    
    # block 2 - Downscaling
    conv2 = Conv2D(128, (3,3), padding='same')(pool1)
    bn2 = Activation('relu')(conv2)
    conv2 = Conv2D(128, (3,3), padding='same')(bn2)
    bn2 = BatchNormalization(axis=3)(conv2)
    bn2 = Activation('relu')(bn2)
    pool2 = MaxPooling2D(pool_size=(2,2))(bn2)
    
    # block 3 - Downscaling
    conv3 = Conv2D(256, (3,3), padding='same')(pool2)
    bn3 = Activation('relu')(conv3)
    conv3 = Conv2D(256, (3,3), padding='same')(bn3)
    bn3 = BatchNormalization(axis=3)(conv3)
    bn3 = Activation('relu')(bn3)
    pool3 = MaxPooling2D(pool_size=(2,2))(bn3)
    
    # block 4 - Downscaling
    conv4 = Conv2D(512, (3,3), padding='same')(pool3)
    bn4 = Activation('relu')(conv4)
    conv4 = Conv2D(128, (3,3), padding='same')(bn4)
    bn4 = BatchNormalization(axis=3)(conv4)
    bn4 = Activation('relu')(bn4)
    pool4 = MaxPooling2D(pool_size=(2,2))(bn4)
    
    # block 5
    conv5 = Conv2D(1024, (3,3), padding='same')(pool4)
    bn5 = Activation('relu')(conv5)
    conv5 = Conv2D(1024, (3,3), padding='same')(bn5)
    bn5 = BatchNormalization(axis=3)(conv5)
    bn5 = Activation('relu')(bn5)
    
    # block 6 - Upscaling
    up6 = concatenate(
        [Conv2DTranspose(512, (2,2),
                         strides=(2,2),
                         padding='same')(bn5), conv4], axis=3)
    conv6 = Conv2D(512, (3,3), padding='same')(up6)
    bn6 = Activation('relu')(conv6)
    conv6 = Conv2D(512, (3,3), padding='same')(bn6)
    bn6 = BatchNormalization(axis=3)(conv6)
    bn6 = Activation('relu')(bn6)
    
    # block 7 - Upscaling
    up7 = concatenate(
        [Conv2DTranspose(256, (2,2),
                         strides=(2,2),
                         padding='same')(bn6), conv3], axis=3)
    conv7 = Conv2D(256, (3,3), padding='same')(up7)
    bn7 = Activation('relu')(conv7)
    conv7 = Conv2D(256, (3,3), padding='same')(bn7)
    bn7 = BatchNormalization(axis=3)(conv7)
    bn7 = Activation('relu')(bn7)
    
    # block 8 - Upscaling
    up8 = concatenate(
        [Conv2DTranspose(128, (2,2),
                         strides=(2,2),
                         padding='same')(bn7), conv2], axis=3)
    conv8 = Conv2D(128, (3,3), padding='same')(up8)
    bn8 = Activation('relu')(conv8)
    conv8 = Conv2D(128, (3,3), padding='same')(bn8)
    bn8 = BatchNormalization(axis=3)(conv8)
    bn8 = Activation('relu')(bn8)
    
    # block 9 - Upscaling
    up9 = concatenate(
        [Conv2DTranspose(64, (2,2),
                         strides=(2,2),
                         padding='same')(bn8), conv1], axis=3)
    conv9 = Conv2D(64, (3,3), padding='same')(up9)
    bn9 = Activation('relu')(conv9)
    conv9 = Conv2D(64, (3,3), padding='same')(bn9)
    bn9 = BatchNormalization(axis=3)(conv9)
    bn9 = Activation('relu')(bn9)
    
    # block 10 - Output layer
    conv10 = Conv2D(1, (1,1), activation='sigmoid')(bn9)
    
    return Model(inputs=[inputs], outputs=[conv10])
    
    

In [None]:
model = unet()
model.summary()

In [None]:
# Training parameters
EPOCHS = 100
BATCH_SIZE = 32
ALPHA = 0.0001 # learning rate
DECAY_RATE = ALPHA/EPOCHS

In [None]:
df_train.iloc[726]['images']

In [None]:
df_train.iloc[726]['masks']

### Initilializing 'train' and 'valid' augmented image generators

In [None]:
train_generator_args = dict(
    rotation_range=.2,
    width_shift_range=.05,
    height_shift_range=.05,
    shear_range=.05,
    zoom_range=.05,
    horizontal_flip=True,
    fill_mode='nearest')

In [None]:
train_gen = train_generator(df_train, BATCH_SIZE,
                           train_generator_args,
                           target_size=(HEIGHT, WIDTH))

val_gen = train_generator(df_val, BATCH_SIZE,
                           dict(),
                           target_size=(HEIGHT, WIDTH))

### Setting Model parameters

In [None]:
# Setting Model parameters and compiling
model = unet(input_size=(HEIGHT, WIDTH, 3))
OPTIMIZER = Adam(lr=ALPHA, epsilon=None, decay=DECAY_RATE)
model.compile(
    optimizer=OPTIMIZER,
    loss=dice_coef_loss,
    metrics=['binary_accuracy', iou, dice_coef]    
)

In [None]:
# Saving Models

callbacks = [ModelCheckpoint('brain_seg_unet.hdf5',
                             verbose=1,
                             save_best_only=True), EarlyStopping(monitor='val_loss', verbose=1, patience=4)]

## Training model

In [None]:
history = model.fit(train_gen,
                   steps_per_epoch=len(df_train)/BATCH_SIZE,
                   epochs=EPOCHS,
                   callbacks=callbacks,
                   validation_data=val_gen,
                   validation_steps=len(df_val)/BATCH_SIZE)

In [None]:
a = history.history

list_traindice = a['dice_coef']
list_testdice = a['val_dice_coef']

list_trainjaccard = a['iou']
list_testjaccard = a['val_iou']

list_trainloss = a['loss']
list_testloss = a['val_loss']
plt.figure(1)

plt.plot(list_trainloss,'ro-')
plt.plot(list_testloss, 'bo-')
plt.xlabel('iteration')
plt.ylabel('loss')
plt.title('loss graph', fontsize = 15)
plt.figure(2)
plt.plot(list_traindice, 'ro-')
plt.plot(list_testdice, 'bo-')
plt.xlabel('iteration')
plt.ylabel('dice score')
plt.title('dice score graph', fontsize = 15)
plt.legend(['Training','Testing'])
plt.show()

In [None]:
model = load_model('brain_seg_unet.hdf5', custom_objects={'dice_coef_loss': dice_coef_loss, 'iou': iou, 'dice_coef': dice_coef})

In [None]:
for i in range(30):
    index=np.random.randint(1,len(df_test.index))
    img = cv2.imread(df_test['images'].iloc[index])
    img = cv2.resize(img ,(256, 256))
    img = img / 255
    img = img[np.newaxis, :, :, :]
    pred=model.predict(img)

    plt.figure(figsize=(12,12))
    plt.subplot(1,3,1)
    plt.imshow(np.squeeze(img))
    plt.title('Original Image')
    plt.subplot(1,3,2)
    plt.imshow(np.squeeze(cv2.imread(df_test['masks'].iloc[index])))
    plt.title('Original Mask')
    plt.subplot(1,3,3)
    plt.imshow(np.squeeze(pred) > .5)
    plt.title('Prediction')
    plt.show()