# Importing Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import glob2
import random
import cv2
from skimage import io
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from model import *
from losses import *
from data_generator import DataGenerator

import tensorflow as tf
%matplotlib inline

In [None]:
DATASET_PATH = os.path.abspath('../dataset/lgg-mri-segmentation/kaggle_3m/')

In [None]:
data = pd.read_csv(os.path.join(DATASET_PATH + '/data.csv'))
data.info()

In [None]:
data.head()

In [None]:
images = sorted(glob2.glob(DATASET_PATH + '/**/*.tif'))
len(images)

In [None]:
patient_id = [x.split('/')[-2] for x in images]
patient_id[:5]

In [None]:
df = pd.DataFrame(list(zip(patient_id, images)), columns=['patient_id', 'image_path'])
df.head()

In [None]:
df_imgs = df[~df['image_path'].str.contains("mask")] # if have not mask
df_masks = df[df['image_path'].str.contains("mask")]# if have mask

# File path line length images for later sorting
BASE_LEN = len(DATASET_PATH + '/TCGA_DU_6408_19860521/TCGA_DU_6408_19860521_')
END_IMG_LEN = 4
END_MASK_LEN = 9

# Data sorting
imgs = sorted(df_imgs["image_path"].values, key=lambda x : int(x[BASE_LEN:-END_IMG_LEN]))
masks = sorted(df_masks["image_path"].values, key=lambda x : int(x[BASE_LEN:-END_MASK_LEN]))

# Sorting check
idx = random.randint(0, len(imgs)-1)
print("Path to the Image:", imgs[idx], "\nPath to the Mask:", masks[idx])

In [None]:
# Final dataframe
brain_df = pd.DataFrame({"patient_id": df_imgs.patient_id.values,
                         "image_path": imgs,
                         "mask_path": masks
                        })

def has_mask(mask_path):
    value = np.max(cv2.imread(mask_path))
    if value > 0: 
        return 1
    else:
        return 0
    
brain_df['mask'] = brain_df['mask_path'].apply(lambda x: has_mask(x))
brain_df

# Data Visualization

In [None]:
for i in range(len(brain_df)):
    if cv2.imread(brain_df.mask_path[i]).max() > 0:
        break

plt.figure(figsize=(8,8))
plt.subplot(1,2,1)
plt.imshow(cv2.imread(brain_df.mask_path[i]));
plt.title('Tumor Location')

plt.subplot(1,2,2)
plt.imshow(cv2.imread(brain_df.image_path[i]));

In [None]:
fig, axs = plt.subplots(6,2, figsize=(16,26))
count = 0
for x in range(6):
  i = random.randint(0, len(brain_df)) # select a random index
  axs[count][0].title.set_text("Brain MRI") # set title
  axs[count][0].imshow(cv2.imread(brain_df.image_path[i])) # show MRI 
  axs[count][1].title.set_text("Mask - " + str(brain_df['mask'][i])) # plot title on the mask (0 or 1)
  axs[count][1].imshow(cv2.imread(brain_df.mask_path[i])) # Show corresponding mask
  count += 1

fig.tight_layout()

In [None]:
count = 0
i = 0
fig,axs = plt.subplots(12,3, figsize=(20,50))
for mask in brain_df['mask']:
    if (mask==1):
        img = io.imread(brain_df.image_path[i])
        axs[count][0].title.set_text("Brain MRI")
        axs[count][0].imshow(img)
        
        mask = io.imread(brain_df.mask_path[i])
        axs[count][1].title.set_text("Mask")
        axs[count][1].imshow(mask, cmap='gray')
        
        img[mask==255] = (0,255,150)  # change pixel color at the position of mask
        axs[count][2].title.set_text("MRI with Mask")
        axs[count][2].imshow(img)
        count +=1
    i += 1
    if (count==12):
        break
        
fig.tight_layout()


# Data Split 

In [None]:
brain_df_mask = brain_df[brain_df['mask'] == 1]
brain_df_mask.shape

In [None]:
# creating test, train and val sets

X_train, X_val = train_test_split(brain_df_mask, test_size=0.15)
X_test, X_val = train_test_split(X_val, test_size=0.5)
print("Train size is {}, valid size is {} & test size is {}".format(len(X_train), len(X_val), len(X_test)))

train_ids = list(X_train.image_path)
train_mask = list(X_train.mask_path)

val_ids = list(X_val.image_path)
val_mask= list(X_val.mask_path)

# Data Generator

In [None]:
train_data = DataGenerator(train_ids, train_mask)
val_data = DataGenerator(val_ids, val_mask)

# Modelling

In [None]:
seg_model = get_model()

In [None]:
# compling model and callbacks functions
adam = tf.keras.optimizers.Adam(lr = 0.05, epsilon = 0.1)
seg_model.compile(optimizer = adam, 
                  loss = focal_tversky, 
                  metrics = [tversky, dice_coef]
                 )
#callbacks
earlystopping = EarlyStopping(monitor='val_loss',
                              mode='min', 
                              verbose=1, 
                              patience=20
                             )
# save the best model with lower validation loss
checkpointer = ModelCheckpoint(filepath="ResUNet-segModel-weights.hdf5", 
                               verbose=1, 
                               save_best_only=True
                              )
reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                              mode='min',
                              verbose=1,
                              patience=10,
                              min_delta=0.0001,
                              factor=0.2
                             )

In [None]:
h = seg_model.fit(train_data, 
                  epochs = 60, 
                  validation_data = val_data,
                  callbacks = [checkpointer, earlystopping, reduce_lr]
                 )

In [None]:
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.plot(h.history['loss']);
plt.plot(h.history['val_loss']);
plt.title("SEG Model focal tversky Loss");
plt.ylabel("focal tversky loss");
plt.xlabel("Epochs");
plt.legend(['train', 'val']);

plt.subplot(1,2,2)
plt.plot(h.history['dice_coef']);
plt.plot(h.history['val_dice_coef']);
plt.title("SEG Model Dice Coef score");
plt.ylabel("Dice  Coeff");
plt.xlabel("Epochs");
plt.legend(['train', 'val']);

In [None]:
test_ids = list(X_test.image_path)
test_mask = list(X_test.mask_path)
test_data = DataGenerator(test_ids, test_mask)
_, tv, dice = seg_model.evaluate(test_data)
print("Segmentation tversky is {:.2f}%".format(tv*100))
print("Segmentation Dice is {:.2f}".format(dice))