# UWMGI: UNet Keras [Inference]

### Please if this kernel is useful, <font color='red'>please upvote !!</font>

In [None]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
import gc
import cv2
import math
from PIL import Image 
import matplotlib.pyplot as plt
from tqdm import tqdm
from tqdm.notebook import tqdm
from datetime import datetime
import json,itertools
from typing import Optional
from glob import glob

from sklearn.model_selection import StratifiedKFold

from tensorflow import keras
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Model
from keras.layers import Input
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.callbacks import Callback, ModelCheckpoint
from keras.models import load_model

import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import matplotlib as mpl

In [None]:
BATCH_SIZE = 16
EPOCHS=30
n_splits=5
fold_selected=1 # 1..5

In [None]:
df = pd.read_csv("../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv")
DEBUG = False
if df.shape[0] == 0:
    DEBUG = True
if DEBUG == True:
    df = pd.read_csv("../input/uw-madison-gi-tract-image-segmentation/train.csv")[:1000*3]
    df.pop('segmentation')
    df['predicted'] = ""

In [None]:
df.rename(columns = {'class':'class_name'}, inplace = True)
#--------------------------------------------------------------------------
df["case"] = df["id"].apply(lambda x: int(x.split("_")[0].replace("case", "")))
df["day"] = df["id"].apply(lambda x: int(x.split("_")[1].replace("day", "")))
df["slice"] = df["id"].apply(lambda x: x.split("_")[3])
#--------------------------------------------------------------------------
if DEBUG:
    TRAIN_DIR = '../input/uw-madison-gi-tract-image-segmentation/train'
else:
    TRAIN_DIR = '../input/uw-madison-gi-tract-image-segmentation/test'
    
all_train_images = glob(os.path.join(TRAIN_DIR, "**", "*.png"), recursive=True)
x = all_train_images[0].rsplit("/", 4)[0] ## ../input/uw-madison-gi-tract-image-segmentation/train

path_partial_list = []
for i in range(0, df.shape[0]):
    path_partial_list.append(os.path.join(x,
                          "case"+str(df["case"].values[i]),
                          "case"+str(df["case"].values[i])+"_"+ "day"+str(df["day"].values[i]),
                          "scans",
                          "slice_"+str(df["slice"].values[i])))
df["path_partial"] = path_partial_list
#--------------------------------------------------------------------------
path_partial_list = []
for i in range(0, len(all_train_images)):
    path_partial_list.append(str(all_train_images[i].rsplit("_",4)[0]))
    
tmp_df = pd.DataFrame()
tmp_df['path_partial'] = path_partial_list
tmp_df['path'] = all_train_images

#--------------------------------------------------------------------------
df = df.merge(tmp_df, on="path_partial").drop(columns=["path_partial"])
#--------------------------------------------------------------------------
df["width"] = df["path"].apply(lambda x: int(x[:-4].rsplit("_",4)[1]))
df["height"] = df["path"].apply(lambda x: int(x[:-4].rsplit("_",4)[2]))
#--------------------------------------------------------------------------
del x,path_partial_list,tmp_df
#--------------------------------------------------------------------------
df.head(5)

In [None]:
# RESTRUCTURE  DATAFRAME
df_train = pd.DataFrame({'id':df['id'][::3]})
df_train['path'] = df['path'][::3].values
df_train['predicted'] = df['predicted'][::3].values
df_train['case'] = df['case'][::3].values
df_train['day'] = df['day'][::3].values
df_train['slice'] = df['slice'][::3].values
df_train['width'] = df['width'][::3].values
df_train['height'] = df['height'][::3].values

del df
df_train.reset_index(inplace=True,drop=True)
df_train.fillna('',inplace=True); 
df_train.head(5)

In [None]:
print (df_train.shape)
if DEBUG:
    df_train = df_train.reset_index(drop=True)
print (df_train.shape)

In [None]:
gc.collect()

In [None]:
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)


def build_masks(labels,input_shape, colors=True):
    height, width = input_shape
    if colors:
        mask = np.zeros((height, width, 3))
        for label in labels:
            mask += rle_decode(label, shape=(height,width , 3), color=np.random.rand(3))
    else:
        mask = np.zeros((height, width, 1))
        for label in labels:
            mask += rle_decode(label, shape=(height, width, 1))
    mask = mask.clip(0, 1)
    return mask


In [None]:
import keras
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size = BATCH_SIZE, subset="train", shuffle=False):
        super().__init__()
        self.df = df
        self.shuffle = shuffle
        self.subset = subset
        self.batch_size = batch_size
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.df) / self.batch_size))
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
            
    def __getitem__(self, index): 
        X = np.empty((self.batch_size,256,256,3))
        y = np.empty((self.batch_size,256,256,3))
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        
        for i,img_path in enumerate(self.df['path'].iloc[indexes]):
            w=self.df['width'].iloc[indexes[i]]
            h=self.df['height'].iloc[indexes[i]]
            img = self.__load_grayscale(img_path)
            X[i,] =img
            if self.subset == 'train':
                for k,j in zip([0,1,2],["large_bowel","small_bowel","stomach"]):
                    rles=self.df[j].iloc[indexes[i]]
                    masks = rle_decode(rles, shape=(h, w, 1))
                    #rles=df_train[j][df_train.index==indexes[i]]
                    #masks = build_masks(rles,(h,w), colors=False)
                    masks = cv2.resize(masks, (256, 256))
                    y[i,:,:,k] = masks
        if self.subset == 'train': return X, y
        else: return X
        
    def __load_grayscale(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)
        dsize = (256, 256)
        img = cv2.resize(img, dsize)
        img = img.astype(np.int8) / 255.
        img = np.expand_dims(img, axis=-1)
        return img
        

# UNET model

In [None]:
from keras import backend as K
from keras.losses import binary_crossentropy
import tensorflow as tf

def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def iou_coef(y_true, y_pred, smooth=1):
  intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
  union = K.sum(y_true,[1,2,3])+K.sum(y_pred,[1,2,3])-intersection
  iou = K.mean((intersection + smooth) / (union + smooth), axis=0)
  return iou

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(tf.cast(y_true, tf.float32), y_pred) + 0.5 * dice_loss(tf.cast(y_true, tf.float32), y_pred)

class FixedDropout(keras.layers.Dropout):
    def _get_noise_shape(self, inputs):
        if self.noise_shape is None:
            return self.noise_shape

        symbolic_shape = K.shape(inputs)
        noise_shape = [symbolic_shape[axis] if shape is None else shape
                       for axis, shape in enumerate(self.noise_shape)]
        return tuple(noise_shape)


## UNET


In [None]:
custom_objects = custom_objects={
    'FixedDropout': FixedDropout,
    'dice_coef': dice_coef,
    'iou_coef': iou_coef,
    'bce_dice_loss': bce_dice_loss  
}
#model = load_model('../input/uwmgi-unet-keras/model.h5', custom_objects=custom_objects)
gc.collect()

############ 1 ##################
LOGITS = []
for i in tqdm(range(0, df_train.shape[0], 16)):
    batch_idx = list(range(i, min(df_train.shape[0], i + 16)))
    test_generator = DataGenerator(df_train[df_train.index.isin(batch_idx)],batch_size = len(batch_idx),subset="test",shuffle=False)
    LOGITS.append(model.predict(test_generator,verbose=0))

############ 2 ##################
LOGITS = []
pred_batches = DataGenerator(df_train,batch_size = 1,subset="test",shuffle=False)
gc.collect()
for p in tqdm(range(len(pred_batches))):
    logit=model.predict(pred_batches[p],verbose=0)
    LOGITS.append(logit)
    del logit
    #gc.collect()

############ 3 ##################
pred_batches = DataGenerator(df_train,batch_size = 64,subset="test",shuffle=False)
LOGITS = model.predict_generator(pred_batches,verbose=0)
LOGITS.shape

In [None]:
############ 4 ##################
def infer(pred_batches):
    model = load_model('../input/uwmgi-unet-keras/model.h5', custom_objects=custom_objects)
    LOGITS = model.predict_generator(pred_batches,verbose=0)
    del model
    gc.collect()
    return LOGITS

LOGITS = []
for i in tqdm(range(0, df_train.shape[0], 128)):
    batch_idx = list(range(i, min(df_train.shape[0], i + 128)))
    pred_batches = DataGenerator(df_train[df_train.index.isin(batch_idx)],batch_size = len(batch_idx),subset="test",shuffle=False)
    LOGITS.append(infer(pred_batches))
    del pred_batches
    gc.collect()


    
    

In [None]:
gc.collect()

In [None]:
lbs = []
sbs = []
sts = []
for i in range(len(LOGITS)):
    for index in tqdm(range(len(LOGITS[i]))):
        root_shape = (df_train.iloc[index]["height"], df_train.iloc[index]["width"])
        pred_arr = np.round(cv2.resize(LOGITS[i][index][:, :,0], root_shape, interpolation=cv2.INTER_NEAREST)).astype('uint8')
        lbs.append(rle_encode(pred_arr))
        pred_arr = np.round(cv2.resize(LOGITS[i][index][:, :,1], root_shape, interpolation=cv2.INTER_NEAREST)).astype('uint8')
        sbs.append(rle_encode(pred_arr))
        pred_arr = np.round(cv2.resize(LOGITS[i][index][:, :,2], root_shape, interpolation=cv2.INTER_NEAREST)).astype('uint8')
        sts.append(rle_encode(pred_arr))
del LOGITS
gc.collect()

In [None]:
df_train = df_train[['id']]
gc.collect()

In [None]:
ids = []
classes = []
rles = []
for index, row in tqdm(df_train.iterrows(), total=df_train.shape[0]):
    ids.extend([row['id']] * 3)
    classes.extend(['large_bowel', 'small_bowel', 'stomach'])
    rles.extend([lbs[index], sbs[index], sts[index]])

In [None]:
df_train = pd.DataFrame()
df_train['id'] = ids
df_train['class'] = classes
df_train['predicted'] = rles
df_train.to_csv("submission.csv", index=False)

In [None]:
df_train.head()