In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from tqdm.notebook import tqdm
from datetime import datetime
import json,itertools
from typing import Optional
from glob import glob

from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold

from tensorflow import keras
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Model
from keras.layers import Input
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
from keras.models import load_model, save_model

import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import matplotlib as mpl

In [None]:
# Train set
train_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')
print(train_df.shape)
train_df.head()


In [None]:
# Test set
test_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')

if len(test_df)==0:
    DEBUG=True
    test_df = train_df.sample(frac =.00001)
    print(test_df.shape)
    test_df["segmentation"]=''
    test_df=test_df.rename(columns={"segmentation":"prediction"})
else:
    DEBUG=False

submission=test_df.copy()
test_df.head()

## Preprocessing

In [None]:
# Metadata
def preprocessing(df, subset="train"):
    #--------------------------------------------------------------------------
    df["case"] = df["id"].apply(lambda x: int(x.split("_")[0].replace("case", "")))
    df["day"] = df["id"].apply(lambda x: int(x.split("_")[1].replace("day", "")))
    df["slice"] = df["id"].apply(lambda x: x.split("_")[3])
    #--------------------------------------------------------------------------
    if (subset=="train") or (DEBUG):
        DIR="../input/uw-madison-gi-tract-image-segmentation/train"
    else:
        DIR="../input/uw-madison-gi-tract-image-segmentation/test"
    
    all_images = glob(os.path.join(DIR, "**", "*.png"), recursive=True)
    
    x = all_images[0].rsplit("/", 4)[0] ## ../input/uw-madison-gi-tract-image-segmentation/train

    path_partial_list = []
    for i in range(0, df.shape[0]):
        path_partial_list.append(os.path.join(x,
                              "case"+str(df["case"].values[i]),
                              "case"+str(df["case"].values[i])+"_"+ "day"+str(df["day"].values[i]),
                              "scans",
                              "slice_"+str(df["slice"].values[i])))
    df["path_partial"] = path_partial_list
    #--------------------------------------------------------------------------
    path_partial_list = []
    for i in range(0, len(all_images)):
        path_partial_list.append(str(all_images[i].rsplit("_",4)[0]))

    tmp_df = pd.DataFrame()
    tmp_df['path_partial'] = path_partial_list
    tmp_df['path'] = all_images

    #--------------------------------------------------------------------------
    df = df.merge(tmp_df, on="path_partial").drop(columns=["path_partial"])
    print(df.columns)
    #--------------------------------------------------------------------------
    df["width"] = df["path"].apply(lambda x: int(x[:-4].rsplit("_",4)[1]))
    df["height"] = df["path"].apply(lambda x: int(x[:-4].rsplit("_",4)[2]))
    #--------------------------------------------------------------------------
    del x, path_partial_list, tmp_df
    #--------------------------------------------------------------------------
    
    return df

In [None]:
train_df = preprocessing(train_df, subset="train")


In [None]:
train_df.shape

In [None]:
test_df=preprocessing(test_df, subset="test")


In [None]:
# Restructure
def restructure(df, subset="train"):
    # RESTRUCTURE  DATAFRAME
    df_out = pd.DataFrame({'id': df['id'][::3]})

    if subset=="train":
        df_out['large_bowel'] = df['segmentation'][::3].values
        df_out['small_bowel'] = df['segmentation'][1::3].values
        df_out['stomach'] = df['segmentation'][2::3].values

    df_out['path'] = df['path'][::3].values
    df_out['case'] = df['case'][::3].values
    df_out['day'] = df['day'][::3].values
    df_out['slice'] = df['slice'][::3].values
    df_out['width'] = df['width'][::3].values
    df_out['height'] = df['height'][::3].values

    df_out=df_out.reset_index(drop=True)
    df_out=df_out.fillna('')
    if subset=="train":
        df_out['count'] = np.sum(df_out.iloc[:,1:4]!='',axis=1).values
    
    return df_out

In [None]:
train_df=restructure(train_df, subset="train")
train_df.head()

## Helper function

In [None]:
# Run-length encoding
def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)


In [None]:
BATCH_SIZE = 16
EPOCHS=10

In [None]:

from sklearn.model_selection import StratifiedKFold, KFold, StratifiedGroupKFold

## Do a folded split

In [None]:
n_splits = 5


fold_selected=2


skf = StratifiedGroupKFold(n_splits=n_splits, shuffle=True, random_state=42)
for fold, (_, val_idx) in enumerate(skf.split(X=train_df, y=train_df['count'], groups=train_df['case']), 1):
    train_df.loc[val_idx, 'fold'] = fold
    
train_df['fold'] = train_df['fold'].astype(np.uint8)

train_ids = train_df[train_df["fold"]!=fold_selected].index
valid_ids = train_df[train_df["fold"]==fold_selected].index

X_train = train_df[train_df.index.isin(train_ids)]
X_valid = train_df[train_df.index.isin(valid_ids)]

train_df.groupby('fold').size()

In [None]:
# Check submission format works
experimental=False
if experimental:
    X_train=X_train[X_train.case.isin(X_train.case.unique()[:5])]       # take first few cases
    X_valid=X_valid[X_valid.case.isin(X_valid.case.unique()[:2])]       # take first few cases
    
print('X_train shape:', X_train.shape)
print('X_valid shape:', X_valid.shape)

## Load trained model from Json

In [None]:
from keras.models import model_from_json

In [None]:
from tensorflow.keras import backend, layers
class FixedDropout(layers.Dropout):
    def _get_noise_shape(self, inputs):
        if self.noise_shape is None:
            return self.noise_shape

        symbolic_shape = backend.shape(inputs)
        noise_shape = [symbolic_shape[axis] if shape is None else shape
                           for axis, shape in enumerate(self.noise_shape)]
        return tuple(noise_shape)

In [None]:
#load json and create model
json_file = open('../input/efficientb7-unet-model-file/model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
unet_model= model_from_json(loaded_model_json,custom_objects={'FixedDropout':FixedDropout(rate=0.1)})
# load weights into new model
unet_model.load_weights("../input/efficientb7-unet-model-file/model.h5")
print("Loaded model from disk")

In [None]:
from glob import glob
sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/sample_submission.csv')
if not len(sub_df):
    debug = True
    sub_df = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')
    test_df = preprocessing(sub_df,  subset = 'train')
    test_df = test_df[:100*3]
else : 
    debug = False
    test_df = preprocessing(sub_df , subset = 'test')
    
test_df.head(5)

In [None]:
class CFG:
    BATCH_SIZE    = 64
    img_size      = (128, 128, 3)
    n_fold        = 5
    fold_selected = 1
    epochs        = 100
    seed          = 42
    steps_per_epoch_train = None
    steps_per_epoch_val = None

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size = CFG.BATCH_SIZE, subset="train", shuffle=False , img_shape = CFG.img_size):
        super().__init__()
        self.df = df
        self.shuffle = shuffle
        self.subset = subset
        self.batch_size = batch_size
        self.img_shape = img_shape
        self.indexes = np.arange(len(df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.df) / self.batch_size))
    
    def on_epoch_end(self):
        if self.shuffle == True:
            np.random.shuffle(self.indexes)
    
    def __getitem__(self, index):
        X = np.empty((self.batch_size,self.img_shape[0],self.img_shape[1],self.img_shape[2]))
        y = np.empty((self.batch_size,self.img_shape[0],self.img_shape[1],self.img_shape[2]))
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        id_, heights, widths, classes = [] , [] ,[], [] 
        
        for i, img_path in enumerate(self.df['path'].iloc[indexes]):
            if self.subset != 'train':
                id_.append(self.df['id'].iloc[indexes[i]])
                heights.append(self.df['height'].iloc[indexes[i]])
                widths.append(self.df['width'].iloc[indexes[i]])
                classes.append(self.df['class'].iloc[indexes[i]])
            
            w=self.df['width'].iloc[indexes[i]]
            h=self.df['height'].iloc[indexes[i]]
            
            img = self.__load_grayscale(img_path)  
            X[i,] = img   
            
            if self.subset == 'train':
                for k,j in enumerate(["large_bowel","small_bowel","stomach"]):
                    rles = self.df[j].iloc[indexes[i]]
                    mask = rle_decode(rles, shape=(h, w, 1))
                    mask = cv2.resize(mask, self.img_shape[0:2] )
                    y[i,:,:,k] = mask
                    
        if self.subset == 'train':
            return X, y
        else: 
            return X , id_ , widths , heights , classes

    def __load_grayscale(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_ANYDEPTH)
#         img = cv2.imread(img_path,cv2.IMREAD_UNCHANGED)
        dsize = self.img_shape[0:2]
        img = cv2.resize(img, dsize)
#         img = img.astype(np.int8) / 255.
        img = img.astype('float32') # original is uint16
        img = (img - img.min())/(img.max() - img.min())*255.0 # scale image to [0, 255]
        img = img.astype('uint8')/255
        img = np.expand_dims(img, axis=-1)
        return img


In [None]:
def infer(DF , model , batch_size = CFG.BATCH_SIZE) : 
    pred_rle = []; pred_ids = []; pred_classes = [];
    
    DF_batch = DataGenerator(DF, batch_size =batch_size, subset="test", shuffle=False)
    for idx , (img , id_, widths , heights , classes) in enumerate(tqdm(DF_batch)):
#         msk = np.empty((batch_size,CFG.img_size[0],CFG.img_size[1],CFG.img_size[2]))
                                       
        preds = model.predict(img,verbose=0)
        
        # Rle encode 
        for j in range(batch_size):
            k = 0 if classes[j]=='large_bowel' else 1 if classes[j]=='small_bowel' else 2

            pred_img = cv2.resize(preds[j,:,:,k], ( widths[j] , heights[j]),
                                  interpolation=cv2.INTER_NEAREST) # resize probabilities to original shape
            pred_img = (pred_img>0.5).astype(dtype='uint8')    # classify

            pred_ids.append(id_[j])
            pred_classes.append(classes[j])
            pred_rle.append(rle_encode(pred_img))
    
    return pred_rle, pred_ids , pred_classes

In [None]:
import cv2
CFG.BATCH_SIZE = 3   
pred_rle, pred_ids , pred_classes = infer(test_df, unet_model, batch_size = CFG.BATCH_SIZE)

In [None]:
repertory ='/kaggle/input/'

DIR = repertory + 'uw-madison-gi-tract-image-segmentation/' 
TRAIN_DIR = DIR + 'train'
TEST_DIR = DIR + 'test'
train_csv = DIR +'train.csv' 
sample_sub = DIR + 'sample_submission.csv'

df_train = pd.read_csv(train_csv)


## Creating Submission file

In [None]:
submission = pd.DataFrame({
    "id":pred_ids,
    "class":pred_classes,
    "predicted":pred_rle
})

if debug :
    sub_df = pd.read_csv(train_csv)
    del sub_df['segmentation']
else:
    sub_df = pd.read_csv(sample_sub)
    del sub_df['predicted']

sub_df = sub_df.merge(submission, on=['id','class'])
sub_df.to_csv('submission.csv',index=False)

submission.sample(10)

In [None]:
submission.to_csv("submission.csv", index = False)
submission