In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import os
from tqdm import tqdm

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, BatchNormalization, Activation, Dropout, MaxPooling2D, Conv2DTranspose, Concatenate
from tensorflow.keras import Input, Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import binary_crossentropy

In [None]:
#https://www.kaggle.com/titericz/building-and-visualizing-masks
#https://www.kaggle.com/paulorzp/rle-functions-run-lenght-encode-decode

#defining function for converting EncodedPixels(rle: run length encoding) to mask
def rle2mask(rle_string, img_shape=(256,1600)):
    '''
    input: EncodedPixels (run-length-encoded) string & image shape:-(width,height)
    output: mask in numpy.ndarray format with shape (256,1600)
    '''
    rle_array = np.array([int(s)for s in rle_string.split()])
    starts_array = rle_array[::2]-1
    lengths_array = rle_array[1::2]
    mask_array = np.zeros(img_shape[0]*img_shape[1],dtype=np.uint8)
    #print(starts_array,lengths_array)
    for i in range(len(starts_array)):
        mask_array[starts_array[i]:starts_array[i]+lengths_array[i]] = 1
    #order='F' because encoded pixels are numbered from top to bottom, then left to right
    return mask_array.reshape(img_shape, order = 'F')

#defining function for converting given mask to EncodedPixels(rle: run length encoding)
def mask2rle(mask_array):
    '''
    input: mask in numpy.ndarray format
    output: EncodedPixels (run-length-encoded) string
    '''
    mask_array = mask_array.T.flatten()
    mask_array = np.concatenate([[0], mask_array, [0]])
    rle_array = np.where(mask_array[1:]!=mask_array[:-1])[0]+1
    rle_array[1::2] -= rle_array[::2]
    rle_string = ' '.join(map(str,rle_array))
    return rle_string

#defining function for calculation of metric dice coefficient
def dice_coefficient(y_true, y_pred):
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    intersection = tf.math.reduce_sum(y_true_f * y_pred_f)
    smoothing_const = 1e-9
    return (2. * intersection + smoothing_const) / (tf.math.reduce_sum(y_true_f) + tf.math.reduce_sum(y_pred_f) + smoothing_const)

#defining function for calculation of dice coefficient
def dice_loss(y_true, y_pred):
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    return (1-dice_coefficient(y_true, y_pred))

#defining function for calculation of loss function: binary cross entropy + dice loss
def bce_dice_loss(y_true, y_pred):
    y_true_f = tf.reshape(y_true, [-1])
    y_pred_f = tf.reshape(y_pred, [-1])
    return binary_crossentropy(y_true, y_pred) + (1-dice_coefficient(y_true, y_pred))

In [None]:
# defining Unet architecture
# https://towardsdatascience.com/understanding-semantic-segmentation-with-unet-6be4f42d4b47
# https://github.com/hlamba28/UNET-TGS

def conv2D_block(input_tensor, n_filters, kernel_size = 3, batchnorm = True ):
    """function to pass Input_tensor through 2- Conv2D layers configured as per the input parameters"""
    # first Conv2D layer
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
            kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

    # second Conv2D layer
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
            kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
        x = Activation('relu')(x)

    return x


def Unet_Model(input_image, n_filters = 16, dropout = 0.1, batchnorm = True):
    # Encoder (Contraction Path)
    E1 = conv2D_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
    p1 = MaxPooling2D((2, 2))(E1)
    p1 = Dropout(dropout)(p1)

    E2 = conv2D_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
    p2 = MaxPooling2D((2, 2))(E2)
    p2 = Dropout(dropout)(p2)

    E3 = conv2D_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
    p3 = MaxPooling2D((2, 2))(E3)
    p3 = Dropout(dropout)(p3)

    E4 = conv2D_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
    p4 = MaxPooling2D((2, 2))(E4)
    p4 = Dropout(dropout)(p4)

    E5 = conv2D_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)

    # Decoder (Expansive Path)
    D6 = Conv2DTranspose(n_filters * 8, kernel_size = (3, 3), strides = (2, 2), padding = 'same')(E5)
    D6 = Concatenate()([D6, E4])
    D6 = Dropout(dropout)(D6)
    E6 = conv2D_block(D6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)

    D7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(E6)
    D7 = Concatenate()([D7, E3])
    D7 = Dropout(dropout)(D7)
    E7 = conv2D_block(D7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)

    D8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(E7)
    D8 = Concatenate()([D8, E2])
    D8 = Dropout(dropout)(D8)
    E8 = conv2D_block(D8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)

    D9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(E8)
    D9 = Concatenate()([D9, E1])
    D9 = Dropout(dropout)(D9)
    E9 = conv2D_block(D9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)

    outputs = Conv2D(4, (1, 1), activation='sigmoid')(E9)
    model = Model(inputs=[input_img], outputs=[outputs])
    return model

In [None]:
input_img = Input((256, 1600, 3), name='img')
model = Unet_Model(input_img, n_filters=8, dropout=0.2, batchnorm=True)
model.compile(optimizer=Adam(), loss=bce_dice_loss, metrics=[dice_coefficient])
model.summary()

In [None]:
# loading trained model weights
model.load_weights('../input/unet1/unet(trained-60epochs).h5')

In [None]:
class PredictDataGenerator(tf.keras.utils.Sequence):
    def __init__(self,dataframe, list_idcs, batch_size=32, ):
        self.batch_size = batch_size
        self.df = dataframe
        self.list_idcs = list_idcs
        self.indices = self.df.index.tolist()
        self.rem = len(self.list_idcs) % (self.batch_size)
        self.on_epoch_end()

    def __len__(self):
         return len(self.list_idcs) // (self.batch_size)
#         if (self.rem) == 0:
#             return len(self.list_idcs) // (self.batch_size)
#         else:
#             return (len(self.list_idcs) // (self.batch_size) )+1

    def __getitem__(self, index):
        index = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
#         if ((index + 1) * self.batch_size) < len(self.list_idcs):
#             index = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
#         else:
#             index = self.indices[index * self.batch_size: (index * self.batch_size)+ self.rem]
        batch = [self.list_idcs[k] for k in index]
        
        X = self.__get_data(batch)
         
        return X
    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))

    def __get_data(self, batch):
        X = np.empty((self.batch_size,256,1600,3),dtype=np.float32) # image place-holders
              
        for i, id in enumerate(batch):
          img = Image.open('../input/severstal-steel-defect-detection/test_images/' + str(self.df['ImageId'].loc[id]))
          X[i,] = img#input image

        return X

In [None]:
test_img_IDs = list(os.listdir('../input/severstal-steel-defect-detection/test_images'))
test_imgsIds_df = pd.DataFrame({'ImageId': test_img_IDs})
print(len(test_imgsIds_df))
test_imgsIds_df.head()

In [None]:
SubmissionDf = pd.DataFrame(columns = ['ImageId','EncodedPixels','ClassId'])

In [None]:
for i in range(0,len(test_imgsIds_df),320):
    batch_idcs =  list(range(i, min(test_imgsIds_df.shape[0], i + 320)))#.iloc[batch_idcs]
    if len(batch_idcs)== 320:        
        test_subbatch = PredictDataGenerator(dataframe = test_imgsIds_df,
                                             list_idcs = batch_idcs)
    else:
        test_subbatch = PredictDataGenerator(dataframe = test_imgsIds_df,
                                             list_idcs = batch_idcs,
                                             batch_size= len(batch_idcs))
    subbatch_pred_masks = model.predict(test_subbatch)
    for j, idx in tqdm(enumerate(batch_idcs)):
        filename = test_imgsIds_df['ImageId'].iloc[idx]
        rle1 = mask2rle(subbatch_pred_masks[j,:,:,0].round().astype(int))
        rle2 = mask2rle(subbatch_pred_masks[j,:,:,1].round().astype(int))
        rle3 = mask2rle(subbatch_pred_masks[j,:,:,2].round().astype(int))
        rle4 = mask2rle(subbatch_pred_masks[j,:,:,3].round().astype(int))
        df = pd.DataFrame({'ImageId':[filename]*4,
                      'EncodedPixels': [rle1,rle2,rle3,rle4],
                      'ClassId':['1', '2', '3', '4']})
        SubmissionDf = SubmissionDf.append(df,ignore_index=True)

In [None]:
SubmissionDf.shape

In [None]:
SubmissionDf.sort_values(by=['ImageId', 'ClassId'], inplace=True)
SubmissionDf.head(10)

In [None]:
SubmissionDf['ImageId_ClassId'] = SubmissionDf['ImageId'] + '_' + SubmissionDf['ClassId']
SubmissionDf

In [None]:
SubmissionDf[['ImageId_ClassId','EncodedPixels' ]].to_csv('submission.csv', index=False)