In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
from tqdm import tqdm_notebook
import cv2
from tensorflow.python.keras import backend as K
from sklearn.model_selection import train_test_split
import tensorflow as tf

import keras
from keras.layers import UpSampling2D, Conv2D, Activation, Conv2DTranspose
from keras import Model

In [None]:
BATCH_SIZE = 16
EPOCHS = 10
IMG_SIZE = 256

train_dir = '../input/severstal-steel-defect-detection/train_images'

In [None]:
class DataGenerator(keras.utils.Sequence):
    def __init__(self, list_ids, image_dir, batch_size=32,
                 img_h=256, img_w=256, shuffle=True):
        
        self.list_ids = list_ids
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.img_h = img_h
        self.img_w = img_w
        self.shuffle = shuffle
        self.on_epoch_end()
    
    def __len__(self):
        'denotes the number of batches per epoch'
        return int(np.floor(len(self.list_ids)) / self.batch_size)
    
    def __getitem__(self, index):
        'generate one batch of data'
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # get list of IDs
        list_ids_temp = [self.list_ids[k] for k in indexes]
        # generate data
        X, y = self.__data_generation(list_ids_temp)
        # return data 
        return X, y
    
    def on_epoch_end(self):
        'update ended after each epoch'
        self.indexes = np.arange(len(self.list_ids))
        if self.shuffle:
            np.random.shuffle(self.indexes)
            
    def __data_generation(self, list_ids_temp):
        'generate data containing batch_size samples'
        X = np.empty((self.batch_size, self.img_h, self.img_w, 3))
        y = np.empty((self.batch_size, self.img_h, self.img_w, 1))
        
        for idx, id in enumerate(list_ids_temp):
            file_path =  os.path.join(self.image_dir, id)
            image = cv2.imread(file_path, 1)
            image_resized = cv2.resize(image, (self.img_w, self.img_h))
            image_resized = np.array(image_resized, dtype=np.float64)
            
            mask = np.empty((self.img_h, self.img_w, 1))
            
            rle_name = id + '_' + '4'
            rle = df_train[df_train['ImageId_ClassId'] == rle_name]['EncodedPixels'].values[0]
            
            class_mask = rle_to_mask(rle, width=1600, height=256) 
            class_mask_resized = cv2.resize(class_mask, (self.img_w, self.img_h))
            mask = class_mask_resized
            
            X[idx,] = image_resized
            y[idx,] = np.expand_dims(mask, -1)
        
        # normalize 
        X = X / 255
        y = (y > 0).astype(int)
            
        return X, y

In [None]:
df_train = pd.read_csv('../input/severstal-steel-defect-detection/train.csv')
print(len(df_train))
df_train.head()

In [None]:
'Only 4 class'
df_train = df_train[df_train['EncodedPixels'].notnull()].reset_index(drop=True)
df_train = df_train[df_train['ImageId_ClassId'].apply(lambda x: x.split('_')[1] == '4')].reset_index(drop=True)
print(len(df_train))
df_train.head()

In [None]:
df_train['ImageId'] = df_train['ImageId_ClassId'].apply(lambda x: x.split('_')[0])
listdir = df_train['ImageId'].values
train, valid = train_test_split(listdir, train_size=0.8)
df_train.head()

In [None]:
def rle_to_mask(rle_string, height, width):
    
    rows, cols = height, width
    img = np.zeros(rows * cols, dtype=np.uint8)
    if len(str(rle_string)) > 1:
        rle_numbers = [int(numstring) for numstring in rle_string.split(' ')]
        rle_pairs = np.array(rle_numbers).reshape(-1, 2)
        for index, length in rle_pairs:
            index -= 1
            img[index:index+length] = 255
    else: img = np.zeros(cols*rows)
    img = img.reshape(cols, rows)
    img = img.T
    return img

In [None]:
for x, y in DataGenerator(df_train['ImageId'], 
                          '../input/severstal-steel-defect-detection/train_images', 
                          batch_size=32, img_h=256, img_w=256, shuffle=True):
    break
    
print(x.shape, y.shape)

In [None]:
plt.imshow(np.squeeze(x[3]))

In [None]:
plt.imshow(np.squeeze(y[3]))

In [None]:
'metric and loss function for evaluation'
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2 * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def loss_dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return -K.log((2 * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))

In [None]:
'load pretrained model'
from keras.applications.vgg16 import VGG16
base_model = VGG16(weights=None, input_shape=(IMG_SIZE,IMG_SIZE,3), include_top=False)
base_model.load_weights('../input/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5')

In [None]:
base_model.trainable = False

In [None]:
base_out = base_model.output # (8, 8)
conv1 = Conv2DTranspose(8, (2, 2), strides=(2, 2), padding='same') (base_out) # (8, 16, 16)
up = UpSampling2D(8, interpolation='bilinear')(conv1) # (8, 128, 128)
conv2 = Conv2DTranspose(1, (2, 2), strides=(2, 2), padding='same') (up) # (1, 256, 256)
conv3 = Conv2D(1, (1, 1))(conv2)
conv4 = Activation('sigmoid')(conv3)

model = Model(input=base_model.input, output=conv4)
model.compile(keras.optimizers.Adam(lr=0.0001), loss=loss_dice_coef, metrics=[dice_coef])
model.summary

In [None]:
train_generator = DataGenerator(listdir, train_dir, batch_size=BATCH_SIZE, shuffle=True)
train_size = len(listdir)

In [None]:
%%time
history = model.fit_generator(generator=train_generator, epochs=EPOCHS, 
                              steps_per_epoch=train_size//BATCH_SIZE, 
                              verbose=1, shuffle=True)

In [None]:
dice_coef = history.history['dice_coef']
ep = np.arange(len(dice_coef)) + 1
fig, ax = plt.subplots()

plt.plot(ep, dice_coef, label='train')

ax.legend()
plt.show()

In [None]:
pred = model.predict(x)
plt.imshow(np.squeeze(pred[3] > 0.5).astype(int))

In [None]:
testfiles=os.listdir("../input/severstal-steel-defect-detection/test_images/")
len(testfiles)

In [None]:
%%time
test_img = []
for fn in tqdm_notebook(testfiles):
        img = cv2.imread( '../input/severstal-steel-defect-detection/test_images/'+fn )
        img = cv2.resize(img,(IMG_SIZE, IMG_SIZE))       
        test_img.append(img)

In [None]:
%%time
predict = model.predict(np.array(test_img))
print(len(predict))

In [None]:
def mask_to_rle(mask):
    '''
    Convert a mask into RLE
    
    Parameters: 
    mask (numpy.array): binary mask of numpy array where 1 - mask, 0 - background

    Returns: 
    sring: run length encoding 
    '''
    pixels= mask.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
%%time
pred_rle = []
for img in tqdm_notebook(predict):
    img = cv2.resize(img, (1600, 256))
    tmp = np.copy(img)
    tmp[tmp<0.5] = 0
    tmp[tmp>0] = 1
    pred_rle.append(mask_to_rle(tmp))

In [None]:
img_t = cv2.imread( '../input/severstal-steel-defect-detection/test_images/'+ testfiles[4])
plt.imshow(img_t)

In [None]:
mask_t = rle_to_mask(pred_rle[4], 256, 1600)
plt.imshow(mask_t)

In [None]:
sub = pd.read_csv( '../input/severstal-steel-defect-detection/sample_submission.csv', converters={'EncodedPixels': lambda e: ' '} )
sub.head()

In [None]:
%%time
for fn, rle in zip(testfiles, pred_rle):
    sub['EncodedPixels'][(sub['ImageId_ClassId'].apply(lambda x: x.split('_')[0]) == fn) & \
                        (sub['ImageId_ClassId'].apply(lambda x: x.split('_')[1] == '4'))] = rle

In [None]:
img_s = cv2.imread( '../input/severstal-steel-defect-detection/test_images/'+ sub['ImageId_ClassId'][47].split('_')[0])
plt.imshow(img_s)

In [None]:
mask_s = rle_to_mask(sub['EncodedPixels'][47], 256, 1600)
plt.imshow(mask_s)

In [None]:
sub.to_csv('submission.csv', index=False)