In [None]:
import os
import json
import gc

import cv2
import keras
from keras import backend as K
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.optimizers import Adam
from keras.callbacks import Callback, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Preprocessing

In [None]:
submission_df = pd.read_csv('../input/severstal-steel-defect-detection/sample_submission.csv')
print(submission_df.shape)
submission_df.head()


In [None]:
unique_test_images = submission_df['ImageId'].unique()
len(unique_test_images)
test_df = pd.DataFrame(unique_test_images, columns=['ImageId'])
print(test_df.shape)
test_df.head()

In [None]:
TEST_PATH = '../input/severstal-steel-defect-detection/test_images/'


In [None]:
BATCH_SIZE = 32
IMAGE_SIZE = 256

In [None]:
submit_test_gen = ImageDataGenerator(rescale=1/255.).flow_from_dataframe(
        test_df,
        directory='../input/severstal-steel-defect-detection/test_images/',
        x_col='ImageId',
        class_mode=None,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=BATCH_SIZE,
        shuffle=False
    )

In [None]:
model = load_model('../input/severstaldensenetmodel/CNNDenseNet2classes.h5_20210309')

import math
# We take the ceiling because we do not drop the remainder of the batch
compute_steps_per_epoch = lambda x: int(math.ceil(1. * x / BATCH_SIZE))

STEP_SIZE_TEST = compute_steps_per_epoch(test_df.shape[0])
print('predicting...')

submit_test = model.predict(
    submit_test_gen,
    steps=STEP_SIZE_TEST,
    verbose=1
)

test_df['defect_label'] = (submit_test > 0.5).astype("int32")
test_df.head()

In [None]:
test_df.defect_label.value_counts()

In [None]:
test_df_defect =  test_df[test_df['defect_label'] == 1].copy()
test_df_defect.shape

In [None]:
test_df_noDefect =  test_df[test_df['defect_label'] == 0].copy()
test_df_noDefect.shape

# Functions

In [None]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle2mask(rle, input_shape):
    width, height = input_shape[:2]
    
    mask= np.zeros( width*height ).astype(np.uint8)
    
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
        current_position += lengths[index]
        
    return mask.reshape(height, width).T

def build_rles(masks):
    width, height, depth = masks.shape
    
    rles = [mask2rle(masks[:, :, i])
            for i in range(depth)]
    
    return rles

In [None]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, df, target_df=None, mode='fit',
                 base_path='../input/severstal-steel-defect-detection/train_images',
                 batch_size=32, dim=(256, 1600), n_channels=3,
                 n_classes=4, random_state=2021, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.df = df
        self.mode = mode
        self.base_path = base_path
        self.target_df = target_df
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.random_state = random_state
        
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_batch = [self.list_IDs[k] for k in indexes]
        
        X = self.__generate_X(list_IDs_batch)
        
        if self.mode == 'fit':
            y = self.__generate_y(list_IDs_batch)
            return X, y
        
        elif self.mode == 'predict':
            return X

        else:
            raise AttributeError('The mode parameter should be set to "fit" or "predict".')
        
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.seed(self.random_state)
            np.random.shuffle(self.indexes)
    
    def __generate_X(self, list_IDs_batch):
        'Generates data containing batch_size samples'
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        
        # Generate data
        for i, ID in enumerate(list_IDs_batch):
            im_name = self.df['ImageId'].iloc[ID]
            img_path = f"{self.base_path}/{im_name}"
            img = self.__load_rgb(img_path)
            
            # Store samples
            X[i,] = img

        return X
    
    def __generate_y(self, list_IDs_batch):
        y = np.empty((self.batch_size, *self.dim, self.n_classes), dtype=int)
        
        for i, ID in enumerate(list_IDs_batch):
            im_name = self.df['ImageId'].iloc[ID]
            image_df = self.target_df[self.target_df['ImageId'] == im_name].copy().reset_index()

            masks = np.zeros((*self.dim, self.n_classes))
            
            for j in range(len(image_df)):
                rle = image_df.loc[j,'EncodedPixels']
                cls =  image_df.loc[j,'ClassId']
                masks[:, :, cls-1] = rle2mask(rle, self.dim)
    
            y[i, ] = masks

        return y
    
    def __load_grayscale(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = img.astype(np.float32) / 255.
        img = np.expand_dims(img, axis=-1)

        return img
    
    def __load_rgb(self, img_path):
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32) / 255.

        return img

In [None]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

# Load Model

In [None]:
dependencies = {
    'dice_coef': dice_coef
}

model = load_model('../input/severstal-segmentation-unetxception-20210312/Segmentation_UnetXception_20210312.h5', custom_objects=dependencies )

In [None]:
df = []
step = 300

for i in range(0, test_df_defect.shape[0], step):
    batch_idx = list(
        range(i, min(test_df_defect.shape[0], i + step))
    )
    
    test_generator = DataGenerator(
        batch_idx,
        df=test_df_defect,
        shuffle=False,
        mode='predict',
        base_path='../input/severstal-steel-defect-detection/test_images',
        target_df=test_df_defect,
        batch_size=1,
        n_classes=4
    )
    
    batch_pred_masks = model.predict(
        test_generator, 
        verbose=1,
        )
    
    for j, b in tqdm(enumerate(batch_idx)):
        filename = test_df_defect['ImageId'].iloc[b]
      
        data = {'ImageId':  [filename, filename, filename, filename],
        'ClassId': [1,2,3,4],
        'EncodedPixels': ['','','','' ]
        }
        image_df = pd.DataFrame(data, columns = ['ImageId','ClassId','EncodedPixels'])

        pred_masks = batch_pred_masks[j, ].round().astype(int)
        pred_rles = build_rles(pred_masks)
        for i in range(4):
            image_df.loc[i,'EncodedPixels'] = pred_rles[i]
        
        df.append(image_df)

    gc.collect()

# Submission

In [None]:
df = pd.concat(df)
print(df.shape)

In [None]:
df.head(30)

Now, we combine results from the predicted masks with the rest of images that our first CNN classified as having all 4 masks missing.

In [None]:
test_df_noDefect.head()

In [None]:
tt = []
for img in test_df_noDefect.index:
        image_df = pd.DataFrame(columns = ['ImageId','ClassId','EncodedPixels'])
        for i in range(4):
            image_df.loc[i,'EncodedPixels'] = np.nan
            image_df.loc[i,'ClassId'] = i+1
            image_df.loc[i,'ImageId'] = test_df_noDefect.loc[img, "ImageId"]            
       
        tt.append(image_df)

tt = pd.concat(tt)
print(tt.shape)

In [None]:
final_submission_df = pd.concat([df, tt])
print(final_submission_df.shape)
final_submission_df.head()

In [None]:
final_submission_df["EncodedPixels"] = final_submission_df["EncodedPixels"].apply(lambda x: np.nan if x == '' else x)

In [None]:
final_submission_df["ClassId"] = final_submission_df["ClassId"].astype(str)
final_submission_df['ImageId_ClassId'] = final_submission_df['ImageId']  + "_" + final_submission_df["ClassId"]

In [None]:
final_submission_df.head()

In [None]:
final_submission_df[['ImageId_ClassId', 'EncodedPixels']].to_csv('submission-xceptionUnet2.csv', index=False)

In [None]:
import os
print("Done.")
print(os.listdir())

In [None]:
# Create a sepertate file to submit the result. 
'''
import pandas as pd

s = pd.read_csv('../input/csvfiles/submission.csv')

s.to_csv('submission.csv',index=False)
s.head()
'''