In [1]:
import os
import gc
import cv2
import keras
from keras import backend as K
from keras import layers
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.optimizers import Adam
from keras.callbacks import Callback, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Read submission samples

In [2]:
submission_df = pd.read_csv('../input/severstal-steel-defect-detection/sample_submission.csv')
print(submission_df.shape)
submission_df.head()


(5506, 3)


Unnamed: 0,ImageId,EncodedPixels,ClassId
0,0000f269f.jpg,1 409600,0
1,000ccc2ac.jpg,1 409600,0
2,002451917.jpg,1 409600,0
3,003c5da97.jpg,1 409600,0
4,0042e163f.jpg,1 409600,0


In [3]:
unique_test_images = submission_df['ImageId'].unique()
len(unique_test_images)
test_df = pd.DataFrame(unique_test_images, columns=['ImageId'])
print(test_df.shape)
test_df.head()

(5506, 1)


Unnamed: 0,ImageId
0,0000f269f.jpg
1,000ccc2ac.jpg
2,002451917.jpg
3,003c5da97.jpg
4,0042e163f.jpg


In [4]:
TEST_PATH = '../input/severstal-steel-defect-detection/test_images/'


In [5]:
BATCH_SIZE = 32
IMAGE_SIZE = 256

In [6]:
submit_test_gen = ImageDataGenerator(rescale=1/255.).flow_from_dataframe(
        test_df,
        directory='../input/severstal-steel-defect-detection/test_images/',
        x_col='ImageId',
        class_mode=None,
        target_size=(IMAGE_SIZE, IMAGE_SIZE),
        batch_size=BATCH_SIZE,
        shuffle=False
    )

Found 5506 validated image filenames.


# Classification Prediction

In [7]:
model = <YOUR_CODE_HERE>('../input/ncseverstal03classificationdensenet/severstal-classification-DenseNet.h5')

import math
# We take the ceiling because we do not drop the remainder of the batch
compute_steps_per_epoch = lambda x: int(math.ceil(1. * x / BATCH_SIZE))

STEP_SIZE_TEST = compute_steps_per_epoch(test_df.shape[0])
print('predicting...')

submit_test = model.<YOUR_CODE_HERE>(
    submit_test_gen,
    steps=STEP_SIZE_TEST,
    verbose=1
)

test_df['defect_label'] = (<YOUR_CODE_HERE> > <YOUR_CODE_HERE>).astype("int32")
test_df.head()

predicting...


Unnamed: 0,ImageId,defect_label
0,0000f269f.jpg,1
1,000ccc2ac.jpg,1
2,002451917.jpg,1
3,003c5da97.jpg,1
4,0042e163f.jpg,1


In [8]:
test_df.defect_label.<YOUR_CODE_HERE>()

1    3430
0    2076
Name: defect_label, dtype: int64

In [9]:
test_df_defect =  test_df[test_df['defect_label'] == 1].copy()
test_df_defect.shape

(3430, 2)

In [10]:
test_df_noDefect =  test_df[test_df['defect_label'] == 0].copy()
test_df_noDefect.shape

(2076, 2)

# Segmentation Prediction
## Functions

In [11]:
def mask2rle(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    pixels= img.T.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def rle2mask(rle, input_shape):
    width, height = input_shape[:2]
    
    mask= np.zeros( width*height ).astype(np.uint8)
    
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]
    starts-=1
    lengths = array[1::2]

    current_position = 0
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
        current_position += lengths[index]
        
    return mask.reshape(height, width).T

def build_rles(masks):
    width, height, depth = masks.shape
    
    rles = [mask2rle(masks[:, :, i])
            for i in range(depth)]
    
    return rles

In [12]:
class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, df, target_df=None, mode='fit',
                 base_path='../input/severstal-steel-defect-detection/train_images',
                 batch_size=32, dim=(256, 1600), n_channels=1,
                 n_classes=4, random_state=2021, shuffle=True):
        self.dim = dim
        self.batch_size = batch_size
        self.df = df
        self.mode = mode
        self.base_path = base_path
        self.target_df = target_df
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.random_state = random_state
        
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_batch = [self.list_IDs[k] for k in indexes]
        
        X = self.__generate_X(list_IDs_batch)
        
        if self.mode == 'fit':
            y = self.__generate_y(list_IDs_batch)
            return X, y
        
        elif self.mode == 'predict':
            return X

        else:
            raise AttributeError('The mode parameter should be set to "fit" or "predict".')
        
    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.seed(self.random_state)
            np.random.shuffle(self.indexes)
    
    def __generate_X(self, list_IDs_batch):
        'Generates data containing batch_size samples'
        # Initialization
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        
        # Generate data
        for i, ID in enumerate(list_IDs_batch):
            im_name = self.df['ImageId'].iloc[ID]
            img_path = f"{self.base_path}/{im_name}"
            if self.n_channels == 1:
                img = self.__load_grayscale(img_path)
            else:
                img = self.__load_rgb(img_path)
            
            # Store samples
            X[i,] = img

        return X
    
    def __generate_y(self, list_IDs_batch):
        y = np.empty((self.batch_size, *self.dim, self.n_classes), dtype=int)
        
        for i, ID in enumerate(list_IDs_batch):
            im_name = self.df['ImageId'].iloc[ID]
            image_df = self.target_df[self.target_df['ImageId'] == im_name].copy().reset_index()

            masks = np.zeros((*self.dim, self.n_classes))
            
            for j in range(len(image_df)):
                rle = image_df.loc[j,'EncodedPixels']
                cls =  image_df.loc[j,'ClassId']
                masks[:, :, cls-1] = rle2mask(rle, self.dim)
    
            y[i, ] = masks

        return y
    
    def __load_grayscale(self, img_path):
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        img = img.astype(np.float32) / 255.
        img = np.expand_dims(img, axis=-1)

        return img
    
    def __load_rgb(self, img_path):
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = img.astype(np.float32) / 255.

        return img

In [13]:
def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

## Load segmentation saved model

In [14]:
def load_segmentation_model_and_predict(model_path,submission_file,n_channels):
    dependencies = {
        'dice_coef': dice_coef
    }

    model = load_model(model_path, custom_objects=<YOUR_CODE_HERE> )
    
    df = []
    step = 300
    
    #Prediction for the predicted defected images  
    for i in range(0, test_df_defect.shape[0], step):
        batch_idx = list(
            range(i, min(test_df_defect.shape[0], i + step))
        )

        test_generator = DataGenerator(
            batch_idx,
            df=test_df_defect,
            shuffle=<YOUR_CODE_HERE>,
            mode='<YOUR_CODE_HERE>',
            base_path='../input/severstal-steel-defect-detection/test_images',
            target_df=test_df_defect,
            batch_size=1,
            n_classes=<YOUR_CODE_HERE>,
            n_channels=n_channels
        )

        batch_pred_masks = model.predict(
            test_generator, 
            verbose=1,
            )

        for j, b in tqdm(enumerate(batch_idx)):
            filename = test_df_defect['ImageId'].iloc[b]

            data = {'ImageId':  [filename, filename, filename, filename],
            'ClassId': [1,2,3,4],
            'EncodedPixels': ['','','','' ]
            }
            image_df = pd.DataFrame(data, columns = ['ImageId','ClassId','EncodedPixels'])

            pred_masks = batch_pred_masks[j, ].round().astype(int)
            pred_rles = build_rles(pred_masks)
            for i in range(4):
                image_df.loc[i,'EncodedPixels'] = pred_rles[i]

            df.append(image_df)

        gc.collect()
    df = pd.concat(df)
        
    #Now, we combine results from the predicted masks with the rest of images that our first CNN classified as having all 4 masks missing.

    tt = []
    for img in test_df_noDefect.index:
            image_df = pd.DataFrame(columns = ['ImageId','ClassId','EncodedPixels'])
            for i in range(4):
                image_df.loc[i,'EncodedPixels'] = np.nan
                image_df.loc[i,'ClassId'] = i+1
                image_df.loc[i,'ImageId'] = test_df_noDefect.loc[img, "ImageId"]            

            tt.append(image_df)

    tt = pd.concat(tt)

    #merge the defected predictions and no-defected 
    final_submission_df = pd.<YOUR_CODE_HERE>([df, tt])
    
    final_submission_df["EncodedPixels"] = final_submission_df["EncodedPixels"].apply(lambda x: np.nan if x == '' else x)
    final_submission_df["ClassId"] = final_submission_df["ClassId"].astype(str)
    final_submission_df['ImageId_ClassId'] = final_submission_df['ImageId']  + "_" + final_submission_df["ClassId"]

    final_submission_df[['ImageId_ClassId', 'EncodedPixels']].to_csv(submission_file, index=False)
    print("Model: ", model_path)
    print("Generated submission file: ", submission_file)

# Predictions and Submission file Gerneration for each model

In [15]:
model_path = '../input/ncseverstalsegmentationunetdataaugloss/severstal-segmentation_unetDataAugLoss.h5'

submission_file = 'submission-ncseverstalsegmentationunetdataaugloss.csv'
n_channels = 1
load_segmentation_model_and_predict(model_path,submission_file,n_channels)




300it [00:04, 71.62it/s]




300it [00:04, 72.76it/s]




300it [00:04, 73.11it/s]




300it [00:04, 74.22it/s]




300it [00:03, 75.48it/s]




300it [00:04, 73.48it/s]




300it [00:04, 72.44it/s]




300it [00:04, 66.66it/s]




300it [00:04, 71.53it/s]




300it [00:04, 69.18it/s]




300it [00:04, 74.76it/s]




130it [00:01, 71.72it/s]


Model:  ../input/ncseverstalsegmentationunetdataaugloss/severstal-segmentation_unetDataAugLoss.h5
Generated submission file:  submission-ncseverstalsegmentationunetdataaugloss.csv


In [16]:
model_path = '../input/nc-severstalsegmentationunetdataaugdice/severstal-segmentation_unetDataAugDice.h5'
submission_file = 'submission-ncseverstalsegmentationunetdataaugdice.csv'
n_channels = 1
load_segmentation_model_and_predict(model_path,submission_file,n_channels)



300it [00:04, 72.32it/s]




300it [00:04, 66.14it/s]




300it [00:04, 74.15it/s]




300it [00:03, 76.40it/s]




300it [00:03, 78.28it/s]




300it [00:03, 78.27it/s]




300it [00:04, 71.31it/s]




300it [00:04, 69.66it/s]




300it [00:04, 68.17it/s]




300it [00:04, 68.44it/s]




300it [00:03, 75.06it/s]




130it [00:01, 76.45it/s]


Model:  ../input/nc-severstalsegmentationunetdataaugdice/severstal-segmentation_unetDataAugDice.h5
Generated submission file:  submission-ncseverstalsegmentationunetdataaugdice.csv


In [17]:
model_path = '../input/ncseverstal05segmentationunet/severstal-segmentation_UNET.h5'
submission_file = 'submission-ncseverstal05segmentationunet.csv'
n_channels = 1
load_segmentation_model_and_predict(model_path,submission_file,n_channels)



300it [00:04, 73.06it/s]




300it [00:04, 73.69it/s]




300it [00:04, 71.00it/s]




300it [00:03, 76.11it/s]




300it [00:03, 75.06it/s]




300it [00:04, 74.16it/s]




300it [00:04, 64.09it/s]




300it [00:04, 71.27it/s]




300it [00:04, 70.74it/s]




300it [00:04, 68.62it/s]




300it [00:04, 73.89it/s]




130it [00:01, 75.02it/s]


Model:  ../input/ncseverstal05segmentationunet/severstal-segmentation_UNET.h5
Generated submission file:  submission-ncseverstal05segmentationunet.csv


In [18]:
model_path = '../input/ncseverstal06segmentationunetxception/severstal-segmentation_UnetXception.h5'
submission_file = 'submission-ncseverstal06segmentationunetxception.csv'
n_channels = 3
load_segmentation_model_and_predict(model_path,submission_file,n_channels)



300it [00:04, 65.78it/s]




300it [00:03, 82.65it/s]




300it [00:03, 78.64it/s]




300it [00:03, 80.51it/s]




300it [00:03, 79.81it/s]




300it [00:03, 82.23it/s]




300it [00:03, 82.45it/s]




300it [00:04, 74.91it/s]




300it [00:03, 75.49it/s]




300it [00:03, 77.13it/s]




300it [00:04, 61.12it/s]




130it [00:01, 77.85it/s]


Model:  ../input/ncseverstal06segmentationunetxception/severstal-segmentation_UnetXception.h5
Generated submission file:  submission-ncseverstal06segmentationunetxception.csv


In [19]:
model_path = '../input/ncseverstal07segmentationunetbn/severstal-segmentation_unetBN.h5'
submission_file = 'submission-ncseverstal07segmentationunetbn.csv'
n_channels = 1
load_segmentation_model_and_predict(model_path,submission_file,n_channels)



300it [00:03, 77.94it/s]




300it [00:03, 78.29it/s]




300it [00:05, 55.66it/s]




300it [00:03, 78.07it/s]




300it [00:03, 78.82it/s]




300it [00:04, 63.63it/s]




300it [00:03, 78.23it/s]




300it [00:03, 77.95it/s]




300it [00:04, 60.44it/s]




300it [00:03, 75.48it/s]




300it [00:04, 74.48it/s]




130it [00:01, 79.40it/s]


Model:  ../input/ncseverstal07segmentationunetbn/severstal-segmentation_unetBN.h5
Generated submission file:  submission-ncseverstal07segmentationunetbn.csv


In [20]:
# Create a sepertate file to submit the result. 
'''
import pandas as pd

s = pd.read_csv('../input/csvfiles/submission.csv')

s.to_csv('submission.csv',index=False)
s.head()
'''

"\nimport pandas as pd\n\ns = pd.read_csv('../input/csvfiles/submission.csv')\n\ns.to_csv('submission.csv',index=False)\ns.head()\n"