In [125]:
#! pip install segmentation-models
# Import libraries
import warnings
warnings.filterwarnings("ignore")

from datetime import datetime
from time import time
import pandas as pd
import numpy as np
import os 
import cv2
import shutil
from PIL import Image

%env SM_FRAMEWORK=tf.keras
import segmentation_models
print(segmentation_models.__version__)

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from keras.models import Model, load_model
import tensorflow as tf
from tensorflow.python.keras.callbacks import TensorBoard
from keras.callbacks import ModelCheckpoint
import segmentation_models as sm
from segmentation_models import Unet
from segmentation_models import get_preprocessing

env: SM_FRAMEWORK=tf.keras
1.0.1


In [5]:
from rle import mask2rle, rle2mask, area
from model_metric import dice_coef, recall_m, precision_m, f1_score_m

In [6]:
input_folder = '../Input/'
print('Input folder check', os.listdir(input_folder),'\n')
output_folder = '../Output/'
print('Output folder check', os.listdir(output_folder),'\n')
model_folder = '../Model/'
print('Model folder check', os.listdir(model_folder),'\n')

Input folder check ['0abfbfc69.jpg', '0ac9936af.jpg'] 

Output folder check [] 

Model folder check ['severstal_binary.h5', 'severstal_segmentation_Defect_1.h5', 'severstal_segmentation_Defect_2.h5', 'severstal_segmentation_Defect_3.h5', 'severstal_segmentation_Defect_4.h5'] 



In [7]:
def data_prep(input_folder):

    #read all file from train_folder
    img_files = os.listdir(input_folder)
    file_list = []

    if len(img_files)>0:
        print('Number of new image files detected:', len(img_files))
        print('List of image file', img_files)
        
        for file in img_files:
            
            file_list.append(file)
            output_df = pd.DataFrame(file_list, columns = ['ImageId'])
        
    else:
        print('No file inside input folder')
               
    return(output_df)

In [8]:
X = data_prep(input_folder)

Number of new image files detected: 2
List of image file ['0abfbfc69.jpg', '0ac9936af.jpg']
Models loaded


### Utility functions, metrics and DataGenerator

In [9]:
class test_DataGenerator_3(keras.utils.Sequence):
    
    def __init__(self, df, batch_size = 1, image_path = input_folder, preprocess=None, info={}):
        super().__init__()
        self.df = df
        self.batch_size = batch_size
        self.preprocess = preprocess
        self.info = info
        self.data_path = image_path
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(len(self.df) / self.batch_size))
    
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.df))
        
    def __getitem__(self, index): 
        '''
        The DataGenerator takes ImageIds of batch size 1 and returns Image array to the model.
        With the help of ImageIds the DataGenerator locates the Image file in the path, the image is read and resized from
        256x1600 to 256x800.
        '''
        X = np.empty((self.batch_size,256,800,3),dtype=np.float32)
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        for i,f in enumerate(self.df['ImageId'].iloc[indexes]):
            self.info[index*self.batch_size+i]=f
            X[i,] = Image.open(self.data_path + f).resize((800,256))      
        if self.preprocess!=None: X = self.preprocess(X)
        return X

### Predictor functions

In [None]:
dependencies = {
                'recall_m':recall_m,
                'precision_m':precision_m,
                'dice_coef':dice_coef,
                'f1_score_m':f1_score_m,
                'dice_loss':sm.losses.dice_loss
                }

defect_model = load_model('../Model/Defect_model.h5', custom_objects=dependencies)
type1_model = load_model('../Model/Defect_type1_model.h5', custom_objects=dependencies)
type2_model = load_model('../Model/Defect_type2_model.h5', custom_objects=dependencies)
type3_model = load_model('../Model/Defect_type3_model.h5', custom_objects=dependencies)
type4_model = load_model('../Model/Defect_type4_model.h5', custom_objects=dependencies)

print("Models loaded")

In [141]:
def pred_classification(X):
    '''
    Input: ImageIds in form of a dataframe
    Return: Predictions of classification models
    '''
    X = X.reset_index().drop('index',axis=1)
    data_generator = ImageDataGenerator(rescale=1./255).flow_from_dataframe(dataframe=X, 
                                                                            directory='../Input/', 
                                                                            x_col="ImageId", 
                                                                            class_mode = None, 
                                                                            target_size=(256,512), 
                                                                            batch_size=1, 
                                                                            shuffle=False)

    data_preds_binary = defect_model.predict(data_generator, verbose=0)
    data_classification = pd.DataFrame(data_preds_binary, columns = ['hasDefect'])
    data_classification['hasDefect'] = np.where(data_preds_binary>0.6,1,0)
    data_classification['ImageId'] = X['ImageId']
    return data_classification[['ImageId', 'hasDefect']]

def pred_segmentation(X):
    '''
    Input: ImageIds in form of a dataframe
    Return: Predictions of segmentation models
    '''
    X = X.reset_index().drop('index',axis=1)
    preprocess = get_preprocessing('efficientnetb1')
    tmp=[]
    loop_num = 50
    for j in range((len(X)//loop_num)+1):
        test_dataf = X[loop_num*j:loop_num*j+loop_num]
        test_batches =  test_DataGenerator_3(test_dataf,preprocess=preprocess)
        test_preds_1 = type1_model.predict(test_batches,verbose=0)
        test_preds_2 = type2_model.predict(test_batches,verbose=0)
        test_preds_3 = type3_model.predict(test_batches,verbose=0)
        test_preds_4 = type4_model.predict(test_batches,verbose=0)
        
        for i in range(len(test_preds_1)):
            ep1 = mask2rle(np.array((Image.fromarray((test_preds_1[i][:,:,0])>=0.5)).resize((1600,256))).astype(int))
            ep2 = mask2rle(np.array((Image.fromarray((test_preds_2[i][:,:,0])>=0.5)).resize((1600,256))).astype(int))
            ep3 = mask2rle(np.array((Image.fromarray((test_preds_3[i][:,:,0])>=0.5)).resize((1600,256))).astype(int))
            ep4 = mask2rle(np.array((Image.fromarray((test_preds_4[i][:,:,0])>=0.5)).resize((1600,256))).astype(int))
            
            tmp.append([test_dataf.ImageId.iloc[i],ep1,ep2,ep3,ep4])
    
    seg_df = pd.DataFrame(tmp,columns=['ImageId','EncodedPixels_1','EncodedPixels_2','EncodedPixels_3','EncodedPixels_4'])            

    return(seg_df)

def pred_combined(X):
    '''
    Input: ImageId (dataframe)
    Return: Comdined dataframe of output of pred_classification function and pred_segmentation function
    '''
    X = X.reset_index().drop('index',axis=1)
    merge_df = pred_classification(X).merge(pred_segmentation(X),on=['ImageId'])
    
    def_list = ['Defect_1', 'Defect_2', 'Defect_3', 'Defect_4']

    for defect, i in zip(def_list, (range(1,5))):
        
        merge_df[defect] = merge_df['EncodedPixels_{}'.format(i)].str.count(' ')
        merge_df['Defect_{}'.format(i)] = (np.where((merge_df['Defect_{}'.format(i)]>0),i,0))

    merge_df[def_list] = merge_df[def_list].replace(0,'')
    merge_df['Defect_Type'] = merge_df.filter(regex='Defect_[1-4]').astype(str).apply(lambda x: x.str.cat(), axis=1)
    merge_df = merge_df.drop(columns=def_list)
    
    return merge_df

In [142]:
final_df = pred_combined(X)
final_df.head()

Found 2 validated image filenames.


Unnamed: 0,ImageId,hasDefect,EncodedPixels_1,EncodedPixels_2,EncodedPixels_3,EncodedPixels_4,Defect_Type
0,0abfbfc69.jpg,1,,6394 5 6650 5 6905 5 7161 5 97387 10 97643 10 ...,652 14 908 14 1150 70 1406 70 1659 77 1915 77 ...,142446 1 142702 1 142956 6 143212 6 143467 8 1...,234
1,0ac9936af.jpg,1,341104 36 341360 36 341613 46 341869 46 342123...,,339060 53 339316 53 339547 90 339803 90 340057...,339040 3 339046 111 339296 3 339302 111 339542...,134


In [199]:
def img_report(df):
    
    col_name = ['Timestamp', 'ImageId', 'hasDefect', 'Defect_Type']
    
    dateTimeObj = datetime.now()
    timestampStr = dateTimeObj.strftime("%d-%b-%Y %H:%M:%S")
    print('Current Timestamp : ', timestampStr)
    
    df['Timestamp'] = timestampStr
    df = df[col_name].copy()
    
    while True:
        
        try:
            record = pd.read_csv(output_folder + 'defect_report.csv')           
            print('Read record from defect_report.csv')  
            
            record_img = list(record['ImageId'])
            new_img = list(df['ImageId'])

            for img in new_img:
                if img in record_img:
                    print(img,'found in previous record')
            
            #remove row if record already found in report
            df = df[~df['ImageId'].isin(record_img)]
            record = record.append(df)
            record.to_csv(output_folder + 'defect_report.csv', index=False)
            
            if len(df)<1:
                print('No data to update')
            else:
                print('Report updated with latest data')
            
            return
            
        except FileNotFoundError:
            print('defect_report.csv not found in folder')
            new_df = pd.DataFrame(columns = col_name)
            new_df = new_df.append(df)
            new_df.to_csv(output_folder + 'defect_report.csv', index=False)
            print('Created defect_report.csv with latest data')
        
            break

In [200]:
img_report(final_df)

Current Timestamp :  16-Oct-2020 18:12:48
defect_report.csv not found in folder
Created defect_report.csv with latest data


In [21]:
def steel_prediction(X):
    '''
    Function-1:
    Input: ImageId(dataframe)
    Process: Calls pred_combined which calls pred_classification and pred_segmentation
            Applies thresholds -> area and classification probability
    Return: DataFrame (columns = ImageId_ClassId,EncodedPixels)
    
    '''
    X = X.reset_index().drop('index',axis=1)
    p = pred_combined(X)
    tmp = []
    for i in range(len(p)):      
        j, b, ep1, ep2, ep3, ep4 = p.iloc[i]  
        # j, b, m1, m2, m3, m4, ep1, ep2, ep3, ep4 = p.iloc[i]
        # randomly selected classification threshold values to get high recall 
        # for no defect binary classifier and high precision for multi-label classifier 
        # while not compromising much on other metrics
        
        # area thresholds are determined from EDA performed only on train dataset
        if area(ep1)>=500 and area(ep1)<=15500 and b>=0.95: 
            tmp.append([j+'_1',ep1])
        else:
            tmp.append([j+'_1',''])

        if area(ep2)>=700 and area(ep2)<=10000 and b>=0.855:
            tmp.append([j+'_2',ep2])
        else:
            tmp.append([j+'_2',''])

        if area(ep3)>=1100 and area(ep3)<=160000 and b>=0.85:
            tmp.append([j+'_3',ep3])
        else:
            tmp.append([j+'_3',''])
            
        if area(ep4)>=2800 and area(ep4)<=127000 and b>=0.85:
            tmp.append([j+'_4',ep4])
        else:
            tmp.append([j+'_4',''])
            
    return pd.DataFrame(tmp, columns = ['ImageId_ClassId','EncodedPixels'])

In [23]:
final_prediction = steel_prediction(X)
final_prediction.head(10)

Found 2 validated image filenames.


Unnamed: 0,ImageId_ClassId,EncodedPixels
0,0abfbfc69.jpg_1,
1,0abfbfc69.jpg_2,6394 5 6650 5 6905 5 7161 5 97387 10 97643 10 ...
2,0abfbfc69.jpg_3,652 14 908 14 1150 70 1406 70 1659 77 1915 77 ...
3,0abfbfc69.jpg_4,142446 1 142702 1 142956 6 143212 6 143467 8 1...
4,0ac9936af.jpg_1,341104 36 341360 36 341613 46 341869 46 342123...
5,0ac9936af.jpg_2,
6,0ac9936af.jpg_3,339060 53 339316 53 339547 90 339803 90 340057...
7,0ac9936af.jpg_4,339040 3 339046 111 339296 3 339302 111 339542...


**Generating Predictions on raw test dataset for submission on Kaggle**

In [14]:
def defect_detection_output():
    '''
    Used to generate X_train, X_val and X_test, each include corresponding Y data also
    (Raw Data)
    '''
    defect_database = pd.read_csv('./defect_detection/defect_detection_database.csv')

    imgs = defect_database[['ImageId']][2:3]
    preds = steel_prediction(imgs)

    print(preds.head)

defect_detection_output()


Found 1 validated image filenames.
<bound method NDFrame.head of    ImageId_ClassId EncodedPixels
0  000418bfc.jpg_1              
1  000418bfc.jpg_2              
2  000418bfc.jpg_3              
3  000418bfc.jpg_4              >


In [None]:
c2 = steel_prediction(raw_test)
c2.to_csv('./Output/severstal_final_test_preds_07022020.csv',index=False)
c2.shape

Found 5506 validated image filenames.


(22024, 2)