In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from pprint import pprint
import os.path as pth

import gzip
import pickle

from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, UpSampling2D, Add, Flatten, Reshape
from tensorflow.keras.utils import Sequence

from sklearn.model_selection import train_test_split, GroupKFold

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
!ls -al ../input/convert-train/

In [None]:
ROOT_DIR = '/kaggle/input'
TRAIN_IMG_DIR = pth.join(ROOT_DIR, 'severstal-steel-defect-detection/train_images')
TRAIN_DATA_DIR = pth.join(ROOT_DIR, 'convert-train')

WIDTH=1600
HEIGHT=256

In [None]:
data = pd.read_feather(pth.join(TRAIN_DATA_DIR, 'train.feather'))

In [None]:
data.head()

In [None]:
with gzip.open(pth.join(TRAIN_DATA_DIR, 'train_masks.pickle'), 'rb') as f:
    image_masks = pickle.load(f)

In [None]:
def create_inception_pair(num_filters, kernel_size, input_layer):
    l = Conv2D(num_filters, (1, 1), padding='same', activation='relu')(input_layer)
    return Conv2D(num_filters, (kernel_size, kernel_size), padding='same', activation='relu')(l)

def create_inception_layer(num_filters, input_layer):
    l_3x3 = create_inception_pair(num_filters, 3, input_layer)
    l_5x5 = create_inception_pair(num_filters, 5, input_layer)
    l_7x7 = create_inception_pair(num_filters, 7, input_layer)
    
    return Add()([l_3x3, l_5x5, l_7x7])

def up_sampler(num_filters, input_layer):
    l = UpSampling2D((2, 4))(input_layer)
    l = Conv2D(num_filters, (3, 3), padding='same', activation='relu')(l)
    
    #l = UpSampling2D((2, 4))(l)
    #l = Conv2D(num_filters, (3, 3), padding='same', activation='relu')(l)
    
    l = UpSampling2D((2, 4))(l)
    l = Conv2D(1, (3, 3), padding='same', activation='relu')(l)
    
    return l

def create_model():
    input_img = Input((HEIGHT, WIDTH, 1), dtype='float32')

    x = create_inception_layer(16, input_img)
    x = MaxPool2D((2, 4))(x)

    #x = create_inception_layer(16, x)
    #x = MaxPool2D((2, 4))(x)
    
    x = create_inception_layer(16, x)
    mid = MaxPool2D((2, 4))(x)

    o1 = up_sampler(16, mid)
    o2 = up_sampler(16, mid)
    o3 = up_sampler(16, mid)
    o4 = up_sampler(16, mid)
    
    model = Model(inputs=input_img, outputs=[o1, o2, o3, o4])
    model.compile(optimizer='adam', loss='binary_crossentropy')
    
    return model

model = create_model()
model.summary()

In [None]:
image_groups = data.groupby('imageid')

image_names = list(data.imageid.unique())[0:100]
train_image_names, test_image_names = train_test_split(image_names)
len(train_image_names), len(test_image_names)

In [None]:
import math

class ImageLoader(Sequence):
    def __init__(self, image_names, image_groups, image_masks, batch_size):
        self.image_names = image_names
        self.image_groups = image_groups
        self.image_masks = image_masks
        
        self.batch_size = batch_size
        
        self.image_cache = {}
        
       
        
    def __len__(self):
        return math.ceil(len(self.image_names) / self.batch_size)
    
    def __getitem__(self, idx):
#         for n in self.image_names[0:3]:
#             image_group = self.image_groups.get_group(n)
#             print(n)
#             for row in image_group.itertuples():
#                 print('    ', row.classid)
            
#         return
        
        start = idx * self.batch_size
        end = (idx + 1) * self.batch_size
        
        if end > len(self.image_names):
            end = len(self.image_names)
            
        size = end - start
        
        img = np.zeros((size, HEIGHT, WIDTH, 1), dtype='float32')
        masks = np.zeros((4, size, HEIGHT, WIDTH, 1), dtype='float32')

        for i, imageid in enumerate(self.image_names[start:end]):
            im = self.get_image(imageid)
            im /= 255
            img[i, :, :, 0] = im[:, :, 0]
            
            image_group = self.image_groups.get_group(imageid)
            for row in image_group.itertuples():
                if row.mask_present:
                    key = f'{imageid}_{row.classid}'
                    mask = self.image_masks[key].astype('float32')
                    masks[row.classid - 1, i, :, :, 0] = mask
            
        return img, [masks[0], masks[1], masks[2], masks[3]]
    
    def get_image(self, imageid):
        if imageid not in self.image_cache:
            im = plt.imread(pth.join(TRAIN_IMG_DIR, f'{imageid}.jpg')).astype('float32')
            im /= 255
            self.image_cache[imageid] = im
            
        return self.image_cache[imageid]
            


In [None]:
#ImageLoader(train_image_names, image_groups, image_masks, 128)[0]
model.fit_generator(
    ImageLoader(train_image_names, image_groups, image_masks, 128),
    epochs=5)