[Source](https://www.kaggle.com/christofhenkel/keras-baseline)

In [1]:
import os
import numpy as np
import cv2
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
TRAIN_IMAGE_DIR = 'data/salt/train/images/'
TRAIN_MASK_DIR = 'data/salt/train/masks/'
TEST_IMAGE_DIR = 'data/salt/test/images/'

In [3]:
train_fns = os.listdir(TRAIN_IMAGE_DIR)

In [4]:
len(train_fns)

4000

In [5]:
train_fns[:10]

['000e218f21.png',
 '003c477d7c.png',
 '00441f1cf2.png',
 '0050766ae2.png',
 '005b452274.png',
 '0061281eea.png',
 '008a50a2ec.png',
 '00950d1627.png',
 '00a3af90ab.png',
 '00cda0328c.png']

In [6]:
X = [np.array(cv2.imread(TRAIN_IMAGE_DIR + p, cv2.IMREAD_GRAYSCALE), dtype=np.uint8) for p in tqdm(train_fns)]
X = np.array(X)/255
X = np.expand_dims(X,axis=3)

y = [np.array(cv2.imread(TRAIN_MASK_DIR + p, cv2.IMREAD_GRAYSCALE), dtype=np.uint8) for p in tqdm(train_fns)]
y = np.array(y)/255
y = np.expand_dims(y,axis=3)

X_train, X_valid, y_train, y_valid = train_test_split(X,y, random_state=23, test_size = 0.2)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 4000/4000 [00:01<00:00, 2079.23it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 4000/4000 [00:01<00:00, 3965.13it/s]


In [7]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, Input, Concatenate
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [8]:
def conv_block(num_layers,inp,units,kernel):
    x = inp
    for l in range(num_layers):
        x = Conv2D(units, kernel_size=kernel, padding='SAME',activation='relu')(x)
    return x

In [9]:
inp = Input(shape=(101,101,1))
cnn1 = conv_block(4,inp,32,3)
cnn2 = conv_block(4,inp,24,5)
cnn3 = conv_block(4,inp,16,7)
concat = Concatenate()([cnn1,cnn2,cnn3])
d1 = Conv2D(16,1, activation='relu')(concat)
out = Conv2D(1,1, activation='sigmoid')(d1)

model = Model(inputs = inp, outputs = out)
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 101, 101, 1)  0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 101, 101, 32) 320         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_4 (Conv2D)               (None, 101, 101, 24) 624         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_8 (Conv2D)               (None, 101, 101, 16) 800         input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (

In [10]:
from tensorflow.keras.utils import plot_model 
plot_model(model, to_file='keras-baseline-architecture.png')

![](keras-baseline-architecture.png)

In [11]:
model.compile(optimizer='adam',loss='binary_crossentropy')

early_stop = EarlyStopping(patience=5)
check_point = ModelCheckpoint('model.hdf5',save_best_only=True)

In [12]:
model.fit(X_train,y_train, epochs=50, validation_data=(X_valid,y_valid), callbacks=[early_stop,check_point],batch_size=32)

Train on 3200 samples, validate on 800 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1437c81a588>

In [13]:
test_fns = os.listdir(TEST_IMAGE_DIR)
X_test = [np.array(cv2.imread(TEST_IMAGE_DIR + p, cv2.IMREAD_GRAYSCALE), dtype=np.uint8) for p in tqdm(test_fns)]
X_test = np.array(X_test)/255
X_test = np.expand_dims(X_test,axis=3)

pred = model.predict(X_test, verbose = True)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 18000/18000 [00:50<00:00, 356.88it/s]






In [14]:
def RLenc(img, order='F', format=True):
    """
    img is binary mask image, shape (r,c)
    order is down-then-right, i.e. Fortran
    format determines if the order needs to be preformatted (according to submission rules) or not

    returns run length as an array or string (if format is True)
    """
    bytes = img.reshape(img.shape[0] * img.shape[1], order=order)
    runs = []  ## list of run lengths
    r = 0  ## the current run length
    pos = 1  ## count starts from 1 per WK
    for c in bytes:
        if (c == 0):
            if r != 0:
                runs.append((pos, r))
                pos += r
                r = 0
            pos += 1
        else:
            r += 1

    # if last run is unsaved (i.e. data ends with 1)
    if r != 0:
        runs.append((pos, r))
        pos += r
        r = 0

    if format:
        z = ''

        for rr in runs:
            z += '{} {} '.format(rr[0], rr[1])
        return z[:-1]
    else:
        return runs

In [15]:
pred_dict = {fn[:-4]:RLenc(np.round(pred[i,:,:,0])) for i,fn in tqdm(enumerate(test_fns))}

18000it [04:42, 63.71it/s]


In [16]:
import pandas as pd

sub = pd.DataFrame.from_dict(pred_dict,orient='index')
sub.index.names = ['id']
sub.columns = ['rle_mask']
sub.to_csv('keras-baseline-submission.csv')