This is a follow up of my [previous kernel](https://www.kaggle.com/meaninglesslives/simple-classifier-train/notebook) which tried to predict whether an image contains salt or not. In this kernel, I use Transfer learning (Xception Model) to improve the classifier validation accuracy to around 87%. Happy Kaggling :-)

In [None]:
import os
import sys
import random
import warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm import tqdm_notebook, tnrange
from skimage.io import imread, imshow, concatenate_images
from skimage.transform import resize
from skimage.morphology import label
from sklearn.model_selection import train_test_split

from keras.utils import plot_model
from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras import models
from keras import layers
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import backend as K
from keras.models import Model
from keras.layers import Input, Dense, Reshape, concatenate, Conv2D, Flatten, MaxPooling2D
from keras.layers import BatchNormalization, Dropout, GlobalMaxPooling2D
from keras import optimizers

import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [None]:
# Set some parameters
im_width = 128
im_height = 128
border = 5
im_chan = 3 # Number of channels: first is original and second cumsum(axis=0)
n_features = 1 # Number of extra features, like depth
path_train = '../input/train/'
path_test = '../input/test/'
df_depths = pd.read_csv('../input/depths.csv', index_col='id')
train_ids = next(os.walk(path_train+"images"))[2]
test_ids = next(os.walk(path_test+"images"))[2]

In [None]:
# https://www.kaggle.com/bguberfain/unet-with-depth
# Get and resize train images and masks
X = np.zeros((len(train_ids), im_height, im_width, im_chan), dtype=np.float32)
y = np.zeros((len(train_ids), ), dtype=np.float32)
X_feat = np.zeros((len(train_ids), n_features), dtype=np.float32)
print('resizing train images and masks ... ')
sys.stdout.flush()
for n, id_ in tqdm_notebook(enumerate(train_ids), total=len(train_ids)):
    path = path_train
    
    # Depth
    X_feat[n] = df_depths.loc[id_.replace('.png', ''), 'z']
    
    # Load X
    img = load_img(path + '/images/' + id_, grayscale=True)    
    x_img = img_to_array(img)

    x_img = resize(x_img, (128, 128, 1), mode='constant', preserve_range=True)    


    # Load Y
    mask = img_to_array(load_img(path + '/masks/' + id_, grayscale=True))
    mask = resize(mask, (128, 128, 1), mode='constant', preserve_range=True)

    # Save images
    X[n, ..., 0] = x_img.squeeze() / 255
    X[n, ..., 1] = x_img.squeeze() / 255
    X[n, ..., 2] = x_img.squeeze() / 255

    y[n] = 1 if np.sum(mask[:])>0 else 0
#     y[n] = mask / 255

print('Pecentage of images with no salt in train set:', (np.sum(y)/y.shape[0])*100)

In [None]:
i = 0
j = 0
plt.figure(figsize=(30,15))
plt.subplots_adjust(bottom=0.2, top=0.8, hspace=0.2)  #adjust this to change vertical and horiz. spacings..
# Visualizing the predicted outputs
while True:
    if y[i]==0:        
        no_salt = X[i,:,:,:]
        plt.subplot(1,6,j+1)
        plt.imshow(no_salt)
        plt.title('ID: '+ train_ids[i][:-4])
        j = j + 1
        if j>5:
            break
    i = i + 1

plt.suptitle('Train set Images that have no salt', y=0.7, fontsize=30)

In [None]:
i = 0
j = 0
plt.figure(figsize=(30,15))
plt.subplots_adjust(bottom=0.2, top=0.8, hspace=0.2)  #adjust this to change vertical and horiz. spacings..
# Visualizing the predicted outputs
while True:
    if y[i]==1:        
        with_salt = X[i,:,:,:]
        plt.subplot(1,6,j+1)
        plt.imshow(with_salt)
        plt.title('ID: '+ train_ids[i][:-4])
        j = j + 1
        if j>5:
            break
    i = i + 1

plt.suptitle('Train set Images that have salt', y=0.7, fontsize=30)

In [None]:
# uncomment to use the other models..
# from keras.applications import VGG16
from keras.applications import Xception
# from keras.applications import MobileNet
# from keras.applications import VGG19
# conv_base = VGG16(weights='imagenet',include_top=False,input_shape=(128, 128, 3))
conv_base = Xception(weights='imagenet',include_top=False,input_shape=(128, 128, 3))
# conv_base = MobileNet(weights='imagenet',include_top=False,input_shape=(128, 128, 3))
# conv_base = VGG19(weights='imagenet',include_top=False,input_shape=(128, 128, 3))

In [None]:
# Split train and valid
X_train, X_valid, X_feat_train, X_feat_valid, y_train, y_valid = train_test_split(X, X_feat, y, test_size=0.15, random_state=42)
callbacks = [
    EarlyStopping(patience=5, verbose=1),
    ReduceLROnPlateau(patience=3, verbose=1),
    ModelCheckpoint('model-tgs-salt-classifier.h5', verbose=1, save_best_only=True, save_weights_only=True)
]

In [None]:
train_features = conv_base.predict(X_train)
feat_shape = np.array(train_features.shape)
validation_features = conv_base.predict(X_valid)

train_features = np.reshape(train_features, ( feat_shape[0],
                                             feat_shape[1]* feat_shape[2]*feat_shape[3] ))
validation_features = np.reshape(validation_features, (validation_features.shape[0],
                                                       feat_shape[1]* feat_shape[2]* feat_shape[3]))

In [None]:
model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_dim=feat_shape[1]* feat_shape[2]*feat_shape[3]))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))
model.compile(optimizer=optimizers.RMSprop(lr=2e-5),loss='binary_crossentropy',
              metrics=['acc'])
history = model.fit(train_features, y_train,epochs=30,batch_size=20,
                    validation_data=(validation_features, y_valid),callbacks=callbacks)

# model.fit_generator(gen_flow, validation_data=([X_valid, X_feat_valid], y_valid),
#                     steps_per_epoch=len(X_train) / batch_size, epochs=30,callbacks=callbacks)

In [None]:
model.summary()

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
# Get and resize test images
X_test = np.zeros((len(test_ids), im_height, im_width, im_chan), dtype=np.float32)
X_feat_test = np.zeros((len(test_ids), n_features), dtype=np.float32)
sizes_test = []
print('Getting and resizing test images ... ')
sys.stdout.flush()
for n, id_ in tqdm_notebook(enumerate(test_ids), total=len(test_ids)):
    path = path_test
    
    # Load X
    img = load_img(path + '/images/' + id_, grayscale=True)
    x = img_to_array(img)
    sizes_test.append([x.shape[0], x.shape[1]])
    x = resize(x, (128, 128, 1), mode='constant', preserve_range=True)


    # Save images
    X_test[n, ..., 0] = x.squeeze() / 255
    X_test[n, ..., 1] = x.squeeze() / 255
    X_test[n, ..., 2] = x.squeeze() / 255

In [None]:
# Predict on test data
test_features = conv_base.predict(X_test)
feat_shape = np.array(test_features.shape)
test_features = np.reshape(test_features, ( feat_shape[0],
                                             feat_shape[1]* feat_shape[2]*feat_shape[3] ))
test_predictions = model.predict(test_features)
test_predictions = np.round(test_predictions)
# 0.38 is fake, so predictions seem to be good..
print('Seems like',(sum(test_predictions)/test_predictions.shape[0])*100,'% images of test set have no salt..')

In [None]:
i = 0
j = 0
plt.figure(figsize=(30,15))
plt.subplots_adjust(bottom=0.2, top=0.8, hspace=0.2)  #adjust this to change vertical and horiz. spacings..
# Visualizing the predicted outputs
while True:
    if test_predictions[i]==1:        
        with_salt = X_test[i,:,:,:]
        plt.subplot(1,6,j+1)
        plt.imshow(with_salt)
        plt.title('ID: '+ test_ids[i][:-4])
        j = j + 1
        if j>5:
            break
    i = i + 1

plt.suptitle('Predicted Test set Images that may have salt', y=0.7, fontsize=30)

In [None]:
i = 0
j = 0
plt.figure(figsize=(30,15))
plt.subplots_adjust(bottom=0.2, top=0.8, hspace=0.2)  #adjust this to change vertical and horiz. spacings..
# Visualizing the predicted outputs
while True:
    if test_predictions[i]==0:        
        no_salt = X_test[i,:,:,:]
        plt.subplot(1,6,j+1)
        plt.imshow(no_salt)
        plt.title('ID: '+ test_ids[i][:-4])
        j = j + 1
        if j>5:
            break
    i = i + 1

plt.suptitle('Predicted Test set Images that may not have salt', y=0.7, fontsize=30)