# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Loading-libraries" data-toc-modified-id="Loading-libraries-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Loading libraries</a></div><div class="lev1 toc-item"><a href="#Creating-the-model" data-toc-modified-id="Creating-the-model-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Creating the model</a></div><div class="lev1 toc-item"><a href="#Training-400x300" data-toc-modified-id="Training-400x300-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Training 400x300</a></div><div class="lev1 toc-item"><a href="#Predictions" data-toc-modified-id="Predictions-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Predictions</a></div><div class="lev1 toc-item"><a href="#Training-600x450" data-toc-modified-id="Training-600x450-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Training 600x450</a></div><div class="lev1 toc-item"><a href="#Predictions" data-toc-modified-id="Predictions-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Predictions</a></div>

# Loading libraries

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.models import Sequential, load_model, Model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.inception_v3 import InceptionV3

from keras_tqdm import TQDMNotebookCallback

from datetime import datetime
import os

import numpy as np
import pandas as pd
import math

pd.options.display.max_rows = 40

Using TensorFlow backend.


# Creating the model

In [2]:
base_model = InceptionV3(include_top = False,
                        weights = 'imagenet')

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5

In [3]:
base_model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, None, None, 32 864         input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, None, None, 32 96          conv2d_1[0][0]                   
____________________________________________________________________________________________________
activation_1 (Activation)        (None, None, None, 32 0           batch_normalization_1[0][0]      
___________________________________________________________________________________________

In [4]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(1, activation = 'sigmoid')(x)
model_final = Model(inputs=base_model.input, outputs=x)

In [5]:
model_final.compile(loss = 'binary_crossentropy',
                   optimizer = SGD(lr = 0.0001, momentum = 0.9, decay = 1e-5),
                   metrics = ['accuracy'])

In [6]:
model_final.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, None, None, 3) 0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, None, None, 32 864         input_1[0][0]                    
____________________________________________________________________________________________________
batch_normalization_1 (BatchNorm (None, None, None, 32 96          conv2d_1[0][0]                   
____________________________________________________________________________________________________
activation_1 (Activation)        (None, None, None, 32 0           batch_normalization_1[0][0]      
___________________________________________________________________________________________

In [9]:
datagen = ImageDataGenerator(
    rotation_range                = 20,
    width_shift_range             = 0.2,
    height_shift_range            = 0.2,
    horizontal_flip               = True)

validgen = ImageDataGenerator()

# Training 400x300

In [10]:
# 600/450 _ 500/375 _ 400/300 _ 300/225

img_width  = 400
img_height = 300

train_data_dir      = "data/train"
validation_data_dir = "data/valid"
test_data_dir       = "data/test"

batch_size_train = 16
batch_size_val   = 32

train_gen = datagen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_train,
        class_mode  = "binary",
        shuffle     = True)

val_gen = validgen.flow_from_directory(
        directory   = validation_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_val,
        class_mode  = "binary",
        shuffle     = False)


train_samples      = len(train_gen.filenames)
validation_samples = len(val_gen.filenames)

Found 1727 images belonging to 2 classes.
Found 568 images belonging to 2 classes.


In [None]:
checkpoint = ModelCheckpoint("weights-iter-4-epoch-{epoch:02d}.hdf5",
                             monitor='val_acc',
                             verbose=0,
                             save_best_only=False,
                             save_weights_only=True)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')

model_final.fit_generator(generator        = train_gen,
                          epochs           = 40, 
                          steps_per_epoch  = math.ceil(train_samples / batch_size_train), 
                          validation_data  = val_gen, 
                          validation_steps = math.ceil(validation_samples / batch_size_val), 
                          verbose          = 2,
                          callbacks        = [early_stopping, TQDMNotebookCallback(), checkpoint])

Epoch 1/40
179s - loss: 0.6043 - acc: 0.6555 - val_loss: 0.5371 - val_acc: 0.6743


Epoch 2/40
135s - loss: 0.4859 - acc: 0.7778 - val_loss: 0.4101 - val_acc: 0.8169


Epoch 3/40
136s - loss: 0.4047 - acc: 0.8460 - val_loss: 0.3282 - val_acc: 0.9032


Epoch 4/40
135s - loss: 0.3402 - acc: 0.8743 - val_loss: 0.2764 - val_acc: 0.9278


Epoch 5/40
136s - loss: 0.2724 - acc: 0.9097 - val_loss: 0.2330 - val_acc: 0.9349


Epoch 6/40
135s - loss: 0.2426 - acc: 0.9120 - val_loss: 0.1971 - val_acc: 0.9472


Epoch 7/40
135s - loss: 0.2140 - acc: 0.9224 - val_loss: 0.1707 - val_acc: 0.9507


Epoch 8/40
135s - loss: 0.1856 - acc: 0.9387 - val_loss: 0.1532 - val_acc: 0.9595


Epoch 9/40
136s - loss: 0.1831 - acc: 0.9340 - val_loss: 0.1353 - val_acc: 0.9613


Epoch 10/40
134s - loss: 0.1711 - acc: 0.9416 - val_loss: 0.1240 - val_acc: 0.9613


Epoch 11/40
133s - loss: 0.1522 - acc: 0.9508 - val_loss: 0.1141 - val_acc: 0.9648


Epoch 12/40
133s - loss: 0.1498 - acc: 0.9432 - val_loss: 0.1058 - val_acc: 0.9665


Epoch 13/40
133s - loss: 0.1365 - acc: 0.9536 - val_loss: 0.0998 - val_acc: 0.9665


Epoch 14/40
132s - loss: 0.1395 - acc: 0.9502 - val_loss: 0.0947 - val_acc: 0.9683


Epoch 15/40
133s - loss: 0.1167 - acc: 0.9630 - val_loss: 0.0888 - val_acc: 0.9701


Epoch 16/40
132s - loss: 0.1377 - acc: 0.9507 - val_loss: 0.0875 - val_acc: 0.9683


Epoch 17/40
132s - loss: 0.1003 - acc: 0.9647 - val_loss: 0.0813 - val_acc: 0.9718


Epoch 18/40
134s - loss: 0.1114 - acc: 0.9646 - val_loss: 0.0781 - val_acc: 0.9754


Epoch 19/40
133s - loss: 0.1076 - acc: 0.9618 - val_loss: 0.0767 - val_acc: 0.9771


Epoch 20/40
133s - loss: 0.1120 - acc: 0.9630 - val_loss: 0.0748 - val_acc: 0.9736


Epoch 21/40
133s - loss: 0.1091 - acc: 0.9624 - val_loss: 0.0728 - val_acc: 0.9736


Epoch 22/40
133s - loss: 0.0940 - acc: 0.9693 - val_loss: 0.0723 - val_acc: 0.9754


Epoch 23/40
133s - loss: 0.0846 - acc: 0.9705 - val_loss: 0.0704 - val_acc: 0.9771


Epoch 24/40
134s - loss: 0.0901 - acc: 0.9664 - val_loss: 0.0675 - val_acc: 0.9754


Epoch 25/40
133s - loss: 0.0897 - acc: 0.9670 - val_loss: 0.0675 - val_acc: 0.9771


Epoch 26/40
133s - loss: 0.0797 - acc: 0.9733 - val_loss: 0.0650 - val_acc: 0.9754


Epoch 27/40
132s - loss: 0.0655 - acc: 0.9803 - val_loss: 0.0626 - val_acc: 0.9771


Epoch 28/40
133s - loss: 0.0797 - acc: 0.9733 - val_loss: 0.0631 - val_acc: 0.9771


Epoch 29/40
134s - loss: 0.0693 - acc: 0.9745 - val_loss: 0.0608 - val_acc: 0.9771


Epoch 30/40
133s - loss: 0.0649 - acc: 0.9821 - val_loss: 0.0593 - val_acc: 0.9771


Epoch 31/40
133s - loss: 0.0604 - acc: 0.9802 - val_loss: 0.0574 - val_acc: 0.9771


Epoch 32/40
133s - loss: 0.0657 - acc: 0.9780 - val_loss: 0.0561 - val_acc: 0.9789


Epoch 33/40
133s - loss: 0.0677 - acc: 0.9820 - val_loss: 0.0565 - val_acc: 0.9806


Epoch 34/40
133s - loss: 0.0549 - acc: 0.9838 - val_loss: 0.0556 - val_acc: 0.9806


Epoch 35/40
133s - loss: 0.0575 - acc: 0.9832 - val_loss: 0.0555 - val_acc: 0.9806


Epoch 36/40
132s - loss: 0.0505 - acc: 0.9850 - val_loss: 0.0550 - val_acc: 0.9789


Epoch 37/40
133s - loss: 0.0454 - acc: 0.9855 - val_loss: 0.0509 - val_acc: 0.9789


Epoch 40/40
132s - loss: 0.0412 - acc: 0.9861 - val_loss: 0.0505 - val_acc: 0.9789



<keras.callbacks.History at 0x7fde5e813160>

In [8]:
model_final.load_weights('weights-iter-4-epoch-39.hdf5')

# Predictions

In [11]:
batch_size_test = 64

test_gen = validgen.flow_from_directory(
        directory   = test_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_test,
        class_mode  = "binary",
        shuffle     = False)

test_samples       = len(test_gen.filenames)

Found 1531 images belonging to 1 classes.


In [12]:
preds = model_final.predict_generator(test_gen, math.ceil(test_samples / batch_size_test))

In [13]:
preds_filenames = test_gen.filenames
preds_filenames = [int(x.replace("unknown/", "").replace(".jpg", "")) for x in preds_filenames]
df_result = pd.DataFrame({'name': preds_filenames, 'invasive': preds[:,0]})
df_result = df_result.sort_values("name")
df_result.index = df_result["name"]
df_result = df_result.drop(["name"], axis=1)
df_result.to_csv("submission_02.csv", encoding="utf8", index=True)
from IPython.display import FileLink
FileLink('submission_02.csv')

In [None]:
# Got 0.99246 on LB

# Training 600x450

In [14]:
# 600/450 _ 500/375 _ 400/300 _ 300/225

img_width  = 600
img_height = 450

train_data_dir      = "data/train"
validation_data_dir = "data/valid"
test_data_dir       = "data/test"

batch_size_train = 16
batch_size_val   = 32

train_gen = datagen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_train,
        class_mode  = "binary",
        shuffle     = True)

val_gen = validgen.flow_from_directory(
        directory   = validation_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_val,
        class_mode  = "binary",
        shuffle     = False)


train_samples      = len(train_gen.filenames)
validation_samples = len(val_gen.filenames)

Found 1727 images belonging to 2 classes.
Found 568 images belonging to 2 classes.


In [15]:
checkpoint = ModelCheckpoint("weights-iter-5-epoch-{epoch:02d}.hdf5",
                             monitor='val_acc',
                             verbose=0,
                             save_best_only=False,
                             save_weights_only=True)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')

model_final.fit_generator(generator        = train_gen,
                          epochs           = 40, 
                          steps_per_epoch  = math.ceil(train_samples / batch_size_train), 
                          validation_data  = val_gen, 
                          validation_steps = math.ceil(validation_samples / batch_size_val), 
                          verbose          = 2,
                          callbacks        = [early_stopping, TQDMNotebookCallback(), checkpoint])

Epoch 1/40
301s - loss: 0.0905 - acc: 0.9704 - val_loss: 0.0670 - val_acc: 0.9824


Epoch 2/40
278s - loss: 0.0780 - acc: 0.9757 - val_loss: 0.0562 - val_acc: 0.9806


Epoch 3/40
278s - loss: 0.0670 - acc: 0.9763 - val_loss: 0.0514 - val_acc: 0.9824


Epoch 4/40
278s - loss: 0.0607 - acc: 0.9803 - val_loss: 0.0483 - val_acc: 0.9824


Epoch 5/40
278s - loss: 0.0658 - acc: 0.9780 - val_loss: 0.0452 - val_acc: 0.9824


Epoch 6/40
278s - loss: 0.0519 - acc: 0.9855 - val_loss: 0.0439 - val_acc: 0.9824


Epoch 7/40
277s - loss: 0.0491 - acc: 0.9855 - val_loss: 0.0422 - val_acc: 0.9824


Epoch 8/40
277s - loss: 0.0557 - acc: 0.9855 - val_loss: 0.0411 - val_acc: 0.9824


Epoch 9/40
278s - loss: 0.0417 - acc: 0.9873 - val_loss: 0.0404 - val_acc: 0.9824


Epoch 10/40
278s - loss: 0.0389 - acc: 0.9896 - val_loss: 0.0397 - val_acc: 0.9842


Epoch 11/40
278s - loss: 0.0456 - acc: 0.9872 - val_loss: 0.0372 - val_acc: 0.9842


Epoch 12/40
277s - loss: 0.0293 - acc: 0.9936 - val_loss: 0.0364 - val_acc: 0.9842


Epoch 13/40
278s - loss: 0.0376 - acc: 0.9896 - val_loss: 0.0363 - val_acc: 0.9859


Epoch 14/40
274s - loss: 0.0379 - acc: 0.9890 - val_loss: 0.0366 - val_acc: 0.9824


Epoch 15/40
273s - loss: 0.0378 - acc: 0.9878 - val_loss: 0.0354 - val_acc: 0.9877


Epoch 16/40
275s - loss: 0.0273 - acc: 0.9925 - val_loss: 0.0351 - val_acc: 0.9842


Epoch 17/40
276s - loss: 0.0396 - acc: 0.9873 - val_loss: 0.0332 - val_acc: 0.9894


Epoch 18/40
275s - loss: 0.0288 - acc: 0.9919 - val_loss: 0.0337 - val_acc: 0.9877


Epoch 19/40
275s - loss: 0.0327 - acc: 0.9913 - val_loss: 0.0334 - val_acc: 0.9877


Epoch 20/40
274s - loss: 0.0252 - acc: 0.9936 - val_loss: 0.0322 - val_acc: 0.9894


Epoch 21/40
275s - loss: 0.0231 - acc: 0.9925 - val_loss: 0.0320 - val_acc: 0.9877


Epoch 22/40
275s - loss: 0.0258 - acc: 0.9931 - val_loss: 0.0325 - val_acc: 0.9877


Epoch 23/40
275s - loss: 0.0271 - acc: 0.9913 - val_loss: 0.0323 - val_acc: 0.9894


Epoch 24/40
275s - loss: 0.0299 - acc: 0.9890 - val_loss: 0.0328 - val_acc: 0.9877


Epoch 25/40
275s - loss: 0.0374 - acc: 0.9861 - val_loss: 0.0317 - val_acc: 0.9877


Epoch 26/40
275s - loss: 0.0312 - acc: 0.9913 - val_loss: 0.0311 - val_acc: 0.9877


Epoch 27/40
275s - loss: 0.0154 - acc: 0.9971 - val_loss: 0.0309 - val_acc: 0.9877


Epoch 28/40
275s - loss: 0.0183 - acc: 0.9971 - val_loss: 0.0312 - val_acc: 0.9894


Epoch 29/40
275s - loss: 0.0205 - acc: 0.9942 - val_loss: 0.0309 - val_acc: 0.9894


Epoch 30/40
275s - loss: 0.0217 - acc: 0.9954 - val_loss: 0.0303 - val_acc: 0.9877


Epoch 31/40
275s - loss: 0.0167 - acc: 0.9948 - val_loss: 0.0307 - val_acc: 0.9877


Epoch 32/40
275s - loss: 0.0156 - acc: 0.9971 - val_loss: 0.0314 - val_acc: 0.9877


Epoch 33/40
275s - loss: 0.0259 - acc: 0.9925 - val_loss: 0.0308 - val_acc: 0.9894


Epoch 34/40
275s - loss: 0.0235 - acc: 0.9907 - val_loss: 0.0309 - val_acc: 0.9894
Epoch 00033: early stopping



<keras.callbacks.History at 0x7f5a6e082208>

In [16]:
model_final.load_weights('weights-iter-5-epoch-32.hdf5')

In [17]:
model_final.evaluate_generator(val_gen, math.ceil(validation_samples / batch_size_val))

[0.030787440823574722, 0.98943661971830987]

# Predictions

In [18]:
batch_size_test = 32

test_gen = validgen.flow_from_directory(
        directory   = test_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_test,
        class_mode  = "binary",
        shuffle     = False)

test_samples       = len(test_gen.filenames)

Found 1531 images belonging to 1 classes.


In [19]:
preds = model_final.predict_generator(test_gen, math.ceil(test_samples / batch_size_test))

In [20]:
preds_filenames = test_gen.filenames
preds_filenames = [int(x.replace("unknown/", "").replace(".jpg", "")) for x in preds_filenames]
df_result = pd.DataFrame({'name': preds_filenames, 'invasive': preds[:,0]})
df_result = df_result.sort_values("name")
df_result.index = df_result["name"]
df_result = df_result.drop(["name"], axis=1)
df_result.to_csv("submission_03.csv", encoding="utf8", index=True)
from IPython.display import FileLink
FileLink('submission_03.csv')

In [None]:
# Got 0.99454 on LB