# Table of Contents
 <p>

Training on all the layers

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.models import Sequential, load_model, Model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.vgg16 import VGG16

from keras_tqdm import TQDMNotebookCallback

from datetime import datetime
import os

import numpy as np
import pandas as pd
import math

pd.options.display.max_rows = 40

Using TensorFlow backend.


In [2]:
vgg16 = VGG16(weights = 'imagenet',include_top=False)
x = vgg16.get_layer('block5_conv3').output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1, activation='sigmoid')(x)
model_final = Model(inputs=vgg16.input, outputs=x)

In [3]:
model_final.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [4]:
model_final.load_weights('./weights/weights-iter-3-epoch-05.hdf5')

In [5]:
img_width  = 600
img_height = 450

train_data_dir      = "data/train"
validation_data_dir = "data/valid"
test_data_dir       = "data/test"

validgen = ImageDataGenerator(rescale=1., featurewise_center=True)
validgen.mean=np.array([103.939, 116.779, 123.68], dtype=np.float32).reshape(1,1,3)

batch_size_val = 32

val_gen = validgen.flow_from_directory(
        directory   = validation_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_val,
        class_mode  = "binary",
        shuffle     = False)

Found 568 images belonging to 2 classes.


In [6]:
model_final.evaluate_generator(val_gen,
                               math.ceil(len(val_gen.filenames)/batch_size_val))

[0.062558841629242398, 0.9859154929577465]

In [7]:
model_final.compile(optimizer = SGD(lr=0.0001, momentum=0.9, decay = 1e-5, nesterov=True),
                    loss      = 'binary_crossentropy',
                    metrics   = ['accuracy'])

In [8]:
for layer in model_final.layers:
   layer.trainable = True

In [9]:
datagen = ImageDataGenerator(
    rotation_range                = 20,
    width_shift_range             = 0.2,
    height_shift_range            = 0.2,
    horizontal_flip               = True)

validgen = ImageDataGenerator()

In [10]:
# 600/450 _ 500/375 _ 400/300 _ 300/225

img_width  = 600
img_height = 450

train_data_dir      = "data/train"
validation_data_dir = "data/valid"
test_data_dir       = "data/test"

batch_size_train = 16
batch_size_val   = 16

train_gen = datagen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_train,
        class_mode  = "binary",
        shuffle     = True)

val_gen = validgen.flow_from_directory(
        directory   = validation_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_val,
        class_mode  = "binary",
        shuffle     = False)


train_samples      = len(train_gen.filenames)
validation_samples = len(val_gen.filenames)

Found 1727 images belonging to 2 classes.
Found 568 images belonging to 2 classes.


In [11]:
checkpoint = ModelCheckpoint("weights-iter-6-epoch-{epoch:02d}.hdf5",
                             monitor='val_acc',
                             verbose=0,
                             save_best_only=False,
                             save_weights_only=True)

early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='auto')


In [11]:
model_final.fit_generator(generator        = train_gen,
                          epochs           = 10, 
                          steps_per_epoch  = math.ceil(train_samples / batch_size_train), 
                          validation_data  = val_gen, 
                          validation_steps = math.ceil(validation_samples / batch_size_val), 
                          verbose          = 2,
                          callbacks        = [early_stopping, TQDMNotebookCallback(), checkpoint])

Epoch 1/10
564s - loss: 0.1795 - acc: 0.9606 - val_loss: 0.1686 - val_acc: 0.9595


Epoch 2/10
532s - loss: 0.0482 - acc: 0.9832 - val_loss: 0.1138 - val_acc: 0.9824


Epoch 3/10
533s - loss: 0.0870 - acc: 0.9774 - val_loss: 0.0856 - val_acc: 0.9771


Epoch 4/10
532s - loss: 0.0365 - acc: 0.9884 - val_loss: 0.0988 - val_acc: 0.9754


Epoch 5/10
532s - loss: 0.0288 - acc: 0.9902 - val_loss: 0.0921 - val_acc: 0.9771


Epoch 6/10
532s - loss: 0.0226 - acc: 0.9925 - val_loss: 0.0544 - val_acc: 0.9842


Epoch 7/10
532s - loss: 0.0369 - acc: 0.9896 - val_loss: 0.0847 - val_acc: 0.9789


Epoch 8/10
531s - loss: 0.0208 - acc: 0.9931 - val_loss: 0.1372 - val_acc: 0.9771


Epoch 9/10
531s - loss: 0.0106 - acc: 0.9954 - val_loss: 0.0818 - val_acc: 0.9842


Epoch 10/10
531s - loss: 0.0190 - acc: 0.9948 - val_loss: 0.1363 - val_acc: 0.9754
Epoch 00009: early stopping



<keras.callbacks.History at 0x7fad3d51b0f0>

In [13]:
model_final.load_weights('./weights/weights-iter-6-epoch-05.hdf5')

In [14]:
model_final.evaluate_generator(val_gen,
                               math.ceil(len(val_gen.filenames)/batch_size_val))

[0.054385563390516846, 0.98415492957746475]

In [15]:
batch_size_test = 32
test_gen = validgen.flow_from_directory(
        directory   = test_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_test,
        class_mode  = "binary",
        shuffle     = False)
test_samples = len(test_gen.filenames)

Found 1531 images belonging to 1 classes.


In [16]:
preds = model_final.predict_generator(test_gen, math.ceil(test_samples / batch_size_test))

In [17]:
preds_filenames = test_gen.filenames
preds_filenames = [int(x.replace("unknown/", "").replace(".jpg", "")) for x in preds_filenames]
df_result = pd.DataFrame({'name': preds_filenames, 'invasive': preds[:,0]})
df_result = df_result.sort_values("name")
df_result.index = df_result["name"]
df_result = df_result.drop(["name"], axis=1)
df_result.to_csv("submission_04.csv", encoding="utf8", index=True)
from IPython.display import FileLink
FileLink('submission_04.csv')

In [None]:
# Got 0.98957 on LB