# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Load-Libraries" data-toc-modified-id="Load-Libraries-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load Libraries</a></div><div class="lev1 toc-item"><a href="#Load-data/Create-data-Generators" data-toc-modified-id="Load-data/Create-data-Generators-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Load data/Create data Generators</a></div><div class="lev1 toc-item"><a href="#AUC-callback-function" data-toc-modified-id="AUC-callback-function-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>AUC callback function</a></div><div class="lev1 toc-item"><a href="#Load-the-model-&amp;-weights" data-toc-modified-id="Load-the-model-&amp;-weights-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Load the model &amp; weights</a></div><div class="lev1 toc-item"><a href="#Training" data-toc-modified-id="Training-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Training</a></div><div class="lev1 toc-item"><a href="#Prediction" data-toc-modified-id="Prediction-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Prediction</a></div>

Training after specifying class weights. Also, calculating AUC after every epoch.

# Load Libraries

In [1]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.models import Sequential, load_model, Model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.inception_v3 import InceptionV3

from keras_tqdm import TQDMNotebookCallback

from datetime import datetime
import os

import numpy as np
import pandas as pd
import math

pd.options.display.max_rows = 40

Using TensorFlow backend.


# Load data/Create data Generators

In [2]:
validgen = ImageDataGenerator()

In [3]:
# 600/450 _ 500/375 _ 400/300 _ 300/225
img_width  = 600
img_height = 450

train_data_dir      = "data/train"
validation_data_dir = "data/valid"
test_data_dir       = "data/test"

batch_size_train = 16
batch_size_val   = 64

In [4]:
val_data = validgen.flow_from_directory(
        directory   = validation_data_dir,
        target_size = (img_height, img_width),
        batch_size  = 568,
        class_mode  = "binary",
        shuffle     = False).next()

Found 568 images belonging to 2 classes.


In [5]:
train_data = validgen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = 1727,
        class_mode  = "binary",
        shuffle     = False).next()

Found 1727 images belonging to 2 classes.


In [6]:
datagen = ImageDataGenerator(
    rotation_range                = 20,
    width_shift_range             = 0.2,
    height_shift_range            = 0.2,
    horizontal_flip               = True)

In [7]:
train_gen = datagen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_train,
        class_mode  = "binary",
        shuffle     = True)

train_samples      = len(train_gen.filenames)

Found 1727 images belonging to 2 classes.


# AUC callback function

In [8]:
from sklearn.metrics import roc_auc_score

In [9]:
from sklearn.metrics import accuracy_score

In [10]:
from sklearn.metrics import log_loss

In [11]:
class auc_callback(keras.callbacks.Callback):
    def __init__(self, val_data):
        
        self.val_x   = val_data[0]
        self.val_y   = val_data[1]
        
    
    def on_train_begin(self, logs={}):
        return
 
    def on_train_end(self, logs={}):
        return
 
    def on_epoch_begin(self, epoch, logs={}):
        return
 
    def on_epoch_end(self, epoch, logs={}):
        self.model.save_weights('epoch-'+str(epoch)+'.hdf5')
        
        val_pred = self.model.predict(self.val_x, batch_size=64, verbose=0)
        val_roc  = roc_auc_score(self.val_y, val_pred[:,0])
        val_loss = log_loss(self.val_y, np.append(1 - val_pred, val_pred, axis=1))log_loss(self.val_y, np.append(1 - val_pred, val_pred, axis=1))
        val_acc  = accuracy_score(self.val_y, val_pred >= 0.5)accuracy_score(self.val_y, val_pred >= 0.5)
        
        print('\nVal AUC: ' + str(val_roc))
        print('\nVal Los: ' + str(val_loss))
        print('\nVal Acc: ' + str(val_acc) + '\n')
        return
 
    def on_batch_begin(self, batch, logs={}):
        return
 
    def on_batch_end(self, batch, logs={}):
        return   

# Load the model & weights

In [12]:
base_model = InceptionV3(include_top = False, weights = 'imagenet')
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(1, activation = 'sigmoid')(x)
model_final = Model(inputs=base_model.input, outputs=x)
model_final.compile(loss = 'binary_crossentropy',
                   optimizer = SGD(lr = 0.0001, momentum = 0.9, decay = 1e-5),
                   metrics = ['accuracy'])

In [19]:
model_final.load_weights('./weights/weights-iter-5-epoch-32.hdf5')

In [20]:
val_pred = model_final.predict(val_data[0], batch_size=64)

In [21]:
log_loss(val_data[1], np.append(1 - val_pred, val_pred, axis=1))

0.030787439282007622

In [22]:
accuracy_score(val_data[1], val_pred >= 0.5)

0.98943661971830987

In [23]:
roc_auc_score(val_data[1], val_pred[:,0])

0.99940143655227454

# Training

In [24]:
model_final.compile(loss = 'binary_crossentropy',
                   optimizer = SGD(lr = 0.01, momentum = 0.9, decay = 1e-5, nesterov = True),
                   metrics = ['accuracy'])

In [25]:
model_final.fit_generator(generator        = train_gen,
                          epochs           = 5, 
                          steps_per_epoch  = math.ceil(1727 / batch_size_train), 
                          validation_data  = None, 
                          verbose          = 2,
                          callbacks        = [auc_callback(val_data), TQDMNotebookCallback()],
                          class_weight     = {0: 1090/1727, 1: 637/1727})

Epoch 1/5

Val AUC: 0.989219207236

Val Los: 0.331199838188

Val Acc: 0.897887323944

281s - loss: 0.0298 - acc: 0.9786


Epoch 2/5

Val AUC: 0.998097898377

Val Los: 0.341508335184

Val Acc: 0.929577464789

272s - loss: 0.0297 - acc: 0.9780


Epoch 3/5

Val AUC: 0.995683692471

Val Los: 0.173073417552

Val Acc: 0.975352112676

272s - loss: 0.0165 - acc: 0.9838


Epoch 4/5

Val AUC: 0.998350625166

Val Los: 0.0761135575132

Val Acc: 0.980633802817

272s - loss: 0.0278 - acc: 0.9832


Epoch 5/5

Val AUC: 0.998057994147

Val Los: 0.0639064348009

Val Acc: 0.978873239437

272s - loss: 0.0175 - acc: 0.9867



<keras.callbacks.History at 0x7f6194a25e80>

In [28]:
model_final.fit_generator(initial_epoch=6,
                          generator        = train_gen,
                          epochs           = 10, 
                          steps_per_epoch  = math.ceil(1727 / batch_size_train), 
                          validation_data  = None, 
                          verbose          = 2,
                          callbacks        = [auc_callback(val_data), TQDMNotebookCallback()],
                          class_weight     = {0: 1090/1727, 1: 637/1727})

Epoch 7/10

Val AUC: 0.999268422453

Val Los: 0.0925881472954

Val Acc: 0.970070422535

272s - loss: 0.0119 - acc: 0.9931


Epoch 8/10

Val AUC: 0.998762968875

Val Los: 0.0675169750969

Val Acc: 0.977112676056

272s - loss: 0.0054 - acc: 0.9965


Epoch 9/10

Val AUC: 0.999733971801

Val Los: 0.0293941304997

Val Acc: 0.991197183099

272s - loss: 0.0047 - acc: 0.9983


Epoch 10/10

Val AUC: 0.999627560521

Val Los: 0.0372696775255

Val Acc: 0.987676056338

272s - loss: 0.0045 - acc: 0.9971


<keras.callbacks.History at 0x7f6162b13390>

In [48]:
model_final.load_weights('./epoch-8.hdf5')

In [49]:
model_final.evaluate(val_data[0], val_data[1], batch_size=32)



[0.029394180233589074, 0.99119718309859151]

# Prediction

In [50]:
batch_size_test = 32

test_gen = validgen.flow_from_directory(
        directory   = test_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_test,
        class_mode  = "binary",
        shuffle     = False)

test_samples       = len(test_gen.filenames)

Found 1531 images belonging to 1 classes.


In [51]:
preds = model_final.predict_generator(test_gen, math.ceil(test_samples / batch_size_test))

In [52]:
preds_filenames = test_gen.filenames
preds_filenames = [int(x.replace("unknown/", "").replace(".jpg", "")) for x in preds_filenames]
df_result = pd.DataFrame({'name': preds_filenames, 'invasive': preds[:,0]})
df_result = df_result.sort_values("name")
df_result.index = df_result["name"]
df_result = df_result.drop(["name"], axis=1)
df_result.to_csv("submission_08.csv", encoding="utf8", index=True)
from IPython.display import FileLink
FileLink('submission_08.csv')

In [None]:
# Got 0.99553 on LB