# Table of Contents
 <p><div class="lev1 toc-item"><a href="#Load-Libraries" data-toc-modified-id="Load-Libraries-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load Libraries</a></div><div class="lev1 toc-item"><a href="#Load-data/Create-data-Generators" data-toc-modified-id="Load-data/Create-data-Generators-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Load data/Create data Generators</a></div><div class="lev1 toc-item"><a href="#AUC-callback-function" data-toc-modified-id="AUC-callback-function-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>AUC callback function</a></div><div class="lev1 toc-item"><a href="#Load-the-model-&amp;-weights" data-toc-modified-id="Load-the-model-&amp;-weights-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Load the model &amp; weights</a></div><div class="lev1 toc-item"><a href="#Training" data-toc-modified-id="Training-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Training</a></div><div class="lev1 toc-item"><a href="#Prediction" data-toc-modified-id="Prediction-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Prediction</a></div>

Training after specifying class weights. Also, calculating AUC after every epoch.

# Load Libraries

In [1]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.models import Sequential, load_model, Model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.resnet50 import ResNet50

from keras_tqdm import TQDMNotebookCallback

from datetime import datetime
import os

import numpy as np
import pandas as pd
import math

pd.options.display.max_rows = 40

Using TensorFlow backend.


# Load data/Create data Generators

In [2]:
validgen = ImageDataGenerator()

In [3]:
# 600/450 _ 500/375 _ 400/300 _ 300/225
img_width  = 600
img_height = 450

train_data_dir      = "data/train"
validation_data_dir = "data/valid"
test_data_dir       = "data/test"

batch_size_train = 12
batch_size_val   = 64

In [4]:
val_data = validgen.flow_from_directory(
        directory   = validation_data_dir,
        target_size = (img_height, img_width),
        batch_size  = 568,
        class_mode  = "binary",
        shuffle     = False).next()

Found 568 images belonging to 2 classes.


In [5]:
train_data = validgen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = 1727,
        class_mode  = "binary",
        shuffle     = False).next()

Found 1727 images belonging to 2 classes.


In [6]:
datagen = ImageDataGenerator(
    rotation_range                = 20,
    width_shift_range             = 0.2,
    height_shift_range            = 0.2,
    horizontal_flip               = True)

In [7]:
train_gen = datagen.flow_from_directory(
        directory   = train_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_train,
        class_mode  = "binary",
        shuffle     = True)

train_samples      = len(train_gen.filenames)

Found 1727 images belonging to 2 classes.


# AUC callback function

In [8]:
from sklearn.metrics import roc_auc_score

In [9]:
from sklearn.metrics import accuracy_score

In [10]:
from sklearn.metrics import log_loss

In [11]:
class auc_callback(keras.callbacks.Callback):
    def __init__(self, val_data, init_epoch):
        
        self.val_x   = val_data[0]
        self.val_y   = val_data[1]
        self.init_epoch = init_epoch
        
    
    def on_train_begin(self, logs={}):
        return
 
    def on_train_end(self, logs={}):
        return
 
    def on_epoch_begin(self, epoch, logs={}):
        return
 
    def on_epoch_end(self, epoch, logs={}):
        self.model.save_weights('resnet-class-weights-epoch-' + str(self.init_epoch + epoch) + '.hdf5')
        
        val_pred = self.model.predict(self.val_x, batch_size=64, verbose=0)
        val_roc  = roc_auc_score(self.val_y, val_pred[:,0])
        val_loss = log_loss(self.val_y, np.append(1 - val_pred, val_pred, axis=1))
        val_acc  = accuracy_score(self.val_y, val_pred >= 0.5)
        
        print('\nVal AUC: ' + str(val_roc))
        print('\nVal Los: ' + str(val_loss))
        print('\nVal Acc: ' + str(val_acc) + '\n')
        return
 
    def on_batch_begin(self, batch, logs={}):
        return
 
    def on_batch_end(self, batch, logs={}):
        return   

# Load the model & weights

In [12]:
base_model = ResNet50(include_top = False, weights = 'imagenet')
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation = 'relu')(x)
x = Dense(1, activation = 'sigmoid')(x)
model_final = Model(inputs=base_model.input, outputs=x)
model_final.compile(loss = 'binary_crossentropy',
                   optimizer = SGD(lr = 0.0001, momentum = 0.9, decay = 1e-5),
                   metrics = ['accuracy'])

In [13]:
model_final.load_weights('./weights/weights-iter-8-epoch-06.hdf5')

In [14]:
val_pred = model_final.predict(val_data[0], batch_size=64)

In [15]:
log_loss(val_data[1], np.append(1 - val_pred, val_pred, axis=1))

0.059163292494019394

In [16]:
accuracy_score(val_data[1], val_pred >= 0.5)

0.97535211267605637

In [17]:
roc_auc_score(val_data[1], val_pred[:,0])

0.99829741952646989

# Training

In [18]:
model_final.compile(loss = 'binary_crossentropy',
                   optimizer = SGD(lr = 0.001, momentum = 0.9, decay = 1e-5, nesterov = True),
                   metrics = ['accuracy'])

In [19]:
model_final.fit_generator(generator        = train_gen,
                          epochs           = 20, 
                          steps_per_epoch  = math.ceil(1727 / batch_size_train), 
                          validation_data  = None, 
                          verbose          = 2,
                          callbacks        = [auc_callback(val_data, 0), TQDMNotebookCallback()],
                          class_weight     = {0: 1090/1727, 1: 637/1727})

Widget Javascript not detected.  It may not be installed or enabled properly.


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 1/20

Val AUC: 0.998696461825

Val Los: 0.0590373511977

Val Acc: 0.973591549296

358s - loss: 0.0296 - acc: 0.9809


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 2/20

Val AUC: 0.998696461825

Val Los: 0.0520537527442

Val Acc: 0.975352112676

346s - loss: 0.0236 - acc: 0.9815


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 3/20

Val AUC: 0.998736366055

Val Los: 0.051399690589

Val Acc: 0.978873239437

347s - loss: 0.0172 - acc: 0.9902


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 4/20

Val AUC: 0.998616653365

Val Los: 0.055566665846

Val Acc: 0.980633802817

347s - loss: 0.0137 - acc: 0.9902


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 5/20

Val AUC: 0.999055599894

Val Los: 0.0552506701942

Val Acc: 0.975352112676

347s - loss: 0.0151 - acc: 0.9872


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 6/20

Val AUC: 0.998510242086

Val Los: 0.0553507314519

Val Acc: 0.978873239437

347s - loss: 0.0132 - acc: 0.9896


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 7/20

Val AUC: 0.998723064645

Val Los: 0.0488027291021

Val Acc: 0.984154929577

347s - loss: 0.0094 - acc: 0.9925


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 8/20

Val AUC: 0.998669859005

Val Los: 0.0577758040417

Val Acc: 0.977112676056

347s - loss: 0.0098 - acc: 0.9931


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 9/20

Val AUC: 0.998683160415

Val Los: 0.0530471734741

Val Acc: 0.985915492958

347s - loss: 0.0075 - acc: 0.9936


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 10/20

Val AUC: 0.998563447725

Val Los: 0.0614362152359

Val Acc: 0.982394366197

346s - loss: 0.0069 - acc: 0.9954


Widget Javascript not detected.  It may not be installed or enabled properly.


Epoch 11/20


KeyboardInterrupt: 

# Prediction

In [34]:
model_final.load_weights('./resnet-class-weights-epoch-6.hdf5')

In [35]:
model_final.evaluate(val_data[0], val_data[1])



[0.048802746971413284, 0.98415492957746475]

In [36]:
batch_size_test = 32

test_gen = validgen.flow_from_directory(
        directory   = test_data_dir,
        target_size = (img_height, img_width),
        batch_size  = batch_size_test,
        class_mode  = "binary",
        shuffle     = False)

test_samples       = len(test_gen.filenames)

Found 1531 images belonging to 1 classes.


In [37]:
preds = model_final.predict_generator(test_gen, math.ceil(test_samples / batch_size_test))

In [38]:
preds_filenames = test_gen.filenames
preds_filenames = [int(x.replace("unknown/", "").replace(".jpg", "")) for x in preds_filenames]
df_result = pd.DataFrame({'name': preds_filenames, 'invasive': preds[:,0]})
df_result = df_result.sort_values("name")
df_result.index = df_result["name"]
df_result = df_result.drop(["name"], axis=1)
df_result.to_csv("submission_09.csv", encoding="utf8", index=True)
from IPython.display import FileLink
FileLink('submission_09.csv')

In [None]:
# Got 0.99113 on LB