Batch size: 64
Added seed

Commit 2:
- 8 epochs

Commit 3:
- added use of callbacks

Commit 4:
- change to val_loss

Commit 5:
- 10 epochs

Commit 6:
- batch_size: 64

In [1]:
import numpy as np 
import pandas as pd
from datetime import datetime

from keras.models import Sequential
import tensorflow as tf
from keras import backend as K
from keras.models import Model
from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense,Concatenate, GlobalMaxPooling2D, GlobalAveragePooling2D, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, Dropout, BatchNormalization
from keras.losses import mae, sparse_categorical_crossentropy, binary_crossentropy
from keras import regularizers, optimizers
from keras.optimizers import Adam, SGD
from tensorflow.keras.applications import ResNet50
from keras.applications.vgg19 import VGG19

from keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard,CSVLogger,ReduceLROnPlateau,LearningRateScheduler

import matplotlib.pyplot as plt 
plt.rcParams['figure.figsize'] = (20,10)

def append_ext(fn):
    return fn+".tif"

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

Using TensorFlow backend.


['train', 'test', 'train_labels.csv', 'sample_submission.csv']


In [2]:
#seed
from numpy.random import seed
seed(1)
from tensorflow import set_random_seed
set_random_seed(1)

In [3]:
traindf=pd.read_csv("../input/train_labels.csv",dtype=str)
train_size = 180000
traindf = traindf.sort_values(by=['label','id'])
traindf = traindf.iloc[:int(train_size/2)].append(traindf.iloc[-int(train_size/2):])
testdf=pd.read_csv("../input/sample_submission.csv",dtype=str)
traindf["id"]=traindf["id"].apply(append_ext)
testdf["id"]=testdf["id"].apply(append_ext)
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [4]:
batch_size = 64

train_generator=datagen.flow_from_dataframe(
                                            dataframe=traindf,
                                            directory="../input/train/",
                                            x_col="id",
                                            y_col="label",
                                            subset="training",
                                            batch_size=batch_size,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="binary",
                                            target_size=(96, 96)
)

valid_generator=datagen.flow_from_dataframe(
                                            dataframe=traindf,
                                            directory="../input/train/",
                                            x_col="id",
                                            y_col="label",
                                            subset="validation",
                                            batch_size=batch_size,
                                            seed=42,
                                            shuffle=True,
                                            class_mode="binary",
                                            target_size=(96, 96)
)

test_datagen=ImageDataGenerator(rescale=1./255.)

test_generator=test_datagen.flow_from_dataframe(
                                                dataframe=testdf,
                                                directory="../input/test/",
                                                x_col="id",
                                                y_col=None,
                                                batch_size=batch_size,
                                                seed=42,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(96, 96)
)

Found 135000 images belonging to 2 classes.
Found 45000 images belonging to 2 classes.
Found 57458 images.


In [5]:
train_generator.n//train_generator.batch_size

2109

In [6]:
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc


In [7]:
def make_model(model_choice, input_shape):
    '''Function to create a model
    
    Output:
    - model made with keras.model.Model'''
    
    base_model = model_choice
    x = base_model(input_shape)
    out = Dense(1, activation="softmax")(x)
    model = Model(base_model.input, out)
    
    model.summary()

    return model

In [None]:
input_shape = (96, 96, 3)

ResNet50_model=make_model(ResNet50(include_top=False, pooling='avg', input_tensor=None, input_shape=input_shape), input_shape)

Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
#Callbacks
best_model_weights = './base.model'

checkpoint = ModelCheckpoint(
    best_model_weights,
    monitor = 'val_loss',
    save_best_only = True,
    mode='min',
    verbose=1,
    save_weights_only=False,
    period = 1)

earlyStop = EarlyStopping(
    monitor = 'val_loss',
    min_delta=0.01,
    patience = 5,
    verbose = 1,
    mode = 'min')

tensorBoard = TensorBoard(
    log_dir = './logs',
    histogram_freq=0,
    batch_size=batch_size,
    write_graph=True,
    write_grads=True,
    write_images=False)

#learnrate = LearningRateSchedule(lambda x: 1. / (1. + x))

reduce = ReduceLROnPlateau(monitor='val_loss',
                           factor=0.5,
                          patience=3,
                          verbose=1,
                          mode='min'
                          )

csvlogger = CSVLogger(filename='training_csv.log',
                     separator=',',
                     append=False)

callbacks = [checkpoint, tensorBoard, csvlogger, reduce]

In [None]:
start=datetime.now()
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [None]:
ResNet50_model.compile(optimizer=SGD(lr=1e-4, momentum=0.99), loss=binary_crossentropy, metrics=['accuracy', auc])

history = ResNet50_model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=6,
                    callbacks=callbacks
)
end = datetime.now()

print("Elapsed time:",end-start)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('ResNet50 model accuracy')
plt.legend(['ResNet50 training','ResNet50 validation'])
plt.ylabel('accuracy')
plt.xlabel('epoch')

In [None]:
plt.plot(history.history['auc'])
plt.plot(history.history['val_auc'])
plt.title('ResNet50 Model AUC')
plt.legend(['ResNet50 Training','ResNet50 Validation'])
plt.ylabel('AUC')
plt.xlabel('epoch')

In [None]:
ResNet50_model.load_weights(best_model_weights)
ResNet50_model.evaluate_generator (generator=valid_generator,
                         steps=STEP_SIZE_VALID)

In [None]:
def get_results(model_used):

    test_generator.reset()
    pred=model_used.predict_generator(test_generator,
                                       steps=STEP_SIZE_TEST+1,
                                       verbose=1)

    predicted_class_indices=np.argmax(pred,axis=1)
    labels = (train_generator.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    predictions = [labels[k] for k in predicted_class_indices]
    filenames=test_generator.filenames
    results=pd.DataFrame({"id":[f[:-4] for f in filenames],
                      "label":[x[0] for x in pred]})

    return results

In [None]:
results = get_results(ResNet50_model)

In [None]:
results.head(20)

In [None]:
results.to_csv("results.csv",
               index=False)

In [None]:
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip
LOG_DIR = './logs' # Here you have to put your log directory
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 8080 &'
    .format(LOG_DIR)
)
get_ipython().system_raw('./ngrok http 8080 &')
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"