In [28]:
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras import optimizers
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K

# Set the desired dimension of our input images, images will be resized
img_width, img_height = 150, 150

# Set the location of our input data
train_data_dir = 'data/train'
validation_data_dir = 'data/validation'

# Use as much of our data as possible, must be a multiple of our batch size
nb_train_samples = 16 * 1418
nb_validation_samples = 16 * 354
epochs = 200
batch_size = 16

# Determine shape of images based on current backend
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

# Construct our model
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(256, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# Set up a couple different options for optimizers
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
rmsprop = optimizers.RMSprop(lr=0.0005, rho=0.9, epsilon=None, decay=0.0)
sgd = optimizers. SGD(lr=0.001, momentum=0.9)

# Compile our model with one of the aforementioned optimizers
model.compile(loss='binary_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

# Augment our input data by generating new images
train_datagen = ImageDataGenerator(
    rotation_range=20,
    height_shift_range=0.1,
    width_shift_range=0.1,
    rescale=1. / 255,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True)

# Only augment our test images by resizing them
test_datagen = ImageDataGenerator(rescale=1. / 255)

# Set up our training data generator
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

# Set up our validation data generator
validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

# Save the best model if applicable after each epoch
save_each_callback = ModelCheckpoint('best.h5', monitor='val_loss', verbose=1,
                                     save_best_only=True, save_weights_only=False, mode='auto', period=1)

# Stop early if validation loss doesn't improve for N consecutive epochs
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# Start training
model.fit_generator(
    train_generator,
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=validation_generator,
    validation_steps=nb_validation_samples // batch_size,
    callbacks=[save_each_callback, early_stopping])

Found 22688 images belonging to 2 classes.
Found 5672 images belonging to 2 classes.
Epoch 1/200

Epoch 00001: val_loss improved from inf to 0.57398, saving model to best.h5
Epoch 2/200

Epoch 00002: val_loss improved from 0.57398 to 0.54724, saving model to best.h5
Epoch 3/200

Epoch 00003: val_loss improved from 0.54724 to 0.51964, saving model to best.h5
Epoch 4/200

Epoch 00004: val_loss improved from 0.51964 to 0.49478, saving model to best.h5
Epoch 5/200

Epoch 00005: val_loss improved from 0.49478 to 0.47721, saving model to best.h5
Epoch 6/200

Epoch 00006: val_loss improved from 0.47721 to 0.45152, saving model to best.h5
Epoch 7/200

Epoch 00007: val_loss improved from 0.45152 to 0.42384, saving model to best.h5
Epoch 8/200

Epoch 00008: val_loss improved from 0.42384 to 0.40632, saving model to best.h5
Epoch 9/200

Epoch 00009: val_loss improved from 0.40632 to 0.37013, saving model to best.h5
Epoch 10/200

Epoch 00010: val_loss improved from 0.37013 to 0.33500, saving model


Epoch 00040: val_loss did not improve from 0.20455
Epoch 41/200

Epoch 00041: val_loss did not improve from 0.20455
Epoch 42/200

Epoch 00042: val_loss improved from 0.20455 to 0.20333, saving model to best.h5
Epoch 43/200

Epoch 00043: val_loss improved from 0.20333 to 0.19355, saving model to best.h5
Epoch 44/200

Epoch 00044: val_loss did not improve from 0.19355
Epoch 45/200

Epoch 00045: val_loss did not improve from 0.19355
Epoch 46/200

Epoch 00046: val_loss did not improve from 0.19355
Epoch 47/200

Epoch 00047: val_loss did not improve from 0.19355
Epoch 48/200

Epoch 00048: val_loss did not improve from 0.19355
Epoch 49/200

Epoch 00049: val_loss did not improve from 0.19355
Epoch 50/200

Epoch 00050: val_loss improved from 0.19355 to 0.19227, saving model to best.h5
Epoch 51/200

Epoch 00051: val_loss did not improve from 0.19227
Epoch 52/200

Epoch 00052: val_loss improved from 0.19227 to 0.19099, saving model to best.h5
Epoch 53/200

Epoch 00053: val_loss did not improve 

<keras.callbacks.History at 0x7f7bfbb03710>

In [29]:
import numpy as np
import os
import re

from keras.models import load_model
from keras.preprocessing.image import img_to_array, load_img

# Set the desired dimension of our input images, images will be resized
img_width, img_height = 150, 150

# Load the best model from the training session
model = load_model('best.h5')

# Set the location of our input data
test_image_directory = 'data/test'

# Set up a sort method which does "natural sorting", this way filenames are
# sorted "1, 2, 3... 10" instead of "1, 10, 11... 2, 21, 22..."
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
    return [int(text) if text.isdigit() else text.lower()
        for text in _nsre.split(s)]

# Iterate over all the test images with the aforementioned natural sort method
# and construct an array from the image, add it to an array of images
images_array = []
for file in sorted(os.listdir(test_image_directory), key=natural_sort_key):
	if file.endswith('.jpg'):
		image_path = test_image_directory + '/' + file
		image = load_img(image_path, False, target_size=(img_width, img_height))
		image = img_to_array(image)
		image = image / 255
		images_array.append(image)

# Perform predictions based on each element in the images array and save the results
images = np.stack(images_array)
predictions = model.predict(images).flatten()
np.savetxt('results.txt', predictions)

In [30]:
import csv

# Parse the results file, which contains one number per line in scientific
# notation, format the number to the proper precision float, and write it
# to a new file in CSV format for submission to Kaggle
with open('results.csv', 'w', newline='') as destination:
    writer = csv.writer(destination, delimiter=',')
    writer.writerow(['Id', 'Expected'])
    with open('results.txt', 'r') as source:
        i = 0
        for line in source:
            i += 1
            value = float(line)
            writer.writerow([f'test_{i}.jpg', round(value, 2)])