In [1]:
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import optimizers
import keras
import pandas
import utils

Using TensorFlow backend.


In [2]:
train_folder = 'data/train'
test_folder = 'data/test/0'
validation_folder = 'data/validation'
labels_csv = 'train_labels.csv'
label_names = ['0', '1']
IMG_WIDTH = 450
IMG_HEIGHT = 450
batch_size = 8

In [3]:
utils.create_labelled_folders(train_folder, labels_csv, label_names)        
utils.create_validation_subfolders(train_folder, validation_folder, label_names, 0.2)        

Folders are already labelled


In [4]:
train_datagen = ImageDataGenerator(
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    rescale=1./255.,
    vertical_flip=True,
    horizontal_flip=True,
    rotation_range=90
)

validation_datagen = ImageDataGenerator(
    rescale=1./255.
)

train_generator = train_datagen.flow_from_directory(
    train_folder,
    batch_size=batch_size,
    target_size = (IMG_WIDTH, IMG_HEIGHT)
)

validation_generator = validation_datagen.flow_from_directory(
    validation_folder,
    batch_size=batch_size,
    target_size = (IMG_WIDTH, IMG_HEIGHT)
)

Found 1837 images belonging to 2 classes.
Found 458 images belonging to 2 classes.


In [None]:
base_model = keras.applications.vgg16.VGG16(weights='imagenet', 
                                            include_top=False, 
                                            input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), 
                                            classes=2)

In [None]:
base_model.summary()

In [None]:
x = base_model.output
x = (Flatten(input_shape=base_model.output_shape[1:], name="finetune"))(x)
x = (Dense(256, activation='relu'))(x)
x = (Dropout(0.5))(x)
x = (Dense(2, activation='sigmoid'))(x)

model = Model(inputs=base_model.inputs, outputs=x)

for layer in model.layers:
    if layer.name == "block5_conv3":
        break
    
    layer.trainable = False

model.compile(loss='binary_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

In [31]:
num_training_samples = train_generator.n
num_validation_samples = validation_generator.n

model.fit_generator(
    train_generator,
    steps_per_epoch=num_training_samples//batch_size,
    epochs=1,
    validation_data=validation_generator,
    validation_steps=num_validation_samples//batch_size)

Epoch 1/1


<keras.callbacks.History at 0x7f1093e6fcc0>

In [32]:
model.save('invasive_species.h5')

In [33]:
from keras.models import load_model

model = load_model('invasive_species.h5')

In [40]:
test_datagen = ImageDataGenerator(
    rescale=1./255.
)

test_generator = test_datagen.flow_from_directory(
    'data/test',
    batch_size=batch_size,
    shuffle=False,
    target_size = (IMG_WIDTH, IMG_HEIGHT)
)

Found 1531 images belonging to 1 classes.


In [47]:
from os.path import splitext
filenames = np.zeros((test_generator.n, ), dtype=np.int32)
for (i,f) in enumerate(test_generator.filenames):
    basename = splitext(f)[0]    
    filenames[i] = int(basename[2:])
    

In [49]:
num_test_samples = test_generator.n
test_predictions = model.predict_generator(test_generator, (num_test_samples//batch_size)+1)

In [50]:
ii = np.argsort(filenames)

In [54]:
filenames_sorted = filenames[ii]
predictions_sorted = test_predictions[ii]

In [57]:
invasive = predictions_sorted[:,1]
names = np.arange(1, len(invasive)+1, dtype=np.int32)
submission_array = np.stack((names, invasive), axis=1)

In [58]:
np.save('submission.npy', submission_array)

In [59]:
import csv
with open('submission.csv', 'w', newline='') as csvfile:
    submission_writer = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
    submission_writer.writerow(['name', 'invasive'])
    for r in submission_array:
        submission_writer.writerow([str(int(r[0])), r[1]])