In [None]:
%env USERNAME=username
%env PASSWORD=password

In [None]:
%mkdir data
%cd data
!kg download -u $USERNAME -p $PASSWORD -c invasive-species-monitoring

In [None]:
!7za x train.7z -bd
!7za x test.7z -bd
!unzip train_labels.csv.zip
!rm train.7z
!rm test.7z
!rm train_labels.csv.zip

In [None]:
import numpy as np
import os
import shutil

In [None]:
train_labels = np.genfromtxt('train_labels.csv', dtype=np.int16, delimiter=",", skip_header=1)

train_ratio = 0.70
sample_size = 10

valid_invasive_size = int((1 - train_ratio) * np.sum(train_labels[:, 1] == 1))
valid_noninvasive_size = int((1 - train_ratio) * np.sum((train_labels[:, 1] == 0)))

for folder in ["train/invasive", "train/noninvasive",
               "valid/invasive", "valid/noninvasive",
               "sample/train/invasive", "sample/train/noninvasive",
               "sample/valid/invasive", "sample/valid/noninvasive",
               "test/unknown"]:
    os.makedirs(folder)

for id in train_labels[np.where(train_labels[:, 1] == 1)][:, 0]:
    shutil.move("train/%s.jpg" % id, "train/invasive/")
    
for id in train_labels[np.where(train_labels[:, 1] == 0)][:, 0]:
    shutil.move("train/%s.jpg" % id, "train/noninvasive/")

for file in os.listdir("train/invasive")[:valid_invasive_size]:
    shutil.move("train/invasive/%s" % file, "valid/invasive/")
    
for file in os.listdir("train/noninvasive")[:valid_noninvasive_size]:
    shutil.move("train/noninvasive/%s" % file, "valid/noninvasive/")

for folder in ["train/invasive", "train/noninvasive",
               "valid/invasive", "valid/noninvasive"]:
    for file in os.listdir(folder)[:sample_size]:
        shutil.copy("%s/%s" % (folder, file), "sample/%s/" % folder)
    
for file in os.listdir("test"):
    shutil.move("test/%s" % file, "test/unknown/")

%cd ..

In [None]:
from vgg16 import Vgg16
from keras.callbacks import ModelCheckpoint

In [None]:
path = "data/" # "data/sample/"

In [None]:
batch_size = 32
epochs = 1

In [None]:
vgg = Vgg16()

batches = vgg.get_batches(path + "train/", batch_size=batch_size)
valid_batches = vgg.get_valid_batches(path + "valid/", batch_size=batch_size)
vgg.finetune(batches)

checkpoint = ModelCheckpoint("weights_best.hdf5", monitor="val_acc", verbose=1, 
                             save_best_only=True, mode="max")
vgg.model.fit_generator(batches, samples_per_epoch=batches.bn_sample, 
                        nb_epoch=epochs, validation_data=valid_batches, 
                        nb_val_samples=valid_batches.nb_sample, callbacks=[checkpoint])

In [None]:
vgg.model.load_weights("weights_best.hdf5")
test_batches, preds = vgg.test(path + "test/", batch_size=batch_size)

In [None]:
filenames = [filename.split('.')[0] for filename in test_batches.filenames]
subm = np.stack([filenames, preds[:, 0]], axis=1)
np.savetxt("submission.csv", subm, fmt="%d,%.5f", header="id,invasive", comments="")

In [None]:
!kg submit submission.csv -u $USERNAME -p $PASSWORD -c invasive-species-monitoring -m ""