In [9]:
DATA_DIR = 'data/invasive-species-monitoring/'

Rather than being named according to their category, or grouped in subdirectories, a CSV file is provided along with the training images. The first field is the image ID, the second field is a 1 if the image contains an invasive plant.

In [19]:
import csv

invasive = set()

with open(DATA_DIR+'train_labels.csv', 'rb') as csv_file:
    csv_reader = csv.reader(csv_file)
    
    # skip the column headings
    next(csv_reader)
    
    for row in csv_reader:
        if row[1] == '1':
            invasive.add(int(row[0]))

Using the set of invasive images gathered above, the training images can be grouped into `invasive`/`noninvasive` subdirectories.

In [27]:
import os, re

IMAGE_PATTERN = re.compile(r'(\d+)\.jpg$')
train_dir = DATA_DIR+'train/'

os.mkdir(train_dir+'invasive')
os.mkdir(train_dir+'noninvasive')

for path in os.listdir(train_dir):
    match = IMAGE_PATTERN.match(path)
    
    if match:
        id = int(match.group(1))
        
        destination_dir = train_dir+'noninvasive/'
        if id in invasive:
            destination_dir = train_dir+'invasive/'
        
        os.rename(train_dir+path, destination_dir+path)

In [1]:
from vgg16 import Vgg16

Using Theano backend.
Using cuDNN version 5110 on context None
Preallocating 6553/8192 Mb (0.800000) on cuda0
Mapped name None to device cuda0: GeForce GTX 1080 (0000:01:00.0)


In [2]:
vgg = Vgg16()

In [39]:
batch_size = 64

In [29]:
batches = vgg.get_batches(DATA_DIR+'train', batch_size=batch_size)
val_batches = vgg.get_batches(DATA_DIR+'valid', batch_size=batch_size)

Found 2145 images belonging to 2 classes.
Found 150 images belonging to 2 classes.


In [30]:
vgg.finetune(batches)

In [35]:
vgg.fit(batches, val_batches)

Epoch 1/1


In [34]:
vgg.model.save_weights(DATA_DIR+'results/ft2.h5')

In [36]:
vgg.model.load_weights(DATA_DIR+'results/ft2.h5')

In [40]:
test_batches, predictions = vgg.test(DATA_DIR+'test', batch_size=batch_size*2)

Found 1531 images belonging to 1 classes.


In [41]:
from utils import load_array, save_array

In [43]:
paths = test_batches.filenames

In [44]:
save_array(DATA_DIR+'results/paths.dat', paths)
save_array(DATA_DIR+'results/predictions.dat', predictions)

In [58]:
with open(DATA_DIR+'submission.csv', 'w') as submission:
    submission.write('name,invasive\n')
    
    for i, path in enumerate(paths):
        filename = os.path.basename(path)
        id = filename[:filename.find('.')]
        submission.write('%s,%f\n' % (id, predictions[i][1]))