Configure matplotlib to use inline mode. The reason for using that is so that when I write code to plot, and then execute it with a simple SHIFT-ENTER, the image will appear directly under that code cell.

In [7]:
%matplotlib inline

In [8]:
# path = "data/dogscats/"
path = "data/dogscats/sample/"

Use division and print functions in the python 3 style (so 1/2 == 0.5, not 0), and things like `print x, sep=', ', end=''` work

In [9]:
from __future__ import division,print_function

Import some other useful things:

In [10]:
import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

current_dir = os.getcwd()
SAVE_DIR = current_dir + '/data'

In [11]:
# As large as you can, but no larger than 64 is recommended. 
# If you have an older or cheaper GPU, you'll run out of memory, so will have to decrease this.
batch_size=64

In [12]:
# Import our class, and instantiate
import vgg16; reload(vgg16)
from vgg16 import Vgg16

In [13]:
vgg = Vgg16()

In [14]:
batches = vgg.get_batches(path+'train', batch_size=batch_size)

Found 16 images belonging to 2 classes.


In [15]:
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size*2)

Found 8 images belonging to 2 classes.


In [16]:
vgg.finetune(batches)

In [17]:
vgg.fit(batches, val_batches, nb_epoch=1)

Epoch 1/1


* Download data
* Sort into folders
* Train and finetune model
* Get results and store in proper format
* Use kaggle cli to upload to kaggle

Get results and store in proper format
------------------------------------

In [18]:
%config IPCompleter.greedy=True 

In [19]:
batches, preds = vgg.test(path+'test1')

Found 11 images belonging to 1 classes.


In [20]:
preds[:5]

array([[ 0.2954,  0.7046],
       [ 0.3235,  0.6765],
       [ 0.7127,  0.2873],
       [ 0.2882,  0.7118],
       [ 0.9567,  0.0433]], dtype=float32)

In [21]:
batches.filenames[:5]

['unknown/999.jpg',
 'unknown/9990.jpg',
 'unknown/9991.jpg',
 'unknown/9992.jpg',
 'unknown/9993.jpg']

In [46]:
results = zip(batches.filenames, preds)
results

[('unknown/999.jpg', array([ 0.2954,  0.7046], dtype=float32)),
 ('unknown/9990.jpg', array([ 0.3235,  0.6765], dtype=float32)),
 ('unknown/9991.jpg', array([ 0.7127,  0.2873], dtype=float32)),
 ('unknown/9992.jpg', array([ 0.2882,  0.7118], dtype=float32)),
 ('unknown/9993.jpg', array([ 0.9567,  0.0433], dtype=float32)),
 ('unknown/9994.jpg', array([ 0.8432,  0.1568], dtype=float32)),
 ('unknown/9995.jpg', array([ 0.9708,  0.0292], dtype=float32)),
 ('unknown/9996.jpg', array([ 0.3741,  0.6259], dtype=float32)),
 ('unknown/9997.jpg', array([ 0.4288,  0.5712], dtype=float32)),
 ('unknown/9998.jpg', array([ 0.9526,  0.0474], dtype=float32)),
 ('unknown/9999.jpg', array([ 0.7032,  0.2968], dtype=float32))]

In [63]:
import re

def format(x):
    id = re.sub('unknown/([0-9]+)\.jpg', r'\1', x[0])
    isDog = x[1][1] > 0.5
    return [np.int(id), 1 if isDog else 0]
    
formattedResults = np.array(map(format, results), )
formattedResults[:5]

array([[ 999,    1],
       [9990,    1],
       [9991,    0],
       [9992,    1],
       [9993,    0]])

In [64]:
np.savetxt(SAVE_DIR + '/jp_catsdogs.csv', formattedResults, fmt='%d,%d', delimiter=',', header='id,label', comments='')