In [1]:
from __future__ import division, print_function
# Rather than importing everything manually, we'll make things easy
#   and load them all in utils.py, and just import them from there.
%matplotlib inline
from importlib import reload  # Python 3
import utils_tri; reload(utils_tri)
from utils_tri import *

Using Theano backend.
Using cuDNN version 5110 on context None
Mapped name None to device cuda0: GeForce GTX 1060 6GB (0000:01:00.0)


In [2]:
import os, json
from glob import glob
import numpy as np
import scipy
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import confusion_matrix
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt
from utils_tri import plots, get_batches, plot_confusion_matrix, get_data

In [3]:
from numpy.random import random, permutation
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import keras
from keras import backend as K
from keras.utils.data_utils import get_file
from keras.models import Sequential
from keras.layers import Input
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD, RMSprop
from keras.preprocessing import image

In [3]:
import vgg16_tri; reload(vgg16_tri)
from vgg16_tri import Vgg16

In [4]:
path = 'data/dogscats/'

#### Tasks

- Load sample dogs and cats data.
- Finetune VGG and train the last layer
- Run on the full dataset (later)
- Practice plotting images
- Plot confusion matrix


In [5]:
# load training data
# we can either get data or get batches
train_batches = get_batches(path+'train')
valid_batches = get_batches(path+'valid')

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.


In [6]:
# load VGG 16 model
vgg = Vgg16()

In [7]:
train_batches.class_indices

{'cats': 0, 'dogs': 1}

In [8]:
vgg.finetune(train_batches)

In [10]:
vgg.fit(train_batches, valid_batches, batch_size=32, nb_epoch=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


### Keras features

- Save model to file
- Create checkpoints for model and only save the best model

#### Next tasks
- Plot confusion matrix
- Improve my first score by retraining all fully connected layers.

In [12]:
# test set
test_batches, preds = vgg.test(path+'test', batch_size=32)

Found 12500 images belonging to 1 classes.


In [13]:
print(preds)

[[  6.09377508e-08   9.99999881e-01]
 [  9.46694983e-22   1.00000000e+00]
 [  1.00000000e+00   2.07615171e-27]
 ..., 
 [  1.00000000e+00   8.19032228e-33]
 [  1.00000000e+00   5.78458646e-35]
 [  1.00000000e+00   5.09689405e-23]]


In [14]:
def file2id(filenames):
    ids = []
    for each in filenames:
        name = each[8:]
        idx = name.split('.')[0]
        ids.append(int(idx))
    return np.array(ids, dtype=np.uint32)

In [16]:
# get predictions
isdog = preds[:,1]
isdog = np.clip(isdog, 0.01, 0.99)
ids = file2id(test_batches.filenames)
subm = np.stack([ids, isdog], axis=1)
np.savetxt(path+'submission.csv', subm, fmt='%d,%.3f', header='id,label', comments='')