# Dogs vs. Cats Redux: Kernels Edition
### Distinguish images of dogs from cats


## Setting things up

The data needs to be organised in the following form:

- data
    - valid
        - cats
        - dogs
    - train
        - cats
        - dogs
    - test
        - unknown
    

In [1]:
# Setting things up
%matplotlib inline

In [2]:
# Required libraries
from __future__ import division,print_function

import os, json
from glob import glob
import numpy as np
np.set_printoptions(precision=4, linewidth=100)
from matplotlib import pyplot as plt

In [4]:
# importing utils.py and vgg.py
import utils; reload(utils)
from utils import plots

import vgg16; reload(vgg16)
from vgg16 import Vgg16

In [5]:
# Setting path
path = '../data/kg-dogs-vs-cats/'
#path = '../data/kg-dogs-vs-cats/' # for working with sample data

## Training the Model

In [None]:
vgg = Vgg16()
# Grab a few images at a time for training and validation.
# NB: They must be in subdirectories named based on their category
batches = vgg.get_batches(path+'train', batch_size=batch_size)
val_batches = vgg.get_batches(path+'valid', batch_size=batch_size*2)
vgg.finetune(batches)
vgg.fit(batches, val_batches, nb_epoch=1)

In [None]:
# Saving the model
vgg.model.save_weights(path+'results/model-full.h5')

In [None]:
# Loading the model
vgg.model.load_weights(path+'results/model-full.h5')

## Predicting for new data

In [None]:
# Using the model to predict
print(path+'test/')
batches, preds = vgg.test(path+'test', batch_size=batch_size*2)

## Generating the submissions file

In [None]:
#For every image, vgg.test() generates two probabilities 
#based on how we've ordered the cats/dogs directories.
#It looks like column one is cats and column two is dogs
print(preds[:5])

filenames = batches.filenames
print(filenames[:5])

In [None]:
#Extract imageIds from the filenames in our test/unknown directory 
ids = np.array([int(f[8:f.find('.')]) for f in filenames])
isdog = preds[:,1]

In [None]:
subm = np.stack([ids,isdog], axis=1)
subm[:5]

In [None]:
# creating the text file with submissions
submission_file_name = 'submission1.csv'
np.savetxt(submission_file_name, subm, fmt='%d,%.5f', header='id,label', comments='')

In [None]:
# generating link for submission file
from IPython.display import FileLink
FileLink(submission_file_name)