# Dogs vs Cats from Scratch

[Dogs vs. Cats Redux: Kernels Edition](https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition)

TODO:
1. [Create validation and sample sets](#Create-validation-and-sample-sets)
2. [Rearrange image files into new directories](#Rearrange-image-files-into-new-directories)
3. [Fine-tuning](#Fine-tuning)
4. [Training](#Training)

In [19]:
%matplotlib inline
import os, sys

In [20]:
current_dir = os.getcwd()
data_dir    = current_dir + '/data/redux'

## Create validation and sample sets

In [None]:
%cd $data_dir

In [None]:
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown

In [2]:
import numpy as np
from glob import glob
from shutil import copyfile

In [None]:
%rm valid -R
%rm sample -R
%rm test -R

In [None]:
%cd $data_dir/train

In [None]:
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(2000): os.rename(shuf[i], data_dir + '/valid/' + shuf[i])

In [None]:
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200): copyfile(shuf[i], data_dir + '/sample/train/' + shuf[i])

In [None]:
%cd $data_dir/valid

In [None]:
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(50): copyfile(shuf[i], data_dir + '/sample/valid/' + shuf[i])

In [None]:
%cd $data_dir/sample/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

## Rearrange image files into new directories

In [None]:
%cd $data_dir/sample/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

In [None]:
%cd $data_dir/valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

In [None]:
%cd $data_dir/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

In [None]:
%cd $data_dir/test
%mv *.jpg unknown/

## Fine-tuning

In [32]:
from keras.applications.vgg16 import VGG16
from keras.layers import Input, Flatten, Dense
from keras.models import Model
from keras.optimizers import Adam

In [4]:
vgg = VGG16(weights='imagenet', include_top=True)

In [5]:
vgg.layers.pop()

<keras.layers.core.Dense at 0x7f1d600e9990>

In [6]:
for layer in vgg.layers: layer.trainable=False

In [7]:
x = Dense(2, activation='softmax', name='predictions')(vgg.layers[-1].output)

In [8]:
model = Model(input=vgg.input, output=x)

In [9]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 3, 224, 224)   0                                            
____________________________________________________________________________________________________
block1_conv1 (Convolution2D)     (None, 64, 224, 224)  0           input_1[0][0]                    
____________________________________________________________________________________________________
block1_conv2 (Convolution2D)     (None, 64, 224, 224)  0           block1_conv1[0][0]               
____________________________________________________________________________________________________
block1_pool (MaxPooling2D)       (None, 64, 112, 112)  0           block1_conv2[0][0]               
___________________________________________________________________________________________

In [30]:
lr = 0.01

In [33]:
model.compile(
    optimizer=Adam(lr=lr), 
    loss='categorical_crossentropy', 
    metrics=['accuracy'])

## Training

In [None]:
%cd $data_dir

In [None]:
#path         = data_dir + '/'
path         = data_dir + '/' + '/sample/'
test_path    = data_dir + '/test/'
results_path = data_dir + '/results/'
train_path   = path + '/train/'
valid_path   = path + '/valid/'

In [41]:
from keras.preprocessing import image

In [42]:
batch_size=64

In [44]:
def get_batches(path, gen=image.ImageDataGenerator(), shuffle=True, batch_size=8, class_mode='categorical'):
    return gen.flow_from_directory(
        path, 
        target_size=(224,224), 
        class_mode=class_mode, 
        shuffle=shuffle, 
        batch_size=batch_size)

In [45]:
batches = get_batches(train_path, batch_size=batch_size)

Found 200 images belonging to 2 classes.


In [46]:
val_batches = get_batches(valid_path, batch_size=batch_size)

Found 50 images belonging to 2 classes.


In [51]:
nb_epoch=1

In [53]:
model.fit_generator(
    batches, 
    samples_per_epoch=batches.nb_sample, 
    nb_epoch=nb_epoch,
    validation_data=val_batches, 
    nb_val_samples=val_batches.nb_sample)

Epoch 1/1


<keras.callbacks.History at 0x7f1d4c191e50>

In [54]:
model.save_weights(results_path + 'ft3.h5')