# DOGS VS CATS CODE

## Initial Imports and Setup

In [1]:
# Try '% + Tab' and it gives you a list of commands that can be run in the notebook just as if it were in the terminal.
# Here we are checking the present working directory
%pwd

u'/home/namitha/kaggle'

In [2]:
# This command helps us to make and see plots within the notebook [a python 2D plotting library] 
%matplotlib

#import modules that we will be using in the code
from utils import *
from vgg16 import Vgg16

# Enter the path to your data in the working directory
# path = "data/redux/" # Use this path if you are using the entire dataset
path = "data/dogscats/sample/" # Use this path if you are using sample dataset

Using matplotlib backend: Qt5Agg


Using Theano backend.


In [3]:
# Other imports. Add the imports as and when you are using any particular library
import os,sys
from shutil import copyfile

## Setting up Data Folders
### This piece of code needs to be run only the very first time you run the code as they are one-time setups*

In [4]:
# Create directories
%cd data/dogscats
%mkdir valid
%mkdir results
%mkdir -p sample/train
%mkdir -p sample/test
%mkdir -p sample/valid
%mkdir -p sample/results
%mkdir -p test/unknown

/home/namitha/kaggle/data/dogscats


In [20]:
%cd train

/home/namitha/kaggle/data/dogscats/train


In [13]:
# Move images to valid data folders from train data folder
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(2000): os.rename(shuf[i], '../valid/' + shuf[i])

In [14]:
# Copy a few images to sample/train from train data folder
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(200): copyfile(shuf[i], '../sample/train/' + shuf[i])

In [15]:
%cd ../valid

/home/namitha/kaggle/data/dogscats/valid


In [16]:
# Copy a few images to sample/valid from valid data folder
g = glob('*.jpg')
shuf = np.random.permutation(g)
for i in range(50): copyfile(shuf[i], '../sample/valid/' + shuf[i])

In [17]:
# Rearrange all the images to separate dogs and cats directories 
# in the train, valid, sample/train, sample/valid directories
%cd ../sample/train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd ../valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd ../../valid
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

%cd ../train
%mkdir cats
%mkdir dogs
%mv cat.*.jpg cats/
%mv dog.*.jpg dogs/

/home/namitha/kaggle/data/dogscats/sample/train
/home/namitha/kaggle/data/dogscats/sample/valid
/home/namitha/kaggle/data/dogscats/valid
/home/namitha/kaggle/data/dogscats/train


In [18]:
# Create a single 'unknown' class for test set as the code looks for sub-directories
%cd ../test
%mv *.jpg unknown/

/home/namitha/kaggle/data/dogscats/test


In [21]:
%cd ../../..

/home/namitha/kaggle


## Using a Trained Model and Finetuning it (VGG16)

In [22]:
#import Vgg16 helper class
vgg = Vgg16()
model = vgg.model

  .format(self.name, input_shape))


In [23]:
# Set batch size
batch_size = 4

In [25]:
# Create batches of training and validation
batches = get_batches(path+'train', batch_size=batch_size)
val_batches = get_batches(path+'valid', batch_size=batch_size*2)


# The above code is using a function from vgg16.py. But we can do the same thing using the below code
gen=image.ImageDataGenerator()
batches = gen.flow_from_directory(path+'train', target_size=(224,224), 
                                  class_mode='categorical', shuffle=True, batch_size=batch_size)
val_batches = gen.flow_from_directory(path+'valid', target_size=(224,224), 
                                  class_mode='categorical', shuffle=True, batch_size=batch_size)

Found 200 images belonging to 2 classes.
Found 50 images belonging to 2 classes.


In [26]:
# Finetune the original model to suit our needs
vgg.finetune(batches)

# The above code can be written as
model.pop()
for layer in model.layers: layer.trainable=False
model.add(Dense(batches.nb_class, activation='softmax'))
model.compile(optimizer=Adam(lr=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

## Train the Finetuned Model

In [27]:
# We will train the new model with the changes which we have just made to it. Trains only the modified layers
vgg.fit(batches, val_batches, nb_epoch=3)

# The above code can be written as 
model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=3,
                validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f5b3f4e2350>

In [None]:
# We can further train it using different learning rates and number of epochs
model.optimizer.lr = 0.001
model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=3,
                validation_data=val_batches, nb_val_samples=val_batches.nb_sample)

In [28]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_1 (Lambda)                (None, 3, 224, 224)   0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 226, 226)   0           lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 64, 224, 224)  1792        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 64, 226, 226)  0           convolution2d_1[0][0]            
___________________________________________________________________________________________

## Test the Model

In [None]:
# We test the model using the test data made available
test_batches, preds = vgg.test(test_path, batch_size = batch_size*2)

# The above code can be written as
test_batches = gen.flow_from_directory(path+'../test', target_size=(224,224), 
                                  class_mode='categorical', shuffle=True, batch_size=batch_size)
preds = model.predict_generator(test_batches, val_samples = test_batches.nb_sample)

In [None]:
preds[:4]

## How can we build our own Model step by step

In [None]:
# This is the VGG16 model if we want to model it from scratch
model = Sequential([
            Lambda(vgg_preprocess, input_shape=(3,224,224)),
            ZeroPadding2D((1, 1)),
            Convolution2D(64,3,3, activation='relu'),
            ZeroPadding2D((1, 1)),
            Convolution2D(64,3,3, activation='relu'),
            MaxPooling2D((3,3)),
            ZeroPadding2D((1, 1)),
            Convolution2D(128,3,3, activation='relu'),
            ZeroPadding2D((1, 1)),
            Convolution2D(128,3,3, activation='relu'),
            MaxPooling2D((3,3)),
            ZeroPadding2D((1, 1)),
            Convolution2D(256,3,3, activation='relu'),
            ZeroPadding2D((1, 1)),
            Convolution2D(256,3,3, activation='relu'),
            MaxPooling2D((3,3)),
            ZeroPadding2D((1, 1)),
            Convolution2D(512,3,3, activation='relu'),
            ZeroPadding2D((1, 1)),
            Convolution2D(512,3,3, activation='relu'),
            MaxPooling2D((3,3)),
            Flatten(),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(10, activation='softmax')
        ])

## Data Augmentation

In [None]:
# Generate a set of images from train data using variations like rotate, height shift, width shift, etc.
gen_t = image.ImageDataGenerator(rotation_range=15, height_shift_range=0.05, 
                shear_range=0.1, channel_shift_range=20, width_shift_range=0.1)

batches = get_t.flow_from_directory(path+'train', target_size=(224,224), 
                                  class_mode='categorical', shuffle=True, batch_size=batch_size)

# And train the model
model.fit_generator(batches, samples_per_epoch=batches.nb_sample, nb_epoch=3,
                validation_data=val_batches, nb_val_samples=val_batches.nb_sample)