In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# define location of zip dataset
folder_zip = '/kaggle/input/dogs-vs-cats/train.zip'
# define location of dataset (where the unzipped files will be)
folder = './train/'

#Extract the zip file
import zipfile
with zipfile.ZipFile(folder_zip,"r") as z:
    z.extractall(".")

# Plot Dog and Cat Photos

Looking at a few random photos in the directory, you can see that the photos are color and have different shapes and sizes. We can see that some photos are landscape format, some are portrait format, and some are square. We can also see a photo where the cat is barely visible (bottom left corner) and another that has two cats (lower right corner). This suggests that any classifier fit on this problem will have to be robust.

In [None]:
# plot dog photos from the dogs vs cats dataset
from matplotlib import pyplot
from matplotlib.image import imread

def plot9FirstPictures(ofDog):
    """ 
    Plot first 9 images of Cat or Dog
  
    Parameters: 
    ofDog (Boolean): if true we plot dog pictures else cat ones
    """
    # plot first n images of Cat or Dog
    animal = 'dog.' if ofDog else 'cat.'
    for i in range(9):
        # define subplot
        pyplot.subplot(330 + 1 + i)
        # define filename
        filename = folder + animal + str(i) + '.jpg'
        # load image pixels
        image = imread(filename)
        # plot raw pixel data
        pyplot.imshow(image)
    # show the figure
    pyplot.show()

In [None]:
#Plot pictures of cat
plot9FirstPictures(True)

In [None]:
plot9FirstPictures(False)

# Select Standardized Photo Size

We select a fixed size of 200×200 pixels. Smaller inputs mean a model that is faster to train.

Pre-Process Photos into Standard Directories using the Keras ImageDataGenerator class and flow_from_directory() API.

This API prefers data to be divided into separate train/ and test/ directories, and under each directory to have a subdirectory for each class, e.g. a train/dog/ and a train/cat/ subdirectories and the same for test. Images are then organized under the subdirectories.

Here is a script to create a copy of the dataset with this preferred structure. We will randomly select 25% of the images (or 6,250) to be used in a test dataset.

We create the directory structure as follows:
* dataset_dogs_vs_cats
    * test
        * dogs
        * cas
    * train
        * dogs
        * cats


In [None]:
# create directories
dataset_home = 'dataset_dogs_vs_cats/'
subdirs = ['train/', 'test/']
# create label subdirectories
labeldirs = ['dogs/', 'cats/']
for subdir in subdirs:
	for labldir in labeldirs:
		newdir = dataset_home + subdir + labldir
		os.makedirs(newdir, exist_ok=True)

In [None]:
import random
from shutil import copyfile
# seed random number generator
random.seed(1)
# define ratio of pictures to use for validation
val_ratio = 0.25
# copy training dataset images into subdirectories
src_directory = 'train/'
for file in os.listdir(folder):
	src = folder + '/' + file
	dst_dir = 'train/'
	if random.random() < val_ratio:
		dst_dir = 'test/'
	if file.startswith('cat'):
		dst = dataset_home + dst_dir + 'cats/'  + file
		copyfile(src, dst)
	elif file.startswith('dog'):
		dst = dataset_home + dst_dir + 'dogs/'  + file
		copyfile(src, dst)

# Develop a Baseline CNN Model

In [None]:
# baseline model for the dogs vs cats dataset
import sys
from matplotlib import pyplot
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.models import Model
from keras.optimizers import SGD
from keras.layers import Dropout
from keras.applications.vgg16 import VGG16
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

In [None]:
#Baseline
def define_model():
    
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
	model.add(MaxPooling2D((2, 2)))
	model.add(Flatten())
	model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(1, activation='sigmoid'))
	# compile model
	opt = SGD(lr=0.001, momentum=0.9)
	model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    
	return model

In [None]:
# plot diagnostic learning curves
def summarize_diagnostics(history):
    # plot loss
    pyplot.subplot(211)
    pyplot.title('Cross Entropy Loss')
    pyplot.plot(history.history['loss'], color='blue', label='train')
    pyplot.plot(history.history['val_loss'], color='orange', label='test')
    # plot accuracy
    pyplot.subplot(212)
    pyplot.title('Classification Accuracy')
    pyplot.plot(history.history['accuracy'], color='blue', label='train')
    pyplot.plot(history.history['val_accuracy'], color='orange', label='test')
    # save plot to file
    filename = sys.argv[0].split('/')[-1]
    pyplot.savefig(filename + '_plot.png')
    pyplot.close()
    
    
# run the test harness for evaluating a model
def run_test_harness(preloadWeights, model, number_epoch = 20, withDataAugmentation = False, imageSize = 200):
    """ 
    Create a test harness of a model
  
    Parameters: 
    preloadWeights (Boolean): True if we preload the weights of the model 
    
    model (int): 1 = one_block_vgg / 2 = two_blocks_vgg / 3 = three_block_vgg / 4 = three_block_vgg with dropout 
  
    """
    # define model
    if model == 1:
        model = define_model_one_block_vgg()
    elif model == 2:
        model = define_model_two_blocks_vgg()
    elif model == 3:
        model = define_model_three_blocks_vgg()
    elif model == 4:
        model = define_model_three_blocks_vgg_with_dropout()
    # create data generator
    if withDataAugmentation:
        train_datagen = ImageDataGenerator(rescale=1.0/255.0,
        width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
    else:
        train_datagen = ImageDataGenerator(rescale=1.0/255.0)
    
    test_datagen = ImageDataGenerator(rescale=1.0/255.0)
    
    # prepare iterators
    #Explication of batch size : https://stats.stackexchange.com/questions/153531/what-is-batch-size-in-neural-network
    #https://stats.stackexchange.com/questions/230120/neural-networks-is-an-epoch-in-sgd-the-same-as-an-epoch-in-mini-batch
    train_it = train_datagen.flow_from_directory('dataset_dogs_vs_cats/train/',
        class_mode='binary', batch_size=64, target_size=(imageSize, imageSize))
    test_it = test_datagen.flow_from_directory('dataset_dogs_vs_cats/test/',
        class_mode='binary', batch_size=64, target_size=(imageSize, imageSize))
    
    # fit model
    
    checkpoint_path = "train_ckpt/cp.ckpt"
    # Create a callback that saves the model's weights
    cp_callback = ModelCheckpoint(filepath=checkpoint_path,save_best_only=True, save_weights_only=True, verbose=1)
    
    #Early stopping
    early_stop = EarlyStopping(monitor = 'val_loss', min_delta = 0.001, 
                           patience = 5, mode = 'min', verbose = 1,
                           restore_best_weights = True)

    if (preloadWeights):
        model.load_weights(checkpoint_path)
    
    history = model.fit_generator(train_it, steps_per_epoch=len(train_it),
        validation_data=test_it, validation_steps=len(test_it), epochs=number_epoch, callbacks=[cp_callback,early_stop])
    
    # evaluate model
    _, acc = model.evaluate_generator(test_it, steps=len(test_it))
    print('> %.3f' % (acc * 100.0))
    # learning curves
    summarize_diagnostics(history)

In [None]:
run_test_harness(True,1)

# One Block VGG Model :
The one-block VGG model has a single convolutional layer with 32 filters followed by a max pooling layer.
The model achieved an accuracy of about 72% on the test dataset.

In [None]:
def define_model_one_block_vgg():
    
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
	model.add(MaxPooling2D((2, 2)))
	model.add(Flatten())
	model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(1, activation='sigmoid'))
	# compile model
	opt = SGD(lr=0.001, momentum=0.9)
	model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    
	return model

# Two Block VGG Model :
The two-block VGG model extends the one block model and adds a second block with 64 filters.
We can see that the model achieved a small improvement in performance from about 72% with one block to about 76% accuracy with two blocks

In [None]:
def define_model_two_blocks_vgg():
    
    model = Sequential()
    
    #First block
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
    model.add(MaxPooling2D((2, 2)))
    #Second Block
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(1, activation='sigmoid'))
    # compile model
    opt = SGD(lr=0.001, momentum=0.9)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
run_test_harness(False,2)

# Three Blocks VGG Model :
The three-block VGG model extends the two block model and adds a third block with 128 filters.
We can see that the model achieved a small improvement in performance from about 72% with one block to about 76% accuracy with two blocks

In [None]:
def define_model_three_blocks_vgg():
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Flatten())
	model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(1, activation='sigmoid'))
	# compile model
	opt = SGD(lr=0.001, momentum=0.9)
	model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
	return model

In [None]:
run_test_harness(False,3)

Discussion: 
VGG 1: 72.331%
VGG 2: 76.646%
VGG 3: 80.184%

We observe a trend of improved performance with the increase in capacity, but also a similar case of overfitting occurring earlier and earlier in the run. -> The results suggest that the model will likely benefit from regularization techniques. Ex: dropout, weight decay, and data augmentation (can boost performance by encouraging the model to learn features that are further invariant to position by expanding the training dataset)?



# Model Improvements

Baseline : VGG 3
Learning curves of the model showed strong signs of overfitting.
We can explore two approaches to attempt to address this overfitting: dropout regularization and data augmentation.
We will increase the number of training epochs from 20 to 50< to give the model more space for refinement.

**Dropout :**
Reviewing the learning curves, we can see that dropout has had an effect on the rate of improvement of the model on both the train and test sets.

Overfitting has been reduced and delayed.

In [None]:
def define_model_three_blocks_vgg_with_dropout():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    # compile model
    opt = SGD(lr=0.001, momentum=0.9)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
run_test_harness(False,4,50)

**Data Augmentation:**

* act as a regularization technique, adding noise to the training data, and encouraging the model to learn the same features, invariant to their position in the input.
* an create variations of the images that can improve the ability of the fit models to generalize what they have learned to new images.

Here we do small shifts and horizontal flips. Photos in the training dataset will be augmented with small (10%) random horizontal and vertical shifts and random horizontal flips that create a mirror image of a photo.

In [None]:
run_test_harness(False,4,100,True)

Ideas of future improvements: Others regularization techniques such as weight decay and early stopping. / Change of the learning rate / Adaptive learning rate such as ADAM 

# Transfer Learning

Pre-trained model is comprised of two main parts, the feature extractor part of the model that is made up of VGG blocks, and the classifier part of the model that is made up of fully connected layers and the output layer.
We will use the feature extraction part of the model and add a new classifier part of the model that is tailored to the dogs and cats dataset.

The model also expects images to be centered. That is, to have the mean pixel values from each channel (red, green, and blue) as calculated on the ImageNet training dataset subtracted from the input. featurewise_center” argument to “True” +  manually specifying the mean pixel values to use when centering as the mean values from the ImageNet training dataset: [123.68, 116.779, 103.939].

WE see that the model achieved very impressive results with a classification accuracy of about 97% on the holdout test dataset.

Reviewing the learning curves, we can see that the model fits the dataset quickly. It does not show strong overfitting, although the results suggest that perhaps additional capacity in the classifier and/or the use of regularization might be helpful.

There are many improvements that could be made to this approach, including adding dropout regularization to the classifier part of the model and perhaps even fine-tuning the weights of some or all of the layers in the feature detector part of the model.



In [None]:
def define_model_pretrained_vgg16(imageSize):
    # load model
    model = VGG16(include_top=False, input_shape=(imageSize, imageSize, 3))
    # mark loaded layers as not trainable
    for layer in model.layers:
        layer.trainable = False
    # add new classifier layers
    flat1 = Flatten()(model.layers[-1].output)
    class1 = Dense(128, activation='relu', kernel_initializer='he_uniform')(flat1)
    output = Dense(1, activation='sigmoid')(class1)
    # define new model
    model = Model(inputs=model.inputs, outputs=output)
    # compile model
    opt = SGD(lr=0.001, momentum=0.9)
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model

# run the test harness for evaluating a model
def run_test_harness_pretrained():
    #Target Size, here we use 224 because the pre-trained model vas trained on 224x224 images
    imageSize = 224
    # define model
    model = define_model_pretrained_vgg16(imageSize)
    # create data generator
    datagen = ImageDataGenerator(featurewise_center=True)
    # specify imagenet mean values for centering
    datagen.mean = [123.68, 116.779, 103.939]
    # prepare iterator
    train_it = datagen.flow_from_directory('dataset_dogs_vs_cats/train/',
        class_mode='binary', batch_size=64, target_size=(imageSize, imageSize))
    test_it = datagen.flow_from_directory('dataset_dogs_vs_cats/test/',
        class_mode='binary', batch_size=64, target_size=(imageSize, imageSize))
    # fit model
    history = model.fit_generator(train_it, steps_per_epoch=len(train_it),
        validation_data=test_it, validation_steps=len(test_it), epochs=10, verbose=1)
    # evaluate model
    _, acc = model.evaluate_generator(test_it, steps=len(test_it), verbose=0)
    print('> %.3f' % (acc * 100.0))
    # learning curves
    summarize_diagnostics(history)

In [None]:
run_test_harness_pretrained()

# Finalize the Model and Make Predictions

We finalize our model by fitting a model on the entire training dataset and saving the model to file for later use. We then load the saved model and use it to make a prediction on a single image.

Our final model is fit on all available data(train and test datasets).

Let's create a new structure without train and test folder:

* finalize_dogs_vs_cats
    * cats
    * dogs


In [None]:
# organize dataset into a useful structure
from os import makedirs
from os import listdir
from shutil import copyfile
# create directories
dataset_home = 'finalize_dogs_vs_cats/'
# create label subdirectories
labeldirs = ['dogs/', 'cats/']
for labldir in labeldirs:
	newdir = dataset_home + labldir
	makedirs(newdir, exist_ok=True)
# copy training dataset images into subdirectories
src_directory = 'train/'
for file in listdir(src_directory):
	src = src_directory + '/' + file
	if file.startswith('cat'):
		dst = dataset_home + 'cats/'  + file
		copyfile(src, dst)
	elif file.startswith('dog'):
		dst = dataset_home + 'dogs/'  + file
		copyfile(src, dst)

**Save Final Model**

In [None]:
# run the test harness for evaluating a model
def run_test_harness_final():
    
    #Target Size, here we use 224 because the pre-trained model vas trained on 224x224 images
    imageSize = 224
    # define model
    model = define_model_pretrained_vgg16(imageSize)
    # create data generator
    datagen = ImageDataGenerator(featurewise_center=True)
    # specify imagenet mean values for centering
    datagen.mean = [123.68, 116.779, 103.939]
    # prepare iterator
    train_it = datagen.flow_from_directory('finalize_dogs_vs_cats/',
        class_mode='binary', batch_size=64, target_size=(imageSize, imageSize))
    # fit model
    model.fit_generator(train_it, steps_per_epoch=len(train_it), epochs=10, verbose=1)
    # save model
    model.save('final_model.h5')

In [None]:
run_test_harness_final()

# Make prediction

In [None]:
# make a prediction for a new image.
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.models import load_model
 
# load and prepare the image
def load_image(filename):
    # load the image
    img = load_img(filename, target_size=(224, 224))
    # convert to array
    img = img_to_array(img)
    # reshape into a single sample with 3 channels
    img = img.reshape(1, 224, 224, 3)
    # center pixel data
    img = img.astype('float32')
    img = img - [123.68, 116.779, 103.939]
    return img
 
# load an image and predict the class
def run_example():
    # load the image
    img = load_image('../input/single-dog/sample_image.jpg')
    # load model
    model = load_model('final_model.h5')
    # predict the class
    result = model.predict(img)
    print(result[0])
 
# entry point, run the example
run_example()