###Import the required libraries

In [0]:

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import zipfile
import random
import shutil
from shutil import copyfile


###Retrieve the required dataset

In [0]:

# Download the dataset
!wget --no-check-certificate \
    "https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip" \
    -O "/tmp/cats-and-dogs.zip"

# Unzip the files and store them
local_zip = '/tmp/cats-and-dogs.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()


###Define the dataset directories for the ImageDataGenerator

In [0]:

try:
    os.mkdir("/tmp/cats-v-dogs")
    os.makedirs("/tmp/cats-v-dogs/training/cats")
    os.makedirs("/tmp/cats-v-dogs/training/dogs")
    os.makedirs("/tmp/cats-v-dogs/testing/cats")
    os.makedirs("/tmp/cats-v-dogs/testing/dogs")
except OSError:
    pass


###Split the data into training and validation sets

In [0]:

# Function to randomly split the data into training and test sets according to the specified split size
def split_data(SOURCE, TRAINING, TESTING, SPLIT_SIZE):
    i = 0
    for f in os.listdir(SOURCE):
      if os.path.getsize(os.path.join(SOURCE, os.path.basename(f))) == 0:
        os.remove(os.path.join(SOURCE, os.path.basename(f)))
    L = len(os.listdir(SOURCE))
    for f in random.sample(os.listdir(SOURCE), L):
      if i < int(SPLIT_SIZE * L):
        copyfile(os.path.join(SOURCE, os.path.basename(f)), os.path.join(TRAINING, os.path.basename(f)))
      else:
        copyfile(os.path.join(SOURCE, os.path.basename(f)), os.path.join(TESTING, os.path.basename(f)))
      i = i + 1

# Define the paths for the training and test sets
CAT_SOURCE_DIR = "/tmp/PetImages/Cat/"
TRAINING_CATS_DIR = "/tmp/cats-v-dogs/training/cats/"
TESTING_CATS_DIR = "/tmp/cats-v-dogs/testing/cats/"
DOG_SOURCE_DIR = "/tmp/PetImages/Dog/"
TRAINING_DOGS_DIR = "/tmp/cats-v-dogs/training/dogs/"
TESTING_DOGS_DIR = "/tmp/cats-v-dogs/testing/dogs/"

# Split the data
split_size = .9
split_data(CAT_SOURCE_DIR, TRAINING_CATS_DIR, TESTING_CATS_DIR, split_size)
split_data(DOG_SOURCE_DIR, TRAINING_DOGS_DIR, TESTING_DOGS_DIR, split_size)

# Print the sizes of the training and test sets
print(len(os.listdir('/tmp/cats-v-dogs/training/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/training/dogs/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/cats/')))
print(len(os.listdir('/tmp/cats-v-dogs/testing/dogs/')))

# Expected output:
# 11250
# 11250
# 1250
# 1250


###Pre-process the data and perform image augmentation

In [0]:

# Pre-process and augment the training set
TRAINING_DIR = "/tmp/cats-v-dogs/training"
train_datagen = ImageDataGenerator(rescale = 1.0/255,
                                   rotation_range = 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   fill_mode = 'nearest')
train_generator = train_datagen.flow_from_directory(TRAINING_DIR,
                                                    batch_size = 50,
                                                    class_mode = 'binary',
                                                    target_size=(150, 150))     

# Pre-process the validation set
VALIDATION_DIR = "/tmp/cats-v-dogs/testing"
validation_datagen = ImageDataGenerator(rescale = 1./255.)
validation_generator = validation_datagen.flow_from_directory(VALIDATION_DIR,
                                                              batch_size = 50,
                                                              class_mode = 'binary',
                                                              target_size=(150, 150))


###Define the model

In [0]:

# Define the layers for the model
cd_model = tf.keras.models.Sequential ([
    tf.keras.layers.Conv2D(32, (3,3), activation = 'relu', input_shape = (150, 150, 3), name = "Conv1"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2,2), name = "Pool1"),
    tf.keras.layers.Conv2D(64, (3,3), activation = 'relu', name = "Conv2"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2,2), name = "Pool2"),
    tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', name = "Conv3"), 
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2,2), name = "Pool3"),
    tf.keras.layers.Conv2D(128, (3,3), activation = 'relu', name = "Conv4"), 
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.MaxPooling2D((2,2), name = "Pool4"),
    tf.keras.layers.Flatten(), 
    tf.keras.layers.Dense(512, activation = 'relu', name = "Dense5"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(1, activation = 'sigmoid', name = "Output6")
])

# Define the optimizer and loss function for the model
cd_model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
                 loss = 'binary_crossentropy',
                 metrics = ['accuracy'])


###Train the model

In [0]:

# Fit the model and store the intermediate outputs
history = cd_model.fit(train_generator,
                            epochs = 30,
                            steps_per_epoch = 450,
                            validation_data = validation_generator,
                            validation_steps =50,
                            verbose = 1)


###Plot the loss and accuracy of the model w.r.t the number of epochs

In [0]:

# Retrieve a list of results on training and test datasets for each training epoch
acc      = history.history['accuracy']
val_acc  = history.history['val_accuracy']
loss     = history.history['loss']
val_loss = history.history['val_loss']

# Get the total number of epochs
epochs   = range(len(acc))

# Plot training and validation accuracy per epoch
plt.plot  (epochs, acc)
plt.plot  (epochs, val_acc)
plt.title ('Training and validation accuracy', color = "white")
plt.figure()

# Plot training and validation loss per epoch
plt.plot  (epochs, loss)
plt.plot  (epochs, val_loss)
plt.title ('Training and validation loss', color = "white")


###Get a summary of the model

In [0]:

cd_model.summary()
