<a href="https://colab.research.google.com/github/sproboticworks/ml-course/blob/master/Cats%20and%20Dogs%20Classification%20with%20Augmentation%20and%20Dropout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Packages

In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

# Download Data

In [0]:
url = 'https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip'
zip_dir = tf.keras.utils.get_file('cats_and_dogs_filtered.zip', origin=url, extract=True)

The dataset we have downloaded has the following directory structure.

<pre style="font-size: 10.0pt; font-family: Arial; line-height: 2; letter-spacing: 1.0pt;" >
<b>cats_and_dogs_filtered</b>
|__ <b>train</b>
    |______ <b>cats</b>: [cat.0.jpg, cat.1.jpg, cat.2.jpg ...]
    |______ <b>dogs</b>: [dog.0.jpg, dog.1.jpg, dog.2.jpg ...]
|__ <b>validation</b>
    |______ <b>cats</b>: [cat.2000.jpg, cat.2001.jpg, cat.2002.jpg ...]
    |______ <b>dogs</b>: [dog.2000.jpg, dog.2001.jpg, dog.2002.jpg ...]
</pre>


## List the directories with the following terminal command:

In [0]:
import os
zip_dir_base = os.path.dirname(zip_dir)
!find $zip_dir_base -type d -print

## Assign Directory Variables

In [0]:
base_dir = os.path.join(os.path.dirname(zip_dir), 'cats_and_dogs_filtered')

train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

# Directory with our training cat/dog pictures
train_cats_dir = os.path.join(train_dir, 'cats')
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with our validation cat/dog pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')

## Print Filenames

In [0]:
train_cat_fnames = os.listdir( train_cats_dir )
train_dog_fnames = os.listdir( train_dogs_dir )

print(train_cat_fnames[:10])
print(train_dog_fnames[:10])

## Print number of Training and Validation images

In [0]:
num_cats_tr = len(os.listdir(train_cats_dir))
num_dogs_tr = len(os.listdir(train_dogs_dir))

num_cats_val = len(os.listdir(validation_cats_dir))
num_dogs_val = len(os.listdir(validation_dogs_dir))

total_train = num_cats_tr + num_dogs_tr
total_val = num_cats_val + num_dogs_val

print('total training cat images :', len(os.listdir(      train_cats_dir ) ))
print('total training dog images :', len(os.listdir(      train_dogs_dir ) ))

print('total validation cat images :', len(os.listdir( validation_cats_dir ) ))
print('total validation dog images :', len(os.listdir( validation_dogs_dir ) ))

# Data Preparation

In [0]:
BATCH_SIZE = 20
IMG_SHAPE = 150
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# All images will be rescaled by 1./255.
train_datagen = ImageDataGenerator( rescale = 1.0/255. )
validation_datagen  = ImageDataGenerator( rescale = 1.0/255. )

# --------------------
# Flow training images in batches of 20 using train_datagen generator
# --------------------
train_generator = train_datagen.flow_from_directory(train_dir,
                                                    batch_size=BATCH_SIZE,
                                                    class_mode='binary',
                                                    target_size=(IMG_SHAPE, IMG_SHAPE))
     
# --------------------
# Flow validation images in batches of 20 using test_datagen generator
# --------------------
validation_generator =  validation_datagen.flow_from_directory(validation_dir,
                                                         batch_size=BATCH_SIZE,
                                                         class_mode  = 'binary',
                                                         target_size = (IMG_SHAPE, IMG_SHAPE))


## Visualizing Training images

In [0]:
def plotImages(images_arr):
    fig, axes = plt.subplots(1, 5, figsize=(20,20))
    axes = axes.flatten()
    for img, ax in zip(images_arr, axes):
        ax.imshow(img)
    plt.tight_layout()
    plt.show()

In [0]:
sample_training_images, _ = next(train_generator)

In [0]:
plotImages(sample_training_images[:5])  # Plot images 0-4

# Image Augmentation

## Flipping the image horizontally

In [0]:
train_datagen = ImageDataGenerator(rescale=1./255, horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(batch_size=BATCH_SIZE,
                                               directory=train_dir,
                                               shuffle=True,
                                               target_size=(IMG_SHAPE,IMG_SHAPE))

In [0]:
augmented_images = [train_generator[0][0][0] for i in range(5)]
plotImages(augmented_images)

## Rotating the image

In [0]:
train_datagen = ImageDataGenerator(rescale=1./255, rotation_range=45)

train_generator = train_datagen.flow_from_directory(batch_size=BATCH_SIZE,
                                               directory=train_dir,
                                               shuffle=True,
                                               target_size=(IMG_SHAPE, IMG_SHAPE))

In [0]:
augmented_images = [train_generator[0][0][0] for i in range(5)]
plotImages(augmented_images)

## Applying Zoom

In [0]:
train_datagen = ImageDataGenerator(rescale=1./255, zoom_range=0.5)

train_generator = train_datagen.flow_from_directory(batch_size=BATCH_SIZE,
                                               directory=train_dir,
                                               shuffle=True,
                                               target_size=(IMG_SHAPE, IMG_SHAPE))

In [0]:
augmented_images = [train_generator[0][0][0] for i in range(5)]
plotImages(augmented_images)

## Putting it all together

In [0]:
train_datagen = ImageDataGenerator(
      rescale=1./255,
      rotation_range=40,
      width_shift_range=0.2,
      height_shift_range=0.2,
      shear_range=0.2,
      zoom_range=0.2,
      horizontal_flip=True,
      fill_mode='nearest')

train_generator = train_datagen.flow_from_directory(batch_size=BATCH_SIZE,
                                                     directory=train_dir,
                                                     shuffle=True,
                                                     target_size=(IMG_SHAPE,IMG_SHAPE),
                                                     class_mode='binary')

In [0]:
augmented_images = [train_generator[0][0][0] for i in range(5)]
plotImages(augmented_images)

# Build Model

In [0]:
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size of the image 150x150 with 3 bytes color
    tf.keras.layers.Conv2D(16, (3,3), padding = 'same', activation='relu', input_shape=(150, 150, 3)),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(32, (3,3), padding = 'same', activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2), 
    tf.keras.layers.Conv2D(64, (3,3), padding = 'same', activation='relu'), 
    tf.keras.layers.MaxPooling2D(2,2),
    # Dropout
    tf.keras.layers.Dropout(0.5),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(), 
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'), 
    # Our last layer (our classifier) consists of a Dense layer with 2 output units and a softmax activation function
    # tf.keras.layers.Dense(2, activation='softmax') 
    # Another popular approach when working with binary classification problems, is to use a classifier that consists of a Dense layer with 1 output unit and a sigmoid activation function
    # It will contain a value from 0-1 where 0 for 1 class ('cats') and 1 for the other ('dogs')
    tf.keras.layers.Dense(1, activation='sigmoid')  
])

In [0]:
model.summary()

In [0]:
from tensorflow.keras.optimizers import RMSprop

model.compile(optimizer=RMSprop(lr=0.001),
              loss='binary_crossentropy',
              metrics = ['accuracy'])

# Training Model

In [0]:
EPOCHS = 100
history = model.fit(train_generator,
                              validation_data=validation_generator,
                              steps_per_epoch=int(np.ceil(total_train / float(BATCH_SIZE))),
                              epochs=EPOCHS,
                              validation_steps=int(np.ceil(total_val / float(BATCH_SIZE))),
                              verbose=2)

# Visualizing results of the training

In [0]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(EPOCHS)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
#plt.savefig('./foo.png')
plt.show()

# Prediction using the Model

Let's now take a look at actually running a prediction using the model. 

In [0]:
test_images, test_labels = next(validation_generator)
classes = model.predict(test_images, 10)
classes = classes.flatten()
print(classes)
print(test_labels)

In [0]:
fig, axes = plt.subplots(4, 5, figsize=(20,20))
axes = axes.flatten()
i = 0
for img, ax in zip(test_images, axes):
    ax.imshow(img)
    ax.axis('off')
    color = 'blue'
    if round(classes[i]) != test_labels[i] :
        color = 'red'
    if classes[i]>0.5:
        ax.set_title("Dog",fontdict = {'size' : 20, 'color' : color});
    else :
        ax.set_title("Cat",fontdict = {'size' : 20, 'color' : color});
    i+=1
plt.tight_layout()
plt.show()