# We will build a model that takes an image as input and determines whether the image contains a picture of a dog or a cat.

![](https://lirp.cdn-website.com/f499246c/dms3rep/multi/opt/Convolutional+Neural+Network-637w.jpg)

image credits: [Click Here](https://lirp.cdn-website.com/f499246c/dms3rep/multi/opt/Convolutional+Neural+Network-637w.jpg)

# About Dataset:
- ### The Asirra (animal species image recognition for restricting access) dataset was introduced in 2013 for a machine learning competition. The dataset includes 25,000 images with equal numbers of labels for cats and dogs.

# Cats vs Dogs Classification using CNN Tensorflow.Keras 
- A convolutional neural network (CNN) is a type of artificial neural network used in image recognition and processing that is specifically designed to process pixel data. A CNN uses a system much like a multilayer perceptron that has been designed for reduced processing requirements

- image credits: [Click Here](https://media.geeksforgeeks.org/wp-content/uploads/cat-vs-dog.jpp)

![](https://media.geeksforgeeks.org/wp-content/uploads/cat-vs-dog.jpg)

In [None]:
# Import the libraries
import os
import torch
import numpy as np
import pandas as pd
from tensorflow.keras import Sequential
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator,load_img

In [None]:
# checking if gpu is available 
import tensorflow as tf
tf.test.is_gpu_available()

In [None]:
def get_default_device():
    """Picking GPU if available or else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')

In [None]:
device = get_default_device()

In [None]:
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

- Prepare dataset for training model:

In [None]:
data_set = "dogs-vs-cats"

import zipfile 
with zipfile.ZipFile("/kaggle/input/"+ data_set +"/train.zip","r") as z:
    z.extractall(".")
    # save all files to kaggle/files/images
    destination = '/kaggle/files/images/train'
    z.extractall(destination)
    
with zipfile.ZipFile("/kaggle/input/"+ data_set +"/test1.zip","r") as z:
    z.extractall(".")
    # save all files to kaggle/files/images
    destination = '/kaggle/files/images/test'
    z.extractall(destination)

In [None]:
def list_full_paths(directory):
    return [os.path.join(directory, file) for file in os.listdir(directory)]

train = pd.DataFrame({'filepath': list_full_paths('/kaggle/files/images/train/train')})
train['truth_label'] = np.where(train['filepath'].str.contains('dog'), 'dog', 'cat')

test = pd.DataFrame({'filepath': list_full_paths('/kaggle/files/images/test/test1')})

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test = train_test_split(train, test_size=0.2)

In [None]:
train.shape

In [None]:
test.shape

In [None]:
train.columns

In [None]:
test.columns

In [None]:
train.head()

In [None]:
train_datagen = ImageDataGenerator(
                    rescale = 1./255,
                    shear_range = 0.2,
                    zoom_range = 0.2,
                    rotation_range=40,
                    width_shift_range=0.2,
                    height_shift_range=0.2,
                    horizontal_flip=True,
                    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
training_set = train_datagen.flow_from_dataframe(dataframe=X_train, x_col='filepath', y_col='truth_label', class_mode='categorical', target_size = (64, 64), batch_size = 128)
test_set = test_datagen.flow_from_dataframe(dataframe=X_test, x_col='filepath', y_col='truth_label', class_mode='categorical', target_size = (64, 64), batch_size = 128)

- plotting images from dataset

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline


batches_augmented = train_datagen.flow_from_directory('/kaggle/files/images/', target_size = (512, 512), batch_size = 16, class_mode = 'categorical', seed=1234)
batches_real = test_datagen.flow_from_directory('/kaggle/files/images/', target_size = (512, 512), batch_size = 16, class_mode = 'categorical', seed=1234)

x_batch_augmented, y_batch_augmented = next(batches_augmented)
x_batch_real, y_batch_real = next(batches_real)

for i in range(16):
    image_augmented = x_batch_augmented[i]
    image_real = x_batch_real[i]
    
    title_add_on = "random image"
    if y_batch_augmented[i][1]: title_add_on =  "dog vs cat"

    plt.subplot(221)
    plt.imshow(image_real)
    plt.title("original " + title_add_on)

        
    plt.subplot(222)
    plt.imshow(image_augmented)
    plt.title("augmented " + title_add_on)

    plt.show()

- Create the neural net model:

In [None]:
classifier = Sequential()

classifier.add(Conv2D(filters=32,kernel_size=(3,3),activation="relu",
                padding="valid", input_shape = (64,64,3)))

classifier.add(MaxPooling2D(pool_size=2, strides=2, padding='valid'))

classifier.add(Conv2D(filters=32,kernel_size=(3,3),activation="relu",
                padding="valid", input_shape = (64,64,3)))

classifier.add(MaxPooling2D(pool_size=2, strides=2, padding='valid'))

classifier.add(Flatten())
classifier.add(Dense(128,activation="relu")) 
classifier.add(Dense(2,activation="sigmoid")) 

In [None]:
classifier.summary()

In [None]:
classifier.compile(optimizer="adam",loss='binary_crossentropy',metrics=['accuracy'])

- Model Training 

In [None]:
history = classifier.fit(training_set, validation_data = test_set, epochs=20)

In [None]:
test_set2 = test_datagen.flow_from_dataframe(dataframe=test,
    directory = '/kaggle/files/images/test',
    x_col = 'filepath',
    y_col = None,
    class_mode = None,
    target_size = (64, 64),
    batch_size = 32,
    shuffle = False)

In [None]:
test_preds = classifier.predict(test_set2, steps = np.ceil(test.shape[0] / 32))

test["test_preds"] = np.argmax(test_preds, axis = 1)
labels = dict((v,k) for k,v in training_set.class_indices.items())

test['test_preds'] = test['test_preds'].map(labels)

In [None]:
sample_test = test.sample(64).reset_index(drop = True)

fig = plt.figure(1, figsize = (24, 20))
fig.suptitle("Sample Predictions")

for i in range(len(sample_test)):
    
    plt.subplot(10, 8, i + 1)
    image = load_img(sample_test.filepath[i])
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"Predicted as {sample_test['test_preds'][i]}")
    
plt.tight_layout()
plt.show()

# Now will be using VGG16

- VGG-16 is a convolutional neural network that is 16 layers deep. You can load a pretrained version of the network trained on more than a million images from the ImageNet database [1]. The pretrained network can classify images into 1000 object categories, such as keyboard, mouse, pencil, and many animals. 

- image credits: [Click Here](https://media.geeksforgeeks.org/wp-content/uploads/20200219152327/conv-layers-vgg16.jpg)

![](https://media.geeksforgeeks.org/wp-content/uploads/20200219152327/conv-layers-vgg16.jpg)

In [None]:
!rm -r ./*

In [None]:
!unzip ../input/dogs-vs-cats/train.zip -d ./new_data

In [None]:
# The path to the directory where the original
# dataset was uncompressed
original_dataset_dir = './new_data/train'

In [None]:
# The directory where we will
# store our smaller dataset
base_dir = './new_data_base'
os.mkdir(base_dir)

# Directories for our training,
# validation and test splits
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

In [None]:
# Directory with our training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')
os.mkdir(train_cats_dir)

# Directory with our training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')
os.mkdir(train_dogs_dir)

# Directory with our validation cat pictures
validation_cats_dir = os.path.join(validation_dir, 'cats')
os.mkdir(validation_cats_dir)

# Directory with our validation dog pictures
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
os.mkdir(validation_dogs_dir)

# Directory with our validation cat pictures
test_cats_dir = os.path.join(test_dir, 'cats')
os.mkdir(test_cats_dir)

# Directory with our validation dog pictures
test_dogs_dir = os.path.join(test_dir, 'dogs')
os.mkdir(test_dogs_dir)

In [None]:
import shutil
# Copy first 1000 cat images to train_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_cats_dir, fname)
    shutil.copyfile(src, dst)

In [None]:
# Copy next 500 cat images to validation_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src, dst)

In [None]:
# Copy next 500 cat images to test_cats_dir
fnames = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src, dst)

In [None]:
# Copy first 1000 dog images to train_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src, dst)

In [None]:
# Copy next 500 dog images to validation_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src, dst)

In [None]:
# Copy next 500 dog images to test_dogs_dir
fnames = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]

for fname in fnames:
    src = os.path.join(original_dataset_dir, fname)
    dst = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src, dst)

In [None]:
print('total training dog images:', len(os.listdir(train_dogs_dir)))

In [None]:
from tensorflow.keras import layers
from tensorflow.keras import models

In [None]:
from tensorflow.keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False, 
                  input_shape=(150, 150, 3))

conv_base.summary()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 20

In [None]:
def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 4, 4, 512))
    labels = np.zeros(shape=(sample_count))
    
    generator = datagen.flow_from_directory(
        directory,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode='binary')
    
    i = 0
    
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        
        features[i * batch_size : (i + 1) * batch_size] = features_batch
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        
        i += 1
        if i * batch_size >= sample_count:
            # Note that since generators yield data indefinitely in a loop, 
            # we must `break` after every image has been seen once.
            break
    return features, labels


In [None]:
train_features, train_labels = extract_features(train_dir, 2000)
validation_features, validation_labels = extract_features(validation_dir, 1000)
test_feature, test_labels = extract_features(test_dir, 1000)

In [None]:
# flattening our input data for dense layers
train_features = np.reshape(train_features, (2000, 4 * 4 * 512))
validation_features = np.reshape(validation_features, (1000, 4 * 4 * 512))
test_feature = np.reshape(test_feature, (1000, 4 * 4 * 512))

In [None]:
model = models.Sequential()
model.add(layers.Dense(256, activation="relu", input_dim = 4 * 4 * 512))

# adding Dropout layer for regularization
model.add(layers.Dropout(0.5))

model.add(layers.Dense(1, activation="sigmoid"))

model.summary()

In [None]:
from tensorflow.keras import optimizers

model.compile(
    loss="binary_crossentropy", 
    optimizer=optimizers.RMSprop(lr=2e-5), 
    metrics=["acc"])

In [None]:
history = model.fit(
    train_features, train_labels, 
    epochs=30, 
    batch_size=20,
    validation_data=(validation_features, validation_labels))

In [None]:
model.save("./cats_and_dogs_vgg16.h5")

In [None]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

In [None]:
epochs = range(1, len(acc) + 1)

plt.plot(epochs, acc, 'bo', label='Training acc') 
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy') 
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss') 
plt.plot(epochs, val_loss, 'b', label='Validation loss') 
plt.title('Training and validation loss')
plt.legend()
plt.show()

# Conclusion:
- **Both performed Quite well.**