# Overview

## Image Classification using Fine-Tuned VGG16 Model - Cats and Dogs Image Classification with 99% accuracy(approx.)

This notebook demonstrates the process of building an image classification model using transfer learning with a fine-tuned VGG16 convolutional neural network (CNN). The objective is to classify images of cats and dogs. The dataset used for this task consists of images from the "Cats and Dogs" dataset.

1. **Import Libraries**: Import necessary libraries for data preprocessing, model building, and evaluation.
2. **Load Dataset**: Load and preprocess the dataset, which includes splitting it into training, validation, and testing sets.
3. **Creating Sample Data**: Create sample data for training, validation, and testing by copying a subset of images from the original dataset.
4. **Data Preparation**: Prepare the data for model training by defining image size, batch size, and creating data generators.
5. **Build Fine-Tuned VGG16 Model**: Download the pre-trained VGG16 model and build a new sequential model by adding layers from the pre-trained model.
6. **Model Compilation**: Compile the model with appropriate optimizer, loss function, and metrics.
7. **Model Training**: Train the model using the training and validation datasets.
8. **Model Evaluation**: Evaluate the model's performance on the test dataset by predicting classes and calculating accuracy.
9. **Conclusion**: Summarize the results and discuss potential improvements or further steps.

This notebook provides a step-by-step guide to building and evaluating an image classification model for the Cats and Dogs dataset using transfer learning with VGG16. Let's get started!


# Import Libraries

In [None]:
import warnings

warnings.simplefilter(action ='ignore', category=FutureWarning)

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, Flatten, BatchNormalization, Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix, accuracy_score

import matplotlib.pyplot as plt
import os
import PIL
import shutil
import random
import glob
import itertools

%matplotlib inline

# Load Dataset

In [None]:
# Define the path to the test directory
train_dir_main = '/kaggle/input/cat-and-dog/training_set/training_set/'
test_dir_main = '/kaggle/input/cat-and-dog/test_set/test_set/'

# Count files in subdirectories
def count_images_in_folders(directory):
    folders = os.listdir(directory)
    for folder in folders:
        folder_path = os.path.join(directory, folder)
        if os.path.isdir(folder_path):
            images_count = len(os.listdir(folder_path))
            print(f"Folder: {folder}, Images Count: {images_count}")

# Print files in subdirectories
print("Train Set:")
count_images_in_folders(train_dir_main)

print("\nTest Set:")
count_images_in_folders(test_dir_main)

# Creating Sample Data

In [None]:
# Define destination directories
train_dir = "/kaggle/working/training_set"
valid_dir = "/kaggle/working/validation_set"
test_dir = "/kaggle/working/test_set"

# Create validation directory if it does not exist
os.makedirs(valid_dir, exist_ok=True)

# Function to copy files
def copy_files(src_dir, dst_dir, category, num_files):
    os.makedirs(os.path.join(dst_dir, category), exist_ok=True)
    files = os.listdir(os.path.join(src_dir, category))
    num_files_to_copy = min(num_files, len(files))
    files_to_copy = files[:num_files_to_copy]
    for file in files_to_copy:
        src_path = os.path.join(src_dir, category, file)
        dst_path = os.path.join(dst_dir, category, file)
        shutil.copy(src_path, dst_path)

# Copy 1000 cats and dogs from the train set to the training directory
copy_files(train_dir_main, train_dir, 'cats', 500)
copy_files(train_dir_main, train_dir, 'dogs', 500)

# Copy 500 cats and dogs from the test set to the test directory
copy_files(test_dir_main, test_dir, 'cats', 250)
copy_files(test_dir_main, test_dir, 'dogs', 250)

# Copy 500 cats and 500 dogs from the test set to the validation directory
copy_files(test_dir_main, valid_dir, 'cats', 250)
copy_files(test_dir_main, valid_dir, 'dogs', 250)

# Print files in directories
print("Train Directory:")
count_images_in_folders(train_dir)
print("\nTest Directory:")
count_images_in_folders(test_dir)
print("\nValidation Directory:")
count_images_in_folders(valid_dir)

# Data Preparation

In [None]:
# Define the image size and batch size
image_size = (224, 224)
batch_size = 10

# Create an ImageDataGenerator for preprocessing
datagen = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input)

# Create the train_batches using flow_from_directory
train_batches = datagen.flow_from_directory(
    train_dir,
    target_size=image_size,
    batch_size=batch_size,
    classes=['cats', 'dogs']
)

# Create the test_batches using flow_from_directory
test_batches = datagen.flow_from_directory(
    test_dir,
    target_size=image_size,
    batch_size=batch_size,
    classes=['cats', 'dogs']
)

# Create the valid_batches using flow_from_directory
valid_batches = datagen.flow_from_directory(
    valid_dir,
    target_size=image_size,
    batch_size=batch_size,
    classes=['cats', 'dogs']
)


In [None]:
train_total_images = len(train_batches.filenames)
valid_total_images = len(valid_batches.filenames)
test_total_images = len(test_batches.filenames)

print(f"Total Train Images: {train_total_images}, Total Valid Images: {valid_total_images}, Total Test Images: {test_total_images}")


In [None]:
assert train_batches.n == 1000
assert test_batches.n == 499
assert valid_batches.n == 499
assert train_batches.num_classes == test_batches.num_classes == valid_batches.num_classes == 2

In [None]:
imgs, labels = next(train_batches)

# Data Visualization

In [None]:
# Create plot function
def plotImages(images_arr) :
    fig, axes = plt.subplots(1, 10, figsize = (20,20))
    axes = axes. flatten()
    for img, ax in zip( images_arr, axes):
        ax.imshow(img)
        ax.axis('off')
    plt.tight_layout()
    plt.show()
    
# Print images & labels
plotImages(imgs)
print(labels)

# Build Fine-Tuned Vgg16 Model

In [None]:
# Download model from "https://www.tensorflow.org/api_docs/python/tf/keras/applications/vgg16/VGG16"
vgg16_model = tf.keras.applications.vgg16.VGG16()

## Vgg16 Model Summary

In [None]:
# Print model structure
vgg16_model.summary()

In [None]:
# Create function for count_params
def count_params(model):
    """Count the total number of trainable and non-trainable parameters in the model."""
    non_trainable_params = sum(v.shape.num_elements() for v in model.non_trainable_weights)
    trainable_params = sum(v.shape.num_elements() for v in model.trainable_weights)
    return {'non_trainable_params': non_trainable_params, 'trainable_params': trainable_params}

# Call the function to get the counts
params = count_params(vgg16_model)

# Print the counts
print("Total trainable parameters:", params['trainable_params'])
print("Total non-trainable parameters:", params['non_trainable_params'])

In [None]:
# Call the function to get the counts
params = count_params(vgg16_model)

# Check the counts
assert params['non_trainable_params'] == 0
assert params['trainable_params'] == 138357544

# Build Sequential Model from Vgg16 Model

In [None]:
# Create a new Sequential model
model = Sequential()

# Add layers from the VGG16 model to the new Sequential model
for layer in vgg16_model.layers[:-1]:
    model.add(layer)

# Print new Sequential Model Summary

In [None]:
model.summary()

In [None]:
# Call the function to get the counts
params = count_params(model)

# Check the counts
assert params['non_trainable_params'] == 0
assert params['trainable_params'] == 134260544

In [None]:
# Freeze all layers in the model
for layer in model.layers:
    layer.trainable = False

In [None]:
# Adding ouput layer with softmax activation function
model.add(Dense(units=2, activation='softmax'))

In [None]:
model.summary()

In [None]:
# Call the function to get the counts
params = count_params(model)

# Check the new counts
assert params['trainable_params'] == 8194
assert params['non_trainable_params'] == 134260544

# Compile the Fine-Tuned Vgg16 model

In [None]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the Fine-Tuned Vgg16 model

In [None]:
# Train the model
model.fit(x=train_batches, validation_data=valid_batches, epochs=5, verbose=2)

In [None]:
# Check if accuracy exceeds 95%
assert max(model.history.history['accuracy']) > 0.95

# Predict on test batches

In [None]:
# Predict using the model
predictions = model.predict(x = test_batches, verbose=0)

In [None]:
# Calculate confusion matrix
cm = confusion_matrix(y_true=test_batches.classes, y_pred=np.argmax(predictions, axis=-1))

In [None]:
# Printing classes and labels
test_batches.class_indices

# Check Metrics for evaluation

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="blue" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
cm_plot_labels = ['cats', 'dogs']
plot_confusion_matrix(cm=cm, classes=cm_plot_labels, title='Confusion Matrix', cmap=plt.cm.Greens)

In [None]:
# Calculate predicted classes
predicted_classes = np.argmax(predictions, axis=-1)

# Calculate accuracy
accuracy = accuracy_score(test_batches.classes, predicted_classes)

print("Accuracy on test dataset:", accuracy)