# **Transfer Learning Project**
# “Image Classification Using a Pretrained Model”

**Objective**

Use a pretrained CNN (trained on ImageNet) and adapt it to a new task with limited data.

**You will learn:**

- Transfer learning

- Feature extraction vs fine-tuning

- Freezing & unfreezing layers

- Why pretrained models work so well

**Why Transfer Learning Is Powerful**

- Requires less data

- Trains faster

- Achieves higher accuracy

- Standard practice in industry

# import libraries

In [1]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Load Pretrained Model

In [2]:
base_model = MobileNetV2(
    weights='imagenet',
    include_top=False,
    input_shape=(160, 160, 3)
)

base_model.trainable = False

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


#Load dataset

In [4]:
import kagglehub
path = kagglehub.dataset_download("shaunthesheep/microsoft-catsvsdogs-dataset")

Using Colab cache for faster access to the 'microsoft-catsvsdogs-dataset' dataset.


# Data Preprocessing

In [6]:
import os

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Construct the correct path to the training data
train_dir = os.path.join(path, "PetImages") # Assuming "PetImages" is the folder containing "Cat" and "Dog"

train_data = datagen.flow_from_directory(
    train_dir,
    target_size=(160,160),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_data = datagen.flow_from_directory(
    train_dir,
    target_size=(160,160),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

Found 20000 images belonging to 2 classes.
Found 5000 images belonging to 2 classes.


# Build Final Model

In [7]:
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

# Dataset Cleaning and Preparation

To remove corrupted images by copying the dataset to a writable directory, ensuring error-free data loading and stable model training.

In [10]:
import os
from PIL import Image
import shutil

# Define the base directory for the images
# 'path' variable should be available from previous execution (kagglehub.dataset_download)
# train_dir is where the 'Cat' and 'Dog' folders are located
image_base_dir = train_dir # This is the original read-only path

# Define a writable temporary directory
writable_base_dir = '/tmp/PetImages_writable'

print(f"Copying dataset from {image_base_dir} to {writable_base_dir} for cleaning...")
# Ensure the writable base directory exists
if os.path.exists(writable_base_dir):
    shutil.rmtree(writable_base_dir)
shutil.copytree(image_base_dir, writable_base_dir)
print("Dataset copied successfully.")

def clean_image_directory(directory_path):
    print(f"Cleaning directory: {directory_path}")
    for root, _, files in os.walk(directory_path):
        for file in files:
            file_path = os.path.join(root, file)
            try:
                # Attempt to open the image to check for corruption
                img = Image.open(file_path)
                img.verify() # Verify if it's a valid image
                img.close()
            except (IOError, SyntaxError, Image.UnidentifiedImageError, OSError) as e:
                print(f"Deleting corrupted image: {file_path} - Error: {e}")
                os.remove(file_path) # Now this should work as it's in a writable location
                # No need for shutil.rmtree for a file, os.remove is sufficient

# Clean both 'Cat' and 'Dog' subdirectories in the writable location
cat_dir_writable = os.path.join(writable_base_dir, 'Cat')
dog_dir_writable = os.path.join(writable_base_dir, 'Dog')

clean_image_directory(cat_dir_writable)
clean_image_directory(dog_dir_writable)

print("Dataset cleaning complete on the writable copy.")

# Update train_dir and val_data to point to the cleaned, writable dataset
train_dir = writable_base_dir

# Re-initialize ImageDataGenerator and flow_from_directory to use the cleaned data
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

train_data = datagen.flow_from_directory(
    train_dir,
    target_size=(160,160),
    batch_size=32,
    class_mode='binary',
    subset='training'
)

val_data = datagen.flow_from_directory(
    train_dir,
    target_size=(160,160),
    batch_size=32,
    class_mode='binary',
    subset='validation'
)

Copying dataset from /kaggle/input/microsoft-catsvsdogs-dataset/PetImages to /tmp/PetImages_writable for cleaning...
Dataset copied successfully.
Cleaning directory: /tmp/PetImages_writable/Cat
Deleting corrupted image: /tmp/PetImages_writable/Cat/666.jpg - Error: cannot identify image file '/tmp/PetImages_writable/Cat/666.jpg'
Deleting corrupted image: /tmp/PetImages_writable/Cat/Thumbs.db - Error: cannot identify image file '/tmp/PetImages_writable/Cat/Thumbs.db'
Cleaning directory: /tmp/PetImages_writable/Dog
Deleting corrupted image: /tmp/PetImages_writable/Dog/11702.jpg - Error: cannot identify image file '/tmp/PetImages_writable/Dog/11702.jpg'




Deleting corrupted image: /tmp/PetImages_writable/Dog/Thumbs.db - Error: cannot identify image file '/tmp/PetImages_writable/Dog/Thumbs.db'
Dataset cleaning complete on the writable copy.
Found 20000 images belonging to 2 classes.
Found 4998 images belonging to 2 classes.


# Compile & Train

In [11]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(train_data, epochs=5, validation_data=val_data)

Epoch 1/5


  self._warn_if_super_not_called()


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 885ms/step - accuracy: 0.9748 - loss: 0.0742 - val_accuracy: 0.9786 - val_loss: 0.0593
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m549s[0m 877ms/step - accuracy: 0.9786 - loss: 0.0574 - val_accuracy: 0.9756 - val_loss: 0.0594
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m559s[0m 874ms/step - accuracy: 0.9822 - loss: 0.0504 - val_accuracy: 0.9778 - val_loss: 0.0597
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m551s[0m 883ms/step - accuracy: 0.9822 - loss: 0.0467 - val_accuracy: 0.9782 - val_loss: 0.0589
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m550s[0m 880ms/step - accuracy: 0.9863 - loss: 0.0358 - val_accuracy: 0.9778 - val_loss: 0.0604


<keras.src.callbacks.history.History at 0x7f2efd8b4ef0>

# Fine-Tuning

In [12]:
base_model.trainable = True

for layer in base_model.layers[:100]:
    layer.trainable = False

In [13]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.fit(train_data, epochs=5, validation_data=val_data)


Epoch 1/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m956s[0m 1s/step - accuracy: 0.9181 - loss: 0.2031 - val_accuracy: 0.9722 - val_loss: 0.0896
Epoch 2/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m897s[0m 1s/step - accuracy: 0.9679 - loss: 0.0810 - val_accuracy: 0.9720 - val_loss: 0.0844
Epoch 3/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m909s[0m 1s/step - accuracy: 0.9736 - loss: 0.0667 - val_accuracy: 0.9772 - val_loss: 0.0728
Epoch 4/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m878s[0m 1s/step - accuracy: 0.9791 - loss: 0.0523 - val_accuracy: 0.9778 - val_loss: 0.0723
Epoch 5/5
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m878s[0m 1s/step - accuracy: 0.9830 - loss: 0.0421 - val_accuracy: 0.9746 - val_loss: 0.0768


<keras.src.callbacks.history.History at 0x7f2efdde7e00>