This is a ResNet-based CNN for binary image classification (Cat vs Dog)

AI was used to help generate the codebase

Note: Make sure that the tensorflow package is installed in your device.

In [1]:
# Lib imports
import os
import tensorflow as tf
# Use the ImageDataGenerator from the tensorflow runtime to avoid static import resolution issues
ImageDataGenerator = tf.keras.preprocessing.image.ImageDataGenerator
# Access keras submodules via the tf object to avoid static import resolution issues
layers = tf.keras.layers
models = tf.keras.models
Model = tf.keras.Model
import numpy as np

In [2]:
train_dir = "./train"  
test_dir = "./test"   

In [3]:
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
IMG_SIZE = (224, 224)  # ResNet requires 224x224 input
BATCH_SIZE = 32

In [4]:
# DATA PREPROCESSING & AUGMENTATION
# Optional but recommended for image processing tasks, especially with limited data.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 3788 images belonging to 2 classes.
Found 945 images belonging to 2 classes.
Found 945 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.


In [6]:
# RESNET CNN MODEL ARCHITECTURE
# Transfer learning with ResNet50 pre-trained on ImageNet

# Load pre-trained ResNet50 without top layers
base_model = tf.keras.applications.ResNet50(
    weights='imagenet',
    include_top=False,
    input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)
)

# Freeze the base model initially
base_model.trainable = False

# Build custom top layers with regularization and dropout
inputs = layers.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
x = base_model(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)

# Create the complete model
model = Model(inputs=inputs, outputs=outputs)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 1us/step
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 1us/step


In [7]:
# Display model architecture summary
print("[INFO] Model Architecture Summary:")
model.summary()

[INFO] Model Architecture Summary:


In [8]:
# Configure the model optimizer, loss function, and metrics
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(
    optimizer=optimizer,
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [9]:
# TRAINING THE CNN - PHASE 1: FROZEN BASE MODEL
history_phase1 = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

Epoch 1/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 849ms/step - accuracy: 0.6993 - loss: 0.5879 - val_accuracy: 0.5016 - val_loss: 0.7609
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 849ms/step - accuracy: 0.6993 - loss: 0.5879 - val_accuracy: 0.5016 - val_loss: 0.7609
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 864ms/step - accuracy: 0.7474 - loss: 0.5317 - val_accuracy: 0.6931 - val_loss: 0.5720
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 864ms/step - accuracy: 0.7474 - loss: 0.5317 - val_accuracy: 0.6931 - val_loss: 0.5720
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 850ms/step - accuracy: 0.7629 - loss: 0.5109 - val_accuracy: 0.8180 - val_loss: 0.4867
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 850ms/step - accuracy: 0.7629 - loss: 0.5109 - val_accuracy: 0.8180 - val_loss: 0.4867
Epoc

In [10]:
# FINE-TUNING - PHASE 2: UNFREEZE BASE MODEL
# Unfreeze the base model
base_model.trainable = True

# Freeze the first 100 layers to retain low-level features
for layer in base_model.layers[:100]:
    layer.trainable = False

# Recompile with a lower learning rate for fine-tuning
fine_tune_lr = 0.0001 / 10
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=fine_tune_lr),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Continue training with unfrozen layers
history_phase2 = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

Epoch 1/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 2s/step - accuracy: 0.6972 - loss: 0.7455 - val_accuracy: 0.6603 - val_loss: 0.6148
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m190s[0m 2s/step - accuracy: 0.6972 - loss: 0.7455 - val_accuracy: 0.6603 - val_loss: 0.6148
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 2s/step - accuracy: 0.7186 - loss: 0.6722 - val_accuracy: 0.7249 - val_loss: 0.5442
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m179s[0m 2s/step - accuracy: 0.7186 - loss: 0.6722 - val_accuracy: 0.7249 - val_loss: 0.5442
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 2s/step - accuracy: 0.7355 - loss: 0.6343 - val_accuracy: 0.8032 - val_loss: 0.4231
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 2s/step - accuracy: 0.7355 - loss: 0.6343 - val_accuracy: 0.8032 - val_loss: 0.4231
Epoch 4/10
[1m119/119

In [11]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 726ms/step - accuracy: 0.8269 - loss: 0.3914
Test Accuracy: 0.8268581032752991
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 726ms/step - accuracy: 0.8269 - loss: 0.3914
Test Accuracy: 0.8268581032752991


In [12]:
# SAVE THE MODEL
model.save('exercise_6_resnet_ledesma.h5')



In [32]:
# This is important to map the predicted class indices back to class labels
print(train_generator.class_indices)

{'chihuahua': 0, 'muffin': 1}


In [33]:
# SIMPLE INFERENCE SCRIPT
# Use the tensorflow runtime object 'tf' already imported in the notebook
# to access the preprocessing utilities to avoid static import resolution issues.
image = tf.keras.preprocessing.image

def predict_image(img_path, model_path='exercise_6_resnet_ledesma.h5'):
    model = tf.keras.models.load_model(model_path, compile=False)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]    

    # Since train_generator.class_indices is {'chihuahua': 0, 'muffin': 1}
    # my label layout should be:
    label = "Muffin" if pred >= 0.5 else "Chihuahua"
    print(f"Prediction: {label} (confidence: {pred:.2f})")

In [34]:
# Example usage:
predict_image("./predict/predict_1.jpg")
predict_image("./predict/predict_2.jpg")
predict_image("./predict/predict_3.jpg")
predict_image("./predict/predict_4.jpeg")
predict_image("./predict/predict_5.webp")
predict_image("./predict/predict_6.webp")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 930ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 930ms/step
Prediction: Muffin (confidence: 0.99)
Prediction: Muffin (confidence: 0.99)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 914ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 914ms/step
Prediction: Muffin (confidence: 0.98)
Prediction: Muffin (confidence: 0.98)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 933ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 933ms/step
Prediction: Muffin (confidence: 0.97)
Prediction: Muffin (confidence: 0.97)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 909ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 909ms/step
Prediction: Chihuahua (confidence: 0.39)
Prediction: Chihuahua (confidence: 0.39)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 968ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━