This is an example of a simple CNN developed, trained and utilized

AI was used to help generate the codebase

Note: Make sure that the tensorflow package is installed in your device.

In [1]:
# Lib imports
import os
import tensorflow as tf
# Use the ImageDataGenerator from the tensorflow runtime to avoid static import resolution issues
ImageDataGenerator = tf.keras.preprocessing.image.ImageDataGenerator
# Access keras submodules via the tf object to avoid static import resolution issues
layers = tf.keras.layers
models = tf.keras.models
import numpy as np

In [2]:
# DATASET DIRECTORY CONFIGURATION
# Download and unzip the dataset from Kaggle, set the directory paths accordingly.

# For muffin vs chihuahua version:
train_dir = "./train" 
test_dir = "./test"   

# For cat vs dog version:
# train_dir = "./train_catdog" 
# test_dir = "./test_catdog"   

In [3]:
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [4]:
# DATA PREPROCESSING & AUGMENTATION
# Optional but recommended for image processing tasks, especially with limited data.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 3788 images belonging to 2 classes.
Found 945 images belonging to 2 classes.
Found 945 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.


In [5]:
# SIMPLE CNN MODEL ARCHITECTURE

# Some modifications are applied
initial_learning_rate = 0.001
# We are combining ExponentialDecay with Adam optimizer for better learning rate management
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.9,
    staircase=True
)

# Create the optimizer with the learning rate schedule
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Apply L2 regularization and stronger dropout (SpatialDropout2D for conv blocks)
l2_reg = tf.keras.regularizers.l2(1e-4)

model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=l2_reg, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.MaxPooling2D(2, 2),
    layers.SpatialDropout2D(0.2),

    layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2_reg),
    layers.MaxPooling2D(2, 2),
    layers.SpatialDropout2D(0.2),

    layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2_reg),
    layers.MaxPooling2D(2, 2),
    layers.SpatialDropout2D(0.2),

    layers.Flatten(),
    layers.Dense(128, activation='relu', kernel_regularizer=l2_reg),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Configure the model optimizers, loss function, and metrics
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # old
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [7]:
# TRAINING THE CNN
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

Epoch 1/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 203ms/step - accuracy: 0.6143 - loss: 0.6832 - val_accuracy: 0.7778 - val_loss: 0.5705
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 233ms/step - accuracy: 0.7569 - loss: 0.5446 - val_accuracy: 0.7968 - val_loss: 0.4750
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 247ms/step - accuracy: 0.7878 - loss: 0.4944 - val_accuracy: 0.8497 - val_loss: 0.4071
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 251ms/step - accuracy: 0.8083 - loss: 0.4661 - val_accuracy: 0.8762 - val_loss: 0.3493
Epoch 5/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 249ms/step - accuracy: 0.8012 - loss: 0.4784 - val_accuracy: 0.8783 - val_loss: 0.3645
Epoch 6/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 216ms/step - accuracy: 0.8105 - loss: 0.4528 - val_accuracy: 0.8709 - val_loss: 0.3627
Epoch 7/10

In [8]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 98ms/step - accuracy: 0.8818 - loss: 0.3419
Test Accuracy: 0.8817567825317383


In [9]:
# SAVE THE MODEL
model.save('exercise_6_trained_model_improved.h5')
# model.save('exercise_6_catdog_ledesma.h5')



In [10]:
# This is important to map the predicted class indices back to class labels
print(train_generator.class_indices)

{'chihuahua': 0, 'muffin': 1}


In [11]:
# SIMPLE INFERENCE SCRIPT
# Use the tensorflow runtime object 'tf' already imported in the notebook
# to access the preprocessing utilities to avoid static import resolution issues.
image = tf.keras.preprocessing.image

def predict_image(img_path, model_path='exercise_6_trained_model_improved.h5'): # for muffin vs chihuahua
# def predict_image(img_path, model_path='exercise_6_catdog_ledesma.h5'): # for cat vs dog
    model = tf.keras.models.load_model(model_path, compile=False)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    
    # Version: cat vs dog classification
    # Since train_generator.class_indices is {'cat': 0, 'dog': 1}
    # my label layout should be:
    # label = "Dog" if pred >= 0.5 else "Cat"
    # print(f"Prediction: {label} (confidence: {pred:.2f})")
    
    # Version:  muffin vs chihuahua classification:
    # Since train_generator.class_indices is {'chihuahua': 0, 'muffin': 1}
    # my label layout should be:
    label = "Muffin" if pred >= 0.5 else "Chihuahua"
    print(f"Prediction: {label} (confidence: {pred:.2f})")

In [14]:
# Example usage:
predict_image("./predict/predict_1.jpg")
predict_image("./predict/predict_2.jpg")
predict_image("./predict/predict_3.jpg")
predict_image("./predict/predict_4.jpeg")
predict_image("./predict/predict_5.webp")
predict_image("./predict/predict_6.webp")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Prediction: Muffin (confidence: 0.99)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Prediction: Muffin (confidence: 0.96)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Prediction: Muffin (confidence: 0.90)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Prediction: Muffin (confidence: 0.91)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
Prediction: Chihuahua (confidence: 0.05)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Prediction: Chihuahua (confidence: 0.14)
