This is an example of a simple CNN developed, trained and utilized

AI was used to help generate the codebase

Note: Make sure that the tensorflow package is installed in your device.

In [8]:
# Lib imports
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, regularizers
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense, concatenate, Input, BatchNormalization
import numpy as np

2.	Apply improvements to the CNN architecture: regularization, and dropout, and retrain the CNN. 

In [7]:


# DATASET DIRECTORY CONFIGURATION
# Download and unzip the dataset from Kaggle, set the directory paths accordingly.
import kagglehub

# Download latest version
path = kagglehub.dataset_download("samuelcortinhas/muffin-vs-chihuahua-image-classification")

train_dir = kagglehub.dataset_download("samuelcortinhas/muffin-vs-chihuahua-image-classification") # e.g. './muffin-vs-chihuahua/train'
test_dir = kagglehub.dataset_download("samuelcortinhas/muffin-vs-chihuahua-image-classification")    # e.g. './muffin-vs-chihuahua/test'

In [6]:
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [4]:
# DATA PREPROCESSING & AUGMENTATION
# Optional but recommended for image processing tasks, especially with limited data.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 4735 images belonging to 2 classes.
Found 1182 images belonging to 2 classes.
Found 5917 images belonging to 2 classes.


In [5]:
# SIMPLE CNN MODEL ARCHITECTURE

# Some modifications are applied
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.9,
    staircase=True
)




# Create the optimizer with the learning rate schedule
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# L2 regularization parameter
l2_reg = 0.001

# Applied dropout layers to reduce overfitting
model = models.Sequential([
    # First Conv Block
    layers.Conv2D(32, (3, 3), activation='relu', 
                  kernel_regularizer=regularizers.l2(l2_reg),
                  input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    
    # Second Conv Block
    layers.Conv2D(64, (3, 3), activation='relu',
                  kernel_regularizer=regularizers.l2(l2_reg)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    
    # Third Conv Block
    layers.Conv2D(128, (3, 3), activation='relu',
                  kernel_regularizer=regularizers.l2(l2_reg)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.3),
    
    # Dense Layers
    layers.Flatten(),
    layers.Dense(128, activation='relu',
                 kernel_regularizer=regularizers.l2(l2_reg)),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
# Configure the model optimizers, loss function, and metrics
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # old
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [7]:
# Display model architecture
model.summary()

In [26]:
# TRAINING THE CNN
# TRAINING WITH EARLY STOPPING AND MODEL CHECKPOINT
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_muffin_vs_chihuahua_cnn.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

history = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    callbacks=[early_stopping, model_checkpoint]
)

Epoch 1/5
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 616ms/step - accuracy: 0.7992 - loss: 0.5317 - val_accuracy: 0.8003 - val_loss: 0.5378
Epoch 2/5
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 542ms/step - accuracy: 0.7998 - loss: 0.5304 - val_accuracy: 0.8003 - val_loss: 0.5181
Epoch 3/5
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 497ms/step - accuracy: 0.7987 - loss: 0.5363 - val_accuracy: 0.7673 - val_loss: 0.7244
Epoch 4/5
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 605ms/step - accuracy: 0.7998 - loss: 0.5324 - val_accuracy: 0.8003 - val_loss: 0.5349
Epoch 5/5
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 640ms/step - accuracy: 0.7998 - loss: 0.5376 - val_accuracy: 0.8003 - val_loss: 0.5364


In [27]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"\nTest Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")

[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 190ms/step - accuracy: 0.7999 - loss: 0.5190

Test Accuracy: 0.7999
Test Loss: 0.5190


In [None]:
# SAVE THE MODEL
model.save('exercise_6_trained_model_improved.h5')

In [28]:
# SIMPLE INFERENCE SCRIPT
from tensorflow.keras.preprocessing import image

def predict_image(img_path, model_path='muffin_vs_chihuahua_cnn_improved.keras'):
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    label = "Chihuahua" if pred >= 0.5 else "Muffin"
    confidence = pred if pred >= 0.5 else 1 - pred
    print(f"Prediction: {label} (confidence: {confidence:.2%})")
    return label, confidence

 4.	(50 points) Using either the muffin vs Chihuahua dataset OR your chosen dataset, choose a CNN architecture from the following, and modify Jupyter Notebook OR create a new one.
 DATASET DIRECTORY CONFIGURATION,; 
 Chihuahua vs Muffin dataset from Kaggle
InceptionNet Architecture


In [10]:
# DATA PREPROCESSING & AUGMENTATION
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 4735 images belonging to 2 classes.
Found 1182 images belonging to 2 classes.
Found 5917 images belonging to 2 classes.


In [11]:
def inception_module(x, filters_1x1, filters_3x3_reduce, filters_3x3, filters_5x5_reduce, filters_5x5, filters_pool, l2_reg=0.001):
    """
    Custom Inception module with parallel convolutions of different sizes
    """
    # 1x1 convolution branch
    conv_1x1 = Conv2D(filters_1x1, (1, 1), padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(l2_reg))(x)
    conv_1x1 = BatchNormalization()(conv_1x1)
    
    # 3x3 convolution branch
    conv_3x3 = Conv2D(filters_3x3_reduce, (1, 1), padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(l2_reg))(x)
    conv_3x3 = BatchNormalization()(conv_3x3)
    conv_3x3 = Conv2D(filters_3x3, (3, 3), padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(l2_reg))(conv_3x3)
    conv_3x3 = BatchNormalization()(conv_3x3)
    
    # 5x5 convolution branch
    conv_5x5 = Conv2D(filters_5x5_reduce, (1, 1), padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(l2_reg))(x)
    conv_5x5 = BatchNormalization()(conv_5x5)
    conv_5x5 = Conv2D(filters_5x5, (5, 5), padding='same', activation='relu',
                      kernel_regularizer=regularizers.l2(l2_reg))(conv_5x5)
    conv_5x5 = BatchNormalization()(conv_5x5)
    
    # Max pooling branch
    pool = MaxPooling2D((3, 3), strides=(1, 1), padding='same')(x)
    pool = Conv2D(filters_pool, (1, 1), padding='same', activation='relu',
                  kernel_regularizer=regularizers.l2(l2_reg))(pool)
    pool = BatchNormalization()(pool)
    
    # Concatenate all branches
    output = concatenate([conv_1x1, conv_3x3, conv_5x5, pool], axis=-1)
    
    return output

In [12]:
# Input layer
input_layer = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))

In [14]:

l2_reg = 0.001

# Initial convolution layers
x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', activation='relu',
           kernel_regularizer=regularizers.l2(l2_reg))(input_layer)
x = BatchNormalization()(x)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
x = Dropout(0.2)(x)

In [15]:
# First Inception Module
x = inception_module(x, filters_1x1=64, filters_3x3_reduce=96, filters_3x3=128,
                     filters_5x5_reduce=16, filters_5x5=32, filters_pool=32, l2_reg=l2_reg)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
x = Dropout(0.25)(x)

In [16]:
# Second Inception Module
x = inception_module(x, filters_1x1=128, filters_3x3_reduce=128, filters_3x3=192,
                     filters_5x5_reduce=32, filters_5x5=96, filters_pool=64, l2_reg=l2_reg)
x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
x = Dropout(0.3)(x)

In [17]:
# Third Inception Module
x = inception_module(x, filters_1x1=192, filters_3x3_reduce=96, filters_3x3=208,
                     filters_5x5_reduce=16, filters_5x5=48, filters_pool=64, l2_reg=l2_reg)
x = Dropout(0.3)(x)

In [18]:
# Global Average Pooling and Dense layers
x = layers.GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l2_reg))(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
output_layer = Dense(1, activation='sigmoid')(x)

In [19]:
# Create model
model = models.Model(inputs=input_layer, outputs=output_layer)

In [None]:
# COMPILE MODEL
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.9, 
    staircase=True
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [21]:
# Display model architecture
model.summary()

In [24]:
# TRAINING WITH CALLBACKS
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=7,
    restore_best_weights=True
)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_inceptionnet_muffin_chihuahua.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    min_lr=1e-7
)

history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    callbacks=[early_stopping, model_checkpoint, reduce_lr]
)

Epoch 1/10
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 618ms/step - accuracy: 0.7973 - loss: 0.5384 - val_accuracy: 0.8003 - val_loss: 0.5689 - learning_rate: 0.0010
Epoch 2/10
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 618ms/step - accuracy: 0.7992 - loss: 0.5363 - val_accuracy: 0.8003 - val_loss: 0.5416 - learning_rate: 0.0010
Epoch 3/10
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 615ms/step - accuracy: 0.7987 - loss: 0.5337 - val_accuracy: 0.8003 - val_loss: 0.5310 - learning_rate: 0.0010
Epoch 4/10
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 595ms/step - accuracy: 0.7981 - loss: 0.5291 - val_accuracy: 0.8003 - val_loss: 0.5321 - learning_rate: 0.0010
Epoch 5/10
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 594ms/step - accuracy: 0.7996 - loss: 0.5319 - val_accuracy: 0.8003 - val_loss: 0.5768 - learning_rate: 0.0010
Epoch 6/10
[1m148/148[0m [32m━━━━━━━━━━━━━━━━━━━━[0

TypeError: This optimizer was created with a `LearningRateSchedule` object as its `learning_rate` constructor argument, hence its learning rate is not settable. If you need the learning rate to be settable, you should instantiate the optimizer with a float `learning_rate` argument.

In [29]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"\nTest Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")

[1m185/185[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 151ms/step - accuracy: 0.7999 - loss: 0.5190

Test Accuracy: 0.7999
Test Loss: 0.5190


In [None]:
# SAVE THE FINAL MODEL
model.save('inceptionnet_muffin_chihuahua.keras')

In [None]:
# INFERENCE FUNCTION
from tensorflow.keras.preprocessing import image

def predict_image(img_path, model_path='inceptionnet_muffin_chihuahua.keras'):
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    label = "Chihuahua" if pred >= 0.5 else "Muffin"
    confidence = pred if pred >= 0.5 else 1 - pred
    print(f"Prediction: {label} (confidence: {confidence:.2%})")
    return label, confidence

# DATASET DIRECTORY CONFIGURATION
# For Cats vs Dogs dataset from Kaggle:

In [30]:
train_dir =  kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset") # e.g. './muffin-vs-chihuahua/train'
test_dir =  kagglehub.dataset_download("karakaggle/kaggle-cat-vs-dog-dataset")    # e.g. './muffin-vs-chihuahua/test'

Downloading from https://www.kaggle.com/api/v1/datasets/download/karakaggle/kaggle-cat-vs-dog-dataset?dataset_version_number=1...


100%|██████████| 787M/787M [00:38<00:00, 21.7MB/s] 

Extracting files...





In [31]:
# DATA PREPROCESSING & AUGMENTATION

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 19968 images belonging to 1 classes.
Found 4991 images belonging to 1 classes.
Found 24959 images belonging to 1 classes.


In [32]:
# IMPROVED CNN MODEL ARCHITECTURE WITH REGULARIZATION
initial_learning_rate = 0.001
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.9,
    staircase=True
)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

In [33]:
# L2 regularization parameter
l2_reg = 0.001

In [34]:
model = models.Sequential([
    # First Conv Block
    layers.Conv2D(32, (3, 3), activation='relu', 
                  kernel_regularizer=regularizers.l2(l2_reg),
                  input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    
    # Second Conv Block
    layers.Conv2D(64, (3, 3), activation='relu',
                  kernel_regularizer=regularizers.l2(l2_reg)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    
    # Third Conv Block
    layers.Conv2D(128, (3, 3), activation='relu',
                  kernel_regularizer=regularizers.l2(l2_reg)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.3),
    
    # Dense Layers
    layers.Flatten(),
    layers.Dense(128, activation='relu',
                 kernel_regularizer=regularizers.l2(l2_reg)),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [35]:
# Compile with updated optimizer
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


In [36]:
# Display model architecture
model.summary()

In [38]:
# TRAINING WITH EARLY STOPPING AND MODEL CHECKPOINT
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'best_muffin_vs_chihuahua_cnn.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max'
)

history = model.fit(
    train_generator,
    epochs=5,
    validation_data=val_generator,
    callbacks=[early_stopping, model_checkpoint]
)

Epoch 1/5
[1m100/624[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m7:09[0m 819ms/step - accuracy: 0.8915 - loss: 0.7581



[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m594s[0m 952ms/step - accuracy: 0.9860 - loss: 0.3290 - val_accuracy: 1.0000 - val_loss: 0.1250
Epoch 2/5
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 495ms/step - accuracy: 0.9998 - loss: 0.0770 - val_accuracy: 1.0000 - val_loss: 0.0374
Epoch 3/5
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 410ms/step - accuracy: 1.0000 - loss: 0.0247 - val_accuracy: 1.0000 - val_loss: 0.0192
Epoch 4/5
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 495ms/step - accuracy: 0.9999 - loss: 0.0282 - val_accuracy: 1.0000 - val_loss: 0.0137
Epoch 5/5
[1m624/624[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m290s[0m 465ms/step - accuracy: 1.0000 - loss: 0.0150 - val_accuracy: 1.0000 - val_loss: 0.0098


In [39]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"\nTest Accuracy: {test_acc:.4f}")
print(f"Test Loss: {test_loss:.4f}")

[1m780/780[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 90ms/step - accuracy: 0.9999 - loss: 0.0099

Test Accuracy: 0.9999
Test Loss: 0.0099


In [40]:
# SAVE THE FINAL MODEL
model.save('“exercise_6_custom_lastname.h5')



In [41]:
from tensorflow.keras.preprocessing import image

def predict_image(img_path, model_path='muffin_vs_chihuahua_cnn_improved.keras'):
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    label = "Chihuahua" if pred >= 0.5 else "Muffin"
    confidence = pred if pred >= 0.5 else 1 - pred
    print(f"Prediction: {label} (confidence: {confidence:.2%})")
    return label, confidence