This is an example of a simple CNN developed, trained and utilized

AI was used to help generate the codebase

Note: Make sure that the tensorflow package is installed in your device.

In [24]:
# Lib imports
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
import numpy as np

In [25]:
# DATASET DIRECTORY CONFIGURATION
# Download and unzip the dataset from Kaggle, set the directory paths accordingly.
train_dir = "train_1"  # e.g. './muffin-vs-chihuahua/train'
test_dir = "test_1"    # e.g. './muffin-vs-chihuahua/test'

In [26]:
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [27]:
# DATA PREPROCESSING & AUGMENTATION
# Optional but recommended for image processing tasks, especially with limited data.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 3788 images belonging to 2 classes.


Found 945 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.


# APPLIED IMPROVEMENTS

This cell lists the improvements applied to the model, training, inference and diagnostics so reviewers can see the changes next to the code.

- **Model architecture**
  - L2 weight regularization added (kernel_regularizer=regularizers.l2(1e-4)).
  - BatchNormalization layers added after convolutional and dense blocks.
  - Dropout layers inserted (rates: 0.25, 0.25, 0.3, 0.5) to reduce overfitting.
  - Dense layer increased to 256 units for better capacity.

- **Compile & optimizer**
  - Kept ExponentialDecay LR schedule with Adam optimizer (initial_learning_rate=0.001).
  - Compiled with `binary_crossentropy` and `accuracy` metric.

- **Training**
  - EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True).
  - ModelCheckpoint saves best model to `best_muffin_vs_chihuahua.h5`.
  - ReduceLROnPlateau to reduce LR on plateau.
  - Increased epochs to 30 (EarlyStopping short-circuits if needed).

- **Inference & predictions**
  - `load_best_model()` helper added (prefers checkpoint, falls back to final model).
  - `predict_image()` now returns `(label, confidence)` instead of printing.
  - Example usage cell prints label + confidence for `run_1/run_1.jpg` and `run_1/run_2.jpg`.

- **Diagnostics**
  - Added environment & sanity-check cell to print Python exec, TensorFlow status, dataset folder contents, and whether the model file exists.

(This summary is intentionally concise — open the code cells below to see the actual edits.)


In [None]:
# SIMPLE CNN MODEL ARCHITECTURE

# Some modifications are applied
initial_learning_rate = 0.001
# We are combining ExponentialDecay with Adam optimizer for better learning rate management
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.9,
    staircase=True,
)

# Create the optimizer with the learning rate schedule
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Regularization and improved architecture: use L2, BatchNorm and Dropout
from tensorflow.keras import regularizers

weight_decay = 1e-4
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),

    layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(weight_decay)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),

    layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(weight_decay)),
    layers.BatchNormalization(),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.3),

    layers.Flatten(),
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(weight_decay)),
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Configure the model optimizers, loss function, and metrics
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Training callbacks will be setup in the training cell


In [None]:
# TRAINING THE CNN
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

best_model_path = 'best_muffin_vs_chihuahua.h5'
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint(best_model_path, monitor='val_loss', save_best_only=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1)
]

history = model.fit(
    train_generator,
    epochs=30,
    validation_data=val_generator,
    callbacks=callbacks
)

# After training, save the final best model
model.save('muffin_vs_chihuahua_cnn.h5')


  self._warn_if_super_not_called()


Epoch 1/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 287ms/step - accuracy: 0.7115 - loss: 0.5503 - val_accuracy: 0.8222 - val_loss: 0.4201
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 284ms/step - accuracy: 0.8268 - loss: 0.3992 - val_accuracy: 0.8561 - val_loss: 0.3504
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 283ms/step - accuracy: 0.8390 - loss: 0.3557 - val_accuracy: 0.8148 - val_loss: 0.3936
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 280ms/step - accuracy: 0.8574 - loss: 0.3219 - val_accuracy: 0.8984 - val_loss: 0.2510
Epoch 5/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 283ms/step - accuracy: 0.8754 - loss: 0.2944 - val_accuracy: 0.9153 - val_loss: 0.2087
Epoch 6/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 282ms/step - accuracy: 0.8897 - loss: 0.2703 - val_accuracy: 0.9238 - val_loss: 0.2092
Epoch 7/10

In [31]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 124ms/step - accuracy: 0.8446 - loss: 0.3661
Test Accuracy: 0.8445945978164673


In [32]:
# SAVE THE MODEL
model.save('muffin_vs_chihuahua_cnn.h5')



In [None]:
# SIMPLE INFERENCE SCRIPT
from tensorflow.keras.preprocessing import image
import os


def load_best_model(model_path='best_muffin_vs_chihuahua.h5', fallback='muffin_vs_chihuahua_cnn.h5'):
    if os.path.exists(model_path):
        return tf.keras.models.load_model(model_path)
    elif os.path.exists(fallback):
        return tf.keras.models.load_model(fallback)
    else:
        raise FileNotFoundError('No trained model found. Please train the model first.')


def predict_image(img_path, model=None, model_path='best_muffin_vs_chihuahua.h5'):
    if model is None:
        model = load_best_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    label = "Chihuahua" if pred >= 0.5 else "Muffin"
    return label, float(pred)

# Example usage helper cell will call predict_image and print confidence


In [None]:
# Example usage: run predictions for the two example images and print label + confidence
try:
    model_for_pred = load_best_model()
except FileNotFoundError as e:
    print(e)
    model_for_pred = None

examples = [
    ("run_1/run_1.jpg", 'Run 1'),
    ("run_1/run_2.jpg", 'Run 2')
]

for path, name in examples:
    if os.path.exists(path):
        label, conf = predict_image(path, model=model_for_pred)
        print(f"{name} -> {label} (confidence: {conf:.3f})")
    else:
        print(f"{name} image not found at {path}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step




Prediction: Muffin (confidence: 0.27)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Prediction: Chihuahua (confidence: 0.57)


In [None]:
# ENVIRONMENT & SANITY CHECKS
import sys, os
print('Python executable:', sys.executable)
print('Python version:', sys.version)

try:
    import tensorflow as tf
    print('TensorFlow version:', tf.__version__)
except Exception as e:
    print('TensorFlow import failed:', repr(e))

# Show the dataset directories so you can confirm paths
TRAIN_DIR = 'train_1'
TEST_DIR = 'test_1'
for d in (TRAIN_DIR, TEST_DIR):
    print('\nContents of', d)
    if os.path.exists(d):
        for root, dirs, files in os.walk(d):
            print(root)
            print('  subdirs:', dirs[:5])
            print('  file sample:', files[:5])
            break
    else:
        print('  (does not exist)')

# Quick check for model file used in predict_image
print('\nModel file exists?:', os.path.exists('muffin_vs_chihuahua_cnn.h5'))
