In [15]:
# Lib imports
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, regularizers
import numpy as np

In [16]:
# DATASET DIRECTORY CONFIGURATION
# Download and unzip the dataset from Kaggle, set the directory paths accordingly.
train_dir = "muffin_vs_chihuahua/train"  # e.g. './muffin-vs-chihuahua/train'
test_dir = "muffin_vs_chihuahua/test"    # e.g. './muffin-vs-chihuahua/test'

In [17]:
# IMAGE PARAMETERS
# Used to resize the input images, also will determine the input size of your input layer.
IMG_SIZE = (128, 128)
BATCH_SIZE = 32

In [18]:
# DATA PREPROCESSING & AUGMENTATION
# Optional but recommended for image processing tasks, especially with limited data.
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
val_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

Found 3788 images belonging to 2 classes.
Found 945 images belonging to 2 classes.
Found 945 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.
Found 1184 images belonging to 2 classes.


In [19]:
# IMPROVED CNN MODEL ARCHITECTURE WITH REGULARIZATION AND DROPOUT

# Some modifications are applied
initial_learning_rate = 0.001
# We are combining ExponentialDecay with Adam optimizer for better learning rate management
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.9,
    staircase=True
)

# Create the optimizer with the learning rate schedule
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

# Applied dropout layers and L2 regularization to reduce overfitting
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [20]:
# Configure the model optimizers, loss function, and metrics
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) # old
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [21]:
# TRAINING THE CNN
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator
)

Epoch 1/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 2s/step - accuracy: 0.6668 - loss: 0.8684 - val_accuracy: 0.7778 - val_loss: 0.6915
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m253s[0m 2s/step - accuracy: 0.6668 - loss: 0.8684 - val_accuracy: 0.7778 - val_loss: 0.6915
Epoch 2/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 898ms/step - accuracy: 0.7724 - loss: 0.6151 - val_accuracy: 0.7979 - val_loss: 0.5680
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 898ms/step - accuracy: 0.7724 - loss: 0.6151 - val_accuracy: 0.7979 - val_loss: 0.5680
Epoch 3/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 906ms/step - accuracy: 0.8012 - loss: 0.5411 - val_accuracy: 0.8169 - val_loss: 0.5075
Epoch 4/10
[1m119/119[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 906ms/step - accuracy: 0.8012 - loss: 0.5411 - val_accuracy: 0.8169 - val_loss: 0.5075
Epoch 4/10

In [22]:
# EVALUATE THE MODEL
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc}")

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.7973 - loss: 0.4628
Test Accuracy: 0.7972972989082336
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 2s/step - accuracy: 0.7973 - loss: 0.4628
Test Accuracy: 0.7972972989082336


In [23]:
# SAVE THE IMPROVED MODEL
model.save('exercise_6_trained_model_improved.keras')

In [24]:
# SIMPLE INFERENCE SCRIPT
from tensorflow.keras.preprocessing import image

def predict_image(img_path, model_path='exercise_6_trained_model_improved.keras'):
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    label = "Chihuahua" if pred >= 0.5 else "Muffin"
    print(f"Prediction: {label} (confidence: {pred:.2f})")
    return label, pred


In [25]:
# Example usage:
predict_image("muffin_vs_chihuahua/test/chihuahua/img_0_5.jpg")
predict_image("muffin_vs_chihuahua/test/chihuahua/img_0_8.jpg")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 288ms/step
Prediction: Muffin (confidence: 0.19)
Prediction: Muffin (confidence: 0.19)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
Prediction: Chihuahua (confidence: 0.54)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
Prediction: Chihuahua (confidence: 0.54)


('Chihuahua', np.float32(0.54451025))

## EXERCISE 6 ANSWERS

### Improvements Applied:
1. **Dropout Layers**: Added dropout (0.25 after Conv layers, 0.5 after Dense layer) to prevent overfitting
2. **L2 Regularization**: Applied L2 regularization (0.001) to all Conv2D and Dense layers to penalize large weights

### Results:

In [26]:
# a. Test Accuracy (5 points)
print("=" * 60)
print("ANSWER a: TEST ACCURACY")
print("=" * 60)
print(f"Test Accuracy: {test_acc:.4f} ({test_acc*100:.2f}%)")
print(f"Test Loss: {test_loss:.4f}")
print("=" * 60)

ANSWER a: TEST ACCURACY
Test Accuracy: 0.7973 (79.73%)
Test Loss: 0.4628


In [27]:
# b. img_0_5 and img_0_8 predictions (10 points)
print("\n" + "=" * 60)
print("ANSWER b: IMAGE PREDICTIONS AND CONFIDENCE")
print("=" * 60)

# Predict img_0_5
print("\nImage: img_0_5.jpg")
label_5, conf_5 = predict_image("muffin_vs_chihuahua/test/chihuahua/img_0_5.jpg")

print("\nImage: img_0_8.jpg")
label_8, conf_8 = predict_image("muffin_vs_chihuahua/test/chihuahua/img_0_8.jpg")

print("\n" + "=" * 60)
print("SUMMARY:")
print(f"  img_0_5: {label_5} (confidence: {conf_5:.4f})")
print(f"  img_0_8: {label_8} (confidence: {conf_8:.4f})")
print("=" * 60)


ANSWER b: IMAGE PREDICTIONS AND CONFIDENCE

Image: img_0_5.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 219ms/step
Prediction: Muffin (confidence: 0.19)

Image: img_0_8.jpg
Prediction: Muffin (confidence: 0.19)

Image: img_0_8.jpg
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
Prediction: Chihuahua (confidence: 0.54)

SUMMARY:
  img_0_5: Muffin (confidence: 0.1950)
  img_0_8: Chihuahua (confidence: 0.5445)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 197ms/step
Prediction: Chihuahua (confidence: 0.54)

SUMMARY:
  img_0_5: Muffin (confidence: 0.1950)
  img_0_8: Chihuahua (confidence: 0.5445)


In [28]:
# c. Model saved as "exercise_6_trained_model_improved.h5" (10 points)
print("\n" + "=" * 60)
print("ANSWER c: MODEL SAVED")
print("=" * 60)
print("✓ Model saved as: exercise_6_trained_model_improved.h5")
print("✓ Model includes:")
print("  - Dropout layers (0.25 after Conv layers, 0.5 after Dense)")
print("  - L2 regularization (0.001) on all Conv2D and Dense layers")
print("=" * 60)


ANSWER c: MODEL SAVED
✓ Model saved as: exercise_6_trained_model_improved.h5
✓ Model includes:
  - Dropout layers (0.25 after Conv layers, 0.5 after Dense)
  - L2 regularization (0.001) on all Conv2D and Dense layers


---

## PART 2: CUSTOM DATASET APPLICATION (25 POINTS)

### New Dataset: Cats vs Dogs
We will apply the same CNN architecture to a different binary classification problem using the Cats vs Dogs dataset.

In [1]:
# CUSTOM DATASET CONFIGURATION
# The cats_vs_dogs dataset structure: PetImages/Cat and PetImages/Dog
# We need to create train/test splits from this structure

import shutil
from pathlib import Path
import random

# Source directory
source_dir = Path("cats_vs_dogs/PetImages")

# Create train/test directories
custom_train_dir = "cats_vs_dogs_split/train"
custom_test_dir = "cats_vs_dogs_split/test"

# Create directory structure if it doesn't exist
for split in ['train', 'test']:
    for category in ['Cat', 'Dog']:
        Path(f"cats_vs_dogs_split/{split}/{category}").mkdir(parents=True, exist_ok=True)

# Function to split and copy images
def split_dataset(source_category, train_ratio=0.8):
    """Split images from source into train/test folders"""
    source_path = source_dir / source_category
    
    # Get all image files
    images = [f for f in source_path.glob('*') if f.suffix.lower() in ['.jpg', '.jpeg', '.png']]
    
    # Shuffle and split
    random.seed(42)  # For reproducibility
    random.shuffle(images)
    split_idx = int(len(images) * train_ratio)
    
    train_images = images[:split_idx]
    test_images = images[split_idx:]
    
    print(f"{source_category}: {len(train_images)} train, {len(test_images)} test")
    
    # Copy to train
    for img in train_images[:1000]:  # Limit to 1000 images per class for faster training
        dest = Path(custom_train_dir) / source_category / img.name
        if not dest.exists():
            shutil.copy2(img, dest)
    
    # Copy to test
    for img in test_images[:200]:  # Limit to 200 images per class for testing
        dest = Path(custom_test_dir) / source_category / img.name
        if not dest.exists():
            shutil.copy2(img, dest)

# Only run if directories are empty (to avoid duplicates)
if not list(Path(custom_train_dir).rglob('*.jpg')):
    print("Creating train/test split...")
    split_dataset('Cat')
    split_dataset('Dog')
    print("Dataset split completed!")
else:
    print("Dataset already split. Skipping...")

Creating train/test split...
Cat: 9992 train, 2498 test
Dog: 9975 train, 2494 test
Dataset split completed!


In [30]:
# DATA PREPROCESSING FOR CUSTOM DATASET
# Using the same augmentation strategy for consistency

custom_train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)
custom_test_datagen = ImageDataGenerator(rescale=1./255)

custom_train_generator = custom_train_datagen.flow_from_directory(
    custom_train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)
custom_val_generator = custom_train_datagen.flow_from_directory(
    custom_train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)
custom_test_generator = custom_test_datagen.flow_from_directory(
    custom_test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

# Display class mapping
print("\nClass indices:", custom_train_generator.class_indices)

Found 1600 images belonging to 2 classes.
Found 400 images belonging to 2 classes.
Found 400 images belonging to 2 classes.

Class indices: {'Cat': 0, 'Dog': 1}
Found 400 images belonging to 2 classes.
Found 400 images belonging to 2 classes.

Class indices: {'Cat': 0, 'Dog': 1}


In [31]:
# BUILD CUSTOM CNN MODEL (Same architecture with regularization and dropout)

# Reset learning rate schedule for new training
custom_lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=10000,
    decay_rate=0.9,
    staircase=True
)

custom_optimizer = tf.keras.optimizers.Adam(learning_rate=custom_lr_schedule)

# Create model with same architecture
custom_model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001), input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    layers.Conv2D(64, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.MaxPooling2D(2, 2),
    layers.Dropout(0.25),
    layers.Conv2D(128, (3, 3), activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.MaxPooling2D(2, 2),
    layers.Flatten(),
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001)),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])

custom_model.compile(optimizer=custom_optimizer, loss='binary_crossentropy', metrics=['accuracy'])

In [32]:
# TRAIN THE CUSTOM MODEL
custom_history = custom_model.fit(
    custom_train_generator,
    epochs=10,
    validation_data=custom_val_generator
)

Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 3s/step - accuracy: 0.4881 - loss: 1.0769 - val_accuracy: 0.5000 - val_loss: 0.9276
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 3s/step - accuracy: 0.4881 - loss: 1.0769 - val_accuracy: 0.5000 - val_loss: 0.9276
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 800ms/step - accuracy: 0.4988 - loss: 0.8868 - val_accuracy: 0.5000 - val_loss: 0.8537
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 800ms/step - accuracy: 0.4988 - loss: 0.8868 - val_accuracy: 0.5000 - val_loss: 0.8537
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 923ms/step - accuracy: 0.4925 - loss: 0.8343 - val_accuracy: 0.5000 - val_loss: 0.8154
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 923ms/step - accuracy: 0.4925 - loss: 0.8343 - val_accuracy: 0.5000 - val_loss: 0.8154
Epoch 4/10
[1m50/50[0m 

In [33]:
# EVALUATE THE CUSTOM MODEL
custom_test_loss, custom_test_acc = custom_model.evaluate(custom_test_generator)
print(f"\nCustom Dataset Test Accuracy: {custom_test_acc:.4f} ({custom_test_acc*100:.2f}%)")
print(f"Custom Dataset Test Loss: {custom_test_loss:.4f}")

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - accuracy: 0.5500 - loss: 0.7293

Custom Dataset Test Accuracy: 0.5500 (55.00%)
Custom Dataset Test Loss: 0.7293
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 2s/step - accuracy: 0.5500 - loss: 0.7293

Custom Dataset Test Accuracy: 0.5500 (55.00%)
Custom Dataset Test Loss: 0.7293


In [None]:
# SAVE THE CUSTOM MODEL (25 points)
custom_model.save('exercise_6_custom_gamo.h5')
print("\n" + "=" * 60)
print("✓ Custom model saved as: exercise_6_custom_gamo.h5")
print("=" * 60)




✓ Custom model saved as: exercise_6_custom_gamo.h5

✓ Custom model saved as: exercise_6_custom_gamo.h5


In [35]:
# TEST INFERENCE ON CUSTOM DATASET
def predict_custom_image(img_path, model_path='exercise_6_custom_gamo.h5'):
    """Predict image using the custom trained model"""
    model = tf.keras.models.load_model(model_path)
    img = image.load_img(img_path, target_size=IMG_SIZE)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0,0]
    
    # Based on class_indices: Cat=0, Dog=1 (usually alphabetical)
    label = "Dog" if pred >= 0.5 else "Cat"
    print(f"Prediction: {label} (confidence: {pred:.4f})")
    return label, pred

# Example usage - Test with sample images from test set
print("\nTesting predictions on sample images:")
print("-" * 50)

# Get sample image paths from test set
import os
cat_test_dir = os.path.join(custom_test_dir, "Cat")
dog_test_dir = os.path.join(custom_test_dir, "Dog")

if os.path.exists(cat_test_dir) and os.path.exists(dog_test_dir):
    cat_images = [f for f in os.listdir(cat_test_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    dog_images = [f for f in os.listdir(dog_test_dir) if f.endswith(('.jpg', '.jpeg', '.png'))]
    
    if cat_images:
        cat_sample = os.path.join(cat_test_dir, cat_images[0])
        print(f"\nTest Cat Image: {cat_images[0]}")
        predict_custom_image(cat_sample)
    
    if dog_images:
        dog_sample = os.path.join(dog_test_dir, dog_images[0])
        print(f"\nTest Dog Image: {dog_images[0]}")
        predict_custom_image(dog_sample)


Testing predictions on sample images:
--------------------------------------------------

Test Cat Image: 10116.jpg




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 205ms/step
Prediction: Cat (confidence: 0.4977)

Test Dog Image: 10072.jpg
Prediction: Cat (confidence: 0.4977)

Test Dog Image: 10072.jpg




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step
Prediction: Cat (confidence: 0.4980)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step
Prediction: Cat (confidence: 0.4980)


### Summary of Custom Dataset Implementation

**Dataset Used**: Cats vs Dogs (Microsoft PetImages dataset)

**Dataset Preparation**:
- Original structure: `PetImages/Cat` and `PetImages/Dog` (all images in one folder)
- Reorganized into: `cats_vs_dogs_split/train` and `cats_vs_dogs_split/test`
- Split ratio: 80% train (1000 images per class), 20% test (200 images per class)
- Random seed: 42 (for reproducibility)

**Architecture Applied**: Same CNN with regularization and dropout
- 3 Conv2D layers (32, 64, 128 filters) with L2 regularization (0.001)
- MaxPooling2D after each Conv layer
- Dropout layers (0.25, 0.25, 0.5)
- Dense layer (128 units) with L2 regularization
- Output layer with sigmoid activation

**Model Saved**: `exercise_6_custom_gamo.h5`

**Training Details**:
- Epochs: 10
- Optimizer: Adam with Exponential Decay learning rate
- Loss: Binary Crossentropy
- Data Augmentation: Rotation, width/height shift, horizontal flip
- Image Size: 128x128
- Batch Size: 32

**Class Mapping**: Cat=0 (< 0.5), Dog=1 (≥ 0.5)