# CS4287 Neural Computing - Assignment 2: Convolutional Neural Networks

**Team Members:** [INSERT NAMES AND ID NUMBERS]  
**Student ID 1:** [INSERT]  
**Student ID 2:** [INSERT]

**Code Execution Status:** [Comment on whether the code executes to completion without errors]

**Third Party Source:** [Provide link to any existing implementation used]


## Table of Contents
1. [Imports and Setup](#imports)
2. [Data Loading and Preprocessing](#data)
3. [Network Architecture](#architecture)
4. [Cost Function](#loss)
5. [Optimizer](#optimizer)
6. [Cross-Fold Validation](#validation)
7. [Training and Results](#results)
8. [Hyperparameter Analysis](#hyperparameters)
9. [Evaluation](#evaluation)


## 1. Imports and Setup {#imports}


In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import warnings
warnings.filterwarnings('ignore')

# Deep learning frameworks
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers, losses, metrics
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

# Sklearn for evaluation and preprocessing
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")


## 2. Data Loading and Preprocessing {#data}


In [None]:
# Load Fruit Detection Dataset from Kaggle
# Dataset: lakshaytyagi01/fruit-detection
# Reorganized from YOLO format to classification format with 6 classes:
# Apple, Banana, Grape, Orange, Pineapple, Watermelon

DATASET_PATH = "data/fruits_classification"
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32

# Load training dataset - Keras automatically creates labels from folder names
train_dataset = keras.utils.image_dataset_from_directory(
    f'{DATASET_PATH}/train',
    labels='inferred',
    label_mode='categorical',
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42
)

# Load test dataset
test_dataset = keras.utils.image_dataset_from_directory(
    f'{DATASET_PATH}/test',
    labels='inferred',
    label_mode='categorical',
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=42
)

# Load validation dataset
val_dataset = keras.utils.image_dataset_from_directory(
    f'{DATASET_PATH}/valid',
    labels='inferred',
    label_mode='categorical',
    image_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    shuffle=False,
    seed=42
)

# Get class names and number of classes
class_names = train_dataset.class_names
num_classes = len(class_names)

print(f"\nDataset loaded successfully!")
print(f"Number of classes: {num_classes}")
print(f"Fruit classes: {class_names}")
print(f"Number of training batches: {tf.data.experimental.cardinality(train_dataset).numpy()}")
print(f"Number of test batches: {tf.data.experimental.cardinality(test_dataset).numpy()}")
print(f"Number of validation batches: {tf.data.experimental.cardinality(val_dataset).numpy()}")

# Store image shape for later use
input_shape = (*IMAGE_SIZE, 3)
print(f"Input shape: {input_shape}")


### 2.1 Data Visualization

Visualize sample fruit images and their distribution


In [None]:
# Visualize sample fruits from training set
plt.figure(figsize=(15, 15))

# Get first batch from training data
for images, labels in train_dataset.take(1):
    # Display 16 sample images
    for i in range(min(16, len(images))):
        plt.subplot(4, 4, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))  # Display image
        # Get class name from one-hot encoded label
        label_idx = np.argmax(labels[i].numpy())
        plt.title(f"{class_names[label_idx]}", fontsize=10)
        plt.axis('off')

plt.suptitle('Sample Fruits from Training Set', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.show()

# Plot class distribution
class_counts = {}
total_samples = 0

# Count samples per class
for images, labels in train_dataset:
    for label in labels:
        idx = np.argmax(label)
        class_name = class_names[idx]
        class_counts[class_name] = class_counts.get(class_name, 0) + len(images)
    total_samples += len(images)

# Visualize distribution
plt.figure(figsize=(14, 6))
bars = plt.bar(class_counts.keys(), class_counts.values(), color='steelblue', edgecolor='black')
plt.title('Distribution of Fruit Classes in Training Set', fontsize=14, fontweight='bold')
plt.xlabel('Fruit Class', fontsize=12)
plt.ylabel('Number of Images', fontsize=12)
plt.xticks(rotation=45, ha='right')

# Add count labels on bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
            f'{int(height)}',
            ha='center', va='bottom')

plt.tight_layout()
plt.show()

print("\nClass Distribution:")
for class_name, count in sorted(class_counts.items()):
    percentage = (count / sum(class_counts.values())) * 100
    print(f"  {class_name}: {count} images ({percentage:.1f}%)")


## 3. CNN Architecture - Transfer Learning with ResNet50 {#architecture}

We use **Transfer Learning** with the pre-trained **ResNet50** architecture from ImageNet.

**Why ResNet50?**
- **Residual Learning**: Uses skip connections to solve vanishing gradient problem
- **Deep Architecture**: 50 layers (deep enough for complex patterns)
- **Pre-trained Weights**: Already learned features from ImageNet (1.4M images)
- **Industry Standard**: Used in production systems worldwide

**Transfer Learning Strategy**:
1. Use ResNet50 as feature extractor (freeze base layers)
2. Add custom classification head for 6 fruit classes
3. Fine-tune if needed for better performance

**Reference**: He et al. (2015) - "Deep Residual Learning for Image Recognition"


In [None]:
# Build Transfer Learning Model using ResNet50
def build_transfer_learning_model(input_shape, num_classes):
    """
    Build a transfer learning model using pre-trained ResNet50.
    
    ResNet50 Architecture (He et al., 2015):
    - 50 layers deep with residual (skip) connections
    - Solves vanishing gradient problem through identity shortcuts
    - Pre-trained on ImageNet (1.4M images, 1000 classes)
    
    Transfer Learning Strategy:
    - Base Model: ResNet50 frozen weights (feature extractor)
    - Custom Head: New layers for fruit classification
    - Benefits: Faster training, better accuracy, less data needed
    """
    
    # Load pre-trained ResNet50 (without top classification layer)
    # Weights trained on ImageNet - contains general visual features
    base_model = tf.keras.applications.ResNet50(
        weights='imagenet',           # Use ImageNet pre-trained weights
        include_top=False,             # Exclude original classification head
        input_shape=input_shape,       # Our input: 224x224x3
        pooling='avg'                  # Global average pooling at the end
    )
    
    # Freeze base model layers - we use it as fixed feature extractor
    # This preserves learned features from ImageNet
    base_model.trainable = False
    print(f"✓ Loaded ResNet50 base model (frozen)")
    print(f"  - Total layers in base: {len(base_model.layers)}")
    print(f"  - Base model parameters: {base_model.count_params():,}")
    
    # Build custom classification head for our 6 fruit classes
    model = models.Sequential([
        # Input layer
        layers.Input(shape=input_shape, name='input_layer'),
        
        # Data augmentation layer (applied during training only)
        layers.RandomFlip('horizontal', name='augmentation_flip'),
        layers.RandomRotation(0.2, name='augmentation_rotation'),
        layers.RandomZoom(0.1, name='augmentation_zoom'),
        
        # Pre-trained ResNet50 feature extractor
        base_model,
        
        # Custom classification head
        layers.BatchNormalization(name='bn_1'),       # Normalize features
        layers.Dropout(0.3, name='dropout_1'),        # Regularization
        
        layers.Dense(512, activation='relu',          # Dense layer 1
                    kernel_initializer='he_normal', name='fc_1'),
        layers.BatchNormalization(name='bn_2'),
        layers.Dropout(0.4, name='dropout_2'),
        
        layers.Dense(256, activation='relu',          # Dense layer 2
                    kernel_initializer='he_normal', name='fc_2'),
        layers.BatchNormalization(name='bn_3'),
        layers.Dropout(0.3, name='dropout_3'),
        
        # Output layer - softmax for multi-class classification
        layers.Dense(num_classes, activation='softmax', name='output_layer')
    ], name='ResNet50_FruitClassifier')
    
    return model, base_model

# Build the model
print("=" * 60)
print("Building Transfer Learning Model with ResNet50...")
print("=" * 60)

model, base_model = build_transfer_learning_model(input_shape, num_classes)

# Display model architecture
print("\n" + "=" * 60)
print("MODEL ARCHITECTURE SUMMARY")
print("=" * 60)
model.summary()

# Count trainable vs non-trainable parameters
trainable_params = sum([tf.size(w).numpy() for w in model.trainable_weights])
non_trainable_params = sum([tf.size(w).numpy() for w in model.non_trainable_weights])

print("\n" + "=" * 60)
print("PARAMETER BREAKDOWN")
print("=" * 60)
print(f"Total parameters:        {model.count_params():,}")
print(f"Trainable parameters:    {trainable_params:,}")
print(f"Non-trainable parameters: {non_trainable_params:,}")
print(f"\n✓ Using ResNet50 (frozen) + Custom Head")
print(f"✓ Only training classification head ({trainable_params:,} params)")
print("=" * 60)


## 4. Loss Function {#loss}

**Categorical Cross-Entropy Loss**

For multi-class classification with one-hot encoded labels:
- Measures difference between predicted probability distribution and true distribution
- Formula: L = -∑(y_true * log(y_pred))
- Well-suited for softmax output layer
- Provides strong gradients for faster learning

**Alternatives considered:**
- Sparse Categorical Cross-Entropy (for integer labels)
- Focal Loss (for class imbalance - not needed here)


## 5. Optimizer {#optimizer}

**Adam Optimizer** (Adaptive Moment Estimation)

**Why Adam?**
- Combines momentum (moving average of gradients) and RMSProp (adaptive learning rates)
- Automatically adjusts learning rate for each parameter
- Works well with sparse gradients and noisy data
- Industry standard for deep learning

**Configuration:**
- Learning rate: 0.001 (default, will be adjusted with ReduceLROnPlateau)
- Beta1: 0.9 (momentum)
- Beta2: 0.999 (RMSProp)

**Alternatives:**
- SGD with momentum: Slower but sometimes better final accuracy
- RMSProp: Good but Adam is more stable
- AdamW: Adam with weight decay (could improve regularization)


In [None]:
# Compile the model with loss function, optimizer, and metrics
print("=" * 60)
print("COMPILING MODEL")
print("=" * 60)

model.compile(
    # Adam optimizer - adaptive learning rate for each parameter
    optimizer=optimizers.Adam(learning_rate=0.001),
    
    # Categorical cross-entropy loss for multi-class classification
    loss=losses.CategoricalCrossentropy(),
    
    # Track accuracy during training and validation
    metrics=[
        metrics.CategoricalAccuracy(name='accuracy'),
        metrics.TopKCategoricalAccuracy(k=2, name='top_2_accuracy'),  # Top-2 predictions
        metrics.Precision(name='precision'),
        metrics.Recall(name='recall')
    ]
)

print("✓ Model compiled successfully!")
print(f"  - Optimizer: Adam (lr=0.001)")
print(f"  - Loss: Categorical Cross-Entropy")
print(f"  - Metrics: Accuracy, Top-2 Accuracy, Precision, Recall")
print("=" * 60)


## 6. Training with Callbacks {#training}

**Training Strategy:**
1. **Early Stopping**: Stop if validation loss doesn't improve for 5 epochs
2. **ReduceLROnPlateau**: Reduce learning rate by 0.5x if loss plateaus for 3 epochs
3. **ModelCheckpoint**: Save best model based on validation accuracy

This prevents overfitting and optimizes training efficiency.


In [None]:
# Setup callbacks for training optimization
callbacks_list = [
    # Reduce learning rate when validation loss plateaus
    # This helps model escape local minima and fine-tune
    ReduceLROnPlateau(
        monitor='val_loss',        # Watch validation loss
        factor=0.5,                 # Reduce LR by 50%
        patience=3,                 # Wait 3 epochs before reducing
        min_lr=1e-7,               # Don't go below this
        verbose=1
    ),
    
    # Stop training early if no improvement
    # Prevents overfitting and saves compute time
    EarlyStopping(
        monitor='val_loss',        # Watch validation loss
        patience=5,                 # Stop after 5 epochs with no improvement
        restore_best_weights=True,  # Restore best model weights
        verbose=1
    ),
    
    # Save best model during training
    ModelCheckpoint(
        'best_fruit_model.h5',     # Filename
        monitor='val_accuracy',     # Watch validation accuracy
        save_best_only=True,        # Only save if improved
        mode='max',                 # Maximize accuracy
        verbose=1
    )
]

print("=" * 60)
print("STARTING TRAINING")
print("=" * 60)
print(f"Configuration:")
print(f"  - Epochs: 25")
print(f"  - Batch Size: {BATCH_SIZE}")
print(f"  - Training samples: ~{tf.data.experimental.cardinality(train_dataset).numpy() * BATCH_SIZE}")
print(f"  - Validation samples: ~{tf.data.experimental.cardinality(val_dataset).numpy() * BATCH_SIZE}")
print(f"  - Callbacks: Early Stopping, ReduceLROnPlateau, ModelCheckpoint")
print("=" * 60)
print("\nTraining will take approximately 10-20 minutes with GPU...\n")

# Train the model
history = model.fit(
    train_dataset,              # Training data
    validation_data=val_dataset,  # Validation data
    epochs=25,                   # Maximum epochs (early stopping may end sooner)
    callbacks=callbacks_list,    # Apply our callbacks
    verbose=1                    # Show progress bar
)

print("\n" + "=" * 60)
print("✓ TRAINING COMPLETED!")
print("=" * 60)


## 7. Results and Visualization {#results}

Plot training history and evaluate model performance on test set.


In [None]:
# Plot Training History - Accuracy and Loss Curves
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Training & Validation Accuracy
axes[0, 0].plot(history.history['accuracy'], label='Training Accuracy', linewidth=2, marker='o')
axes[0, 0].plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2, marker='s')
axes[0, 0].set_title('Model Accuracy Over Epochs', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Epoch', fontsize=12)
axes[0, 0].set_ylabel('Accuracy', fontsize=12)
axes[0, 0].legend(fontsize=11)
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Training & Validation Loss
axes[0, 1].plot(history.history['loss'], label='Training Loss', linewidth=2, marker='o', color='coral')
axes[0, 1].plot(history.history['val_loss'], label='Validation Loss', linewidth=2, marker='s', color='red')
axes[0, 1].set_title('Model Loss Over Epochs', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Epoch', fontsize=12)
axes[0, 1].set_ylabel('Loss', fontsize=12)
axes[0, 1].legend(fontsize=11)
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Precision
axes[1, 0].plot(history.history['precision'], label='Training Precision', linewidth=2, marker='o', color='green')
axes[1, 0].plot(history.history['val_precision'], label='Validation Precision', linewidth=2, marker='s', color='darkgreen')
axes[1, 0].set_title('Model Precision Over Epochs', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Epoch', fontsize=12)
axes[1, 0].set_ylabel('Precision', fontsize=12)
axes[1, 0].legend(fontsize=11)
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Recall
axes[1, 1].plot(history.history['recall'], label='Training Recall', linewidth=2, marker='o', color='purple')
axes[1, 1].plot(history.history['val_recall'], label='Validation Recall', linewidth=2, marker='s', color='indigo')
axes[1, 1].set_title('Model Recall Over Epochs', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Epoch', fontsize=12)
axes[1, 1].set_ylabel('Recall', fontsize=12)
axes[1, 1].legend(fontsize=11)
axes[1, 1].grid(True, alpha=0.3)

plt.suptitle('Transfer Learning Training Results - ResNet50', fontsize=16, fontweight='bold', y=1.00)
plt.tight_layout()
plt.show()

# Print final metrics
print("\n" + "=" * 60)
print("FINAL TRAINING METRICS")
print("=" * 60)
print(f"Final Training Accuracy:   {history.history['accuracy'][-1]:.4f}")
print(f"Final Validation Accuracy: {history.history['val_accuracy'][-1]:.4f}")
print(f"Final Training Loss:       {history.history['loss'][-1]:.4f}")
print(f"Final Validation Loss:     {history.history['val_loss'][-1]:.4f}")
print(f"Final Training Precision:  {history.history['precision'][-1]:.4f}")
print(f"Final Validation Precision: {history.history['val_precision'][-1]:.4f}")
print(f"Final Training Recall:     {history.history['recall'][-1]:.4f}")
print(f"Final Validation Recall:   {history.history['val_recall'][-1]:.4f}")
print("=" * 60)
