# CNN Implementation for Cat Breed Classification

This notebook implements a Convolutional Neural Network (CNN) to classify cat breeds.

**Objectives:**
1.  Define a CNN with Batch Normalization and Dropout layers.
2.  Compare the performance of different kernel sizes (e.g., 3x3, 5x5, 7x7).
3.  Select the best kernel size and visualize Training vs. Validation loss and accuracy.

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision import models
from torch.utils.data import DataLoader
import time, sys

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cpu


In [2]:
# helper for estimated completion time
def format_time(seconds):
    m, s = divmod(int(seconds), 60)
    h, m = divmod(m, 60)
    return f"{h:02d}:{m:02d}:{s:02d}"

def _progress_bar(current, total, start_time, prefix=''):
    bar_len = 30
    if total > 0:
        filled_len = int(round(bar_len * current / float(total)))
    else:
        filled_len = 0
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    elapsed = time.time() - start_time
    if elapsed > 0 and current > 0:
        rate = current / elapsed
        eta_seconds = (total - current) / rate
    else:
        eta_seconds = 0

    eta_str = format_time(eta_seconds)

    sys.stdout.write(f'\r{prefix} [{bar}] {current}/{total} | ETA: {eta_str}')
    sys.stdout.flush()
    if current == total:
        sys.stdout.write('\n')

## 1. Data Loading

In [3]:
# Hyperparameters
BATCH_SIZE = 64
LEARNING_RATE = 0.001
NUM_EPOCHS = 10  
TARGET_SIZE = (224, 224)

# Data Directories
TRAIN_DIR = 'data/train'
TEST_DIR = 'data/test'

# Transforms - Updated for better accuracy
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],  
        std=[0.229, 0.224, 0.225]
    )
])

# Load Datasets
train_dataset = datasets.ImageFolder(root=TRAIN_DIR, transform=transform)
test_dataset = datasets.ImageFolder(root=TEST_DIR, transform=transform)

# Data Loaders
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f'Classes: {len(train_dataset.classes)}')
print(f'Training samples: {len(train_dataset)}')
print(f'Testing samples: {len(test_dataset)}')

Classes: 66
Training samples: 18054
Testing samples: 2257


## 2. Model Definition
Includes batch normalization and dropout layers

In [4]:
class CatBreedCNN(nn.Module):
    def __init__(self, num_classes=66, kernel_size=3):
        super(CatBreedCNN, self).__init__()
        
        padding = kernel_size // 2
        
        # Block 1
        self.conv1 = nn.Conv2d(3, 64, kernel_size, padding=padding)
        self.bn1 = nn.BatchNorm2d(64)
        self.conv2 = nn.Conv2d(64, 64, kernel_size, padding=padding)
        self.bn2 = nn.BatchNorm2d(64)
        
        # Block 2
        self.conv3 = nn.Conv2d(64, 128, kernel_size, padding=padding)
        self.bn3 = nn.BatchNorm2d(128)
        self.conv4 = nn.Conv2d(128, 128, kernel_size, padding=padding)
        self.bn4 = nn.BatchNorm2d(128)
        
        # Block 3
        self.conv5 = nn.Conv2d(128, 256, kernel_size, padding=padding)
        self.bn5 = nn.BatchNorm2d(256)
        self.conv6 = nn.Conv2d(256, 256, kernel_size, padding=padding)
        self.bn6 = nn.BatchNorm2d(256)
        
        # Block 4
        self.conv7 = nn.Conv2d(256, 512, kernel_size, padding=padding)
        self.bn7 = nn.BatchNorm2d(512)
        self.conv8 = nn.Conv2d(512, 512, kernel_size, padding=padding)
        self.bn8 = nn.BatchNorm2d(512)
        
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        
        # Adaptive pooling to handle any input size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((7, 7))
        
        # Fully connected layers
        self.fc1 = nn.Linear(512 * 7 * 7, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        # Block 1
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)
        
        # Block 2
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.pool(x)
        
        # Block 3
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.relu(self.bn6(self.conv6(x)))
        x = self.pool(x)
        
        # Block 4
        x = self.relu(self.bn7(self.conv7(x)))
        x = self.relu(self.bn8(self.conv8(x)))
        x = self.pool(x)
        
        # Adaptive pooling
        x = self.adaptive_pool(x)
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # FC layers
        x = self.dropout(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x

## 3. Training Function
Validation tracking during model training

In [5]:
def train_and_validate(model, train_loader, val_loader, criterion, optimizer, num_epochs=NUM_EPOCHS):
    model.to(device)
    history = {
        'train_loss': [], 'train_acc': [],
        'val_loss': [], 'val_acc': []
    }
    
    print(f"Training on {device}...")
    
    for epoch in range(num_epochs):
        
        # --- Training Phase ---
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        print(f"\nEpoch [{epoch+1}/{num_epochs}] Training: ", end='', flush=True)
        train_phase_start = time.time()
        for batch_idx, (images, labels) in enumerate(train_loader, 1):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            
            _progress_bar(batch_idx, len(train_loader), train_phase_start, prefix='Training')
            
        if len(train_loader) > 0:
            avg_train_loss = train_loss / len(train_loader)
        else:
            avg_train_loss = 0.0
            
        if train_total > 0:
            avg_train_acc = 100 * train_correct / train_total
        else:
            avg_train_acc = 0.0
        
        # --- Validation Phase ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        print(f"\nValidating: ", end='', flush=True)
        val_phase_start = time.time()
        with torch.no_grad():
            for batch_idx, (images, labels) in enumerate(val_loader, 1):
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()
                
                _progress_bar(batch_idx, len(val_loader), val_phase_start, prefix='Validation')
                
        if len(val_loader) > 0:
            avg_val_loss = val_loss / len(val_loader)
        else:
            avg_val_loss = 0.0
            
        if val_total > 0:
            avg_val_acc = 100 * val_correct / val_total
        else:
            avg_val_acc = 0.0
        
        # --- record history ---
        history['train_loss'].append(avg_train_loss)
        history['train_acc'].append(avg_train_acc)
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(avg_val_acc)
        
        print(
        f"\n  ── Epoch {epoch+1}/{num_epochs} Summary ─────────────────────────────"
        f"\n  Train | Loss: {avg_train_loss:.4f} | Acc: {avg_train_acc:6.2f}%"
        f"\n  Valid | Loss: {avg_val_loss:.4f} | Acc: {avg_val_acc:6.2f}%"
        f"\n  ──────────────────────────────────────────────────────────────────────"
        )
              
    return history

## 4. Experiments: Kernel Sizes
Comparing Kernel Sizes: 3, 5, and 7.

In [6]:
kernel_sizes = [3, 5, 7]
results = {}

total_experiments = len(kernel_sizes)

for exp_idx, k in enumerate(kernel_sizes, 1):
    print(f"\n{'='*60}")
    print(f"EXPERIMENT {exp_idx}/{total_experiments}: Kernel Size = {k}")
    print(f"{'='*60}")
    
    model = CatBreedCNN(kernel_size=k)
    # Lower learning rate for fine-tuning
    optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', 
                                                   factor=0.5, patience=2)
    criterion = nn.CrossEntropyLoss()
    
    history = train_and_validate(model, train_loader, test_loader, criterion, optimizer, num_epochs=NUM_EPOCHS)
    results[k] = history
    
    print(f"\n{'='*60}")
    print(f"Experiment {exp_idx} Complete!")
    print(f"{'='*60}")


EXPERIMENT 1/3: Kernel Size = 3
Training on cpu...


KeyboardInterrupt: 

## 5. Results & Visualization

In [None]:
# comparing kernel sizes based on validation accuracy
plt.figure(figsize=(10, 6))
for k in kernel_sizes:
    plt.plot(results[k]['val_acc'], label=f'Kernel Size {k}')
plt.title('Validation Accuracy vs. Epochs for Different Kernel Sizes')
plt.xlabel('Epoch')
plt.ylabel('Validation Accuracy (%)')
plt.legend()
plt.grid(True)
plt.show()

# find best kernel size
best_k = max(results, key=lambda k: max(results[k]['val_acc']))
print(f"\nBest Kernel Size based on max validation accuracy: {best_k}")

In [None]:
# plot best model loss and accuracy
best_history = results[best_k]

plt.figure(figsize=(14, 5))

# loss
plt.subplot(1, 2, 1)
plt.plot(best_history['train_loss'], label='Train Loss')
plt.plot(best_history['val_loss'], label='Validation Loss')
plt.title(f'Best Model (Kernel {best_k}): Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

# accuracy
plt.subplot(1, 2, 2)
plt.plot(best_history['train_acc'], label='Train Accuracy')
plt.plot(best_history['val_acc'], label='Validation Accuracy')
plt.title(f'Best Model (Kernel {best_k}): Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.legend()
plt.grid(True)

plt.show()