# Week 5: Deep Learning Essentials & Vision (Solution)

## Overview
This notebook contains complete solutions for Week 5. Use this as reference after attempting the starter notebook.

---

## Setup and Imports

In [None]:
# Core libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Tuple, Optional
from pathlib import Path
import logging

# Deep Learning
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms as transforms
from torchvision import models

# Image processing
from PIL import Image

# Scikit-learn utilities
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## Part 1: Neural Network Fundamentals - SOLUTION

In [None]:
class SimpleNeuralLayer:
    """Simple neural network layer with forward pass."""
    
    def __init__(self, input_size: int, output_size: int):
        # Initialize weights with small random values scaled by sqrt(input_size)
        self.weights = np.random.randn(input_size, output_size) * 0.01
        # Initialize bias as zeros
        self.bias = np.zeros(output_size)
    
    def forward(self, x: np.ndarray) -> np.ndarray:
        """Forward pass: compute output = x @ weights + bias"""
        return np.dot(x, self.weights) + self.bias
    
    def relu(self, x: np.ndarray) -> np.ndarray:
        """Apply ReLU activation: max(0, x)"""
        return np.maximum(0, x)

# Test the layer
layer = SimpleNeuralLayer(10, 5)
test_input = np.random.randn(3, 10)
output = layer.forward(test_input)
print(f"Input shape: {test_input.shape}")
print(f"Output shape: {output.shape}")
print(f"Output with ReLU shape: {layer.relu(output).shape}")

In [None]:
class MLPClassifier(nn.Module):
    """Multi-Layer Perceptron for classification."""
    
    def __init__(self, input_size: int, hidden_sizes: List[int], num_classes: int, dropout: float = 0.3):
        super(MLPClassifier, self).__init__()
        
        layers = []
        
        # Input to first hidden layer
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))
        
        # Additional hidden layers
        for i in range(len(hidden_sizes) - 1):
            layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
        
        # Output layer
        layers.append(nn.Linear(hidden_sizes[-1], num_classes))
        
        self.network = nn.Sequential(*layers)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.network(x)

# Test the MLP
model = MLPClassifier(input_size=784, hidden_sizes=[256, 128], num_classes=10)
print(model)
test_input = torch.randn(32, 784)
output = model(test_input)
print(f"Output shape: {output.shape}")

## Part 2: Convolutional Neural Networks - SOLUTION

In [None]:
class BasicCNN(nn.Module):
    """Basic CNN for image classification."""
    
    def __init__(self, num_classes: int = 10, input_channels: int = 3):
        super(BasicCNN, self).__init__()
        
        # Convolutional layers
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(2, 2)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool3 = nn.MaxPool2d(2, 2)
        
        # Fully connected layers
        # For 32x32 input: after 3 pooling layers â†’ 4x4 spatial size
        self.fc1 = nn.Linear(128 * 4 * 4, 512)
        self.fc2 = nn.Linear(512, num_classes)
        
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        # Conv block 1
        x = self.pool1(torch.relu(self.conv1(x)))
        
        # Conv block 2
        x = self.pool2(torch.relu(self.conv2(x)))
        
        # Conv block 3
        x = self.pool3(torch.relu(self.conv3(x)))
        
        # Flatten
        x = x.view(x.size(0), -1)
        
        # FC layers
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Test the CNN
model = BasicCNN(num_classes=10, input_channels=3)
print(model)
test_input = torch.randn(8, 3, 32, 32)
output = model(test_input)
print(f"Output shape: {output.shape}")

## Part 2.2: Data Augmentation - SOLUTION

In [None]:
class ImageDataset(Dataset):
    """Custom dataset with augmentation support."""
    
    def __init__(self, images: np.ndarray, labels: np.ndarray, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform
    
    def __len__(self) -> int:
        return len(self.images)
    
    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, int]:
        image = self.images[idx]
        label = self.labels[idx]
        
        # Convert numpy array to PIL Image
        if image.dtype != np.uint8:
            image = (image * 255).astype(np.uint8)
        image = Image.fromarray(image)
        
        # Apply transformations
        if self.transform:
            image = self.transform(image)
        
        return image, label

def get_data_transforms(train: bool = True):
    """Get data augmentation transforms."""
    if train:
        transform = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomRotation(15),
            transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    else:
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    
    return transform

## Part 3: Training Pipeline - SOLUTION

In [None]:
class ModelTrainer:
    """Trainer class for CNN models."""
    
    def __init__(self, model: nn.Module, device: torch.device):
        self.model = model.to(device)
        self.device = device
        self.train_losses = []
        self.val_losses = []
        self.train_accuracies = []
        self.val_accuracies = []
    
    def train_epoch(self, train_loader: DataLoader, criterion, optimizer) -> Tuple[float, float]:
        """Train for one epoch."""
        self.model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        for batch_idx, (images, labels) in enumerate(train_loader):
            images = images.to(self.device)
            labels = labels.to(self.device)
            
            optimizer.zero_grad()
            outputs = self.model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        avg_loss = running_loss / len(train_loader)
        accuracy = 100 * correct / total
        
        return avg_loss, accuracy
    
    def validate(self, val_loader: DataLoader, criterion) -> Tuple[float, float]:
        """Validate model."""
        self.model.eval()
        running_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(self.device)
                labels = labels.to(self.device)
                
                outputs = self.model(images)
                loss = criterion(outputs, labels)
                
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        avg_loss = running_loss / len(val_loader)
        accuracy = 100 * correct / total
        
        return avg_loss, accuracy
    
    def train(self, train_loader: DataLoader, val_loader: DataLoader, 
              num_epochs: int, learning_rate: float = 0.001):
        """Full training pipeline."""
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        
        for epoch in range(num_epochs):
            train_loss, train_acc = self.train_epoch(train_loader, criterion, optimizer)
            val_loss, val_acc = self.validate(val_loader, criterion)
            
            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)
            self.train_accuracies.append(train_acc)
            self.val_accuracies.append(val_acc)
            
            logger.info(f"Epoch [{epoch+1}/{num_epochs}] - "
                       f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% - "
                       f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
    
    def plot_training_history(self):
        """Plot training and validation metrics."""
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
        
        ax1.plot(self.train_losses, label='Train Loss')
        ax1.plot(self.val_losses, label='Validation Loss')
        ax1.set_xlabel('Epoch')
        ax1.set_ylabel('Loss')
        ax1.set_title('Training and Validation Loss')
        ax1.legend()
        ax1.grid(True)
        
        ax2.plot(self.train_accuracies, label='Train Accuracy')
        ax2.plot(self.val_accuracies, label='Validation Accuracy')
        ax2.set_xlabel('Epoch')
        ax2.set_ylabel('Accuracy (%)')
        ax2.set_title('Training and Validation Accuracy')
        ax2.legend()
        ax2.grid(True)
        
        plt.tight_layout()
        plt.show()

## Part 4: Transfer Learning - SOLUTION

In [None]:
class TransferLearningModel:
    """Transfer learning wrapper using pre-trained models."""
    
    def __init__(self, model_name: str, num_classes: int, freeze_layers: bool = True):
        if model_name == 'resnet18':
            self.model = models.resnet18(pretrained=True)
            num_features = self.model.fc.in_features
        elif model_name == 'resnet50':
            self.model = models.resnet50(pretrained=True)
            num_features = self.model.fc.in_features
        elif model_name == 'vgg16':
            self.model = models.vgg16(pretrained=True)
            num_features = self.model.classifier[6].in_features
        else:
            raise ValueError(f"Unsupported model: {model_name}")
        
        if freeze_layers:
            for param in self.model.parameters():
                param.requires_grad = False
        
        # Replace final layer
        if 'resnet' in model_name:
            self.model.fc = nn.Sequential(
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(512, num_classes)
            )
        elif 'vgg' in model_name:
            self.model.classifier[6] = nn.Sequential(
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(512, num_classes)
            )
    
    def get_model(self) -> nn.Module:
        return self.model
    
    def unfreeze_layers(self, num_layers: int = None):
        """Unfreeze layers for fine-tuning."""
        if num_layers is None:
            for param in self.model.parameters():
                param.requires_grad = True
        else:
            # Unfreeze last num_layers
            params = list(self.model.parameters())
            for param in params[-num_layers:]:
                param.requires_grad = True

# Test transfer learning
tl_model = TransferLearningModel('resnet18', num_classes=5, freeze_layers=True)
model = tl_model.get_model()
print(model)

## Part 5: Regularization & Early Stopping - SOLUTION

In [None]:
class EarlyStopping:
    """Early stopping to prevent overfitting."""
    
    def __init__(self, patience: int = 5, min_delta: float = 0.0, verbose: bool = True):
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
    
    def __call__(self, val_loss: float) -> bool:
        if self.best_loss is None:
            self.best_loss = val_loss
            if self.verbose:
                print(f"Initial validation loss: {val_loss:.4f}")
        elif val_loss < self.best_loss - self.min_delta:
            # Improvement
            self.best_loss = val_loss
            self.counter = 0
            if self.verbose:
                print(f"Validation loss improved to {val_loss:.4f}")
        else:
            # No improvement
            self.counter += 1
            if self.verbose:
                print(f"No improvement for {self.counter} epoch(s)")
            if self.counter >= self.patience:
                self.early_stop = True
                if self.verbose:
                    print("Early stopping triggered")
        
        return self.early_stop

## Part 6: Visual Defect Detection System - SOLUTION

In [None]:
class DefectDetectionSystem:
    """Complete system for visual defect detection."""
    
    def __init__(self, model_type: str = 'cnn', use_transfer_learning: bool = False):
        self.model_type = model_type
        self.model = None
        self.trainer = None
        
        if use_transfer_learning:
            tl_model = TransferLearningModel('resnet18', num_classes=2, freeze_layers=True)
            self.model = tl_model.get_model()
        else:
            self.model = BasicCNN(num_classes=2, input_channels=3)
    
    def prepare_data(self, images: np.ndarray, labels: np.ndarray, 
                     test_size: float = 0.2, val_size: float = 0.1, batch_size: int = 32):
        """Load and prepare data."""
        # Split data
        X_temp, X_test, y_temp, y_test = train_test_split(
            images, labels, test_size=test_size, random_state=42, stratify=labels
        )
        
        val_size_adjusted = val_size / (1 - test_size)
        X_train, X_val, y_train, y_val = train_test_split(
            X_temp, y_temp, test_size=val_size_adjusted, random_state=42, stratify=y_temp
        )
        
        # Create datasets
        train_dataset = ImageDataset(X_train, y_train, transform=get_data_transforms(train=True))
        val_dataset = ImageDataset(X_val, y_val, transform=get_data_transforms(train=False))
        test_dataset = ImageDataset(X_test, y_test, transform=get_data_transforms(train=False))
        
        # Create dataloaders
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
        
        return train_loader, val_loader, test_loader
    
    def train(self, train_loader, val_loader, num_epochs: int = 50, learning_rate: float = 0.001):
        """Train the model."""
        self.trainer = ModelTrainer(self.model, device)
        self.trainer.train(train_loader, val_loader, num_epochs, learning_rate)
    
    def evaluate(self, test_loader) -> Dict[str, float]:
        """Evaluate model on test set."""
        self.model.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(device)
                outputs = self.model(images)
                _, predicted = torch.max(outputs.data, 1)
                
                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.numpy())
        
        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)
        
        metrics = {
            'accuracy': accuracy_score(all_labels, all_preds),
            'precision': precision_score(all_labels, all_preds, average='binary'),
            'recall': recall_score(all_labels, all_preds, average='binary'),
            'f1': f1_score(all_labels, all_preds, average='binary')
        }
        
        return metrics
    
    def predict(self, image: np.ndarray) -> Tuple[str, float]:
        """Predict on single image."""
        self.model.eval()
        
        # Prepare image
        transform = get_data_transforms(train=False)
        if image.dtype != np.uint8:
            image = (image * 255).astype(np.uint8)
        image_pil = Image.fromarray(image)
        image_tensor = transform(image_pil).unsqueeze(0).to(device)
        
        # Predict
        with torch.no_grad():
            output = self.model(image_tensor)
            probabilities = torch.softmax(output, dim=1)
            confidence, predicted = torch.max(probabilities, 1)
        
        class_names = ['Non-Defective', 'Defective']
        prediction = class_names[predicted.item()]
        confidence = confidence.item()
        
        return prediction, confidence
    
    def visualize_predictions(self, images: np.ndarray, labels: np.ndarray, num_samples: int = 8):
        """Visualize predictions on sample images."""
        indices = np.random.choice(len(images), min(num_samples, len(images)), replace=False)
        
        fig, axes = plt.subplots(2, 4, figsize=(16, 8))
        axes = axes.ravel()
        
        class_names = ['Non-Defective', 'Defective']
        
        for i, idx in enumerate(indices):
            image = images[idx]
            true_label = labels[idx]
            prediction, confidence = self.predict(image)
            
            axes[i].imshow(image)
            axes[i].axis('off')
            axes[i].set_title(f"True: {class_names[true_label]}\nPred: {prediction} ({confidence:.2f})")
        
        plt.tight_layout()
        plt.show()

## Part 7: Model Evaluation - SOLUTION

In [None]:
class ModelEvaluator:
    """Comprehensive model evaluation utilities."""
    
    @staticmethod
    def compute_metrics(y_true: np.ndarray, y_pred: np.ndarray, class_names: List[str] = None) -> Dict:
        """Compute comprehensive metrics."""
        metrics = {
            'accuracy': accuracy_score(y_true, y_pred),
            'precision': precision_score(y_true, y_pred, average='weighted'),
            'recall': recall_score(y_true, y_pred, average='weighted'),
            'f1': f1_score(y_true, y_pred, average='weighted'),
            'confusion_matrix': confusion_matrix(y_true, y_pred)
        }
        
        # Classification report
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred, target_names=class_names))
        
        return metrics
    
    @staticmethod
    def plot_confusion_matrix(y_true: np.ndarray, y_pred: np.ndarray, class_names: List[str]):
        """Plot confusion matrix."""
        cm = confusion_matrix(y_true, y_pred)
        
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=class_names, yticklabels=class_names)
        plt.xlabel('Predicted')
        plt.ylabel('True')
        plt.title('Confusion Matrix')
        plt.show()
    
    @staticmethod
    def visualize_misclassifications(images: np.ndarray, y_true: np.ndarray, 
                                     y_pred: np.ndarray, class_names: List[str], num_samples: int = 9):
        """Visualize misclassified samples."""
        misclassified_idx = np.where(y_true != y_pred)[0]
        
        if len(misclassified_idx) == 0:
            print("No misclassifications!")
            return
        
        indices = np.random.choice(misclassified_idx, min(num_samples, len(misclassified_idx)), replace=False)
        
        fig, axes = plt.subplots(3, 3, figsize=(12, 12))
        axes = axes.ravel()
        
        for i, idx in enumerate(indices):
            axes[i].imshow(images[idx])
            axes[i].axis('off')
            axes[i].set_title(f"True: {class_names[y_true[idx]]}\nPred: {class_names[y_pred[idx]]}")
        
        plt.tight_layout()
        plt.show()

## Part 8: Model Persistence - SOLUTION

In [None]:
class ModelCheckpoint:
    """Save and load model checkpoints."""
    
    @staticmethod
    def save_model(model: nn.Module, path: str, metadata: Dict = None):
        """Save model with metadata."""
        checkpoint = {
            'model_state_dict': model.state_dict(),
            'metadata': metadata or {}
        }
        torch.save(checkpoint, path)
        logger.info(f"Model saved to {path}")
    
    @staticmethod
    def load_model(model: nn.Module, path: str) -> nn.Module:
        """Load model from checkpoint."""
        checkpoint = torch.load(path, map_location=device)
        model.load_state_dict(checkpoint['model_state_dict'])
        logger.info(f"Model loaded from {path}")
        
        if 'metadata' in checkpoint:
            logger.info(f"Metadata: {checkpoint['metadata']}")
        
        return model

# Example usage
# ModelCheckpoint.save_model(model, 'defect_detector.pth', metadata={'accuracy': 0.95, 'epoch': 50})
# loaded_model = ModelCheckpoint.load_model(BasicCNN(num_classes=2), 'defect_detector.pth')

## Example: Complete Workflow with CIFAR-10

In [None]:
# Load CIFAR-10 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# Create model
model = BasicCNN(num_classes=10, input_channels=3)

# Train model
trainer = ModelTrainer(model, device)
trainer.train(train_loader, test_loader, num_epochs=10, learning_rate=0.001)

# Plot training history
trainer.plot_training_history()

# Save model
ModelCheckpoint.save_model(model, 'cifar10_cnn.pth', metadata={'accuracy': trainer.val_accuracies[-1]})

---

## Summary

This solution notebook demonstrates:
1. Complete implementations of neural networks and CNNs
2. Data augmentation and preprocessing pipelines
3. Training loops with proper evaluation
4. Transfer learning implementation
5. Regularization techniques including early stopping
6. Complete defect detection system
7. Model evaluation and visualization
8. Model persistence and checkpointing

Use this as reference after completing the starter notebook exercises.