In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    f1_score,
    accuracy_score,
)
import time
from tqdm.notebook import tqdm

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load and Explore MNIST Dataset

In [None]:
# Load MNIST dataset
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),  # MNIST mean and std
    ]
)

mnist_full = datasets.MNIST("./data", train=True, download=True, transform=transform)
mnist_test = datasets.MNIST("./data", train=False, download=True, transform=transform)

# Explore the dataset
print(f"MNIST dataset size: {len(mnist_full)} training, {len(mnist_test)} test")

# Display some sample images
fig, axes = plt.subplots(2, 5, figsize=(12, 5))
for i, ax in enumerate(axes.flat):
    img, label = mnist_full[i]
    img = img.squeeze().numpy()
    ax.imshow(img, cmap="gray")
    ax.set_title(f"Label: {label}")
    ax.axis("off")
plt.tight_layout()
plt.show()

# Analyze Class Distribution

In [None]:
# Check class distribution
train_labels = [label for _, label in mnist_full]
test_labels = [label for _, label in mnist_test]

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.hist(train_labels, bins=10, rwidth=0.8)
plt.title("Training Set Class Distribution")
plt.xlabel("Digit")
plt.ylabel("Count")
plt.xticks(range(10))

plt.subplot(1, 2, 2)
plt.hist(test_labels, bins=10, rwidth=0.8)
plt.title("Test Set Class Distribution")
plt.xlabel("Digit")
plt.ylabel("Count")
plt.xticks(range(10))

plt.tight_layout()
plt.show()

# Calculate and print dataset statistics
train_labels_np = np.array(train_labels)
test_labels_np = np.array(test_labels)

print("Training set statistics:")
for i in range(10):
    count = np.sum(train_labels_np == i)
    percentage = count / len(train_labels_np) * 100
    print(f"Digit {i}: {count} samples ({percentage:.2f}%)")

print("\nTest set statistics:")
for i in range(10):
    count = np.sum(test_labels_np == i)
    percentage = count / len(test_labels_np) * 100
    print(f"Digit {i}: {count} samples ({percentage:.2f}%)")

# Define Custom Dataset Class

In [None]:
# Define a custom MNIST dataset class
class MNISTDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data  # List of tuples (image, label)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img, label = self.data[idx]

        if self.transform:
            img = self.transform(img)

        return img, label

    def get_stats(self):
        # Calculate dataset statistics
        labels = [label for _, label in self.data]
        unique_labels, counts = np.unique(labels, return_counts=True)

        stats = {
            "total_samples": len(self.data),
            "num_classes": len(unique_labels),
            "class_distribution": {
                int(label): int(count) for label, count in zip(unique_labels, counts)
            },
            "class_percentages": {
                int(label): float(count / len(self.data) * 100)
                for label, count in zip(unique_labels, counts)
            },
        }

        return stats


# Create custom dataset instances
mnist_dataset = MNISTDataset(mnist_full)
test_dataset = MNISTDataset(mnist_test)

# Print dataset statistics
train_stats = mnist_dataset.get_stats()
test_stats = test_dataset.get_stats()

print("Training Dataset Statistics:")
print(f"Total samples: {train_stats['total_samples']}")
print(f"Number of classes: {train_stats['num_classes']}")
print("Class distribution:")
for label, count in train_stats["class_distribution"].items():
    print(
        f"  Class {label}: {count} samples ({train_stats['class_percentages'][label]:.2f}%)"
    )

print("\nTest Dataset Statistics:")
print(f"Total samples: {test_stats['total_samples']}")
print(f"Number of classes: {test_stats['num_classes']}")
print("Class distribution:")
for label, count in test_stats["class_distribution"].items():
    print(
        f"  Class {label}: {count} samples ({test_stats['class_percentages'][label]:.2f}%)"
    )

# Split Data and Create DataLoaders

In [None]:
# Split the training data into train and validation sets
train_dataset, val_dataset = random_split(mnist_dataset, [0.8, 0.2])

print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")
print(f"Test set size: {len(test_dataset)}")

# Create data loaders
batch_size = 64

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Display a batch of images
dataiter = iter(train_loader)
images, labels = next(dataiter)

print(f"Batch shape: {images.shape}")
print(f"Labels shape: {labels.shape}")

# Display the first 8 images from the batch
fig, axes = plt.subplots(2, 4, figsize=(10, 5))
for i, ax in enumerate(axes.flat):
    if i < 8:
        img = images[i].squeeze().numpy()
        ax.imshow(img, cmap="gray")
        ax.set_title(f"Label: {labels[i].item()}")
        ax.axis("off")
plt.tight_layout()
plt.show()

# Define MLP Model

In [None]:
# Define the MLP model
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x


# Initialize the model
model = MLP().to(device)
print(model)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define Training Function

In [7]:
# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=30):
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    best_val_loss = float("inf")

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
        for images, labels in train_bar:
            images, labels = images.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            train_bar.set_postfix(loss=loss.item(), acc=correct / total)

        epoch_train_loss = running_loss / len(train_loader.dataset)
        epoch_train_acc = correct / total
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)

        # Validation phase
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
            for images, labels in val_bar:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                running_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                val_bar.set_postfix(loss=loss.item(), acc=correct / total)

        epoch_val_loss = running_loss / len(val_loader.dataset)
        epoch_val_acc = correct / total
        val_losses.append(epoch_val_loss)
        val_accs.append(epoch_val_acc)

        print(
            f"Epoch {epoch+1}/{num_epochs} - "
            f"Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f}, "
            f"Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}"
        )

        # Save the best model
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            torch.save(model.state_dict(), "best_mlp_model.pth")
            print(
                f"Model saved at epoch {epoch+1} with validation loss: {best_val_loss:.4f}"
            )

    return train_losses, val_losses, train_accs, val_accs

# Train the Model

In [None]:
# Train the model
start_time = time.time()
train_losses, val_losses, train_accs, val_accs = train_model(
    model, train_loader, val_loader, criterion, optimizer, num_epochs=30
)
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds")

# Plot training and validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Training Loss")
plt.plot(val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(train_accs, label="Training Accuracy")
plt.plot(val_accs, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")
plt.legend()

plt.tight_layout()
plt.show()

# Model Evaluation Function

In [None]:
# Load the best model
best_model = MLP().to(device)
best_model.load_state_dict(torch.load("best_mlp_model.pth"))
best_model.eval()


# Evaluate on test set
def evaluate_model(model, data_loader, criterion):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in tqdm(data_loader, desc="Evaluating"):
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    test_loss = running_loss / len(data_loader.dataset)
    test_acc = accuracy_score(all_labels, all_preds)
    test_f1_micro = f1_score(all_labels, all_preds, average="micro")
    test_f1_macro = f1_score(all_labels, all_preds, average="macro")

    return test_loss, test_acc, test_f1_micro, test_f1_macro, all_preds, all_labels


# Evaluate the model
test_loss, test_acc, test_f1_micro, test_f1_macro, all_preds, all_labels = (
    evaluate_model(best_model, test_loader, criterion)
)

print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")
print(f"Test F1 Score (Micro): {test_f1_micro:.4f}")
print(f"Test F1 Score (Macro): {test_f1_macro:.4f}")

# Print classification report
print("\nClassification Report:")
print(classification_report(all_labels, all_preds, digits=4))

# Per-Class Evaluation and Confusion Matrix

In [None]:
# Compute confusion matrix
cm = confusion_matrix(all_labels, all_preds)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(
    cm, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10)
)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# Per-class accuracy
per_class_acc = cm.diagonal() / cm.sum(axis=1)

# Plot per-class accuracy
plt.figure(figsize=(10, 6))
plt.bar(range(10), per_class_acc)
plt.xlabel("Digit")
plt.ylabel("Accuracy")
plt.title("Per-Class Accuracy")
plt.xticks(range(10))
plt.ylim(0, 1)
for i, acc in enumerate(per_class_acc):
    plt.text(i, acc + 0.01, f"{acc:.4f}", ha="center")
plt.show()

# Calculate per-class precision, recall, and F1 score
report = classification_report(all_labels, all_preds, output_dict=True)
class_metrics = {}

for i in range(10):
    class_metrics[i] = {
        "precision": report[str(i)]["precision"],
        "recall": report[str(i)]["recall"],
        "f1-score": report[str(i)]["f1-score"],
        "support": report[str(i)]["support"],
    }

# Convert to DataFrame for better visualization
df_metrics = pd.DataFrame(class_metrics).T
print("Per-class metrics:")
print(df_metrics)

# Plot per-class metrics
plt.figure(figsize=(12, 6))
df_metrics[["precision", "recall", "f1-score"]].plot(kind="bar", figsize=(12, 6))
plt.title("Per-Class Metrics")
plt.xlabel("Digit")
plt.ylabel("Score")
plt.ylim(0, 1)
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()

# Define Regularized MLP Model

In [11]:
# Define MLP model with regularization
class RegularizedMLP(nn.Module):
    def __init__(self, dropout_rate=0.2, use_batchnorm=True):
        super(RegularizedMLP, self).__init__()
        self.flatten = nn.Flatten()
        self.use_batchnorm = use_batchnorm

        # First layer
        self.fc1 = nn.Linear(28 * 28, 512)
        self.bn1 = nn.BatchNorm1d(512) if use_batchnorm else nn.Identity()
        self.dropout1 = nn.Dropout(dropout_rate)

        # Second layer
        self.fc2 = nn.Linear(512, 256)
        self.bn2 = nn.BatchNorm1d(256) if use_batchnorm else nn.Identity()
        self.dropout2 = nn.Dropout(dropout_rate)

        # Third layer
        self.fc3 = nn.Linear(256, 128)
        self.bn3 = nn.BatchNorm1d(128) if use_batchnorm else nn.Identity()
        self.dropout3 = nn.Dropout(dropout_rate)

        # Output layer
        self.fc4 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.flatten(x)

        x = self.fc1(x)
        if self.use_batchnorm:
            x = self.bn1(x)
        x = F.relu(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        if self.use_batchnorm:
            x = self.bn2(x)
        x = F.relu(x)
        x = self.dropout2(x)

        x = self.fc3(x)
        if self.use_batchnorm:
            x = self.bn3(x)
        x = F.relu(x)
        x = self.dropout3(x)

        x = self.fc4(x)
        return x

# Data Augmentation and Early Stopping

In [None]:
# Define data augmentation transforms
augmentation_transform = transforms.Compose([
    transforms.RandomAffine(degrees=5, translate=(0.1, 0.1), scale=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Define standard transform for validation/test (no augmentation)
standard_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Create augmented dataset class
class AugmentedMNIST(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        img, label = self.dataset[idx]

        # Convert tensor to PIL image for transforms
        if self.transform:
            img = transforms.ToPILImage()(img.squeeze(0))
            img = self.transform(img)

        return img, label

# Apply different transforms to each set
aug_train_dataset = AugmentedMNIST(train_dataset, transform=augmentation_transform)
aug_val_dataset = AugmentedMNIST(val_dataset, transform=standard_transform)  # No augmentation for validation

# Create data loaders
aug_train_loader = DataLoader(aug_train_dataset, batch_size=batch_size, shuffle=True)
aug_val_loader = DataLoader(aug_val_dataset, batch_size=batch_size)

# Initialize the regularized model
reg_model = RegularizedMLP(dropout_rate=0.3, use_batchnorm=True).to(device)
print(reg_model)

# Define loss function and optimizer with weight decay
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(reg_model.parameters(), lr=0.001, weight_decay=1e-4)  # L2 regularization

# Early stopping class
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = float('inf')
        self.early_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True

        return self.early_stop

# Display some augmented images to verify
dataiter = iter(aug_train_loader)
images, labels = next(dataiter)

# Display the first 8 augmented images from the batch
fig, axes = plt.subplots(2, 4, figsize=(10, 5))
for i, ax in enumerate(axes.flat):
    if i < 8:
        img = images[i].squeeze().numpy()
        ax.imshow(img, cmap='gray')
        ax.set_title(f'Label: {labels[i].item()}')
        ax.axis('off')
plt.suptitle('Augmented Training Images')
plt.tight_layout()
plt.show()

# Training Function with Early Stopping

In [13]:
# Training function with early stopping
def train_with_early_stopping(
    model, train_loader, val_loader, criterion, optimizer, num_epochs=30, patience=5
):
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    early_stopping = EarlyStopping(patience=patience)
    best_val_loss = float("inf")

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]")
        for images, labels in train_bar:
            images, labels = images.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            # Statistics
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            train_bar.set_postfix(loss=loss.item(), acc=correct / total)

        epoch_train_loss = running_loss / len(train_loader.dataset)
        epoch_train_acc = correct / total
        train_losses.append(epoch_train_loss)
        train_accs.append(epoch_train_acc)

        # Validation phase
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            val_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]")
            for images, labels in val_bar:
                images, labels = images.to(device), labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                running_loss += loss.item() * images.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

                val_bar.set_postfix(loss=loss.item(), acc=correct / total)

        epoch_val_loss = running_loss / len(val_loader.dataset)
        epoch_val_acc = correct / total
        val_losses.append(epoch_val_loss)
        val_accs.append(epoch_val_acc)

        print(
            f"Epoch {epoch+1}/{num_epochs} - "
            f"Train Loss: {epoch_train_loss:.4f}, Train Acc: {epoch_train_acc:.4f}, "
            f"Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}"
        )

        # Save the best model
        if epoch_val_loss < best_val_loss:
            best_val_loss = epoch_val_loss
            torch.save(model.state_dict(), "best_regularized_mlp_model.pth")
            print(
                f"Model saved at epoch {epoch+1} with validation loss: {best_val_loss:.4f}"
            )

        # Check for early stopping
        if early_stopping(epoch_val_loss):
            print(f"Early stopping triggered at epoch {epoch+1}")
            break

    return train_losses, val_losses, train_accs, val_accs

# Train Regularized Model

In [None]:
# Train the regularized model
start_time = time.time()
reg_train_losses, reg_val_losses, reg_train_accs, reg_val_accs = (
    train_with_early_stopping(
        reg_model,
        aug_train_loader,
        aug_val_loader,
        criterion,
        optimizer,
        num_epochs=150,
        patience=5,
    )
)
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds")

# Plot training and validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(reg_train_losses, label="Training Loss")
plt.plot(reg_val_losses, label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Regularized Model: Training and Validation Loss")
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(reg_train_accs, label="Training Accuracy")
plt.plot(reg_val_accs, label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Regularized Model: Training and Validation Accuracy")
plt.legend()

plt.tight_layout()
plt.show()

# Evaluate Regularized Model

In [None]:
# Load the best regularized model
best_reg_model = RegularizedMLP(dropout_rate=0.3, use_batchnorm=True).to(device)
best_reg_model.load_state_dict(torch.load("best_regularized_mlp_model.pth"))
best_reg_model.eval()

# Evaluate the regularized model
(
    reg_test_loss,
    reg_test_acc,
    reg_test_f1_micro,
    reg_test_f1_macro,
    reg_all_preds,
    reg_all_labels,
) = evaluate_model(best_reg_model, test_loader, criterion)

print(f"Regularized Model Test Loss: {reg_test_loss:.4f}")
print(f"Regularized Model Test Accuracy: {reg_test_acc:.4f}")
print(f"Regularized Model Test F1 Score (Micro): {reg_test_f1_micro:.4f}")
print(f"Regularized Model Test F1 Score (Macro): {reg_test_f1_macro:.4f}")

# Print classification report for regularized model
print("\nRegularized Model Classification Report:")
print(classification_report(reg_all_labels, reg_all_preds, digits=4))

# Compute confusion matrix for regularized model
reg_cm = confusion_matrix(reg_all_labels, reg_all_preds)

# Plot confusion matrix for regularized model
plt.figure(figsize=(10, 8))
sns.heatmap(
    reg_cm,
    annot=True,
    fmt="d",
    cmap="Blues",
    xticklabels=range(10),
    yticklabels=range(10),
)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Regularized Model: Confusion Matrix")
plt.show()

# Compare Models

In [None]:
# Compare the performance of the base model and the regularized model
comparison = {
    "Model": ["Base MLP", "Regularized MLP"],
    "Test Loss": [test_loss, reg_test_loss],
    "Test Accuracy": [test_acc, reg_test_acc],
    "Test F1 (Micro)": [test_f1_micro, reg_test_f1_micro],
    "Test F1 (Macro)": [test_f1_macro, reg_test_f1_macro],
}

comparison_df = pd.DataFrame(comparison)
print("Model Comparison:")
print(comparison_df)

# Plot comparison
metrics = ["Test Loss", "Test Accuracy", "Test F1 (Micro)", "Test F1 (Macro)"]
plt.figure(figsize=(14, 8))

for i, metric in enumerate(metrics):
    plt.subplot(2, 2, i + 1)
    plt.bar(["Base MLP", "Regularized MLP"], comparison_df[metric])
    plt.title(metric)
    plt.ylim(0, 1 if metric != "Test Loss" else max(comparison_df[metric]) * 1.2)

    # Add value labels
    for j, v in enumerate(comparison_df[metric]):
        plt.text(j, v + 0.01, f"{v:.4f}", ha="center")

plt.tight_layout()
plt.show()

# Summary of regularization techniques used
print("\nSummary of Regularization Techniques Used:")
print("1. Dropout (rate=0.3): Prevents overfitting by randomly deactivating neurons")
print(
    "2. Batch Normalization: Normalizes layer inputs, stabilizes and accelerates training"
)
print("3. Weight Decay (L2 regularization, 1e-4): Penalizes large weights")
print(
    "4. Early Stopping (patience=5): Prevents overfitting by stopping when validation loss stops improving"
)
print(
    "5. Data Augmentation: Random affine transformations (rotation, translation, scaling)"
)