In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR

# Check if CUDA is available
if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available. Please ensure a GPU is available and CUDA is properly installed.")

device = torch.device("cuda")

# Load dataset (CIFAR-10 as an example)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

# Define CNN model
class CNN(nn.Module):
    def __init__(self, kernel_size, pooling_type="max"):
        super(CNN, self).__init__()
        if pooling_type == "max":
            self.pool = nn.MaxPool2d(2, 2)
        elif pooling_type == "avg":
            self.pool = nn.AvgPool2d(2, 2)

        self.conv1 = nn.Conv2d(3, 32, kernel_size=kernel_size, padding=kernel_size // 2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=kernel_size, padding=kernel_size // 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = self.pool(nn.ReLU()(self.conv1(x)))
        x = self.pool(nn.ReLU()(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = nn.ReLU()(self.fc1(x))
        x = self.fc2(x)
        return x

# Training function
def train_model(model, optimizer, scheduler, epochs=10, early_stopping_patience=5):
    criterion = nn.CrossEntropyLoss()
    model.to(device)

    for epoch in range(epochs):
        model.train()
        for inputs, labels in DataLoader(train_dataset, batch_size=64, shuffle=True):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        scheduler.step()

# Hyperparameter configurations
kernel_sizes = [3, 5, 7]
pooling_types = ["max", "avg"]
epochs_options = [5, 50, 100]

# Experiment for SGD
for kernel_size in kernel_sizes:
    for pooling in pooling_types:
        for epochs in epochs_options:
            model = CNN(kernel_size=kernel_size, pooling_type=pooling).to(device)
            optimizer = optim.SGD(model.parameters(), lr=0.01)
            scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

            print(f"Training with Kernel Size: {kernel_size}, Pooling: {pooling}, Epochs: {epochs}")
            train_model(model, optimizer, scheduler, epochs)
