In [None]:

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from sklearn.model_selection import ParameterGrid

# Define the model
class NeuralNetwork(nn.Module):
    def __init__(self, hidden_units):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(784, hidden_units)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_units, 10)

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
test_dataset = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the training function
def train(model, device, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# Define the testing function
def test(model, device, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            total_loss += loss.item()
            _, predicted = torch.max(output, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)
    accuracy = correct / total
    return total_loss / len(loader), accuracy

# Define the hyperparameter grid
param_grid = {
    'hidden_units': [64, 128, 256, 512]
}

# Perform grid search
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

best_accuracy = 0
best_hidden_units = None

for params in ParameterGrid(param_grid):
    model = NeuralNetwork(params['hidden_units']).to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01)
    for epoch in range(10):
        train_loss = train(model, device, train_loader, optimizer, criterion)
    test_loss, test_accuracy = test(model, device, test_loader, criterion)
    print(f"Hidden Units: {params['hidden_units']}, Test Accuracy: {test_accuracy:.4f}")
    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_hidden_units = params['hidden_units']

print(f"Best Hidden Units: {best_hidden_units}, Best Accuracy: {best_accuracy:.4f}")
