In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the neural network model
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(28*28, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the 28x28 images to a 1D tensor
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Set random seed for reproducibility
torch.manual_seed(42)

# Load the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize along the channel axis
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform)

# Create data loaders
batch_size = 8
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model and move it to the GPU
model = MyModel().to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
n_epochs = 20

for epoch in range(n_epochs):
    model.train()
    train_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)  # Move data to GPU
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Calculate test accuracy and loss
    model.eval()
    correct = 0
    test_loss = 0.0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)  # Move data to GPU
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    train_loss /= len(train_loader)
    test_loss /= len(test_loader)
    test_accuracy = 100.0 * correct / len(test_loader.dataset)

    print(f'Epoch {epoch + 1}/{n_epochs}, Train Loss: {train_loss:.6f}, Test Loss: {test_loss:.6f}, Test Accuracy: {test_accuracy:.2f}%')

# Save the trained model's state dictionary to a file
torch.save(model.state_dict(), "mnist_model_weights.pth")


Epoch 1/20, Train Loss: 0.313197, Test Loss: 0.218849, Test Accuracy: 93.07%
Epoch 2/20, Train Loss: 0.170207, Test Loss: 0.180970, Test Accuracy: 94.39%
Epoch 3/20, Train Loss: 0.136954, Test Loss: 0.116570, Test Accuracy: 96.38%
Epoch 4/20, Train Loss: 0.118577, Test Loss: 0.192564, Test Accuracy: 94.32%
Epoch 5/20, Train Loss: 0.106691, Test Loss: 0.130408, Test Accuracy: 96.05%
Epoch 6/20, Train Loss: 0.098725, Test Loss: 0.127012, Test Accuracy: 96.41%
Epoch 7/20, Train Loss: 0.094272, Test Loss: 0.144748, Test Accuracy: 95.78%
Epoch 8/20, Train Loss: 0.089560, Test Loss: 0.138768, Test Accuracy: 96.23%
Epoch 9/20, Train Loss: 0.082821, Test Loss: 0.114834, Test Accuracy: 96.95%
Epoch 10/20, Train Loss: 0.080498, Test Loss: 0.135793, Test Accuracy: 96.46%
Epoch 11/20, Train Loss: 0.076011, Test Loss: 0.124832, Test Accuracy: 96.71%
Epoch 12/20, Train Loss: 0.072820, Test Loss: 0.102025, Test Accuracy: 97.30%
Epoch 13/20, Train Loss: 0.069944, Test Loss: 0.112996, Test Accuracy: 96