In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


In [3]:
# Step 2: Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])


In [4]:
# Step 3: Download and load the training data
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 11237529.12it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 294946.47it/s]


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 3257287.04it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [5]:
# Step 4: Download and load the test data
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)


In [6]:
# Step 5: Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Define the first fully connected layer with 28*28 input features and 128 output features
        self.fc1 = nn.Linear(28 * 28, 128)
        # Define the second fully connected layer with 128 input features and 64 output features
        self.fc2 = nn.Linear(128, 64)
        # Define the third fully connected layer with 64 input features and 10 output features (one for each digit)
        self.fc3 = nn.Linear(64, 10)
        
        
    def forward(self, x):
        # Flatten the input image
        x = x.view(-1, 28 * 28)
        # Apply ReLU activation function to the first layer's output
        x = F.relu(self.fc1(x))
        # Apply ReLU activation function to the second layer's output
        x = F.relu(self.fc2(x))
        # Output layer (no activation function because we're using CrossEntropyLoss which applies Softmax)
        x = self.fc3(x)
        return x

# Step 6: Instantiate the neural network
net = Net()


In [7]:
# Step 7: Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross entropy loss for classification
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)  # Stochastic Gradient Descent with momentum


In [10]:

# Step 8: Train the network
for epoch in range(5):  # Loop over the dataset multiple times
    running_loss = 0.0
    for inputs, labels in trainloader:
        # Zero the parameter gradients
        optimizer.zero_grad()
        # Forward pass: compute predicted outputs by passing inputs to the model
        outputs = net(inputs)
        # Calculate the loss
        loss = criterion(outputs, labels)
        # Backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # Perform a single optimization step (parameter update)
        optimizer.step()
        # Print statistics
        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(trainloader)}')

print('Finished Training')

Epoch 1, Loss: 0.07732138251746769
Epoch 2, Loss: 0.0663474664965303
Epoch 3, Loss: 0.05847364484602069
Epoch 4, Loss: 0.05160121161734928
Epoch 5, Loss: 0.04575641802114957
Finished Training


In [13]:

# Step 9: Test the network
correct = 0
total = 0
with torch.no_grad():  # Disable gradient calculation for testing
    for inputs, labels in testloader:
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)  # Get the index of the max log-probability
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total} %')

Accuracy of the network on the 10000 test images: 97.67 %
