In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# Transformations applied on each image
transform = transforms.Compose(
    [transforms.ToTensor(), 
     transforms.Normalize((0.5,), (0.5,))])

# Loading the training dataset
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Loading the test dataset
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)


In [3]:
class MNet(nn.Module):
    def __init__(self):
        super(MNet, self).__init__()
        # First layer: 784 input features (28x28 pixels), 128 output features
        self.fc1 = nn.Linear(784, 128)
        
        # Second layer: 128 input features, 64 output features
        self.fc2 = nn.Linear(128, 64)
        
        # Output layer: 64 input features, 10 output features (for 10 classes)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # Flatten the image
        x = x.view(-1, 28*28)
        
        # Apply ReLU activation function after each hidden layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        # No activation function needed for the output layer
        x = self.fc3(x)
        
        # Applying log_softmax for classification purposes
        return F.log_softmax(x, dim=1)

In [4]:
class MNet(nn.Module):
    def __init__(self):
        super(MNet, self).__init__()
        # First layer: 784 input features (28x28 pixels), 128 output features
        self.fc1 = nn.Linear(784, 128)
        
        # Second layer: 128 input features, 64 output features
        self.fc2 = nn.Linear(128, 64)
        
        # Output layer: 64 input features, 10 output features (for 10 classes)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # Flatten the image
        x = x.view(-1, 28*28)
        
        # Apply ReLU activation function after each hidden layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        # No activation function needed for the output layer
        x = self.fc3(x)
        
        # Applying log_softmax for classification purposes
        return F.log_softmax(x, dim=1)

In [5]:
net = MNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

In [7]:
num_epochs = 10

with open("run_log.txt", "w") as log_file:
    for epoch in range(num_epochs):  
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
    
            optimizer.zero_grad()
    
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
            running_loss += loss.item()
            if i % 100 == 99:    
                log_message = f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}\n'
                print(f'Epoch {epoch + 1}, Batch {i + 1}, Loss: {running_loss / 100:.3f}')
                log_file.write(log_message)
                torch.save(net.state_dict(), f'epoch_{epoch}.pth')
                running_loss = 0.0
print('Finished Training')


Epoch 1, Batch 100, Loss: 0.449
Epoch 1, Batch 200, Loss: 0.369
Epoch 1, Batch 300, Loss: 0.344
Epoch 1, Batch 400, Loss: 0.334
Epoch 1, Batch 500, Loss: 0.295
Epoch 1, Batch 600, Loss: 0.291
Epoch 1, Batch 700, Loss: 0.257
Epoch 1, Batch 800, Loss: 0.238
Epoch 1, Batch 900, Loss: 0.220
Epoch 2, Batch 100, Loss: 0.224
Epoch 2, Batch 200, Loss: 0.199
Epoch 2, Batch 300, Loss: 0.184
Epoch 2, Batch 400, Loss: 0.177
Epoch 2, Batch 500, Loss: 0.195
Epoch 2, Batch 600, Loss: 0.171
Epoch 2, Batch 700, Loss: 0.163
Epoch 2, Batch 800, Loss: 0.160
Epoch 2, Batch 900, Loss: 0.157
Epoch 3, Batch 100, Loss: 0.140
Epoch 3, Batch 200, Loss: 0.149
Epoch 3, Batch 300, Loss: 0.160
Epoch 3, Batch 400, Loss: 0.143
Epoch 3, Batch 500, Loss: 0.144
Epoch 3, Batch 600, Loss: 0.133
Epoch 3, Batch 700, Loss: 0.130
Epoch 3, Batch 800, Loss: 0.110
Epoch 3, Batch 900, Loss: 0.117
Epoch 4, Batch 100, Loss: 0.116
Epoch 4, Batch 200, Loss: 0.103
Epoch 4, Batch 300, Loss: 0.122
Epoch 4, Batch 400, Loss: 0.106
Epoch 4,

In [8]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
with open('training_log.txt', 'a') as f:
    accuracy_message = f'Accuracy of the network on the 10000 test images with Adam optimizer and 20 epochs: {accuracy:.2f} %\n'
    print(accuracy_message)
    f.write(accuracy_message)


Accuracy of the network on the 10000 test images with Adam optimizer and 20 epochs: 96.17 %

