In [1]:
import torch
import torch.nn as nn 
import torch.optim as optim
import torch.nn.functional as F 
from torch.utils.data import DataLoader 
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import time

In [2]:
batch_size = 64

train_dataset = datasets.MNIST(root="dataset/", 
                               train=True, 
                               transform=transforms.ToTensor(), 
                               download=True)

train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=batch_size, 
                          shuffle=True
                         )

test_dataset = datasets.MNIST(root="dataset/",
                              train=False,
                              transform=transforms.ToTensor(),
                              download=True
                             )

test_loader = DataLoader(dataset=test_dataset, 
                         batch_size=batch_size, 
                         shuffle=True
                        )

In [3]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 100)
        self.fc2 = nn.Linear(100, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x 

In [4]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using mps device


In [5]:
def train_loop(dataloader, model, criterion, optimizer): 
    model.train()
    for batch_idx, (data, targets) in enumerate(dataloader):
        data = data.to(device=device)
        targets = targets.to(device=device)

        # Get to correct shape, 28x28->784
        data = data.reshape(data.shape[0], -1) 

        # forward propagation
        scores = model(data)
        loss = criterion(scores, targets)

        # zero previous gradients
        optimizer.zero_grad()
        
        # back-propagation
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

In [6]:
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(
            f"Got {num_correct} / {num_samples} with accuracy"
            f" {float(num_correct) / float(num_samples) * 100:.2f}"
        )
    model.train()

In [7]:
input_size = 784
num_classes = 10
learning_rate = 0.001
num_epochs = 3

### ADAM

In [9]:
model = NN(input_size=input_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

for epoch in range(num_epochs):
    print(f"Epoch: {epoch}")
    train_loop(train_loader, model, criterion, optimizer)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")

print('ADAM train:')
check_accuracy(train_loader, model)
print('ADAM test:')
check_accuracy(test_loader, model)

Epoch: 0
Epoch: 1
Epoch: 2
Total execution time: 12.09 seconds
ADAM train:
Got 58265 / 60000 with accuracy 97.11
ADAM test:
Got 9656 / 10000 with accuracy 96.56


### SGD

In [11]:
model = NN(input_size=input_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

start_time = time.time()

for epoch in range(num_epochs):
    print(f"Epoch: {epoch}")
    train_loop(train_loader, model, criterion, optimizer)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")

print('SGD train:')
check_accuracy(train_loader, model)
print('SGD test:')
check_accuracy(test_loader, model)

Epoch: 0
Epoch: 1
Epoch: 2
Total execution time: 7.38 seconds
SGD train:
Got 43809 / 60000 with accuracy 73.02
SGD test:
Got 7351 / 10000 with accuracy 73.51


### RMSProp

In [13]:
model = NN(input_size=input_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)

start_time = time.time()

for epoch in range(num_epochs):
    print(f"Epoch: {epoch}")
    train_loop(train_loader, model, criterion, optimizer)

end_time = time.time()
elapsed_time = end_time - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")

print('RMSProp train:')
check_accuracy(train_loader, model)
print('RMSProp test:')
check_accuracy(test_loader, model)

Epoch: 0
Epoch: 1
Epoch: 2
Total execution time: 10.57 seconds
RMSProp train:
Got 58388 / 60000 with accuracy 97.31
RMSProp test:
Got 9671 / 10000 with accuracy 96.71
