In [1]:
import torch, torchvision
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torch.nn.functional as F
import time

In [20]:
# Training settings
bs = 64
lr = 0.001
num_epoch = 5
num_classes = 10
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Training using ' + device)

Training using cuda


In [30]:
# Load MNIST dataset
mnist_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = torchvision.datasets.MNIST(root='./data/mnist/', train=True,
                                          download=False,
                                          transform=mnist_transforms)

test_dataset = torchvision.datasets.MNIST(root='./data/mnist', train=False,
                                         download=False,
                                         transform=mnist_transforms)

train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=bs)

In [31]:
# Build the model
class Model(nn.Module):
    def __init__(self, num_classes=10):
        super(Model, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        
        self.fc = nn.Linear(7*7*32, num_classes)
        
        
    def forward(self, x):
    
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [32]:
model = Model()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

In [33]:
model.parameters

<bound method Module.parameters of Model(
  (layer1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=1568, out_features=10, bias=True)
)>

In [34]:
model = Model(num_classes).to(device)

# Optimzer and loss
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [55]:
# Training loop
def train(num_epochs=5):
    model.train()
    for epoch in range(num_epochs):
        for idx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and Update weights
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print the loss
            if (idx) % bs-1 == 0:
                print(f'Epoch [{epoch+1}/{num_epochs}] | Batch [{idx*len(images)}/{len(train_loader.dataset)}] | Loss: {loss.item():.4f}')

In [56]:
def test():
    model.eval()
    with torch.no_grad():
        correct = 0
        test_loss = 0
        
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            out = model(images)
            # total loss
            test_loss += criterion(out, labels)
            # get the index of the max value, calculate how many accurate predictions
            pred = out.data.max(1, keepdim=True)[1]
            correct += pred.eq(labels.data.view_as(pred)).cpu().sum()
            
        # Average loss for the whole test 10000 images    
        test_loss /= len(test_loader.dataset)
        print("==========================")
        print(f"Test set: Average Loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)}, {100*correct/len(test_loader.dataset):.0f}%")

In [57]:
# Run() aka main
def run():
    # Training
    train_time = time.time()
    train(num_epoch)
    m, s = divmod(time.time() - train_time, 60)
    print(f'Training Time: {m:.0f}m {s:.0f}s')
    # Testing
    test_time = time.time()
    test()
    m, s = divmod(time.time() - test_time, 60)
    print(f'Testing Time: {m:.0f}m {s:.0f}s')
    # Total
    m, s = divmod(time.time() - train_time, 60)
    print(f'Total Time: {m:.0f}m {s:.0f}s\nTrained on {device}')

In [58]:
run()

Epoch [1/5] | Batch [64/60000] | Loss: 0.0006
Epoch [1/5] | Batch [4160/60000] | Loss: 0.0004
Epoch [1/5] | Batch [8256/60000] | Loss: 0.0019
Epoch [1/5] | Batch [12352/60000] | Loss: 0.0002
Epoch [1/5] | Batch [16448/60000] | Loss: 0.0007
Epoch [1/5] | Batch [20544/60000] | Loss: 0.0002
Epoch [1/5] | Batch [24640/60000] | Loss: 0.0001
Epoch [1/5] | Batch [28736/60000] | Loss: 0.0000
Epoch [1/5] | Batch [32832/60000] | Loss: 0.0006
Epoch [1/5] | Batch [36928/60000] | Loss: 0.0118
Epoch [1/5] | Batch [41024/60000] | Loss: 0.0162
Epoch [1/5] | Batch [45120/60000] | Loss: 0.0115
Epoch [1/5] | Batch [49216/60000] | Loss: 0.0373
Epoch [1/5] | Batch [53312/60000] | Loss: 0.0101
Epoch [1/5] | Batch [57408/60000] | Loss: 0.0001
Epoch [2/5] | Batch [64/60000] | Loss: 0.0004
Epoch [2/5] | Batch [4160/60000] | Loss: 0.0000
Epoch [2/5] | Batch [8256/60000] | Loss: 0.0002
Epoch [2/5] | Batch [12352/60000] | Loss: 0.0007
Epoch [2/5] | Batch [16448/60000] | Loss: 0.0003
Epoch [2/5] | Batch [20544/600