In [2]:
#imports
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch.utils.tensorboard import SummaryWriter
import matplotlib.pyplot as plt
import numpy as np

In [3]:
#gpu config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)

Using device: cpu


In [4]:
#hyperparams
learning_rate = 0.01
batch_size = 128
num_epochs = 30

#load data
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 78155415.70it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 30996851.03it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 20268249.03it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 7344074.31it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [5]:
#LeNet5
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5, padding=2)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        x = self.pool1(torch.relu(self.conv1(x)))
        x = self.pool2(torch.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [6]:
#initialize
model = LeNet5().to(device)
writer = SummaryWriter()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [7]:
#Training

#train net
for epoch in range(num_epochs):
    train_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        
        # Send data to GPU
        images, labels = images.to(device), labels.to(device)

        # Forward pass and compute loss
        outputs = model(images)
        loss = nn.CrossEntropyLoss()(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Compute running train loss
        train_loss += loss.item()
        
    #validation
    model.eval()
    valid_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            valid_loss += loss.item()*data.size(0)
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()
    
    valid_loss = valid_loss/len(test_loader.dataset)
    accuracy = 100 * correct / total

    writer.add_scalar('Training Loss', train_loss / len(train_loader), epoch)
    writer.add_scalar('Validation Loss', valid_loss / len(test_loader), epoch)
    
    print(f"Epoch {epoch+1}/{30}, Training Loss: {train_loss:.6f}, Validation Loss: {valid_loss:.6f}, Accuracy: {accuracy:.2f}")
writer.close()

Epoch 1/30, Training Loss: 1076.056131, Validation Loss: 2.279579, Accuracy: 24.01
Epoch 2/30, Training Loss: 979.493338, Validation Loss: 1.237790, Accuracy: 67.50
Epoch 3/30, Training Loss: 294.776769, Validation Loss: 0.439615, Accuracy: 86.73
Epoch 4/30, Training Loss: 167.476224, Validation Loss: 0.290538, Accuracy: 91.00
Epoch 5/30, Training Loss: 130.827157, Validation Loss: 0.245816, Accuracy: 92.63
Epoch 6/30, Training Loss: 107.164198, Validation Loss: 0.190167, Accuracy: 94.24
Epoch 7/30, Training Loss: 88.343301, Validation Loss: 0.164396, Accuracy: 95.26
Epoch 8/30, Training Loss: 74.774019, Validation Loss: 0.134009, Accuracy: 95.86
Epoch 9/30, Training Loss: 64.908770, Validation Loss: 0.123512, Accuracy: 96.19
Epoch 10/30, Training Loss: 58.289058, Validation Loss: 0.105220, Accuracy: 96.81
Epoch 11/30, Training Loss: 52.675351, Validation Loss: 0.096826, Accuracy: 97.10
Epoch 12/30, Training Loss: 48.022073, Validation Loss: 0.089624, Accuracy: 97.18
Epoch 13/30, Train