In [12]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import datetime

device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [2]:
# params
batch_size = 100

# data downloading - MNIST 28x28x1; 10 classes; 60,000 train, 10,000 test
train = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

# data loader
torch.manual_seed(42)
kwargs = {'num_workers': 2, 'pin_memory': True} if device=='cuda' else {}
train_loader = torch.utils.data.DataLoader(dataset=train, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(dataset=test, batch_size=batch_size, shuffle=False, **kwargs)

In [3]:
# should be 5
print(train_loader.dataset.targets[0])

tensor(5)


In [10]:
class Net(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(28*28*1, 64)
    self.fc2 = nn.Linear(64, 10)
  def forward(self, x):
    x = torch.flatten(x, 1)
    x = self.fc1(x)
    x = F.relu(x)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)

def train_model(data_loader, model, num_epochs):
  # initialization
  model.train()
  # optimization
  opt = optim.Adam(model.parameters(), lr=0.01)
  L = nn.CrossEntropyLoss()
  # training
  losses = []
  mean_losses = []
  accuracy = []
  mean_accuracy = []
  for epoch in range(num_epochs):
    epoch_losses = []
    epoch_accuracy = []
    for batch_idx, (data, target) in enumerate(data_loader):
      data = data.to(device)
      target = target.to(device)
      # data = data.view(data.shape[0], -1)
      opt.zero_grad()
      output = model(data)
      loss = L(output, target)
      loss.backward()
      opt.step()
      epoch_losses.append(loss.item())
      epoch_accuracy.append((output.argmax(dim=1)==target).float().mean())
    losses.append(epoch_losses)
    mean_losses.append(torch.tensor(epoch_losses).mean())
    accuracy.append(epoch_accuracy)
    mean_accuracy.append(torch.tensor(epoch_accuracy).mean())
    print(f'Epoch: {epoch+1}, Loss: {mean_losses[epoch]:.6f}, Accuracy: {mean_accuracy[epoch]:.6f}')
  return mean_losses, mean_accuracy

def test_model(data_loader, model):
  # initialization
  model.eval()
  # testing
  test_loss = []
  test_accuracy = []
  correct = 0
  L = nn.CrossEntropyLoss()
  with torch.no_grad():
    for data, target in data_loader:
      data = data.to(device)
      target = target.to(device)
      # data = data.view(data.shape[0], -1)
      output = model(data)
      test_loss.append(L(output, target).item())
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()
      test_accuracy.append((output.argmax(dim=1)==target).float().mean())
  mean_loss = torch.tensor(test_loss).mean()
  mean_accuracy = torch.tensor(test_accuracy).mean()
  print(f'Average loss: {mean_loss:.4f}, Average Accuracy: {mean_accuracy:.4f}, Accuracy: {correct}/{len(data_loader.dataset)} ({100. * correct / len(data_loader.dataset):.0f}%)')
  return mean_loss, mean_accuracy

In [5]:
# params
model = Net().to(device)
num_epochs = 5

# train loop
print("Training")
train_model(train_loader, model, num_epochs)
# save model
torch.save(model.state_dict(), "models/" + datetime.datetime.now().strftime("%Y-%m-%d-%H:%M:%S") + ".pt")

# load model
# model = Net().to(device)
# model.load_state_dict(torch.load("models/2021-03-07-16:56:56.pt"))


# test loop
print("Testing")
test_model(test_loader, model)

Epoch: 1, Loss: 0.241071, Accuracy: 0.928300
Epoch: 2, Loss: 0.126403, Accuracy: 0.962267
Epoch: 3, Loss: 0.102688, Accuracy: 0.968917
Epoch: 4, Loss: 0.085066, Accuracy: 0.973817
Epoch: 5, Loss: 0.076701, Accuracy: 0.976867


([tensor(0.2411),
  tensor(0.1264),
  tensor(0.1027),
  tensor(0.0851),
  tensor(0.0767)],
 [tensor(0.9283),
  tensor(0.9623),
  tensor(0.9689),
  tensor(0.9738),
  tensor(0.9769)])