In [1]:
import torch as t
import numpy as np

Let's make sure pytorch works first:

In [2]:
val = t.rand(10)
print (val)

tensor([0.8563, 0.4238, 0.8172, 0.4546, 0.4803, 0.9739, 0.2816, 0.9366, 0.9170,
        0.2062])


And now let's do something a bit more interesting. In this notebook we will be training a very simple MLP to learn the sin() function.

In [3]:
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import seaborn as sn

In [4]:
train = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor()
)

test = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

print(train, test)

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor() Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [5]:
batch_size = 32

train_dl = DataLoader(train, batch_size=batch_size)
test_dl = DataLoader(test, batch_size=batch_size)
for X, y in train_dl:
    print("Shape or X [samples, channels, height, width]:", X.shape)
    print("Shape of y:", y.shape)
    break 

Shape or X [samples, channels, height, width]: torch.Size([32, 1, 28, 28])
Shape of y: torch.Size([32])


Now we select a cuda device if one is available:

In [6]:
device = "cuda" if t.cuda.is_available() else "cpu"
print(device)

cuda


And proceed to define our neural network. A neural network is an subclass of **```nn.Module```**. Layers in a neural network are also subclasses of **```nn.Module```**. In the constructor we define the layers, and in the **```forward(self, x)```** method we implement the forward pass. Pytorch has autodiff capabilities, so we don't have to define the backwards pass.

In [7]:
class MyNN(nn.Module):
    def __init__(self):
        super(MyNN, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.BatchNorm1d(28*28),
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Linear(512, 10),
            nn.ReLU()
        )
        #nn.init.xavier_normal(self.linear_relu_stack[1].weight)
        #nn.init.xavier_normal(self.linear_relu_stack[4].weight)
        #nn.init.xavier_normal(self.linear_relu_stack[7].weight)
        
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = MyNN().to(device)
print(model)

MyNN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): BatchNorm1d(784, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Linear(in_features=784, out_features=512, bias=True)
    (2): ReLU()
    (3): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): ReLU()
    (6): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): Linear(in_features=512, out_features=10, bias=True)
    (8): ReLU()
  )
)


Now we need to define the optimizer

In [8]:
# This loss function actually takes the output of the network (N-dimensional)
# and executes Log(SoftMax(...)), and the passes that to a Negative log likelihood
# It expect the labels to be given as the index of the expected class, not as a 
# vector of probabilities
loss_fn = nn.CrossEntropyLoss()
optimizer = t.optim.SGD(model.parameters(), lr=1e-3)
optimizer = t.optim.Adam(model.parameters(), lr=1e-3)

and finally we define the train/test loops as functions that we will invoke in our outer trainning loop

In [9]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    train_loss = 0.0
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        # Forward pass
        pred = model(X)
        loss = loss_fn(pred, y)
        train_loss += loss

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            #print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
    train_loss /= size
    print(f"Train Avg loss: {train_loss:>5f}")

def test(dataloader, model):
    size = len(dataloader.dataset)
    model.eval()
    test_loss, correct = 0, 0
    with t.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(t.float).sum().item()
    test_loss /= size
    correct /= size
    print(f"Test Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}")

In [10]:
epochs = 10
for e in range(epochs):
    print(f"Epoch {e + 1}\n------------------")
    train(train_dl, model, loss_fn, optimizer)
    test(test_dl, model)
print("Done deal!")

Epoch 1
------------------
Train Avg loss: 0.015082
Test Accuracy: 85.3%, Avg loss: 0.012734
Epoch 2
------------------
Train Avg loss: 0.010693
Test Accuracy: 86.9%, Avg loss: 0.011518
Epoch 3
------------------
Train Avg loss: 0.009109
Test Accuracy: 87.5%, Avg loss: 0.011278
Epoch 4
------------------
Train Avg loss: 0.008009
Test Accuracy: 87.5%, Avg loss: 0.011507
Epoch 5
------------------
Train Avg loss: 0.006936
Test Accuracy: 88.0%, Avg loss: 0.011993
Epoch 6
------------------
Train Avg loss: 0.006077
Test Accuracy: 87.8%, Avg loss: 0.012871
Epoch 7
------------------
Train Avg loss: 0.005345
Test Accuracy: 87.6%, Avg loss: 0.014127
Epoch 8
------------------
Train Avg loss: 0.004675
Test Accuracy: 87.7%, Avg loss: 0.014518
Epoch 9
------------------
Train Avg loss: 0.004119
Test Accuracy: 88.1%, Avg loss: 0.014764
Epoch 10
------------------
Train Avg loss: 0.003705
Test Accuracy: 88.0%, Avg loss: 0.016525
Done deal!
