<a href="https://colab.research.google.com/github/mariyahendriksen/DBD/blob/master/MNIST_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [None]:
torch.randn(5).cuda()

tensor([-1.4032, -1.5811, -1.4371,  0.8190,  1.5651], device='cuda:0')

In [None]:
# define the model
model = nn.Sequential(
    nn.Linear(28 * 28, 64)
    , nn.ReLU()
    , nn.Linear(64, 64)
    , nn.ReLU()
    , nn.Linear(64, 10)
)

In [None]:
# define the optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr=1e-2)

# define the loss
loss = nn.CrossEntropyLoss()

# train, val split
batch_size = 32
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=batch_size)
val_loader = DataLoader(val, batch_size=batch_size)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [None]:
# the training and validation loops
nb_epochs = 5

for epoch in range(nb_epochs):
    
    # training loops
    losses = list()
    for batch in train_loader:
        x, y = batch
        
        # x: batch_size * channel * 28 * 28
        # x: batch_size * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1)
        
        # 1 forward
        l = model(x)   # l: logits
        
        # 2 compute the objective function
        J = loss(l, y)
        
        # 3 cleaning the gradients
        model.zero_grad()
        # alternative to:
        # params.grad.zero_()
        # optimizer.zero_grad()
        
        # 4 accumulate the partial derivative of J wrt params
        J.backward()
        # params.grad_sum(dJ/dparams)
        
        # 5 step in the opposite direction of the gradient
        optimizer.step()
        
        losses.append(J.item())
        
    print(f"Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}")
    

    # validation loops
    losses = list()
    for batch in val_loader:
        x, y = batch
        
        # x: batch_size * channel * 28 * 28
        # x: batch_size * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1)
        
        # 1 forward
        l = model(x)   # l: logits
        
        # 2 compute the objective function
        J = loss(l, y)
        
        losses.append(J.item())
        
    print(f"Epoch {epoch + 1}, validation losss: {torch.tensor(losses).mean():.2f}")

Epoch 1, train losss: 1.27
Epoch 1, validation losss: 0.48
Epoch 2, train losss: 0.41
Epoch 2, validation losss: 0.33
Epoch 3, train losss: 0.33
Epoch 3, validation losss: 0.29
Epoch 4, train losss: 0.29
Epoch 4, validation losss: 0.27
Epoch 5, train losss: 0.27
Epoch 5, validation losss: 0.24


In [None]:
class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28 * 28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 10)
    self.do = nn.Dropout(0.1)

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1)
    logits = self.l3(do)
    return logits
  
model = ResNet().cuda()

In [None]:
# define the optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr=1e-2)

# define the loss
loss = nn.CrossEntropyLoss()

In [None]:
# to run training and validation on cuda:
# add model (ResNet) and tensors (x, y) to cuda()

# the training and validation loops
nb_epochs = 5

for epoch in range(nb_epochs):
    
    # training loops
    losses = list()
    accuracies = list()
    for batch in train_loader:
        x, y = batch
        
        # x: batch_size * channel * 28 * 28
        # x: batch_size * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1).cuda()
        
        # 1 forward
        l = model(x)   # l: logits
        
        # 2 compute the objective function
        J = loss(l, y.cuda())
        
        # 3 cleaning the gradients
        model.zero_grad()
        # alternative to:
        # params.grad.zero_()
        # optimizer.zero_grad()
        
        # 4 accumulate the partial derivative of J wrt params
        J.backward()
        # params.grad_sum(dJ/dparams)
        
        # 5 step in the opposite direction of the gradient
        optimizer.step()
        
        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())
        
    print(f"Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():.2f}")
    print(f"Training accuracy : {torch.tensor(accuracies).mean():.2f}")
    

    # validation loops
    losses = list()
    accuracies = list()
    for batch in val_loader:
        x, y = batch
        
        # x: batch_size * channel * 28 * 28
        # x: batch_size * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1).cuda()
        
        # 1 forward
        l = model(x)   # l: logits
        
        # 2 compute the objective function
        J = loss(l, y.cuda())
        
        losses.append(J.item())
        accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())
        
    print(f"Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():.2f}")
    print(f"Validation accuracy : {torch.tensor(accuracies).mean():.2f}")

Epoch 1, train loss: 0.21
Training accuracy : 0.94
Epoch 1, validation loss: 0.21
Validation accuracy : 0.94
Epoch 2, train loss: 0.19
Training accuracy : 0.94
Epoch 2, validation loss: 0.19
Validation accuracy : 0.95
Epoch 3, train loss: 0.18
Training accuracy : 0.95
Epoch 3, validation loss: 0.17
Validation accuracy : 0.95
Epoch 4, train loss: 0.16
Training accuracy : 0.95
Epoch 4, validation loss: 0.17
Validation accuracy : 0.95
Epoch 5, train loss: 0.15
Training accuracy : 0.96
Epoch 5, validation loss: 0.16
Validation accuracy : 0.95
