In [None]:
import numpy as np

In [None]:
import torch

# MNIST dataset

In [None]:
import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
x_train = torch.tensor(x_train.reshape(60000, 784)/255, dtype=torch.float32)
x_test = torch.tensor(x_test.reshape(10000, 784)/255, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.int64)

### Model, Parameters, Loss Function

In [None]:
import torch.nn.functional as F

w = torch.randn(784, 10, requires_grad=True)
b = torch.randn(10, requires_grad=True)

loss_fn = F.cross_entropy

def model(xb):
    return xb @ w + b

### Hyperparameters

In [None]:
bs = 64  # batch size
lr = 0.1  # learning rate
epochs = 10 # number of training

n, c = x_train.shape

In [None]:
(n-1) // bs + 1

938

In [None]:
for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_fn(pred, yb)

        loss.backward()
        with torch.no_grad():
            w -= w.grad * lr
            b -= b.grad * lr
            w.grad.zero_()
            b.grad.zero_()

print(loss)

tensor(0.0687, grad_fn=<NllLossBackward0>)


In [None]:
np.exp(-0.0687)

0.9336067200861263

In [None]:
(torch.argmax(model(x_train), dim=-1) == y_train).float().mean()

tensor(0.8921)

In [None]:
y_train

tensor([5, 0, 4,  ..., 5, 6, 8])

In [None]:
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

In [None]:
accuracy(model(x_train), y_train)

tensor(0.8921)

In [None]:
torch.argmax(model(x_train), dim=1)

tensor([5, 0, 3,  ..., 5, 6, 8])

# Refactor using `nn.Module`

In [None]:
from torch import nn

class MyModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.w = nn.Parameter(torch.randn(784,10))
        self.b = nn.Parameter(torch.randn(10))

    def forward(self, x):
        return x @ self.w + self.b

In [None]:
model = MyModel()

In [None]:
model

MyModel()

In [None]:
loss = loss_fn(model(x_train), y_train)
loss

tensor(14.0516, grad_fn=<NllLossBackward0>)

In [None]:
loss.backward()

In [None]:
with torch.no_grad():
    for p in model.parameters():
        p -= p.grad * lr
    model.zero_grad()

In [None]:
def fit():

    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_fn(pred, yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()

    print(loss)

In [None]:
fit()

tensor(0.0733, grad_fn=<NllLossBackward0>)


In [None]:
np.exp(-0.0437)

0.9572410867275233

In [None]:
accuracy(model(x_test), y_test)

tensor(0.8863)

# Refactoring using `nn.Linear`

In [None]:
class MyModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(784, 10)

    def forward(self, x):
        return self.linear(x)

In [None]:
model = MyModel()
loss_fn(model(x_train), y_train)

tensor(2.3338, grad_fn=<NllLossBackward0>)

In [None]:
np.exp(-2.3338)

0.09692672483692916

In [None]:
fit()

tensor(0.0936, grad_fn=<NllLossBackward0>)


In [None]:
np.exp(-0.0936)

0.9106469481779949

In [None]:
loss_fn(model(x_test), y_test)

tensor(0.2780, grad_fn=<NllLossBackward0>)

In [None]:
loss_fn(model(x_train), y_train)

tensor(0.2751, grad_fn=<NllLossBackward0>)

In [None]:
np.exp(-0.2704)

0.7630742036013362

In [None]:
accuracy(model(x_test), y_test)

tensor(0.9204)

# Refactoring using `torch.optim`

In [None]:
from torch import optim

In [None]:
opt = optim.Adam(model.parameters())

def fit():

    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_fn(pred, yb)

            loss.backward()
            opt.step()
            opt.zero_grad()

    print(loss)

In [None]:
fit()

tensor(0.0762, grad_fn=<NllLossBackward0>)


In [None]:
np.exp(-0.0762)

0.9266308618531588

# Refactor using `Dataset` and `DataLoader`

In [None]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [None]:
train_ds = TensorDataset(x_train, y_train)

In [None]:
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=64, shuffle=True)

In [None]:
for i in train_dl:
    print(len(i[0][0]))

In [None]:
model = MyModel()
opt = optim.Adam(model.parameters())


def fit():

    for epoch in range(epochs):
        for xb, yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred, yb)

            loss.backward()
            opt.step()
            opt.zero_grad()

    print(loss)

In [None]:
fit()

tensor(0.0823, grad_fn=<NllLossBackward0>)


In [None]:
accuracy(model(x_test), y_test)

tensor(0.9261)

# Summary

In [None]:
import torch
import torch.nn.functional as F
from torch.nn import Module, Linear
from torch import optim
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [None]:
import tensorflow as tf

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = torch.tensor(x_train.reshape(60000, 784)/255, dtype=torch.float32)
x_test = torch.tensor(x_test.reshape(10000, 784)/255, dtype=torch.float32)

y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.int64)

In [None]:
class MyModel(Module):

    def __init__(self):
        super().__init__()
        self.linear = Linear(784, 10)

    def forward(self, x):
        return self.linear(x)

In [None]:
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=64)

In [None]:
model = MyModel()
opt = optim.Adam(model.parameters())
loss_fn = F.cross_entropy
epochs=10

def fit():

    for epoch in range(epochs):
        for xb, yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred, yb)

            loss.backward()
            opt.step()
            opt.zero_grad()

    print(loss)

In [None]:
fit()

tensor(0.0834, grad_fn=<NllLossBackward0>)
