<a href="https://colab.research.google.com/github/sk-gr/ml/blob/main/Building_a_DNN_using_torch_Tensor_methods_and_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from pathlib import Path
import requests


def download_mnist(path):
    url = "https://github.com/pytorch/tutorials/raw/main/_static/"
    filename = "mnist.pkl.gz"

    if not (path / filename).exists():
        content = requests.get(url + filename).content
        (path / filename).open("wb").write(content)

    return path / filename


data_path = Path("data") if Path("data").exists() else Path("../data")
path = data_path / "downloaded" / "vector-mnist"
path.mkdir(parents=True, exist_ok=True)

datafile = download_mnist(path)

In [4]:
import gzip
import pickle


def read_mnist(path):
    with gzip.open(path, "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
    return x_train, y_train, x_valid, y_valid

x_train, y_train, x_valid, y_valid = read_mnist(datafile)

In [20]:
import torch


x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)

print(x_train, y_train, sep="\n")

y_train[0], x_train[0, ::2]

x_train.ndim, y_train.ndim

n, c = x_train.shape
print(x_train.shape)
print(y_train.shape)

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
tensor([5, 0, 4,  ..., 8, 4, 8])
torch.Size([50000, 784])
torch.Size([50000])


  x_train, y_train, x_valid, y_valid = map(


In [7]:
import math
import torch


weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

def linear(x: torch.Tensor) -> torch.Tensor:
    return x @ weights + bias

def log_softmax(x: torch.Tensor) -> torch.Tensor:
    return x - torch.log(torch.sum(torch.exp(x), axis=1))[:, None]

def model(xb: torch.Tensor) -> torch.Tensor:
    return log_softmax(linear(xb))

bs = 64  # batch size

xb = x_train[0:bs]  # a batch of inputs
outs = model(xb)  # outputs on that batch

print(outs[0], outs.shape)  # outputs on the first element of the batch

tensor([-1.9845, -2.2457, -2.2202, -2.9017, -2.4166, -2.3481, -2.3291, -2.3609,
        -1.9366, -2.6244], grad_fn=<SelectBackward0>) torch.Size([64, 10])


In [9]:
def accuracy(out: torch.Tensor, yb: torch.Tensor) -> torch.Tensor:
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

In [13]:
yb = y_train[0:bs]

acc = accuracy(outs, yb)

print(acc)

tensor(0.0469)


In [14]:
try:
    acc.backward()
except RuntimeError as e:
    print(e)

element 0 of tensors does not require grad and does not have a grad_fn


In [15]:
def cross_entropy(output: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    return -output[range(target.shape[0]), target].mean()

loss_func = cross_entropy

In [16]:
print(loss_func(outs, yb), -torch.log(torch.tensor(1 / 10)))

tensor(2.3562, grad_fn=<NegBackward0>) tensor(2.3026)


In [17]:
loss = loss_func(outs, yb)

loss.backward()

In [18]:
bias.grad

tensor([ 0.0320, -0.0325,  0.0466, -0.0593, -0.0263,  0.0282,  0.0103,  0.0199,
         0.0320, -0.0509])

In [21]:
lr = 0.5  # learning rate hyperparameter
epochs = 2  # how many epochs to train for

for epoch in range(epochs):  # loop over the data repeatedly
    for ii in range((n - 1) // bs + 1):  # in batches of size bs, so roughly n / bs of them
        start_idx = ii * bs  # we are ii batches in, each of size bs
        end_idx = start_idx + bs  # and we want the next bs entires

        # pull batches from x and from y
        xb = x_train[start_idx:end_idx]
        yb = y_train[start_idx:end_idx]

        # run model
        pred = model(xb)

        # get loss
        loss = loss_func(pred, yb)

        # calculate the gradients with a backwards pass
        loss.backward()

        # update the parameters
        with torch.no_grad():  # we don't want to track gradients through this part!
            # SGD learning rule: update with negative gradient scaled by lr
            weights -= weights.grad * lr
            bias -= bias.grad * lr

            # ACHTUNG: PyTorch doesn't assume you're done with gradients
            #          until you say so -- by explicitly "deleting" them,
            #          i.e. setting the gradients to 0.
            weights.grad.zero_()
            bias.grad.zero_()

In [22]:
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor(0.0827, grad_fn=<NegBackward0>) tensor(1.)
