In [9]:
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/main/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
        content = requests.get(URL + FILENAME).content
        (PATH / FILENAME).open("wb").write(content)

In [10]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
        ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [15]:
from matplotlib import pyplot
import numpy as np

pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
# ``pyplot.show()`` only if not on Colab
try:
    import google.colab
except ImportError:
    pyplot.show()
print(x_train.shape)

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
import torch

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([5, 0, 4,  ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)


In [None]:
import math

weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

In [None]:
def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
    return log_softmax(xb @ weights + bias)

In [None]:
bs = 64  # batch size

xb = x_train[0:bs]  # a mini-batch from x
preds = model(xb)  # predictions
preds[0], preds.shape
print(preds[0], preds.shape)

tensor([-2.0231, -2.2133, -2.2606, -2.1099, -3.0831, -2.9108, -2.4805, -2.1830,
        -2.3185, -1.9820], grad_fn=<SelectBackward0>) torch.Size([64, 10])


In [None]:
def nll(input, target):
    return -input[range(target.shape[0]), target].mean()

loss_func = nll

In [None]:
yb = y_train[0:bs]
print(loss_func(preds, yb))

tensor(2.3312, grad_fn=<NegBackward0>)


In [None]:
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

In [None]:
print(accuracy(preds, yb))

tensor(0.1562)


In [None]:
print(n)

50000


In [None]:
from IPython.core.debugger import set_trace

lr = 0.5  # learning rate
epochs = 2  # how many epochs to train for
print(n)

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        #         set_trace()
        start_i = i * bs
        end_i = start_i + bs
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()

50000


In [None]:
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor(0.0820, grad_fn=<NegBackward0>) tensor(1.)


Torch NN

In [None]:
import torch.nn.functional as F

loss_func = F.cross_entropy

def model(xb):
    return xb @ weights + bias

In [None]:
print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor(0.0820, grad_fn=<NllLossBackward0>) tensor(1.)


In [None]:
from torch import nn

class Mnist_Logistic(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
        self.bias = nn.Parameter(torch.zeros(10))

    def forward(self, xb):
        return xb @ self.weights + self.bias

In [None]:
model = Mnist_Logistic()

In [None]:
print(loss_func(model(xb), yb))

tensor(2.5738, grad_fn=<NllLossBackward0>)


In [None]:
def fit():
    for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            with torch.no_grad():
                for p in model.parameters():
                    p -= p.grad * lr
                model.zero_grad()

fit()

In [None]:
print(loss_func(model(xb), yb))

tensor(0.0816, grad_fn=<NllLossBackward0>)


In [None]:
from torch import optim
model = Mnist_Logistic()
optimizer = optim.SGD(model.parameters(), lr=lr)

In [None]:
for epoch in range(epochs):
        for i in range((n - 1) // bs + 1):
            start_i = i * bs
            end_i = start_i + bs
            xb = x_train[start_i:end_i]
            yb = y_train[start_i:end_i]
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

In [None]:
from torch.utils.data import TensorDataset

In [None]:
train_ds = TensorDataset(x_train, y_train)

In [None]:
model = Mnist_Logistic()
optimizer = optim.SGD(model.parameters(), lr=lr)

for epoch in range(epochs):
    for i in range((n - 1) // bs + 1):
        xb, yb = train_ds[i * bs: i * bs + bs]
        pred = model(xb)
        loss = loss_func(pred, yb)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

print(loss_func(model(xb), yb))

tensor(0.0786, grad_fn=<NllLossBackward0>)


In [None]:
from torch.utils.data import DataLoader

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)

In [None]:
for xb,yb in train_dl:
    pred = model(xb)

In [None]:
for epoch in range(epochs):
        for xb,yb in train_dl:
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

In [None]:
print(loss_func(model(xb), yb))

tensor(0.0641, grad_fn=<NllLossBackward0>)


In [None]:
print(x_valid.shape)

torch.Size([10000, 784])


In [4]:
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)

NameError: name 'TensorDataset' is not defined

In [None]:
for epoch in range(epochs):
        model.train()
        for xb,yb in train_dl:
            pred = model(xb)
            loss = loss_func(pred, yb)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        model.eval()

        sum = 0
        with torch.no_grad():
          for xb, yb in valid_dl:
            pred = model(xb)
            loss = loss_func(pred, yb)
            sum = sum + loss
        print(epoch, sum / len(valid_dl))

0 tensor(0.3192)
1 tensor(0.2710)


In [None]:
def loss_batch(model, loss_func, xb, yb, opt=None):
    loss = loss_func(model(xb), yb)

    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()

    return loss.item(), len(xb)

In [None]:
import numpy as np

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
    for epoch in range(epochs):
        model.train()
        for xb, yb in train_dl:
            loss_batch(model, loss_func, xb, yb, opt)

        model.eval()
        with torch.no_grad():
            losses, nums = zip(
                *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
            )
        val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

        print(epoch, val_loss)

In [None]:
def get_data(train_ds, valid_ds, bs):
    return (
        DataLoader(train_ds, batch_size=bs, shuffle=True),
        DataLoader(valid_ds, batch_size=bs * 2),
    )

In [None]:
train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model = Mnist_Logistic()
optimizer = optim.SGD(model.parameters(), lr=lr)
fit(epochs, model, loss_func, optimizer, train_dl, valid_dl)

NameError: name 'train_ds' is not defined

In [None]:
class Mnist_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
    def forward(self,xb):
        xb = xb.view(-1,1,28,28)
        xb = F.relu(self.conv1(xb))
        xb = F.relu(self.conv2(xb))
        xb = F.relu(self.conv3(xb))
        xb = F.avg_pool2d(xb,4)
        return xb.view(-1,xb.size(1))
lr = 0.1

In [None]:
model = Mnist_CNN()
opt = optimizer.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

NameError: name 'optimizer' is not defined