In [29]:
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
    content = requests.get(URL + FILENAME).content
    (PATH / FILENAME).open("wb").write(content)

In [30]:
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

import torch

x_train, y_train, x_valid, y_valid = map(
    torch.tensor, (x_train, y_train, x_valid, y_valid)
)

In [31]:
import math

weights = torch.randn(784, 10) / math.sqrt(784)  # input = 784 (pixels), output = 10 (종류:class)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

## Model
### Log Softmax Function
$$ LogSoftmax(xi) = \log(\frac {exp(x_i)} {\sum_{j}{exp(x_j)}}) $$

In [32]:
def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

In [33]:
def model(xb):
    return log_softmax(xb @ weights + bias)  ## 출력을 LogSoftmax 로 정규화

In [34]:
bs = 64  # 배치 크기

xb = x_train[0:bs]  # x로부터 미니배치(mini-batch) 추출
print(xb.shape)

preds = model(xb)  # 예측
print(preds.shape)
print(preds[0])

torch.Size([64, 784])
torch.Size([64, 10])
tensor([-2.6399, -2.7490, -1.8466, -1.6916, -2.4674, -2.1626, -2.0120, -2.4993,
        -2.7318, -3.1736], grad_fn=<SelectBackward0>)


## Loss Function
### NLL(Negative Log-Likelihood) Function
$$ Loss_i = -log(y_i)$$

In [35]:
from torch import Tensor


def nll(input: Tensor, target):
    return -input[range(target.shape[0]), target].mean()


loss_func = nll

In [36]:
yb = y_train[0:bs]
print(loss_func(preds, yb))

tensor(2.3855, grad_fn=<NegBackward0>)


### TODO ...