In [588]:
! pip install -Uqq fastbook

In [589]:
import torch
from fastbook import *

In [590]:
path = untar_data(URLs.MNIST_SAMPLE)

In [591]:
Path.BASE_PATH = path

In [592]:
path.ls()

(#3) [Path('valid'),Path('labels.csv'),Path('train')]

In [593]:
(path/'train').ls()

(#2) [Path('train/7'),Path('train/3')]

In [594]:
threes = [tensor(Image.open(o)) for o in (path/'train'/'3').ls().sorted()]
sevens = [tensor(Image.open(o)) for o in (path/'train'/'7').ls().sorted()]

In [595]:
train_x = torch.cat([torch.stack(threes).float()/255, torch.stack(sevens).float()/255]).view(-1, 28*28)
train_x.shape

torch.Size([12396, 784])

In [596]:
train_y = tensor([1.,0.]*len(threes) + [0., 1.]*len(sevens)).view(-1, 2)
train_y.shape

torch.Size([12396, 2])

In [597]:
def binary_cross_entropy(preds, targ):
    preds = preds.sigmoid()
    return -torch.where(targ==1., preds, 1-preds).log().mean()

In [598]:
def init_params(size, std=1.):
    return (torch.randn(size)*std).requires_grad_()

In [599]:
w = init_params((28*28, 2))
b = init_params(2)

In [600]:
w.shape

torch.Size([784, 2])

In [601]:
def mk_linear(w, b, xb): return xb@w + b

In [602]:
linear = partial(mk_linear, w, b)

In [603]:
preds = linear(train_x[:5])
preds, preds.sigmoid()

(tensor([[-3.2130, 13.8309],
         [-3.3259, -3.4895],
         [-4.0412, -2.0192],
         [-5.7149,  6.4236],
         [-8.0018,  7.8936]], grad_fn=<AddBackward0>),
 tensor([[3.8680e-02, 1.0000e+00],
         [3.4692e-02, 2.9613e-02],
         [1.7273e-02, 1.1720e-01],
         [3.2858e-03, 9.9838e-01],
         [3.3476e-04, 9.9963e-01]], grad_fn=<SigmoidBackward0>))

In [604]:
train_y[:5]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.]])

In [620]:
xb, yb = train_x[:5], train_y[:5]
acts = linear(xb)

binary_cross_entropy(acts, yb), torch.nn.BCEWithLogitsLoss()(acts, yb)

(tensor(4.9873, grad_fn=<NegBackward0>),
 tensor(4.9891, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>))

In [621]:
((acts.sigmoid() > 0.5) == yb.bool()).float()

tensor([[0., 0.],
        [0., 1.],
        [0., 1.],
        [0., 0.],
        [0., 0.]])

In [607]:
def step(xb, yb, model):
    preds = model(xb)
    loss = binary_cross_entropy(preds, yb)
    loss.backward()

In [673]:
step(xb,yb,linear)

w.data -= w.grad * 1e-2
b.data -= b.grad * 1e-2

w.grad.zero_()
b.grad.zero_()

binary_cross_entropy(linear(xb), yb)

tensor(0.4431, grad_fn=<NegBackward0>)

In [674]:
def batch_accuracy(preds, targ):
    preds = preds.sigmoid()
    return ((preds > 0.5) == targ.bool()).float().mean()

In [675]:
batch_accuracy(linear(xb), yb)

tensor(0.9000)

In [677]:
preds = linear(xb)
preds, preds.sigmoid()

(tensor([[  4.5735,   3.7344],
         [  4.1176, -11.8216],
         [  3.2663, -10.0291],
         [  3.2561,  -4.0170],
         [  0.6096,  -2.0676]], grad_fn=<AddBackward0>),
 tensor([[9.8978e-01, 9.7667e-01],
         [9.8398e-01, 7.3441e-06],
         [9.6326e-01, 4.4094e-05],
         [9.6289e-01, 1.7688e-02],
         [6.4784e-01, 1.1229e-01]], grad_fn=<SigmoidBackward0>))

In [708]:
tl = DataLoader(list(zip(train_x, train_y)), batch_size=256)

In [709]:
v_threes = [tensor(Image.open(o)) for o in (path/'valid'/'3').ls().sorted()]
v_sevens = [tensor(Image.open(o)) for o in (path/'valid'/'7').ls().sorted()]

valid_x = torch.cat([torch.stack(v_threes).float()/255, torch.stack(v_sevens).float()/255]).view(-1, 28*28)
valid_y = tensor([1.,0.]*len(v_threes) + [0., 1.]*len(v_sevens)).view(-1, 2)

vl = DataLoader(list(zip(valid_x, valid_y)), batch_size=256)

In [686]:
dls = DataLoaders(tl, vl)

In [705]:
def validate_epoch(model):
  acc = [batch_accuracy(model(xb), yb) for xb,yb in vl]
  loss = [binary_cross_entropy(model(xb), yb) for xb,yb in vl]
  return torch.stack(acc).mean(), torch.stack(loss).mean()

In [688]:
def train(dl, model, params, lr=1e-3, num_epochs=5):
  for i in range(num_epochs):
    for xb, yb in dl:
      step(xb, yb, model)
      for p in params:
        p.data -= p.grad * lr
        p.grad.zero_()
  
    print(validate_epoch(model), end='\n')
  

In [690]:
xb, yb = tl.one_batch()
xb.shape, yb.shape

(torch.Size([15, 784]), torch.Size([15, 2]))

In [692]:
preds = linear(xb)
preds

tensor([[  4.5735,   3.7344],
        [  4.1176, -11.8216],
        [  3.2663, -10.0291],
        [  3.2561,  -4.0170],
        [  0.6096,  -2.0676],
        [  1.6932,  -1.5450],
        [  8.8809, -16.2097],
        [ 12.5846,  -6.5463],
        [ 11.1319,  -7.3350],
        [ 11.9663,  -3.2735],
        [ -0.0646,  -3.6699],
        [  2.9784,  -2.7221],
        [  4.9295,  -7.9564],
        [ -0.4258,  -5.9329],
        [  6.0316,  -1.2440]], grad_fn=<AddBackward0>)

In [693]:
preds = preds.sigmoid()
preds

tensor([[9.8978e-01, 9.7667e-01],
        [9.8398e-01, 7.3441e-06],
        [9.6326e-01, 4.4094e-05],
        [9.6289e-01, 1.7688e-02],
        [6.4784e-01, 1.1229e-01],
        [8.4465e-01, 1.7581e-01],
        [9.9986e-01, 9.1247e-08],
        [1.0000e+00, 1.4334e-03],
        [9.9999e-01, 6.5189e-04],
        [9.9999e-01, 3.6493e-02],
        [4.8385e-01, 2.4847e-02],
        [9.5159e-01, 6.1682e-02],
        [9.9282e-01, 3.5031e-04],
        [3.9513e-01, 2.6438e-03],
        [9.9760e-01, 2.2374e-01]], grad_fn=<SigmoidBackward0>)

In [710]:
w1 = init_params((28*28, 2))
b1 = init_params(2)
linear1 = partial(mk_linear, w1, b1)
params = w1,b1

In [726]:
torch.sigmoid(torch.tensor([15]))

tensor([1.0000])

In [706]:
train(tl, linear1, params, num_epochs=5, lr=1.)

(tensor(0.5000), tensor(nan, grad_fn=<MeanBackward0>))
(tensor(0.5000), tensor(nan, grad_fn=<MeanBackward0>))
(tensor(0.5000), tensor(nan, grad_fn=<MeanBackward0>))
(tensor(0.5000), tensor(nan, grad_fn=<MeanBackward0>))
(tensor(0.5000), tensor(nan, grad_fn=<MeanBackward0>))


In [489]:
batch_accuracy(linear(xb), yb)

tensor(0.3000)