In [1]:
! pip install -Uqq fastbook

In [2]:
import torch
from fastbook import *

In [3]:
path = untar_data(URLs.MNIST_SAMPLE)

In [4]:
Path.BASE_PATH = path

In [5]:
path.ls()

(#3) [Path('valid'),Path('labels.csv'),Path('train')]

In [6]:
(path/'train').ls()

(#2) [Path('train/7'),Path('train/3')]

In [7]:
threes = [tensor(Image.open(o)) for o in (path/'train'/'3').ls().sorted()]
sevens = [tensor(Image.open(o)) for o in (path/'train'/'7').ls().sorted()]

In [8]:
train_x = torch.cat([torch.stack(threes).float()/255, torch.stack(sevens).float()/255]).view(-1, 28*28)
train_x.shape

torch.Size([12396, 784])

In [9]:
train_y = tensor([1.,0.]*len(threes) + [0., 1.]*len(sevens)).view(-1, 2)
train_y.shape

torch.Size([12396, 2])

In [10]:
def mnist_loss(preds, targ):
    preds = preds.sigmoid()
    return torch.where(targ==1., 1-preds, preds).mean()

In [11]:
def init_params(size, std=1.):
    return (torch.randn(size)*std).requires_grad_()

In [12]:
w = init_params((28*28, 2))
b = init_params(2)

In [13]:
w.shape

torch.Size([784, 2])

In [14]:
def mk_linear(w, b, xb): return xb@w + b

In [15]:
linear = partial(mk_linear, w, b)

In [16]:
preds = linear(train_x[:5])
preds, preds.sigmoid()

(tensor([[ 7.5428, 10.1940],
         [ 4.8822, -1.9816],
         [ 2.5037, 13.9184],
         [-3.2935, -5.6898],
         [10.5705,  9.6727]], grad_fn=<AddBackward0>),
 tensor([[0.9995, 1.0000],
         [0.9925, 0.1211],
         [0.9244, 1.0000],
         [0.0358, 0.0034],
         [1.0000, 0.9999]], grad_fn=<SigmoidBackward0>))

In [17]:
train_y[:5]

tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.],
        [1., 0.]])

In [18]:
xb, yb = train_x[:5], train_y[:5]
acts = linear(xb)

mnist_loss(acts, yb), torch.nn.BCEWithLogitsLoss()(acts, yb)

(tensor(0.4172, grad_fn=<MeanBackward0>),
 tensor(3.7334, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>))

In [19]:
((acts.sigmoid() > 0.5) == yb.bool()).float().mean()

tensor(0.6000)

In [20]:
def step(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [21]:
step(xb,yb,linear)

w.data -= w.grad * 1e-2
b.data -= b.grad * 1e-2

w.grad.zero_()
b.grad.zero_()

mnist_loss(linear(xb), yb)

tensor(0.4170, grad_fn=<MeanBackward0>)

In [22]:
def batch_accuracy(preds, targ):
    preds = preds.sigmoid()
    return ((preds > 0.5) == targ.bool()).float().mean()

In [23]:
batch_accuracy(linear(xb), yb)

tensor(0.6000)

In [24]:
preds = linear(xb)
preds, preds.sigmoid()

(tensor([[ 7.5511, 10.1861],
         [ 4.8905, -1.9923],
         [ 2.5139, 13.9108],
         [-3.2829, -5.6991],
         [10.5783,  9.6652]], grad_fn=<AddBackward0>),
 tensor([[0.9995, 1.0000],
         [0.9925, 0.1200],
         [0.9251, 1.0000],
         [0.0362, 0.0033],
         [1.0000, 0.9999]], grad_fn=<SigmoidBackward0>))

In [25]:
tl = DataLoader(list(zip(train_x, train_y)), batch_size=256)

In [26]:
v_threes = [tensor(Image.open(o)) for o in (path/'valid'/'3').ls().sorted()]
v_sevens = [tensor(Image.open(o)) for o in (path/'valid'/'7').ls().sorted()]

valid_x = torch.cat([torch.stack(v_threes).float()/255, torch.stack(v_sevens).float()/255]).view(-1, 28*28)
valid_y = tensor([1.,0.]*len(v_threes) + [0., 1.]*len(v_sevens)).view(-1, 2)

vl = DataLoader(list(zip(valid_x, valid_y)), batch_size=256)

In [27]:
dls = DataLoaders(tl, vl)

In [28]:
def validate_epoch(model):
  acc = [batch_accuracy(model(xb), yb) for xb,yb in vl]
  loss = [mnist_loss(model(xb), yb) for xb,yb in vl]
  return torch.stack(acc).mean(), torch.stack(loss).mean()

In [29]:
def train(dl, model, params, lr=1e-3, num_epochs=5):
  for i in range(num_epochs):
    for xb, yb in dl:
      step(xb, yb, model)
      for p in params:
        p.data -= p.grad * lr
        p.grad.zero_()
  
    print(validate_epoch(model), end='\n')
  

In [30]:
xb, yb = tl.one_batch()
xb.shape, yb.shape

(torch.Size([256, 784]), torch.Size([256, 2]))

In [31]:
preds = linear(xb)
preds

tensor([[ 7.5511e+00,  1.0186e+01],
        [ 4.8905e+00, -1.9923e+00],
        [ 2.5139e+00,  1.3911e+01],
        [-3.2829e+00, -5.6991e+00],
        [ 1.0578e+01,  9.6652e+00],
        [ 1.7330e+01,  8.1297e+00],
        [ 1.1385e+01,  1.4575e+00],
        [-3.3012e+00, -1.2315e+01],
        [ 5.0960e+00,  1.5900e+01],
        [-6.0887e+00, -1.5689e+00],
        [-5.2297e+00,  6.9850e+00],
        [-5.1381e+00,  3.9972e-01],
        [-7.9173e+00,  1.1940e+01],
        [ 7.9454e+00, -9.0266e+00],
        [-1.2563e+00,  5.1990e+00],
        [-2.3799e-01,  6.4348e+00],
        [-4.3496e+00,  1.9372e+00],
        [-7.0414e-01,  4.1155e+00],
        [-6.9421e+00,  1.6317e+01],
        [-7.5485e-01,  4.7218e+00],
        [ 7.9345e+00,  4.7472e+00],
        [-5.7633e+00,  7.2462e+00],
        [ 6.8864e+00, -1.3617e+01],
        [-2.6496e+00,  4.2659e+00],
        [ 1.9912e+01, -1.6159e+00],
        [ 8.4884e+00, -1.7366e+01],
        [ 6.4987e+00,  9.3237e+00],
        [-1.1876e+00,  4.022

In [32]:
preds = preds.sigmoid()
preds

tensor([[9.9947e-01, 9.9996e-01],
        [9.9254e-01, 1.2001e-01],
        [9.2511e-01, 1.0000e+00],
        [3.6164e-02, 3.3379e-03],
        [9.9997e-01, 9.9994e-01],
        [1.0000e+00, 9.9971e-01],
        [9.9999e-01, 8.1116e-01],
        [3.5529e-02, 4.4853e-06],
        [9.9392e-01, 1.0000e+00],
        [2.2633e-03, 1.7237e-01],
        [5.3268e-03, 9.9908e-01],
        [5.8345e-03, 5.9862e-01],
        [3.6424e-04, 9.9999e-01],
        [9.9965e-01, 1.2015e-04],
        [2.2161e-01, 9.9451e-01],
        [4.4078e-01, 9.9840e-01],
        [1.2747e-02, 8.7404e-01],
        [3.3090e-01, 9.8394e-01],
        [9.6535e-04, 1.0000e+00],
        [3.1976e-01, 9.9118e-01],
        [9.9964e-01, 9.9140e-01],
        [3.1309e-03, 9.9929e-01],
        [9.9898e-01, 1.2193e-06],
        [6.6015e-02, 9.8616e-01],
        [1.0000e+00, 1.6577e-01],
        [9.9979e-01, 2.8723e-08],
        [9.9850e-01, 9.9991e-01],
        [2.3368e-01, 9.8241e-01],
        [6.8168e-01, 6.8056e-02],
        [9.873

In [33]:
w1 = init_params((28*28, 2))
b1 = init_params(2)
linear1 = partial(mk_linear, w1, b1)
params = w1,b1

In [34]:
train(tl, linear1, params, num_epochs=5, lr=1.)

(tensor(0.6815), tensor(0.3212, grad_fn=<MeanBackward0>))
(tensor(0.8018), tensor(0.2006, grad_fn=<MeanBackward0>))
(tensor(0.8578), tensor(0.1454, grad_fn=<MeanBackward0>))
(tensor(0.9191), tensor(0.0862, grad_fn=<MeanBackward0>))
(tensor(0.9399), tensor(0.0651, grad_fn=<MeanBackward0>))


In [36]:
nums = DataBlock(
  blocks=(ImageBlock, MultiCategoryBlock),
  get_items=get_image_files,
  get_y=parent_label,
  splitter=RandomSplitter(seed=42),
)

dls = nums.dataloaders(path/'train')

In [65]:
learn = Learner(dls, torch.nn.Linear(28*28, 2), loss_func=mnist_loss, metrics=batch_accuracy)

learn.fit(5, 1.)

epoch,train_loss,valid_loss,batch_accuracy,time


RuntimeError: mat1 and mat2 shapes cannot be multiplied (5376x28 and 784x2)