In [None]:
#hide
! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m719.8/719.8 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.1/47.1 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.9/64.9 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.1/316.1 kB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━

In [None]:
#hide
from fastai.vision.all import *
from fastbook import *

matplotlib.rc('image', cmap='Greys')

In [None]:
# MNIST_SAMPLE doesn't have 2s / 4s
path = untar_data(URLs.MNIST)

In [None]:
path.ls() # note - "testing" and "training" vs. "train" and "test"

(#2) [Path('/root/.fastai/data/mnist_png/training'),Path('/root/.fastai/data/mnist_png/testing')]

In [None]:
#hide
Path.BASE_PATH = path

In [None]:
fours = (path/'training'/'4').ls().sorted()
twos = (path/'training'/'2').ls().sorted()

In [None]:
# create validation set
valid_2_tens = torch.stack([tensor(Image.open(o)) for o in (path/'testing'/'2').ls()])
valid_2_tens = valid_2_tens.float()/255

valid_4_tens = torch.stack([tensor(Image.open(o)) for o in (path/'testing'/'4').ls()])
valid_4_tens = valid_4_tens.float()/255

In [None]:
# prepare two and four training sets for training
two_tensors = [tensor(Image.open(o)) for o in twos]
four_tensors = [tensor(Image.open(o)) for o in fours]
stacked_twos = torch.stack(two_tensors).float()/255
stacked_fours = torch.stack(four_tensors).float()/255

In [None]:
# hyperparameters
lr = 1e-6

In [None]:
# all of the labels of twos, then all of the fours - 1 if a two, 0 if a four
train_x = torch.cat([stacked_twos, stacked_fours]).view(-1, 28*28)
train_y = tensor([1]*len(stacked_twos) + [0]*len(stacked_fours)).unsqueeze(1)

dset = list(zip(train_x, train_y))

In [None]:
valid_x = torch.cat([valid_2_tens, valid_4_tens]).view(-1, 28*28)
valid_y = tensor([1]*len(valid_2_tens) + [0]*len(valid_4_tens)).unsqueeze(1)
valid_dset = list(zip(valid_x, valid_y))

In [None]:
def init_params(size, std=1.0):
  return (torch.randn(size)*std).requires_grad_()

In [None]:
weights = init_params((28*28, 1))
bias = init_params(1)

In [None]:
def linear1(xb): return xb@weights + bias

In [None]:
def mnist_loss(predictions, targets):
    predictions = predictions.sigmoid()
    return torch.where(targets==1, 1-predictions, predictions).mean()

In [None]:
dl = DataLoader(dset, batch_size=256)
valid_dl = DataLoader(valid_dset, batch_size=256)

In [None]:
# minibatch for testing
batch = train_x[:4]
batch.shape

torch.Size([4, 784])

In [None]:
def calc_grad(xb, yb, model):
    # zero out gradients first
    weights.grad.zero_()
    bias.grad.zero_();
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [None]:
def train_epoch(model, lr, params):
    for xb, yb in dl:
        calc_grad(xb, yb, model)
        for p in params:
            p.data -= p.grad * lr
            p.grad.zero_()

In [None]:
def batch_accuracy(xb, yb):
    preds = xb.sigmoid()
    correct = (preds>0.5) == yb
    return correct.float().mean()

In [None]:
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

In [None]:
params = weights, bias
lr = 5e-4
for i in range(200):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')

0.6134 0.6139 0.6139 0.6139 0.6143 0.6152 0.6162 0.6162 0.6162 0.6162 0.6161 0.6161 0.6161 0.6176 0.6176 0.6176 0.6186 0.6186 0.6191 0.62 0.621 0.622 0.6225 0.6235 0.6235 0.623 0.623 0.6235 0.624 0.6249 0.6259 0.6264 0.6264 0.6269 0.6264 0.6264 0.6269 0.6269 0.6269 0.6279 0.6288 0.6298 0.6298 0.6298 0.6303 0.6303 0.6308 0.6308 0.6308 0.6308 0.6313 0.6308 0.6313 0.6318 0.6312 0.6312 0.6317 0.6317 0.6322 0.6327 0.6322 0.6322 0.6322 0.6327 0.6327 0.6327 0.6332 0.6337 0.6342 0.6342 0.6347 0.6351 0.6351 0.6361 0.6366 0.6366 0.6371 0.6376 0.6381 0.64 0.64 0.64 0.64 0.64 0.64 0.6396 0.6396 0.6396 0.6396 0.6396 0.6396 0.6396 0.6415 0.642 0.643 0.643 0.644 0.6444 0.6444 0.6444 0.6449 0.6449 0.6455 0.6455 0.6474 0.6479 0.6489 0.6499 0.6508 0.6508 0.6518 0.6523 0.6528 0.6528 0.6528 0.6528 0.6528 0.6528 0.6533 0.6538 0.6547 0.6552 0.6557 0.6557 0.6557 0.6557 0.6567 0.6567 0.6572 0.6577 0.6577 0.6571 0.6581 0.6581 0.6595 0.66 0.6605 0.661 0.6615 0.6615 0.662 0.6639 0.6639 0.6644 0.6644 0.6649 0.664

Below here we're swapping our functionality out for how it is done correctly in PyTorch. Here I'm doing it less minimally.

In [None]:
linear_model = nn.Linear(28*28, 1)
w,b = linear_model.parameters()

(torch.Size([1, 784]), torch.Size([1]))

In [None]:
class BasicOptim:
    def __init__(self,params,lr): self.params,self.lr = list(params),lr

    def step(self, *args, **kwargs):
        for p in self.params: p.data -= p.grad.data * self.lr

    def zero_grad(self, *args, **kwargs):
        for p in self.params: p.grad = None

In [None]:
opt = BasicOptim(linear_model.parameters(), lr)

In [None]:
def train_epoch(model):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        opt.step()
        opt.zero_grad()

In [None]:
validate_epoch(linear_model)

0.3013

In [None]:
def train_model(model, epochs):
    for i in range(epochs):
        train_epoch(model)
        print(validate_epoch(model), end=' ')

In [None]:
train_model(linear_model, 20)


0.3789 0.4734 0.5744 0.6578 0.7414 0.8051 0.8453 0.8708 0.8959 0.9118 0.9227 0.9323 0.9417 0.9446 0.9465 0.9489 0.9514 0.9538 0.9568 0.9595 

In [None]:

linear_model = nn.Linear(28*28,1)
opt = SGD(linear_model.parameters(), lr)
train_model(linear_model, 20)

0.597 0.6791 0.7533 0.8065 0.8454 0.8707 0.8873 0.902 0.914 0.9227 0.9281 0.9324 0.9372 0.9388 0.9423 0.9453 0.9478 0.9492 0.9507 0.9512 

In [None]:
dls = DataLoaders(dl, valid_dl)


In [None]:

learn = Learner(dls, nn.Linear(28*28,1), opt_func=SGD,
                loss_func=mnist_loss, metrics=batch_accuracy)

In [None]:
learn.fit(10, lr=lr)


epoch,train_loss,valid_loss,batch_accuracy,time
0,0.507328,0.507725,0.443893,00:00
1,0.502179,0.499544,0.513623,00:00
2,0.496231,0.491343,0.594774,00:00
3,0.48975,0.483142,0.669098,00:00
4,0.482957,0.47496,0.737525,00:00
5,0.476005,0.466818,0.792111,00:00
6,0.468991,0.458736,0.834192,00:00
7,0.461967,0.45073,0.864852,00:00
8,0.454967,0.442818,0.890144,00:00
9,0.44801,0.435016,0.90203,00:00


In [None]:

simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30,1)
)

In [None]:

learn = Learner(dls, simple_net, opt_func=SGD,
                loss_func=mnist_loss, metrics=batch_accuracy)

In [None]:
learn.fit(40, 0.1)


epoch,train_loss,valid_loss,batch_accuracy,time
0,0.330891,0.440409,0.49007,00:00
1,0.171102,0.270927,0.738456,00:00
2,0.097241,0.133704,0.900912,00:00
3,0.064138,0.085564,0.939176,00:00
4,0.048829,0.0654,0.953078,00:00
5,0.041163,0.05508,0.956585,00:00
6,0.036946,0.048976,0.960092,00:00
7,0.034381,0.044953,0.965554,00:00
8,0.032657,0.042119,0.967043,00:00
9,0.031398,0.040015,0.969029,00:00
