In this notebook I will attempt to create a model which will classify hand-written numbers

In [1]:
import torch
from fastai.vision.all import *

download MNIST dataset

In [2]:
path = untar_data(URLs.MNIST)
Path.BASE_PATH = path

In [4]:
(path).ls()

(#2) [Path('training'),Path('testing')]

put images into lists

In [110]:
train = [(path/'training'/str(i)).ls() for i in range(10)]
valid = [(path/'testing'/str(i)).ls() for i in range(10)]

convert images into tensors of floating point numbers

In [111]:
train_t = []
valid_t = []
for i in range(10):
    train_t.append(torch.stack([tensor(Image.open(o)) for o in train[i]]).float()/255)
    valid_t.append(torch.stack([tensor(Image.open(o)) for o in valid[i]]).float()/255)

The output of our model will be an array of length 10, where each number in the order represents a confidence of the model that the picture it is classifying is the following number

In [112]:
train_x = torch.cat([train_t[i] for i in range(10)]).view(-1, 28*28)
valid_x = torch.cat([valid_t[i] for i in range(10)]).view(-1, 28*28)
train_y = []
valid_y = []
for i in range(10):
    ans = [-1]*10
    ans[i] = 1
    train_y.append(tensor([ans] * len(train[i])))
    valid_y.append(tensor([ans] * len(valid[i])))

train_y = torch.cat([train_y[i] for i in range(10)])
valid_y = torch.cat([valid_y[i] for i in range(10)])

print(train_y.shape)
print(train_x.shape)

torch.Size([60000, 10])
torch.Size([60000, 784])


In [113]:
dset = list(zip(train_x, train_y))
valid_dset = list(zip(valid_x, valid_y))
x,y = valid_dset[0]
x.shape, y

(torch.Size([784]), tensor([ 1, -1, -1, -1, -1, -1, -1, -1, -1, -1]))

In [114]:
def init_params(size, std=1.0):
    return (torch.randn(size)*std).requires_grad_()

In [115]:
weights = init_params((28*28, 10))
bias = init_params(10)

In [116]:
def linear1(xb):
  return xb @ weights + bias

preds = linear1(train_x)
preds[0]

tensor([ -4.7701,   2.4745,  12.3881,   9.0519,  15.5660, -11.8583,  10.3728,
          9.5017,   6.8035,  16.8012], grad_fn=<SelectBackward0>)

In [117]:
def mnist_loss(predictions, targets):
    predictions = predictions.tanh()
    return ((targets-predictions)**2).mean()

In [118]:
dl = DataLoader(dset, batch_size=500, shuffle=True)
valid_dl = DataLoader(valid_dset, batch_size=500)

In [119]:
def calc_grad(xb, yb, model):
    preds = model(xb)
    loss = mnist_loss(preds, yb)
    loss.backward()

In [120]:
def train_epoch(model, lr, params):
    for xb,yb in dl:
        calc_grad(xb, yb, model)
        with torch.no_grad():
            for p in params:
                p -= p.grad*lr
        for p in params:
            p.grad.zero_()

In [121]:
def batch_accuracy(xb, yb):
    preds = torch.full_like(xb, fill_value=-1)
    max_indices = torch.argmax(xb, dim=1)
    preds.scatter_(1, max_indices.unsqueeze(1), 1)
    correct = (preds == yb).all(dim=1, keepdim=True)
    return correct.float().mean()

In [122]:
batch_accuracy(preds, train_y)

tensor(0.0721)

In [123]:
def validate_epoch(model):
    accs = [batch_accuracy(model(xb), yb) for xb,yb in valid_dl]
    return round(torch.stack(accs).mean().item(), 4)

In [124]:
validate_epoch(linear1)

0.0716

In [130]:
lr = 1
params = weights,bias
train_epoch(linear1, lr, params)
validate_epoch(linear1)

0.147

In [131]:
for i in range(20):
    train_epoch(linear1, lr, params)
    print(validate_epoch(linear1), end=' ')

0.1474 0.1485 0.151 0.1547 0.1579 0.1591 0.1604 0.163 0.165 0.1685 0.1715 0.1753 0.184 0.1917 0.2043 0.2191 0.2238 0.2276 0.2324 0.236 

In [133]:
simple_net = nn.Sequential(
    nn.Linear(28*28,50),
    nn.ReLU(),
    nn.Linear(50,10)
)

In [141]:
dls = DataLoaders(dl, valid_dl)

In [135]:
learn = Learner(dls, simple_net, opt_func=SGD,
                loss_func=mnist_loss, metrics=batch_accuracy)

In [136]:
learn.fit(40, 1)

epoch,train_loss,valid_loss,batch_accuracy,time
0,0.120882,0.079376,0.896,00:01
1,0.074608,0.062428,0.9124,00:01
2,0.060312,0.053781,0.9242,00:00
3,0.052527,0.04867,0.9312,00:00
4,0.04889,0.045478,0.9351,00:00
5,0.04625,0.044958,0.9382,00:00
6,0.042524,0.042607,0.9407,00:00
7,0.040437,0.039444,0.9446,00:00
8,0.038681,0.038011,0.9468,00:00
9,0.036797,0.037917,0.9464,00:00


In [7]:
mnist_data_block = DataBlock(
    blocks=(ImageBlock, CategoryBlock),
    get_items=get_image_files,
    splitter=GrandparentSplitter(train_name='training', valid_name='testing'),
    get_y=parent_label,
)

# Create DataLoaders using the DataBlock
dls = mnist_data_block.dataloaders(path)

learn = vision_learner(dls, resnet18, pretrained=False,
                    loss_func=F.cross_entropy, metrics=accuracy)
learn.fit_one_cycle(1, 0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.088483,0.043582,0.9868,01:17
