# MNIST Digit Classification

## Setup

In [None]:
#hide
# ! [ -e /content ] && pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

In [None]:
batch_size = 256

In [None]:
#hide
from fastai.vision.all import *
from fastbook import *

matplotlib.rc('image', cmap='Greys')

In [None]:
path = untar_data(URLs.MNIST)

In [None]:
#hide
Path.BASE_PATH = path

## Load Data & Prepare DataLoaders

In [None]:
digits_training = sorted((path/'training/').glob('*/*.png'))
digits_testing = sorted((path/'testing/').glob('*/*.png'))
digits_testing[:10]

[Path('testing/0/10.png'),
 Path('testing/0/1001.png'),
 Path('testing/0/1009.png'),
 Path('testing/0/101.png'),
 Path('testing/0/1034.png'),
 Path('testing/0/1047.png'),
 Path('testing/0/1061.png'),
 Path('testing/0/1084.png'),
 Path('testing/0/1094.png'),
 Path('testing/0/1121.png')]

In [None]:
training_tensors = [tensor(Image.open(o)) for o in digits_training]
testing_tensors = [tensor(Image.open(o)) for o in digits_testing]
len(training_tensors),len(testing_tensors)

(60000, 10000)

In [None]:
!find {path/'training'} -type f | wc -l

60000


In [None]:
!find {path/'testing'} -type f | wc -l

10000


In [None]:
train_stacked = torch.stack(training_tensors).float()/255
valid_stacked = torch.stack(testing_tensors).float()/255
train_stacked.shape

torch.Size([60000, 28, 28])

In [None]:
train_x = train_stacked.view(-1, 28*28)
train_y = [int(str(x).split('/')[-2]) for x in digits_training]
train_y = tensor(train_y)
# train_y[30000:30100]
train_x.shape,train_y.shape

(torch.Size([60000, 784]), torch.Size([60000]))

In [None]:
print(train_y[:10])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [None]:
valid_x = valid_stacked.view(-1, 28*28)
valid_y = [int(str(x).split('/')[-2]) for x in digits_testing]
valid_y = tensor(valid_y)
# valid_y[3000:3100]
valid_x.shape,valid_y.shape

(torch.Size([10000, 784]), torch.Size([10000]))

In [None]:
print(valid_y[:10])

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


In [None]:
print(batch_size)
train_dl = DataLoader(list(zip(train_x, train_y)), batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(list(zip(valid_x, valid_y)), batch_size=batch_size, shuffle=True)

256


In [None]:
dls = DataLoaders(train_dl, valid_dl)
dls

<fastai.data.core.DataLoaders>

In [None]:
dls.valid_ds[3000][1].item()

2

In [None]:
len(dls.valid_ds)

10000

In [None]:
len(dls.train_ds)

60000

In [None]:
print(len(dls.train))

235


In [None]:
num_train_batches = 0
for xb, yb in dls.train:
    num_train_batches += 1
    
print(f'num_train_batches: {num_train_batches}')

num_train_batches: 235


In [None]:
print(len(dls.valid))

40


In [None]:
num_valid_batches = 0
for xb, yb in dls.valid:
    num_valid_batches += 1
    # print(yb.shape)
    
print(f'num_valid_batches: {num_valid_batches}')

num_valid_batches: 40


## Create Model Architecture

In [None]:
import torch
torch.cuda.is_available()

True

In [None]:
# from tqdm.auto import tqdm
import time
import pandas as pd
import ipywidgets as widgets
import IPython.display as dsp
# from IPython.display import HTML, display

class MyLearner:
    
    def __init__(self,
                 dls,
                 model: 'callable',
                 opt_func: 'callable',
                 metrics: 'callable',
                 loss_func: 'callable | None' = None,
                 lr: float = 0.001,
                 device: str = 'cpu'):
        self.dls = dls
        self.model = model
        self.metrics = metrics
        self.loss_func = loss_func
        self.opt_func = opt_func
        self.lr = lr
        self.training_summary = pd.DataFrame(columns=['epoch', 'train_loss', 'valid_loss', 'metric', 'time'])
        self.device = torch.device(device)
        
        self.model = self.model.to(self.device)

    
    def _validate_epoch(self):
        epoch_valid_loss = 0.0
        batch_metrics = []
        for xb, yb in self.dls.valid:
            xb = xb.to(self.device)
            yb = yb.to(self.device)
            preds = self.model(xb)
            epoch_valid_loss += self.loss_func(preds, yb).item()
            batch_metrics.append(self.metrics(preds, yb))
        epoch_valid_loss /= len(self.dls.valid)
        return epoch_valid_loss, torch.tensor(batch_metrics).mean().item()
    
    def debug(self):
        print(self.model.parameters())
    
    def fit(self, n_epoch: int = 10, lr: 'float | None' = None):
        if not lr:
            lr = self.lr
        
        # Initialize training progress display
        self.training_summary = self.training_summary[0:0]
        progress_bar = widgets.IntProgress(value=0, min=1, max=n_epoch+1, step=1, description=f'[0 / {n_epoch}]')
        dsp.display(progress_bar, dsp.HTML(self.training_summary.to_html(index=False)))
        
        # Initialize optimizer
        params = self.model.parameters()
        optimizer = self.opt_func(params, lr=lr)
            
        # Training loop
        for i in range(n_epoch):
            # Train
            t0 = time.time()

            epoch_train_loss = 0.0
            for xb, yb in self.dls.train:
                xb = xb.to(self.device)
                yb = yb.to(self.device)
                preds = self.model(xb)
                loss = self.loss_func(preds, yb)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                epoch_train_loss += loss.item()
            epoch_train_loss /= len(self.dls.train)
            
            # Validation
            with torch.no_grad():
                epoch_valid_loss, epoch_valid_metric = self._validate_epoch()
            
            # Update training progress display
            progress_bar.value += 1
            progress_bar.description = f'[{i+1} / {n_epoch}]'
            t1 = time.time()
            epoch_summary = pd.DataFrame([{'epoch': i, 'train_loss': epoch_train_loss, 'valid_loss': epoch_valid_loss, 'metric': epoch_valid_metric, 'time': t1-t0}])
            self.training_summary = pd.concat([self.training_summary, epoch_summary])
            dsp.clear_output()
            dsp.display(progress_bar, dsp.HTML(self.training_summary.to_html(index=False)))
            

In [None]:
def batch_accuracy(pred_tensor, actual):
    # print(f'pred_tensor: {pred_tensor}')
    # pred = torch.argmax(pred_tensor)
    pred = pred_tensor.max(dim=1)[1]
    # print(f'pred: {pred}')
    # print(f'actual: {actual}')
    # raise Exception('Test stop point')
    return (pred == actual).float().mean().item()

In [None]:
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30, 30),
    nn.ReLU(),
    nn.Linear(30,10)
)
# learner = MyLearner(dls, simple_net, opt_func=SGD, loss_func=nn.CrossEntropyLoss(), metrics=batch_accuracy)
learner = MyLearner(dls, simple_net, opt_func=SGD, loss_func=nn.CrossEntropyLoss(), metrics=batch_accuracy, device='cuda')
learner.fit(20, 0.1)

IntProgress(value=21, description='[20 / 20]', max=21, min=1)

epoch,train_loss,valid_loss,metric,time
0,1.152907,0.467667,0.844727,1.224382
1,0.379791,0.322802,0.902051,0.903247
2,0.31499,0.304635,0.907715,0.759126
3,0.276943,0.260111,0.922949,0.759755
4,0.247037,0.229006,0.934375,0.819127
5,0.223366,0.206633,0.941211,0.829346
6,0.201739,0.213195,0.938184,0.817453
7,0.183708,0.201639,0.937695,0.823342
8,0.169019,0.17741,0.948535,0.845161
9,0.156672,0.167596,0.950488,1.019285


In [None]:
simple_net = nn.Sequential(
    nn.Linear(28*28,30),
    nn.ReLU(),
    nn.Linear(30, 30),
    nn.ReLU(),
    nn.Linear(30,10)
)
learner2 = Learner(dls, simple_net, opt_func=SGD, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)
learner2.fit(20, 0.1)

epoch,train_loss,valid_loss,accuracy,time
0,0.531983,0.434135,0.875,00:00
1,0.354307,0.336662,0.9047,00:00
2,0.289925,0.281631,0.9167,00:00
3,0.262873,0.249166,0.9274,00:01
4,0.239947,0.254365,0.9237,00:00
5,0.215922,0.212549,0.9353,00:00
6,0.194177,0.202376,0.9386,00:00
7,0.184403,0.208364,0.9389,00:00
8,0.173474,0.189639,0.9444,00:00
9,0.163192,0.17772,0.9466,00:00
