# Model training

#### Training a model to recognize digit from 0 to 9

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader
from datasets import load_dataset
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import PretrainedConfig, PreTrainedModel
from sklearn.preprocessing import OneHotEncoder

### 1. Loading the MNIST dataset

In [6]:
ds = load_dataset("ylecun/mnist")
ds = ds.with_format('torch')
ds

Downloading readme: 100%|██████████| 6.97k/6.97k [00:00<00:00, 21.5kB/s]


DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

### 2. Training a sklearn OneHotEncoder

In [7]:
labels = [[item["label"].item()] for item in ds["train"]]
labels = np.array(labels)
ohe = OneHotEncoder(handle_unknown="ignore", sparse_output=False).fit(labels)
print(ohe.categories_)
y = ohe.transform(labels)

[array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])]


### 3. Defining a classifier and config

We should keep `model.py` and the cell below exactly same codes.

In [10]:
class DigitClassifier(nn.Module):
    def __init__(self, image_size=28, num_classes=10, **kwargs):
        super().__init__(**kwargs)
        fl_size_s = ((image_size - 4) / 2 - 2) / 2
        fl_size = int(fl_size_s * fl_size_s * 5)
        self.fl = nn.Sequential(
            nn.Conv2d(1, 16, 5),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(16, 5, 3),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Flatten(1),
            nn.Linear(fl_size, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.2),
        )
        self.out = nn.Sequential(
            nn.Linear(128, 64), nn.ReLU(), nn.Linear(64, num_classes)
        )

    def forward(self, x):
        x = self.fl(x)
        x = self.out(x)
        return x


class DigitConfig(PretrainedConfig):
    model_type = "Classifier"

    def __init__(self, image_size=28, num_classes=10, **kwargs):
        self.image_size = image_size
        self.num_classes = num_classes
        super().__init__(**kwargs)


class DigitClassifierModel(PreTrainedModel):
    config_class = DigitConfig

    def __init__(self, config):
        super().__init__(config)
        self.model = DigitClassifier(config.image_size, config.num_classes)

    def forward(self, tensor):
        return self.model.forward(tensor)

### 4. Starting out training

In [15]:
config = DigitConfig()
model = DigitClassifierModel(config)
dataloader = DataLoader(ds['train'], batch_size=32)
testloader = DataLoader(ds['test'], batch_size=32)
criterion = nn.CrossEntropyLoss()

In [19]:
optimizer = optim.Adam(model.parameters(), lr=1e-4)

In [20]:
epoch_count = 5
for epoch in range(epoch_count):

    running_loss = 0.0
    for i, data in enumerate(dataloader, 0):
        inputs = data["image"].to(torch.float32)
        labels = data["label"].numpy()
        labels = [ohe.transform(np.array([[label]])) for label in labels]
        labels = torch.Tensor(np.array(labels)).squeeze()

        optimizer.zero_grad()
        outputs = model(inputs)

        loss = criterion(outputs, labels)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        if i % 500 == 0:
            print(f"[{epoch + 1}, {i + 1:5d}] loss: {running_loss:.5f}")
        running_loss = 0.0

print("Finished Training")

[1,     1] loss: 0.25510
[1,   501] loss: 0.45574
[1,  1001] loss: 0.29064
[1,  1501] loss: 0.21022
[2,     1] loss: 0.13032
[2,   501] loss: 0.19126
[2,  1001] loss: 0.15267
[2,  1501] loss: 0.09094
[3,     1] loss: 0.00756
[3,   501] loss: 0.12089
[3,  1001] loss: 0.07706
[3,  1501] loss: 0.12296
[4,     1] loss: 0.05153
[4,   501] loss: 0.05989
[4,  1001] loss: 0.01597
[4,  1501] loss: 0.07095
[5,     1] loss: 0.03301
[5,   501] loss: 0.09065
[5,  1001] loss: 0.08669
[5,  1501] loss: 0.04821
Finished Training


### 5. Evaluating out model

In [21]:
def calculate_metrics(model, data_loader):
    model.eval()

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs = data["image"].to(torch.float32)
            labels = data["label"]

            outputs = model(inputs)
            preds = torch.argmax(outputs, dim=1)

            all_preds.extend(preds.numpy())
            all_labels.extend(labels.numpy())

    acc = accuracy_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds, average="weighted")
    recall = recall_score(all_labels, all_preds, average="weighted")
    f1 = f1_score(all_labels, all_preds, average="weighted")

    return acc, precision, recall, f1

In [22]:
acc, precision, recall, f1 = calculate_metrics(model, testloader)
print(acc, precision, recall, f1)

1.0 1.0 1.0 1.0


### 6. Saving out model to path

In [23]:
model.save_pretrained('./model')