In [1]:
import torch
import random

In [2]:
SEED = 19
BATCH_SIZE = 32

In [3]:

random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
print(f"Torch version: {torch.__version__}")
print((f"CUDA: {device}"))

Torch version: 2.5.1
CUDA: cpu


In [5]:
from torchvision import datasets, transforms

  warn(


In [9]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='../data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='../data', train=False, download=True, transform=transform)

In [10]:
len(train_dataset)

60000

In [11]:
from torch.utils.data import DataLoader

In [12]:
print(f"Batch size: {BATCH_SIZE}")

Batch size: 32


In [13]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [14]:
from model import Classifier
model = Classifier().to(device=device)

### training loop

In [43]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-3)

In [44]:
from tqdm.auto import tqdm

In [45]:
import torch.utils.data.dataloader


def train_step(model: torch.nn.Module,
               dataloader: torch.utils.data.dataloader,
               loss_fn: torch.nn.Module) -> list:
    
    model.train()    
    # Setup train loss and train accuracy values
    train_loss, train_acc = 0, 0
    for batch, (X, y) in enumerate(dataloader):
        # Send data to target device
        X, y = X.to(device), y.to(device)
        
        y_pred = model(X)

        loss = loss_fn(y_pred, y)
        train_loss += loss.item() 

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        y_pred_class = torch.argmax(torch.softmax(y_pred, dim=1), dim=1)
        train_acc += (y_pred_class == y).sum().item()/len(y_pred)

    train_loss = train_loss / len(dataloader)
    train_acc = train_acc / len(dataloader)
    return train_acc, train_loss

def test_step(model: torch.nn.Module,
              dataloader: torch.utils.data.dataloader,
              loss_fn: torch.nn.Module) -> list:
    model.eval() 
    
    test_loss, test_acc = 0, 0
    
    # Turn on inference context manager
    with torch.inference_mode():
        for batch, (X, y) in enumerate(test_loader):
            # Send data to target device
            X, y = X.to(device), y.to(device)
    
            test_pred_logits = model(X)
            loss = loss_fn(test_pred_logits, y)
            test_loss += loss.item()
            
            test_pred_labels = test_pred_logits.argmax(dim=1)
            test_acc += ((test_pred_labels == y).sum().item()/len(test_pred_labels))
            
    test_loss = test_loss / len(test_loader)
    test_acc = test_acc / len(test_loader)
    return test_acc, test_loss

    

In [46]:

NUM_EPOCHS = 10

In [47]:
import pandas as pd

results_df = pd.DataFrame(columns=["epochs", "train_acc", "train_loss", "test_acc", "test_loss"])

from timeit import default_timer as timer 
start_time = timer()

for epoch in tqdm(range(NUM_EPOCHS)):
    # train
    train_acc, train_loss = train_step(model=model,
                                       dataloader=train_loader,
                                       loss_fn=loss_fn)
    # test
    test_acc, test_loss = test_step(model=model,
                                    dataloader=test_loader,
                                    loss_fn=loss_fn)
    row = pd.DataFrame({"epoch": [epoch], 
                       "train_loss": [train_loss], 
                       "train_acc": [train_acc], 
                       "test_loss": [test_loss], 
                       "test_acc": [test_acc]})
    results_df = pd.concat([results_df, row] , ignore_index=True)

    print(
            f"epoch: {epoch+1} | "
            f"train_loss: {train_loss:.4f} | "
            f"train_acc: {train_acc:.4f} | "
            f"test_loss: {test_loss:.4f} | "
            f"test_acc: {test_acc:.4f}"
        )

end_time = timer()

print(f"Total time of training {end_time-start_time:.3f} seconds")


  0%|          | 0/10 [00:00<?, ?it/s]

  results_df = pd.concat([results_df, row] , ignore_index=True)
 10%|█         | 1/10 [02:23<21:33, 143.67s/it]

Epoch: 1 | train_loss: 0.1428 | train_acc: 0.9553 | test_loss: 0.0396 | test_acc: 0.9882


 20%|██        | 2/10 [05:06<20:38, 154.82s/it]

Epoch: 2 | train_loss: 0.0534 | train_acc: 0.9835 | test_loss: 0.0385 | test_acc: 0.9888


 30%|███       | 3/10 [07:18<16:52, 144.65s/it]

Epoch: 3 | train_loss: 0.0411 | train_acc: 0.9872 | test_loss: 0.0360 | test_acc: 0.9873


 40%|████      | 4/10 [09:35<14:10, 141.68s/it]

Epoch: 4 | train_loss: 0.0329 | train_acc: 0.9892 | test_loss: 0.0297 | test_acc: 0.9906


 50%|█████     | 5/10 [11:40<11:17, 135.53s/it]

Epoch: 5 | train_loss: 0.0289 | train_acc: 0.9906 | test_loss: 0.0276 | test_acc: 0.9907


 60%|██████    | 6/10 [13:47<08:50, 132.63s/it]

Epoch: 6 | train_loss: 0.0228 | train_acc: 0.9924 | test_loss: 0.0251 | test_acc: 0.9924


 70%|███████   | 7/10 [16:14<06:51, 137.24s/it]

Epoch: 7 | train_loss: 0.0222 | train_acc: 0.9925 | test_loss: 0.0191 | test_acc: 0.9940


 80%|████████  | 8/10 [18:31<04:34, 137.15s/it]

Epoch: 8 | train_loss: 0.0180 | train_acc: 0.9943 | test_loss: 0.0203 | test_acc: 0.9937


 90%|█████████ | 9/10 [21:00<02:21, 141.03s/it]

Epoch: 9 | train_loss: 0.0162 | train_acc: 0.9950 | test_loss: 0.0227 | test_acc: 0.9928


100%|██████████| 10/10 [23:21<00:00, 140.13s/it]

Epoch: 10 | train_loss: 0.0158 | train_acc: 0.9949 | test_loss: 0.0199 | test_acc: 0.9932
Total time of training 1401.307 seconds





In [48]:
results_df

Unnamed: 0,epochs,train_acc,train_loss,test_acc,test_loss,Epoch
0,,0.955333,0.142797,0.988219,0.039648,0.0
1,,0.98345,0.053412,0.988818,0.038516,1.0
2,,0.98725,0.041081,0.98732,0.035978,2.0
3,,0.989167,0.032939,0.990615,0.0297,3.0
4,,0.99055,0.028926,0.990715,0.027587,4.0
5,,0.992417,0.022771,0.992412,0.025123,5.0
6,,0.992467,0.022188,0.99401,0.019118,6.0
7,,0.994283,0.018044,0.99371,0.020316,7.0
8,,0.995033,0.01625,0.992812,0.022698,8.0
9,,0.994917,0.015754,0.993211,0.019929,9.0


In [49]:
results_df.to_csv("../results/results.csv", sep=";")

In [53]:
torch.save({
    'model_state_dict': model.state_dict(),
    'class_names': train_dataset.classes
}, '../models/model_with_classes.pth')

In [50]:

print(f"Total time of training {end_time-start_time:.3f} seconds")

Total time of training 1401.307 seconds
