# Exploration: training a model

We create a pytorch-based model for image classification from public FashionMNIST dataset.

https://github.com/zalandoresearch/fashion-mnist

In [None]:
%pip install torch==2.9.1 torchinfo==1.8.0 torchmetrics==1.8.2 torchvision==0.24.1

In [None]:
from torchvision import datasets
from torchvision.transforms import ToTensor

dataset = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    # transform=ToTensor(),
)

labels = {
    0: "T-shirt/top",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle boot",
}

In [None]:
for i in range(0, 3):
    print(labels.get(dataset[i][1]))
    display(dataset[i][0])
    

## A simple image classifier

Let's train a network for classifiying images.

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchinfo import summary
from torchvision import datasets
from torchvision.transforms import ToTensor
import numpy as np

In [None]:
# Network definition: how is it composed?

class ImageClassifier(nn.Module):
  def __init__(self):
      super().__init__()
      self.model = nn.Sequential(
          nn.Conv2d(1, 8, kernel_size=3),
          nn.ReLU(),
          nn.Conv2d(8, 16, kernel_size=3),
          nn.ReLU(),
          nn.Flatten(),
          nn.LazyLinear(10),  # 10 classes in total.
      )

  def forward(self, x):
      return self.model(x)



In [None]:
# we need the dataset in a compatible layout
training_data = datasets.FashionMNIST(
  root="data",
  train=True,
  download=True,
  transform=ToTensor(),
)

print(training_data[0])

In [None]:
# for feeding the train loop we need a dataloader
train_dataloader = DataLoader(training_data, batch_size=128)


In [None]:
# Get cpu or gpu for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ImageClassifier().to(device)


In [None]:
# let's define the target metric and the optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)


In [None]:
# we are ready to train the model
model.train()


In [None]:
# does it work? Let's try
sample_input = training_data[0][0][None, :].numpy()      
with torch.no_grad():
    output = model(torch.tensor(sample_input))
    sample_output = output.numpy()
    print(sample_output)

# what is the output?
# what was the expected (true) value?

In [None]:
# we iterate over the dataset to train the network
for batch, (X, y) in enumerate(train_dataloader):
  X = X.to(device)
  y = y.to(device)

  pred = model(X)
  loss = loss_fn(pred, y)
  
  # Backpropagation.
  loss.backward()
  optimizer.step()
  optimizer.zero_grad()

  if batch % 100 == 0:
      loss_value = loss.item()
      current = batch
      step = batch // 100
      print(f"step {step} loss: {loss_value:2f}")

In [None]:
model

the resulting model is structured as follows:
```
Input: Grayscale image (1xHxW)
↓
Conv2d(1→8, 3x3): Extracts 8 basic features (edges/textures)
↓
ReLU(): Adds non-linearity
↓
Conv2d(8→16, 3x3): Learns 16 complex features (patterns/shapes)
↓
ReLU(): Non-linearity
↓
Flatten(): Converts 16xHxW → 9216-dim vector (16 * 96 * 96 for 28x28 input)
↓
Linear(9216→10): Outputs class scores for 10 categories
```

## Evaluate results

We can directly test the model against the dataset

In [None]:
sample_input = training_data[0][0][None, :].numpy()      


In [None]:

# Get model output - convert tensor to numpy
with torch.no_grad():
    output = model(torch.tensor(sample_input))
    sample_output = output.numpy()
    print(sample_output)

what is the output?
What is the true value?