In [1]:
import torch
from torch import nn
import torchvision
from torchvision import transforms

import numpy as np
import matplotlib.pyplot as plt
import torchmetrics
from torch.utils.data import DataLoader

In [2]:
train_data = torchvision.datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform = transforms.ToTensor(),
    target_transform = None
)

In [3]:
test_data = torchvision.datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = transforms.ToTensor(),
    target_transform=None
)

In [4]:
class FashionMNISTModel(nn.Module):

    def __init__(self, input_features: int, hidden_units: int, output_features: int):
        super().__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_channels=input_features, out_channels=hidden_units,
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units,
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units,
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units, out_channels=hidden_units,
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=hidden_units*49, out_features=output_features),
        )

    def forward(self, x: torch.tensor) -> torch.tensor:
        return self.classifier(self.conv_block2(self.conv_block1(x)))

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [6]:
model = FashionMNISTModel(1, 10, 10).to(device)

In [7]:
from torchsummary import summary
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 28, 28]             100
              ReLU-2           [-1, 10, 28, 28]               0
            Conv2d-3           [-1, 10, 28, 28]             910
              ReLU-4           [-1, 10, 28, 28]               0
         MaxPool2d-5           [-1, 10, 14, 14]               0
            Conv2d-6           [-1, 10, 14, 14]             910
              ReLU-7           [-1, 10, 14, 14]               0
            Conv2d-8           [-1, 10, 14, 14]             910
              ReLU-9           [-1, 10, 14, 14]               0
        MaxPool2d-10             [-1, 10, 7, 7]               0
          Flatten-11                  [-1, 490]               0
           Linear-12                   [-1, 10]           4,910
Total params: 7,740
Trainable params: 7,740
Non-trainable params: 0
-----------------------------------

In [8]:
import wandb
wandb.login()
print()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mstefansu3526[0m. Use [1m`wandb login --relogin`[0m to force relogin





In [9]:
def model_pipeline(hyperparameters):

    with wandb.init(project="FashionMNIST", config=hyperparameters):
        config = wandb.config

        model, train_loader, test_loader, loss_fn, optimiser = make(config)
        print(model)

        train(model, train_loader, loss_fn, optimiser, config)

        test(model, test_loader)
    
    return model

In [10]:
def make(config):
    train_dataloader = DataLoader(
        train_data, batch_size=config.batch_size, shuffle=True
    )
    test_dataloader = DataLoader(
        test_data, batch_size=config.batch_size, shuffle=False
    )

    model = FashionMNISTModel(1, config.kernels, config.classes).to(device)

    loss_fn = nn.CrossEntropyLoss()
    optimiser = torch.optim.Adam(params=model.parameters(), lr=config.learning_rate)
    print(optimiser)
    return model, train_dataloader, test_dataloader, loss_fn, optimiser



In [11]:
from tqdm.auto import tqdm
def train(model, dataloader, loss_fn, optimiser, config):
    
    wandb.watch(model, loss_fn, log="all", log_freq=10)

    model.train()
    example_ct = 0 # Num of examples seen
    batch_ct = 0
    for epoch in tqdm(range(config.epochs)):
        for _, (X, y) in enumerate(dataloader):

            loss, acc = train_batch(X, y, model, loss_fn, optimiser)
            example_ct += len(X)
            batch_ct += 1

            if ((batch_ct) % 500) == 0:
                train_log(loss, acc, example_ct, epoch)

def train_batch(X, y, model, loss_fn, optimiser):
    X, y = X.to(device), y.to(device)

    y_logits = model(X)
    loss = loss_fn(y_logits, y)

    y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)
    acc = (y_pred==y).sum().item()/len(y)

    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

    return loss, acc


  from .autonotebook import tqdm as notebook_tqdm


In [12]:
def train_log(loss, accuracy, example_count, epoch):
    loss = float(loss)

    wandb.log({"epoch": epoch, "loss": loss, "accuracy": accuracy}, step=example_count)
    print(f"Loss After {str(example_count).zfill(5)} examples: {loss:.3f}")

In [13]:
def test(model, dataloader):
    model.eval()
    with torch.inference_mode():
        train_acc = 0
        for _, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)
            y_logits = model(X)
            y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1)
            train_acc += (y_pred==y).sum().item()/len(y)

        train_acc /= len(dataloader)
        wandb.log({"test_accuracy": train_acc})


In [14]:
config = dict(
    epochs=5,
    classes=10,
    kernels=[32, 64],
    batch_size=32,
    learning_rate = 0.001,
    dataset="FashionMNIST",
    architecture="ON"
)

In [15]:
class FashionMNISTModel(nn.Module):

    def __init__(self, input_features: int, kernels: list[int], output_features: int):
        super().__init__()
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(in_channels=input_features, out_channels=kernels[0],
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=kernels[0], out_channels=kernels[0],
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(in_channels=kernels[0], out_channels=kernels[1],
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=kernels[1], out_channels=kernels[1],
                      kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=kernels[-1]*49, out_features=kernels[-1]),
            nn.Linear(in_features=kernels[-1], out_features = output_features)
        )

    def forward(self, x: torch.tensor) -> torch.tensor:
        return self.classifier(self.conv_block2(self.conv_block1(x)))

In [None]:
model = model_pipeline(config)