Let's build a simple neural network to classify images from the FashionMNIST dataset.

**1. Import Libraries**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [None]:
! pip install lightning

Collecting lightning
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.11.7-py3-none-any.whl.metadata (5.2 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Downloading torchmetrics-1.4.2-py3-none-any.whl.metadata (19 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Downloading lightning-2.4.0-py3-none-any.whl (810 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m811.0/811.0 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.7-py3-none-any.whl (26 kB)
Downloading torchmetrics-1.4.2-py3-none-any.whl (869 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m869.2/869.2 kB[0m [31m37.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import lightning as L

*Checking for GPU Availability*

This code checks if a CUDA-enabled GPU is available and sets the `device` accordingly. If no GPU is available, it defaults to the CPU.

In [None]:
# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

Using device: cuda


**2. Data Preparation**

In [None]:
# Define a transform to convert images to tensors
transform = transforms.ToTensor()

# Download and load the training data
train_set = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=256, shuffle=True)

# Download and load the test data
test_set = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=256, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:06<00:00, 3781629.10it/s]


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 137449.23it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 2508511.78it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 5824730.78it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






**3. Neural Network Model**

In [None]:
def get_model(layers: list[int]) -> nn.Sequential:
    ls = [nn.Flatten()]
    for i in range(len(layers) - 1):
        ls.append(nn.Linear(layers[i], layers[i + 1]))
        if i < len(layers) - 2:
            ls.append(nn.ReLU())
    res = nn.Sequential(*ls)
    return res

In [None]:
class DenseNN(L.LightningModule):
    def __init__(self, model, learning_rate):
        super().__init__()
        self.model = model
        self.learning_rate = learning_rate

    def _common_eval(self, batch, batch_idx):
        x, y = batch
        x = x.view(x.size(0), -1)
        y_hat = self.model(x)
        loss = F.cross_entropy(y_hat, y)
        return loss

    def training_step(self, batch, batch_idx):
        loss = self._common_eval(batch, batch_idx)
        self.log('train_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss = self._common_eval(batch, batch_idx)
        self.log('test_loss', loss)

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

# dense_model = DenseNN(get_model([28 * 28, 128, 10])).to(device)

In [None]:
def evaluate(model, test_loader):
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():  # Disable gradient calculation for evaluation
        for images, labels in test_loader:
            # Move images and labels to the device
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total

In [None]:
from sklearn.model_selection import ParameterGrid

param_grid = {
    'learning_rate': [1e-4, 5e-3, 1e-3, 5e-2, 1e-2],
    'layers': [
        [28 * 28, 128, 10],
        [28 * 28, 128, 64, 10],
    ],
    'epochs': [5, 10],
}

grid = ParameterGrid(param_grid)

In [None]:
import logging
logging.getLogger("lightning.pytorch").setLevel(logging.ERROR)


In [None]:
import time
results = []

for params in grid:
    start = time.time()
    model = DenseNN(
        model=get_model(params['layers']),
        learning_rate=params['learning_rate']
    ).to(device)
    trainer = L.Trainer(max_epochs=params['epochs'], enable_model_summary=False)
    trainer.fit(model=model, train_dataloaders=train_loader)
    acc = evaluate(model.model.to(device), test_loader)
    end = time.time()
    print(f'Accuracy: {acc} in {end - start} s with params: {params}', flush=True)
    results.append((acc, params))


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8151 in 37.214606046676636 s with params: {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.0001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8709 in 37.1301634311676 s with params: {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.005}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.845 in 37.15149426460266 s with params: {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8174 in 37.56699013710022 s with params: {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.05}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8609 in 37.61487364768982 s with params: {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.01}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8185 in 38.0311553478241 s with params: {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.0001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8729 in 37.68246865272522 s with params: {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.005}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.863 in 37.89706087112427 s with params: {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.829 in 37.985838413238525 s with params: {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.05}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8658 in 38.21307158470154 s with params: {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.01}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8366 in 73.87126207351685 s with params: {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.0001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8692 in 73.8295669555664 s with params: {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.005}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8682 in 72.9492998123169 s with params: {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8267 in 73.4336404800415 s with params: {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.05}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8604 in 76.29915118217468 s with params: {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.01}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8406 in 76.6804609298706 s with params: {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.0001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8774 in 74.1878833770752 s with params: {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.005}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8754 in 73.85212421417236 s with params: {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.001}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8198 in 73.7701461315155 s with params: {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.05}


Training: |          | 0/? [00:00<?, ?it/s]

Accuracy: 0.8716 in 74.79340887069702 s with params: {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.01}


In [None]:
print('\n'.join(map(str, sorted(results))))

(0.8151, {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.0001})
(0.8174, {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.05})
(0.8185, {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.0001})
(0.8198, {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.05})
(0.8267, {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.05})
(0.829, {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.05})
(0.8366, {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.0001})
(0.8406, {'epochs': 10, 'layers': [784, 128, 64, 10], 'learning_rate': 0.0001})
(0.845, {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.001})
(0.8604, {'epochs': 10, 'layers': [784, 128, 10], 'learning_rate': 0.01})
(0.8609, {'epochs': 5, 'layers': [784, 128, 10], 'learning_rate': 0.01})
(0.863, {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.001})
(0.8658, {'epochs': 5, 'layers': [784, 128, 64, 10], 'learning_rate': 0.01})
(0.8682, {'epoc

In [None]:
best_acc, best_params = max(results, key=lambda x: x[0])
print(f'Best accuracy: {best_acc} with params: {best_params}')

In [None]:
best_model = DenseNN(
    model=get_model(best_params['layers']),
    learning_rate=best_params['learning_rate'],
).to(device)
trainer = L.Trainer(max_epochs=best_params['epochs'], enable_model_summary=False)