<a href="https://colab.research.google.com/github/sargent-mg/ml-zoomcamp/blob/main/08-deep-learning/notebook_w8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Clothing Classification with PyTorch (With Hyperparameter Tuning)

This notebook trains a neural network to classify 10 different types of clothing. It includes a dedicated step to automatically find the best model configuration.

### Workflow:
1.  **Data Setup:** Load images and apply augmentations.
2.  **Model Architecture:** Define a flexible MobileNetV2 wrapper.
3.  **Hyperparameter Tuning:** Systematically test different Learning Rates, Dropout Rates, and Layer Sizes to find the best combination.
4.  **Final Training:** Train the best model for a longer period.
5.  **Deployment:** Export to ONNX.

## 1. Setup and Imports

In [None]:
import os
import glob
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


## 2. Download Data

In [None]:
!git clone https://github.com/alexeygrigorev/clothing-dataset-small.git

Cloning into 'clothing-dataset-small'...
remote: Enumerating objects: 3839, done.[K
remote: Counting objects: 100% (400/400), done.[K
remote: Compressing objects: 100% (400/400), done.[K
remote: Total 3839 (delta 9), reused 385 (delta 0), pack-reused 3439 (from 1)[K
Receiving objects: 100% (3839/3839), 100.58 MiB | 36.63 MiB/s, done.
Resolving deltas: 100% (10/10), done.


## 3. Dataset & Transforms

In [None]:
class ClothingDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [None]:
input_size = 224
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(input_size, scale=(0.9, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

train_dataset = ClothingDataset('./clothing-dataset-small/train', transform=train_transforms)
val_dataset = ClothingDataset('./clothing-dataset-small/validation', transform=val_transforms)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

## 4. Model Architecture
We use a class that allows us to easily change the `size_inner` and `droprate` for tuning.

In [None]:
class ClothingClassifierMobileNet(nn.Module):
    def __init__(self, num_classes=10, size_inner=100, droprate=0.2):
        super(ClothingClassifierMobileNet, self).__init__()
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')
        for param in self.base_model.parameters():
            param.requires_grad = False
        self.base_model.classifier = nn.Identity()

        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(droprate)
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.output_layer(x)
        return x

## 5. Training Utilities
Updated `train_and_evaluate` to **return** the best validation accuracy so we can compare models.

In [None]:
def make_model(learning_rate=0.001, size_inner=100, droprate=0.2, num_classes=10):
    model = ClothingClassifierMobileNet(
        num_classes=num_classes,
        size_inner=size_inner,
        droprate=droprate
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device, save_name=None, verbose=True):
    best_val_accuracy = 0.0

    for epoch in range(num_epochs):
        # Training
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation
        model.eval()
        val_correct = 0
        val_total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_acc = val_correct / val_total

        if verbose:
            print(f'Epoch {epoch+1}/{num_epochs} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}')

        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            if save_name:
                torch.save(model.state_dict(), save_name)
                if verbose: print(f'  --> Saved {save_name}')

    return best_val_accuracy

## 6. Hyperparameter Tuning

We perform a **Grid Search** over a set of parameters. We train each combination for a few epochs (5) to see which one converges best.

In [None]:
# Define the search space
learning_rates = [0.01, 0.001]
inner_sizes = [100, 256]
dropouts = [0.0, 0.2, 0.5]

results = {}
best_accuracy = 0.0
best_params = {}

print("Starting Hyperparameter Tuning...\n")

for lr in learning_rates:
    for size in inner_sizes:
        for drop in dropouts:
            print(f"Testing: LR={lr}, Size={size}, Drop={drop}")

            # Create model with current params
            model, optimizer = make_model(
                learning_rate=lr,
                size_inner=size,
                droprate=drop
            )
            criterion = nn.CrossEntropyLoss()

            # Train for only 5 epochs to test performance quickly
            acc = train_and_evaluate(
                model, optimizer,
                train_loader, val_loader,
                criterion,
                num_epochs=5,
                device=device,
                verbose=False # Keep output clean
            )

            print(f"  -> Val Accuracy: {acc:.4f}")
            results[(lr, size, drop)] = acc

            if acc > best_accuracy:
                best_accuracy = acc
                best_params = {'lr': lr, 'size': size, 'drop': drop}

print("\n--- Tuning Complete ---")
print(f"Best Accuracy: {best_accuracy:.4f}")
print(f"Best Parameters: {best_params}")

Starting Hyperparameter Tuning...

Testing: LR=0.01, Size=100, Drop=0.0
Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


100%|██████████| 13.6M/13.6M [00:00<00:00, 146MB/s]


  -> Val Accuracy: 0.7830
Testing: LR=0.01, Size=100, Drop=0.2
  -> Val Accuracy: 0.7361
Testing: LR=0.01, Size=100, Drop=0.5
  -> Val Accuracy: 0.4897
Testing: LR=0.01, Size=256, Drop=0.0
  -> Val Accuracy: 0.7507
Testing: LR=0.01, Size=256, Drop=0.2
  -> Val Accuracy: 0.7243
Testing: LR=0.01, Size=256, Drop=0.5
  -> Val Accuracy: 0.3783
Testing: LR=0.001, Size=100, Drop=0.0
  -> Val Accuracy: 0.8006
Testing: LR=0.001, Size=100, Drop=0.2
  -> Val Accuracy: 0.8094
Testing: LR=0.001, Size=100, Drop=0.5
  -> Val Accuracy: 0.7889
Testing: LR=0.001, Size=256, Drop=0.0
  -> Val Accuracy: 0.8094
Testing: LR=0.001, Size=256, Drop=0.2
  -> Val Accuracy: 0.7947
Testing: LR=0.001, Size=256, Drop=0.5


KeyboardInterrupt: 

## 7. Final Training
Now we take the `best_params` found above and train for a full 10 epochs to get our production model.

In [None]:
print(f"Training final model with: {best_params}")

final_model, final_optimizer = make_model(
    learning_rate=best_params['lr'],
    size_inner=best_params['size'],
    droprate=best_params['drop']
)
criterion = nn.CrossEntropyLoss()

train_and_evaluate(
    final_model,
    final_optimizer,
    train_loader,
    val_loader,
    criterion,
    num_epochs=10,
    device=device,
    save_name='clothing_model_tuned_best.pth'
)

## 8. Inference

In [None]:
!pip install keras_image_helper
from keras_image_helper import create_preprocessor

# Load the best model structure based on tuning results
best_model = ClothingClassifierMobileNet(
    size_inner=best_params['size'],
    droprate=best_params['drop']
)
best_model.load_state_dict(torch.load('clothing_model_tuned_best.pth'))
best_model.to(device)
best_model.eval()

def preprocess_pytorch_style(X):
    X = X / 255.0
    mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
    X = X.transpose(0, 3, 1, 2)
    X = (X - mean) / std
    return X.astype(np.float32)

preprocessor = create_preprocessor(preprocess_pytorch_style, target_size=(224, 224))
url = 'http://bit.ly/mlbookcamp-pants'
X = preprocessor.from_url(url)
X = torch.Tensor(X).to(device)

with torch.no_grad():
    pred = best_model(X).cpu().numpy()[0]

classes = ["dress", "hat", "longsleeve", "outwear", "pants", "shirt", "shoes", "shorts", "skirt", "t-shirt"]
result = dict(zip(classes, pred.tolist()))
sorted_result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
print(sorted_result)

## 9. Export to ONNX

In [None]:
!pip install onnx onnxscript
dummy_input = torch.randn(1, 3, 224, 224).to(device)
onnx_path = "clothing_classifier_tuned.onnx"

torch.onnx.export(
    best_model,
    dummy_input,
    onnx_path,
    verbose=False,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)
print(f"Model exported to {onnx_path}")