In [1]:
import argparse
import os
from datetime import datetime

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from tqdm import tqdm


class FashionCNN(nn.Module):
    """Simple CNN for Fashion‑MNIST (28×28 grayscale, 10 classes)."""

    def __init__(self, dropout: float = 0.25):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # (N, 1, 28, 28) → (N, 32, 28, 28)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # (N, 32, 14, 14)

            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # (N, 64, 14, 14)
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # (N, 64, 7, 7)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),  # (N, 64*7*7)
            nn.Dropout(dropout),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.features(x)
        return self.classifier(x)


def train_one_epoch(model: nn.Module, loader: DataLoader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for inputs, labels in tqdm(loader, desc="Training", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total


def evaluate(model: nn.Module, loader: DataLoader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for inputs, labels in tqdm(loader, desc="Validation", leave=False):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()
            total += labels.size(0)

    return running_loss / total, correct / total


def save_checkpoint(model: nn.Module, path: str):
    torch.save(model.state_dict(), path)
    print(f"\nModel checkpoint saved to {path}\n")


def main():
    parser = argparse.ArgumentParser(description="Train a CNN on Fashion‑MNIST with PyTorch")
    parser.add_argument("--batch-size", type=int, default=128)
    parser.add_argument("--epochs", type=int, default=10)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--dropout", type=float, default=0.25)
    parser.add_argument("--data-dir", type=str, default="./data")
    parser.add_argument("--save-dir", type=str, default="./checkpoints")
    parser.add_argument("--no-cuda", action="store_true", help="disable CUDA training")
    args = parser.parse_args()

    use_cuda = torch.cuda.is_available() and not args.no_cuda
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(42)

    # Data transforms
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,)),
    ])

    # Dataset & loaders
    train_dataset = datasets.FashionMNIST(root=args.data_dir, train=True, download=True, transform=transform)
    test_dataset = datasets.FashionMNIST(root=args.data_dir, train=False, download=True, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=use_cuda)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=use_cuda)

    # Model, criterion, optimizer
    model = FashionCNN(dropout=args.dropout).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    os.makedirs(args.save_dir, exist_ok=True)
    best_acc = 0.0

    for epoch in range(1, args.epochs + 1):
        print(f"\nEpoch {epoch}/{args.epochs}")
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
        val_loss, val_acc = evaluate(model, test_loader, criterion, device)
        print(f"Train loss: {train_loss:.4f} | Train acc: {train_acc*100:.2f}%")
        print(f"Val   loss: {val_loss:.4f} | Val   acc: {val_acc*100:.2f}%")

        # Save best checkpoint
        if val_acc > best_acc:
            best_acc = val_acc
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            ckpt_path = os.path.join(args.save_dir, f"fashion_cnn_acc{best_acc*100:.2f}_{timestamp}.pth")
            save_checkpoint(model, ckpt_path)

    print(f"\nTraining finished. Best validation accuracy: {best_acc*100:.2f}%")


if __name__ == "__main__":
    main()


usage: ipykernel_launcher.py [-h] [--batch-size BATCH_SIZE] [--epochs EPOCHS]
                             [--lr LR] [--dropout DROPOUT]
                             [--data-dir DATA_DIR] [--save-dir SAVE_DIR]
                             [--no-cuda]
ipykernel_launcher.py: error: unrecognized arguments: --f=/run/user/1000/jupyter/runtime/kernel-v3ee74f6bbd386df6471721ad79a4bf81eacd1228c.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
