# Mini-Beispiel für CNN mit PyTorch

In [1]:
# train_cnn.py
import os
from pathlib import Path
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader, SubsetRandomSampler
from torchvision import datasets, transforms

def get_loaders(root, img_size=224, batch_size=32, val_split=0.2, seed=42, num_workers=1):
    root = Path(root)

    # Transforms: leichte Augmentierung für Training, nur Resize+Norm fürs Validieren
    mean = [0.485, 0.456, 0.406]
    std  = [0.229, 0.224, 0.225]

    train_tfms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    val_tfms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    # Zwei ImageFolder-Instanzen auf das gleiche Root, aber mit unterschiedlichen Transforms
    train_dataset = datasets.ImageFolder(root=str(root), transform=train_tfms)
    val_dataset   = datasets.ImageFolder(root=str(root), transform=val_tfms)

    # Reproduzierbarer Index-Split
    num_samples = len(train_dataset)
    print(f"{num_samples=}")
    indices = np.arange(num_samples)
    rng = np.random.default_rng(seed)
    rng.shuffle(indices)

    split = int(np.floor(val_split * num_samples))
    val_idx = indices[:split]
    train_idx = indices[split:]

    train_sampler = SubsetRandomSampler(train_idx)
    val_sampler   = SubsetRandomSampler(val_idx)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler,
                              num_workers=num_workers, pin_memory=True)
    val_loader   = DataLoader(val_dataset, batch_size=batch_size, sampler=val_sampler,
                              num_workers=num_workers, pin_memory=True)
    return train_loader, val_loader, train_dataset.classes

class SimpleCNN(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        # Kleines, effizientes CNN
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    correct, total, running_loss = 0, 0, 0.0
    criterion = nn.CrossEntropyLoss()
    for imgs, labels in loader:
        imgs, labels = imgs.to(device, non_blocking=True), labels.to(device, non_blocking=True)
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * imgs.size(0)
        preds = outputs.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

def train(root="data", epochs=10, img_size=224, batch_size=32, lr=1e-3, val_split=0.2, seed=42):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Device: {device}")

    train_loader, val_loader, classes = get_loaders(
        root=root, img_size=img_size, batch_size=batch_size, val_split=val_split, seed=seed
    )
    num_classes = len(classes)
    print(f"Klassen ({num_classes}): {classes}")

    model = SimpleCNN(num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
    criterion = nn.CrossEntropyLoss()

    best_val_acc = 0.0
    best_path = "best_cnn.pth"

    for epoch in range(1, epochs + 1):
        model.train()
        running_loss, running_correct, running_total = 0.0, 0, 0

        for imgs, labels in train_loader:
            imgs, labels = imgs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(1)
            running_correct += (preds == labels).sum().item()
            running_total += labels.size(0)

        train_loss = running_loss / running_total
        train_acc = running_correct / running_total
        val_loss, val_acc = evaluate(model, val_loader, device)

        print(f"Epoch {epoch:02d}/{epochs} | "
              f"Train Loss: {train_loss:.4f} Acc: {train_acc:.3f} | "
              f"Val Loss: {val_loss:.4f} Acc: {val_acc:.3f}")

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save({"model_state": model.state_dict(),
                        "classes": classes,
                        "img_size": img_size}, best_path)
            print(f"✓ Bestes Modell aktualisiert: {best_path} (Val Acc {best_val_acc:.3f})")

    print("Training fertig.")
    print(f"Bestes Val-Accuracy: {best_val_acc:.3f}")

if __name__ == "__main__":
    # Passe 'root' an deinen Datenordner an:
    # Struktur: root/
    #   ├── klasse_a/
    #   │     ├── img1.jpg
    #   │     └── ...
    #   └── klasse_b/
    #         └── ...
    dataset_root = "/home/juebrauer/link_to_vcd/10_datasets/57_vehicle_image_classification/Vehicles"
    train(root=dataset_root,
          epochs=10,
          img_size=224,
          batch_size=32,
          lr=1e-3,
          val_split=0.2,
          seed=42)


Device: cuda
num_samples=5589
Klassen (7): ['Auto Rickshaws', 'Bikes', 'Cars', 'Motorcycles', 'Planes', 'Ships', 'Trains']




Epoch 01/10 | Train Loss: 1.3530 Acc: 0.502 | Val Loss: 1.1047 Acc: 0.594
✓ Bestes Modell aktualisiert: best_cnn.pth (Val Acc 0.594)




Epoch 02/10 | Train Loss: 1.0688 Acc: 0.624 | Val Loss: 1.6293 Acc: 0.442




Epoch 03/10 | Train Loss: 0.9246 Acc: 0.680 | Val Loss: 0.8254 Acc: 0.705
✓ Bestes Modell aktualisiert: best_cnn.pth (Val Acc 0.705)




Epoch 04/10 | Train Loss: 0.8567 Acc: 0.702 | Val Loss: 0.9264 Acc: 0.653




Epoch 05/10 | Train Loss: 0.7748 Acc: 0.737 | Val Loss: 1.0822 Acc: 0.583




Epoch 06/10 | Train Loss: 0.7335 Acc: 0.748 | Val Loss: 0.9480 Acc: 0.658




Epoch 07/10 | Train Loss: 0.6821 Acc: 0.763 | Val Loss: 0.7631 Acc: 0.724
✓ Bestes Modell aktualisiert: best_cnn.pth (Val Acc 0.724)




Epoch 08/10 | Train Loss: 0.6362 Acc: 0.782 | Val Loss: 0.9450 Acc: 0.694




Epoch 09/10 | Train Loss: 0.6007 Acc: 0.785 | Val Loss: 0.5945 Acc: 0.782
✓ Bestes Modell aktualisiert: best_cnn.pth (Val Acc 0.782)




Epoch 10/10 | Train Loss: 0.5843 Acc: 0.798 | Val Loss: 0.5220 Acc: 0.833
✓ Bestes Modell aktualisiert: best_cnn.pth (Val Acc 0.833)
Training fertig.
Bestes Val-Accuracy: 0.833


# Qualitative Evaluierung

In [None]:
# viz_25_samples.py
import argparse
from pathlib import Path
import random
import torch
from torch import nn
from torchvision import datasets, transforms
from PIL import Image
import matplotlib.pyplot as plt

# --- SimpleCNN (wie im Trainingsskript) -------------------
class SimpleCNN(nn.Module):
    def __init__(self, num_classes: int):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1), nn.BatchNorm2d(32), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x
# -----------------------------------------------------------

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--root", type=str, default="data", help="Wurzelordner mit Unterordnern pro Klasse")
    parser.add_argument("--ckpt", type=str, default="best_cnn.pth", help="Pfad zum gespeicherten Checkpoint")
    parser.add_argument("--seed", type=int, default=42, help="Zufallssamen")
    parser.add_argument("--num", type=int, default=25, help="Anzahl zufälliger Bilder (zeigt 5x5 Grid)")
    args = parser.parse_args()

    random.seed(args.seed)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Checkpoint laden (enthält img_size & Klassen)
    ckpt = torch.load(args.ckpt, map_location="cpu")
    classes = ckpt["classes"]
    img_size = ckpt["img_size"]

    # Transforms: fürs Inferenz-Input (wie Validation)
    mean = [0.485, 0.456, 0.406]
    std  = [0.229, 0.224, 0.225]
    infer_tfms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean, std),
    ])

    # Fürs Anzeigen (ohne Norm, gleiche Größe)
    display_tfms = transforms.Compose([
        transforms.Resize((img_size, img_size)),
    ])

    # Dataset (liest Dateipfade & Targets)
    ds = datasets.ImageFolder(root=args.root)

    # Modell wiederherstellen
    model = SimpleCNN(num_classes=len(classes))
    model.load_state_dict(ckpt["model_state"])
    model.to(device).eval()

    # 25 (oder weniger) zufällige Indizes ziehen
    n = min(args.num, len(ds))
    indices = random.sample(range(len(ds)), n)

    # Vorbereiten der Figur 5x5
    rows, cols = 5, 5
    fig, axes = plt.subplots(rows, cols, figsize=(cols*3, rows*3))
    axes = axes.flatten()

    # Durch zufällige Bilder iterieren
    with torch.no_grad():
        for i, idx in enumerate(indices):
            path, true_label = ds.samples[idx]  # (Pfad, Klassenindex)
            # Bild fürs Modell
            img_in = infer_tfms(Image.open(path).convert("RGB")).unsqueeze(0).to(device)
            logits = model(img_in)
            pred_idx = logits.argmax(1).item()
            pred_name = classes[pred_idx]

            # Bild fürs Anzeigen
            disp_img = display_tfms(Image.open(path).convert("RGB"))
            ax = axes[i]
            ax.imshow(disp_img)
            # Titel mit Vorhersage (und wahrem Label)
            title = f"Pred: {pred_name}\nTrue: {classes[true_label]}"
            ax.set_title(title, fontsize=9)
            ax.axis("off")

        # Leere Achsen ausblenden, falls < 25 Bilder
        for j in range(i+1, rows*cols):
            axes[j].axis("off")

    fig.suptitle("25 zufällige Vorhersagen (5x5)", fontsize=14)
    plt.tight_layout()
    out_path = Path("predictions_5x5.png")
    plt.savefig(out_path, dpi=150)
    print(f"Gespeichert unter: {out_path.resolve()}")
    plt.show()

main()


usage: ipykernel_launcher.py [-h] [--root ROOT] [--ckpt CKPT] [--seed SEED]
                             [--num NUM]
ipykernel_launcher.py: error: unrecognized arguments: --f=/run/user/1000/jupyter/runtime/kernel-v37bb6a0e21f1955c1ab94e176c95d4443d7714bb7.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
