In [2]:
"""
README: DeepLabV3 Worst-10 IoU Visualizer by Class and City

This script:
- Loads a trained DeepLabV3+MobileNetV3 segmentation model,
- Runs inference on a test image set,
- Calculates per-class IoU between ground truth and predictions for each city,
- Saves the **worst 10** (lowest IoU) results for each class and city as visualizations:
    - original.jpg (original image)
    - label.jpg    (ground truth mask overlay, always green)
    - prediction.jpg (predicted mask overlay, class color)

How to use:
- Set MODEL_PATH, IMAGE_DIR, and OUTPUT_DIR as needed.
- Edit city and class definitions as appropriate.
- Run the script; images will be saved under OUTPUT_DIR/[class]/[city].

Requirements:
- Python 3.8+
- torch, torchvision
- opencv-python
- numpy
- tqdm

Author: Bahadir Akin Akgul
Date: 13.07.2025
"""

import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
from pathlib import Path
from torchvision import transforms
from torchvision.models.segmentation import deeplabv3_mobilenet_v3_large

# === SETTINGS ===
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "PATH_TO_TRAINED_MODEL/trained_model.pth"
IMAGE_DIR = Path("PATH_TO_TEST_IMAGES")
OUTPUT_DIR = Path("deeplabv3-worst-10-by-class-city")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

class_names = ['Background', 'Person', 'Road', 'Vehicle']
class_ids = [1, 2, 3]  # Only evaluated classes

city_keywords = {
    'istanbul': ['libadiye', 'levent', 'taksim', 'ciragan', 'barbaros', 'dolmabahce', 'bagdat', 'muallim', 'katar'],
    'paris': ['paris-champs'],
    'munich': ['munih'],
    'marseille': ['marsilya']
}
city_translation = {
    'istanbul': 'Istanbul',
    'paris': 'Paris',
    'munich': 'Munich',
    'marseille': 'Marseille',
    'unknown': 'Unknown'
}

# === LOAD MODEL ===
model = deeplabv3_mobilenet_v3_large(weights=None, num_classes=4)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.to(DEVICE).eval()

# === TRANSFORM ===
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225)),
])

def compute_iou(gt, pred, cls):
    gt_mask = (gt == cls)
    pred_mask = (pred == cls)
    intersection = np.logical_and(gt_mask, pred_mask).sum()
    union = np.logical_or(gt_mask, pred_mask).sum()
    if union == 0:
        return 0.0
    return intersection / union

def get_city_from_name(name):
    for city, keywords in city_keywords.items():
        if any(k in name for k in keywords):
            return city
    return 'unknown'

results_by_class_city = {cls: {city: [] for city in city_translation} for cls in class_ids}
image_files = list(IMAGE_DIR.glob("*.jpg"))

print(f"Device: {DEVICE}")
print("Inferencing:")
for image_file in tqdm(image_files):
    name = image_file.stem
    city = get_city_from_name(name)
    mask_file = IMAGE_DIR / f"{name}_mask.png"

    if not mask_file.exists():
        continue

    img = cv2.imread(str(image_file))
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(str(mask_file), cv2.IMREAD_GRAYSCALE)

    inp = transform(img_rgb).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        output = model(inp)["out"]
    output = torch.argmax(output.squeeze(), dim=0).cpu().numpy()

    for cls_id in class_ids:
        if np.sum(mask == cls_id) == 0:
            continue
        iou = compute_iou(mask, output, cls_id)
        if iou > 0:
            results_by_class_city[cls_id][city].append({
                "filename": name,
                "iou": iou,
                "image": img.copy(),
                "gt_mask": mask.copy(),
                "pred_mask": output.copy()
            })

# === SAVE WORST 10 VISUALIZATIONS ===
colors = {
    1: (0, 255, 255),  # pedestrian (yellow)
    2: (255, 0, 255),  # road (magenta)
    3: (0, 0, 255),    # vehicle (red)
}

def overlay_mask(img, mask, cls, color):
    overlay = img.copy()
    colored = np.zeros_like(img, dtype=np.uint8)
    colored[:, :] = color
    mask_binary = (mask == cls).astype(np.uint8)
    mask_exp = np.stack([mask_binary]*3, axis=-1)
    overlay = np.where(mask_exp == 1, cv2.addWeighted(colored, 0.4, overlay, 0.6, 0), overlay)
    return overlay

for cls_id, city_results in results_by_class_city.items():
    cls_name = {1: "pedestrian", 2: "road", 3: "vehicle"}[cls_id]

    for city, results in city_results.items():
        if not results:
            continue

        worst10 = sorted(results, key=lambda x: x["iou"])[:10]
        out_dir = OUTPUT_DIR / cls_name / city_translation[city]
        for i, item in enumerate(worst10, 1):
            base = out_dir / f"{i:02d}_{item['filename']}"
            base.mkdir(parents=True, exist_ok=True)

            image = item["image"]
            gt = item["gt_mask"]
            pred = item["pred_mask"]
            color = colors[cls_id]

            cv2.imwrite(str(base / "original.jpg"), image)
            cv2.imwrite(str(base / "label.jpg"), overlay_mask(image, gt, cls_id, (0, 255, 0)))  # green for GT
            cv2.imwrite(str(base / "prediction.jpg"), overlay_mask(image, pred, cls_id, color))

print("Worst-case visualizations saved.")


Device: cuda
Inferencing:


100%|██████████| 1316/1316 [06:20<00:00,  3.46it/s]


✅ Kötü sonuçlar başarıyla kaydedildi.


In [1]:
import os
import cv2
import torch
import numpy as np
from tqdm import tqdm
from pathlib import Path
from torchvision import transforms
from torchvision.models.segmentation import deeplabv3_mobilenet_v3_large
from PIL import Image

# === AYARLAR ===
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH = "road-tr-od-ss-70-30-results/trained_model.pth"
IMAGE_DIR = Path("yolo-seg-11042025/road-tr-od-ss/test")
OUTPUT_DIR = Path("deeplabv3-worst-10-by-class-city-300dpi")  # GÜNCELLENDİ
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

class_names = ['Background', 'Person', 'Road', 'Vehicle']
class_ids = [1, 2, 3]  # Sadece analiz edilen sınıflar

city_keywords = {
    'istanbul': ['libadiye', 'levent', 'taksim', 'ciragan', 'barbaros', 'dolmabahce', 'bagdat', 'muallim', 'katar'],
    'paris': ['paris-champs'],
    'munih': ['munih'],
    'marsilya': ['marsilya']
}
city_translation = {
    'istanbul': 'Istanbul',
    'paris': 'Paris',
    'munih': 'Munich',
    'marsilya': 'Marseille',
    'unknown': 'Unknown'
}

# === MODEL YÜKLE ===
model = deeplabv3_mobilenet_v3_large(weights=None, num_classes=4)
model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
model.to(DEVICE).eval()

# === HAZIRLIK ===
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406),
                         std=(0.229, 0.224, 0.225)),
])

def compute_iou(gt, pred, cls):
    gt_mask = (gt == cls)
    pred_mask = (pred == cls)
    intersection = np.logical_and(gt_mask, pred_mask).sum()
    union = np.logical_or(gt_mask, pred_mask).sum()
    if union == 0:
        return 0.0
    return intersection / union

def get_city_from_name(name):
    for city, keywords in city_keywords.items():
        if any(k in name for k in keywords):
            return city
    return 'unknown'

def overlay_mask(img, mask, cls, color):
    overlay = img.copy()
    colored = np.zeros_like(img, dtype=np.uint8)
    colored[:, :] = color
    mask_binary = (mask == cls).astype(np.uint8)
    mask_exp = np.stack([mask_binary]*3, axis=-1)
    overlay = np.where(mask_exp == 1, cv2.addWeighted(colored, 0.4, overlay, 0.6, 0), overlay)
    return overlay

def save_with_dpi(img_array, path, dpi=(300, 300)):
    img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(img_rgb)
    img_pil.save(path, dpi=dpi, quality=95)

results_by_class_city = {cls: {city: [] for city in city_translation} for cls in class_ids}
image_files = list(IMAGE_DIR.glob("*.jpg"))

print(f"Device: {DEVICE}")
print("Inferencing:")
for image_file in tqdm(image_files):
    name = image_file.stem
    city = get_city_from_name(name)
    mask_file = IMAGE_DIR / f"{name}_mask.png"

    if not mask_file.exists():
        continue

    img = cv2.imread(str(image_file))
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(str(mask_file), cv2.IMREAD_GRAYSCALE)

    inp = transform(img_rgb).unsqueeze(0).to(DEVICE)
    with torch.no_grad():
        output = model(inp)["out"]
    output = torch.argmax(output.squeeze(), dim=0).cpu().numpy()

    for cls_id in class_ids:
        if np.sum(mask == cls_id) == 0:
            continue
        iou = compute_iou(mask, output, cls_id)
        if iou > 0:
            results_by_class_city[cls_id][city].append({
                "filename": name,
                "iou": iou,
                "image": img.copy(),
                "gt_mask": mask.copy(),
                "pred_mask": output.copy()
            })

# === EN KÖTÜ 10 GÖRSELİ KAYDET ===
colors = {
    1: (0, 255, 255),  # pedestrian
    2: (255, 0, 255),  # road
    3: (0, 0, 255),    # vehicle
}

for cls_id, city_results in results_by_class_city.items():
    cls_name = {1: "pedestrian", 2: "road", 3: "vehicle"}[cls_id]

    for city, results in city_results.items():
        if not results:
            continue

        top10 = sorted(results, key=lambda x: x["iou"])[:10]
        out_dir = OUTPUT_DIR / cls_name / city_translation[city]
        for i, item in enumerate(top10, 1):
            base = out_dir / f"{i:02d}_{item['filename']}"
            base.mkdir(parents=True, exist_ok=True)

            image = item["image"]
            gt = item["gt_mask"]
            pred = item["pred_mask"]
            color = colors[cls_id]

            save_with_dpi(image, str(base / "original.jpg"))
            save_with_dpi(overlay_mask(image, gt, cls_id, (0, 255, 0)), str(base / "label.jpg"))  # ground truth - green
            save_with_dpi(overlay_mask(image, pred, cls_id, color), str(base / "prediction.jpg"))

print("✅ Kötü sonuçlar başarıyla kaydedildi (300 DPI).")


Device: cpu
Inferencing:


100%|██████████| 1316/1316 [14:23<00:00,  1.52it/s]


✅ Kötü sonuçlar başarıyla kaydedildi (300 DPI).
