# Carga de librerías y métodos

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import os
import cv2
import glob
import torch
from PIL import Image
from tqdm import tqdm
import torch.nn.functional as F
from transformers import AutoImageProcessor, Mask2FormerForUniversalSegmentation


In [None]:
print("CUDA:", torch.cuda.is_available())

In [None]:
input_dir = '/home/joan_ds/Sandbox/UOC/TFM/trials_imgs'
output_dir = "/home/joan_ds/Sandbox/UOC/TFM/tfm_mask2former/m2f_out"


In [None]:
model_id   = "facebook/mask2former-swin-small-cityscapes-semantic"
device     = "cuda" if torch.cuda.is_available() else "cpu"


In [None]:
ROAD_ID, SIDEWALK_ID = 0, 1
COLOR = {
    ROAD_ID:     (128, 64, 128),  # road (RGB)
    SIDEWALK_ID: (244, 35, 232),  # sidewalk (RGB)
}


In [None]:
os.makedirs(output_dir, exist_ok=True)

proc  = AutoImageProcessor.from_pretrained(model_id)
model = Mask2FormerForUniversalSegmentation.from_pretrained(model_id).to(device).eval()

In [None]:
def infer_one(img: Image.Image):
    """
    Retorna:
      pred_ids: np.uint8 [H,W] (trainIds)
      probs_chw: np.float32 [C,H,W] probabilitats per classe (normalitzades)
    """
    W, H = img.size
    inputs = proc(images=img, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)

    # --- 1) Prob. per classe a nivell de query (descartant 'no_object' a l'última posició)
    # class_queries_logits: [1, Q, C+1]
    class_logits = outputs.class_queries_logits[0]           # [Q, C+1]
    class_probs  = class_logits.softmax(dim=-1)[:, :-1]      # [Q, C]

    # --- 2) Prob. de màscara per query
    # masks_queries_logits: [1, Q, Hm, Wm] -> sigmoid -> upsample a [H,W]
    mask_logits = outputs.masks_queries_logits[0]            # [Q, Hm, Wm]
    mask_probs  = mask_logits.sigmoid().unsqueeze(0)         # [1,Q,Hm,Wm]
    mask_probs_up = F.interpolate(
        mask_probs, size=(H, W), mode="bilinear", align_corners=False
    )[0]  # [Q, H, W]

    # --- 3) Score per classe i píxel: suma sobre queries (Q) de class_probs[q,c] * mask_probs[q,y,x]
    # result: [C,H,W]
    # einsum: 'qc,qhw->chw'
    class_scores = torch.einsum('qc,qhw->chw', class_probs, mask_probs_up)  # [C,H,W]

    # --- 4) ID per píxel (argmax sobre classes)
    pred_ids = class_scores.argmax(dim=0)  # [H,W]

    # --- 5) Prob. normalitzades (per tenir “confiança” de classe a cada píxel)
    denom = class_scores.sum(dim=0, keepdim=True).clamp_min(1e-6)  # [1,H,W]
    probs_chw = (class_scores / denom).detach()  # [C,H,W]

    return pred_ids.cpu().numpy().astype(np.uint8), probs_chw.cpu().numpy().astype(np.float32)

In [None]:
def make_overlay_from_ids(img_rgb, ids_map, alpha=0.5):
    """Construeix màscara RGB per classes {0,1} i la superposa sobre la imatge."""
    h, w = ids_map.shape
    mask_rgb = np.zeros((h, w, 3), dtype=np.uint8)
    for cid, col in COLOR.items():
        mask_rgb[ids_map == cid] = col
    overlay = cv2.addWeighted(img_rgb, 1 - alpha, mask_rgb, alpha, 0)
    return overlay, mask_rgb

def show_overlays(
    input_dir: str,
    output_dir: str,
    df_results: 'pd.DataFrame | None' = None,
    alpha: float = 0.5
):

    # Index ràpid per obtenir mètriques
    stats = None
    if df_results is not None and len(df_results):
        stats = df_results.set_index('image_name').to_dict(orient='index')

    # Troba parelles (original, *_ids.png)
    pairs = []
    for fname in sorted(os.listdir(output_dir)):
        if not fname.endswith('_ids.png'):
            continue
        stem = fname[:-8]  # treu '_ids.png'
        ids_path = os.path.join(output_dir, fname)

        # localitza l'original amb extensió coneguda
        orig_path = None
        for ext in ('.jpg', '.jpeg', '.png'):
            p = os.path.join(input_dir, stem + ext)
            if os.path.exists(p):
                orig_path = p
                break
        if orig_path is not None:
            pairs.append((orig_path, ids_path))

    if not pairs:
        print("No s’ha trobat cap parella (original, *_ids.png).")
        return

    # Llegenda comuna (per figura)
    legend_patches = [
        mpatches.Patch(color=np.array(COLOR[0]) / 255.0, label='road'),
        mpatches.Patch(color=np.array(COLOR[1]) / 255.0, label='sidewalk'),
    ]

    # Itera i mostra una figura per parell
    for orig_path, ids_path in pairs:
        name = os.path.basename(orig_path)

        # Carrega original i mapa d’IDs
        orig_bgr = cv2.imread(orig_path)
        if orig_bgr is None:
            print(f"[avis] No s’ha pogut llegir: {orig_path}")
            continue
        orig_rgb = cv2.cvtColor(orig_bgr, cv2.COLOR_BGR2RGB)

        pred_ids = cv2.imread(ids_path, cv2.IMREAD_GRAYSCALE)
        if pred_ids is None:
            print(f"[avis] No s’ha pogut llegir: {ids_path}")
            continue

        # Assegura mateixa mida
        if pred_ids.shape[:2] != orig_rgb.shape[:2]:
            pred_ids = cv2.resize(
                pred_ids,
                (orig_rgb.shape[1], orig_rgb.shape[0]),
                interpolation=cv2.INTER_NEAREST
            )

        overlay, _ = make_overlay_from_ids(orig_rgb, pred_ids, alpha=alpha)

        # Títol amb mètriques si disposem de df_results
        title_overlay = name
        if stats and name in stats:
            s = stats[name]
            rp = s.get('road_pct', None)
            sp = s.get('sidewalk_pct', None)
            rc = s.get('road_conf', None)
            sc = s.get('sidewalk_conf', None)
            n = s.get('n_classes')

            rp_txt = f"{rp:.2f}%" if isinstance(rp, (float, int)) else "—"
            sp_txt = f"{sp:.2f}%" if isinstance(sp, (float, int)) else "—"
            rc_txt = f"{rc:.3f}" if isinstance(rc, (float, int)) else "—"
            sc_txt = f"{sc:.3f}" if isinstance(sc, (float, int)) else "—"

            title_overlay = (
                f"número de clases: {n}\n"
                f"road: {rp_txt} (conf {rc_txt}) | "
                f"sidewalk: {sp_txt} (conf {sc_txt})"
            )

        # Figura per imatge
        fig, ax = plt.subplots(1, 2, figsize=(12, 6))
        ax[0].imshow(orig_rgb);  ax[0].set_title(f"Imagen original {name}");      ax[0].axis('off')
        ax[1].imshow(overlay);   ax[1].set_title(title_overlay);   ax[1].axis('off')
        fig.legend(handles=legend_patches, loc='lower center', ncol=2)
        plt.tight_layout(rect=[0, 0.05, 1, 1])
        plt.show()

# Experimentos con imágenes del dataset

In [None]:
results_dataset = []
files = sorted([f for f in glob.glob(os.path.join(input_dir, "*"))
                if f.lower().endswith((".jpg",".jpeg",".png"))])

for img_path in tqdm(files, desc="Mask2Former"):
    name = os.path.splitext(os.path.basename(img_path))[0]
    img  = Image.open(img_path).convert("RGB")

    pred, probs_chw = infer_one(img)  # pred: [H,W], probs_chw: [C,H,W]

    # Percentatges de píxels
    road_pct     = float((pred == ROAD_ID).mean() * 100.0)
    sidewalk_pct = float((pred == SIDEWALK_ID).mean() * 100.0)

    # Confiança mitjana (probabilitat mitjana sobre els píxels etiquetats com a classe)
    road_mask = (pred == ROAD_ID)
    swlk_mask = (pred == SIDEWALK_ID)
    road_conf     = float(probs_chw[ROAD_ID][road_mask].mean())     if road_mask.any() else None
    sidewalk_conf = float(probs_chw[SIDEWALK_ID][swlk_mask].mean()) if swlk_mask.any() else None

    # Mapa RGB només road/sidewalk (per overlay)
    mask_rgb = np.zeros((*pred.shape, 3), dtype=np.uint8)
    for cid, col in COLOR.items():
        mask_rgb[pred == cid] = col

    orig = np.array(img)
    overlay = cv2.addWeighted(orig, 0.5, mask_rgb, 0.5, 0)

    # Desa fitxers
    
    os.makedirs(output_dir, exist_ok=True)
    ids_path  = os.path.join(output_dir, f"{name}_ids.png")
    over_path = os.path.join(output_dir, f"{name}_overlay.jpg")
    cv2.imwrite(ids_path, pred)  # uint8 0..18
    cv2.imwrite(over_path, cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))

    del mask_rgb
    torch.cuda.empty_cache()

    # Resum
    results_dataset.append({
        "image_name": os.path.basename(img_path),
        "n_classes": int(np.unique(pred).size),
        "road_pct": round(road_pct, 3),
        "sidewalk_pct": round(sidewalk_pct, 3),
        "road_conf": None if road_conf is None else round(road_conf, 3),
        "sidewalk_conf": None if sidewalk_conf is None else round(sidewalk_conf, 3),
    })

df_dataset = pd.DataFrame(results_dataset)
print(df_dataset.info())
display(df_dataset.head(10))

In [None]:
show_overlays(
    input_dir=input_dir,
    output_dir=output_dir,
    df_results=df_dataset,          # o None si no vols títols amb mètriques
    alpha=0.5
)


# Experimentos con imágenes de los benchmarks

In [None]:
bdd100k = {'name': 'BDD100K', 'path': '/home/joan_ds/Sandbox/UOC/TFM/benchmarks/BDD100K'}
cityscapes = {'name': 'Cityscapes', 'path': '/home/joan_ds/Sandbox/UOC/TFM/benchmarks/Cityscapes'}
mapillary_vistas = {'name': 'Mapillary Vistas', 'path': '/home/joan_ds/Sandbox/UOC/TFM/benchmarks/Mapillary_Vistas'}

benchmark_lst = [bdd100k, cityscapes, mapillary_vistas]


In [None]:
results_benchmarks_lst = []

for benchmark in benchmark_lst:
    print (f"Segmentación con el submuestreo del dataset {benchmark['name']}.")

    results_benchmark = []
    files = sorted([f for f in glob.glob(os.path.join(benchmark['path'], "*"))
                if f.lower().endswith((".jpg",".jpeg",".png"))])

    for img_path in tqdm(files, desc="Mask2Former"):
        name = os.path.splitext(os.path.basename(img_path))[0]
        img  = Image.open(img_path).convert("RGB")

        pred, probs_chw = infer_one(img)  # pred: [H,W], probs_chw: [C,H,W]

    # Percentatges de píxels
        road_pct     = float((pred == ROAD_ID).mean() * 100.0)
        sidewalk_pct = float((pred == SIDEWALK_ID).mean() * 100.0)

    # Confiança mitjana (probabilitat mitjana sobre els píxels etiquetats com a classe)
        road_mask = (pred == ROAD_ID)
        swlk_mask = (pred == SIDEWALK_ID)
        road_conf     = float(probs_chw[ROAD_ID][road_mask].mean())     if road_mask.any() else None
        sidewalk_conf = float(probs_chw[SIDEWALK_ID][swlk_mask].mean()) if swlk_mask.any() else None

    # Mapa RGB només road/sidewalk (per overlay)
        mask_rgb = np.zeros((*pred.shape, 3), dtype=np.uint8)
        for cid, col in COLOR.items():
            mask_rgb[pred == cid] = col

        orig = np.array(img)
        overlay = cv2.addWeighted(orig, 0.5, mask_rgb, 0.5, 0)

    # Desa fitxers
        output_bmrk_dir = output_dir + "/" + benchmark['name']
        os.makedirs(output_bmrk_dir, exist_ok=True)
        ids_path  = os.path.join(output_bmrk_dir, f"{name}_ids.png")
        over_path = os.path.join(output_bmrk_dir, f"{name}_overlay.jpg")
        cv2.imwrite(ids_path, pred)  # uint8 0..18
        cv2.imwrite(over_path, cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))

        del mask_rgb
        torch.cuda.empty_cache()

    # Resum
        results_benchmark.append({
            "image_name": os.path.basename(img_path),
            "n_classes": int(np.unique(pred).size),
            "road_pct": round(road_pct, 3),
            "sidewalk_pct": round(sidewalk_pct, 3),
            "road_conf": None if road_conf is None else round(road_conf, 3),
            "sidewalk_conf": None if sidewalk_conf is None else round(sidewalk_conf, 3),
            })

    df_benchmark = pd.DataFrame(results_benchmark)
    results_benchmarks_lst.append(df_benchmark)
    show_overlays(
    input_dir=benchmark['path'],
    output_dir=output_bmrk_dir,
    df_results=df_benchmark,          # o None si no vols títols amb mètriques
    alpha=0.5
)
    

    



# Output

In [None]:
df_benchmarks = pd.concat(results_benchmarks_lst).reset_index(drop=True)

df = pd.concat([df_dataset, df_benchmarks]).reset_index(drop=True)

df.info()

In [None]:
df[df.image_name == df.image_name[0]]

In [None]:
df.to_excel('20250814_results_analysis.xlsx')