In [1]:

import os, sys, torch
print('Python:', sys.version)
print('PyTorch:', torch.__version__)
print('CUDA available:', torch.cuda.is_available())
if torch.cuda.is_available():
    print('CUDA device:', torch.cuda.get_device_name(0))

# Check presence of mamba_ssm (optional) and mamba_model (required)
try:
    import importlib.util
    spec = importlib.util.find_spec('mamba_ssm')
    print('mamba_ssm found:', spec is not None)
except Exception as e:
    print('mamba_ssm check error:', e)

try:
    spec2 = importlib.util.find_spec('mamba_model')
    print('mamba_model found:', spec2 is not None)
except Exception as e:
    print('mamba_model check error:', e)


Python: 3.10.18 (main, Jun  5 2025, 13:14:17) [GCC 11.2.0]
PyTorch: 2.8.0+cu129
CUDA available: True
CUDA device: NVIDIA GeForce RTX 4060 Ti
mamba_ssm found: True
mamba_model found: True


In [3]:
#imports
import os
import h5py
import numpy as np
import cv2
import torch
import random
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import torch.nn.functional as F
from torch.optim import AdamW


In [4]:
# Constants
PATCH_SIZE = 256
TARGET_SIZE = (256, 256)
LABEL_KEY = 'outlines'
MODALITIES_3 = ['dem', 'optical', 'bright_dark_outlines']
CHANNEL_INFO_3 = {'dem': 2, 'optical': 6, 'bright_dark_outlines': 3}

def normalize(arr):
    arr = arr.astype(np.float32)
    return (arr - arr.mean()) / (arr.std() + 1e-5)

In [5]:
def augment_patch(img, label):
    if random.random() < 0.5:
        img = np.flip(img, axis=0)
        label = np.flip(label, axis=0)
    if random.random() < 0.5:
        img = np.flip(img, axis=1)
        label = np.flip(label, axis=1)
    return img, label

class GlacierHDF5PatchDataset3(Dataset):
    def __init__(self, hdf5_file_path, patch_size=PATCH_SIZE, target_size=TARGET_SIZE, length = 600):
        self.hdf5 = h5py.File(hdf5_file_path, 'r')
        self.tiles = [name for name in self.hdf5.keys() if all(m in self.hdf5[name] for m in MODALITIES_3)]
        self.patch_size = patch_size
        self.target_size = target_size
        self.length = length

    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        tile_name = random.choice(self.tiles)
        tile = self.hdf5[tile_name]
        h, w = tile[MODALITIES_3[0]].shape[:2]
        y = random.randint(0, h - self.patch_size)
        x = random.randint(0, w - self.patch_size)

        input_channels = []
        for key in MODALITIES_3:
            arr = tile[key][y:y+self.patch_size, x:x+self.patch_size, :]
            arr = normalize(arr)
            input_channels.append(arr)
        input_patch = np.concatenate(input_channels, axis=2)
        label = tile[LABEL_KEY][y:y+self.patch_size, x:x+self.patch_size]
        if label.ndim == 3:
            label = label[:, :, 0] if label.shape[2] == 1 else np.argmax(label, axis=2)
        input_patch, label = augment_patch(input_patch, label)
        input_tensor = torch.tensor(np.ascontiguousarray(input_patch)).permute(2, 0, 1).float()
        label_tensor = torch.tensor(np.ascontiguousarray(label), dtype=torch.long)
        return input_tensor, label_tensor

    def close(self):
        self.hdf5.close()


In [6]:
from sklearn.metrics import (
    accuracy_score, 
    f1_score, 
    jaccard_score, 
    precision_score, 
    recall_score, 
    confusion_matrix
)

def segmentation_metrics(y_true, y_pred, num_classes=2):
    y_true = y_true.ravel()
    y_pred = y_pred.ravel()

    labels = list(range(num_classes))
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    # Safe scoring: missing classes won't crash
    per_class_iou  = jaccard_score (y_true, y_pred, average=None, labels=labels, zero_division=0)
    mean_iou       = jaccard_score (y_true, y_pred, average="macro", labels=labels, zero_division=0)
    per_class_dice = f1_score      (y_true, y_pred, average=None, labels=labels, zero_division=0)
    mean_dice      = f1_score      (y_true, y_pred, average="macro", labels=labels, zero_division=0)
    precision      = precision_score(y_true, y_pred, average="macro", labels=labels, zero_division=0)
    recall         = recall_score   (y_true, y_pred, average="macro", labels=labels, zero_division=0)
    pixel_acc      = accuracy_score (y_true, y_pred)

    return {
        "pixel_acc": pixel_acc,
        "mean_iou": mean_iou,
        "mean_dice": mean_dice,
        "per_class_iou": per_class_iou,     # np.ndarray, length=num_classes
        "per_class_dice": per_class_dice,   # np.ndarray, length=num_classes
        "precision": precision,
        "recall": recall,
        "confusion_matrix": cm              # shape: (num_classes, num_classes)
    }


In [7]:
class EarlyStopping:
    """
    Early-stop when monitored metric doesn't improve after `patience` epochs.
    mode='max' for metrics like mIoU; set delta to require minimal improvement.
    """
    def __init__(self, patience=10, mode='max', min_delta=0.0):
        self.patience = patience
        self.mode = mode
        self.min_delta = min_delta
        self.best = None
        self.counter = 0
        self.should_stop = False

    def step(self, current):
        if self.best is None:
            self.best = current
            return False

        improved = (current > self.best + self.min_delta) if self.mode == 'max' else (current < self.best - self.min_delta)
        if improved:
            self.best = current
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True
        return self.should_stop


In [8]:
import torch.optim as optim

def build_optimizer(model, opt_cfg=None):
    opt_cfg = opt_cfg or {"name": "AdamW", "lr": 3e-6, "weight_decay": 1e-4}
    name = opt_cfg.get("name", "AdamW").lower()
    lr = opt_cfg.get("lr", 3e-6)
    wd = opt_cfg.get("weight_decay", 1e-4)
    momentum = opt_cfg.get("momentum", 0.9)

    if name == "sgd":
        return optim.SGD(model.parameters(), lr=lr, weight_decay=wd, momentum=momentum, nesterov=True)
    elif name == "adam":
        return optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    else:
        return optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)

def build_scheduler(optimizer, sch_cfg=None):
    """
    Default: ReduceLROnPlateau on val mIoU (mode='max').
    If you want cosine/step later, we can expand this switch.
    """
    sch_cfg = sch_cfg or {"name": "ReduceLROnPlateau", "factor": 0.5, "patience": 5, "threshold": 1e-4, "min_lr": 1e-7}
    name = sch_cfg.get("name", "ReduceLROnPlateau").lower()

    if name == "reducelronplateau":
        return optim.lr_scheduler.ReduceLROnPlateau(
            optimizer,
            mode="max",
            factor=sch_cfg.get("factor", 0.5),
            patience=sch_cfg.get("patience", 5),
            threshold=sch_cfg.get("threshold", 1e-4),
            cooldown=sch_cfg.get("cooldown", 0),
            min_lr=sch_cfg.get("min_lr", 1e-7)
        )
    elif name == "cosineannealing":
        return optim.lr_scheduler.CosineAnnealingLR(
            optimizer, T_max=sch_cfg.get("T_max", 50), eta_min=sch_cfg.get("min_lr", 1e-7)
        )
    elif name == "steplr":
        return optim.lr_scheduler.StepLR(
            optimizer, step_size=sch_cfg.get("step_size", 30), gamma=sch_cfg.get("gamma", 0.1)
        )
    else:
        return None


In [9]:
from tqdm import tqdm
def train_epoch(loader, model, criterion, optimizer, device):
    model.train()
    total_loss = 0
    for x, y in tqdm(loader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = model(x)
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss/len(loader)

def eval_epoch(loader, model, criterion, device):
    model.eval()
    total_loss = 0
    correct, total = 0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            loss = criterion(outputs, y)
            total_loss += loss.item()
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == y).sum().item()
            total += y.numel()
    return total_loss/len(loader), correct/total


In [10]:
TRAIN_PATH = "/home/goblin/dataset/20230905_train_global_ps384.hdf5"
VAL_PATH = "/home/goblin/dataset/20230905_val_global_ps384.hdf5"
TEST_PATH = "/home/goblin/dataset/20230905_test_global_ps384.hdf5"
WEIGHT_DIR = "weights/NIRD_Config"
os.makedirs(WEIGHT_DIR, exist_ok=True)

dataset_train = GlacierHDF5PatchDataset3(TRAIN_PATH, length=2000)
dataset_val = GlacierHDF5PatchDataset3(VAL_PATH, length = 500)
dataset_test = GlacierHDF5PatchDataset3(TEST_PATH, length = 800)
train_loader= DataLoader(dataset_train, batch_size=30, shuffle=True)
dev_loader = DataLoader(dataset_val, batch_size=30, shuffle=False)
dev_loader = DataLoader(dataset_test, batch_size=30, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')



In [None]:
import torch
import time
from thop import profile
from torchinfo import summary

def analyze_model(model, input_size=(1, 11, 256, 256), device=None):
    # --- Device selection ---
    device = device or ("cuda" if torch.cuda.is_available() else "cpu")
    print(f"[INFO] Using device: {device}")

    # --- Move model and input to device ---
    model = model.to(device)
    dummy_input = torch.randn(*input_size).to(device)

    # --- Verify device consistency ---
    if next(model.parameters()).device != dummy_input.device:
        raise RuntimeError(f"Model and input device mismatch: model on {next(model.parameters()).device}, input on {dummy_input.device}")

    # --- FLOPs & MACs ---
    try:
        macs, params = profile(model, inputs=(dummy_input,), verbose=False)
        flops = 2 * macs  # 1 MAC = 2 FLOPs
    except Exception as e:
        print(f"[ERROR] thop profiling failed: {e}")
        macs, params, flops = 0, 0, 0  # Fallback values

    # --- Torchinfo summary ---
    try:
        print(summary(model, input_size=input_size, device=device))
    except Exception as e:
        print(f"[ERROR] torchinfo summary failed: {e}")

    # --- Inference time ---
    model.eval()
    with torch.no_grad():
        # Warm-up
        for _ in range(5):
            _ = model(dummy_input)
        torch.cuda.synchronize() if device.startswith("cuda") else None  # Sync for GPU
        start = time.time()
        for _ in range(20):
            _ = model(dummy_input)
        torch.cuda.synchronize() if device.startswith("cuda") else None  # Sync for GPU
        end = time.time()
    avg_time = (end - start) / 20 * 1000  # Convert to ms/sample

    return {
        "device": device,
        "params": params,
        "macs": macs / 1e6,  # Convert to millions
        "flops": flops / 1e6,  # Convert to millions
        "avg_inference_time_ms": avg_time
    }

# Example usage
try:
    model = SegFormer(num_classes=2, in_chans=11)  # Ensure SegFormer is defined
    stats = analyze_model(model, input_size=(1, 11, 256, 256), device="cpu")
    print(stats)
except Exception as e:
    print(f"[ERROR] Analysis failed: {e}")

[INFO] Using device: cpu
[ERROR] thop profiling failed: Pointer argument (at 0) cannot be accessed from Triton (cpu tensor?)
[ERROR] torchinfo summary failed: Failed to run torchinfo. See above stack traces for more details. Executed layers up to: [OverlapPatchEmbedInvo: 4, Involution2D: 5, AvgPool2d: 6, Conv2d: 6, ReLU: 6, Conv2d: 6, Conv2d: 5, BatchNorm2d: 5]
[ERROR] Analysis failed: Pointer argument (at 0) cannot be accessed from Triton (cpu tensor?)


In [11]:
model_configs = [
    {
        'name': 'Mamba-Involution-B0-default',
        'mixer_type': 'Mamba',
        'stem_type': 'Involution',
        'backbone_variant': 'mit_b0',
        'embed_dims': [16, 32, 64, 128],
        'depths': [1, 1, 1, 1],
        'use_invo_stages': (True, True, True, True) # Use Involution for all stages with Involution stem
    },
    {
        'name': 'Mamba-Convolution-B0-default',
        'mixer_type': 'Mamba',
        'stem_type': 'Convolution',
        'backbone_variant': 'mit_b0',
        'embed_dims': [16, 32, 64, 128],
        'depths': [1, 1, 1, 1],
        'use_invo_stages': (False, False, False, False) # Use Convolution for all stages with Convolution stem
    },
    {
        'name': 'Mamba-Involution-B0-shallow',
        'mixer_type': 'Mamba',
        'stem_type': 'Involution',
        'backbone_variant': 'mit_b0', # Still based on B0 structure
        'embed_dims': [16, 32, 64, 128],
        'depths': [1, 1, 1, 1], # Example of a slightly shallower variant
        'use_invo_stages': (True, True, True, True)
    },
     {
        'name': 'Mamba-Involution-B0-deep',
        'mixer_type': 'Mamba',
        'stem_type': 'Involution',
        'backbone_variant': 'mit_b0', # Still based on B0 structure
        'embed_dims': [16, 32, 64, 128],
        'depths': [2, 2, 2, 2], # Example of a slightly deeper variant
        'use_invo_stages': (True, True, True, True)
    },
    {
        'name': 'Mamba-Involution-B0-wider',
        'mixer_type': 'Mamba',
        'stem_type': 'Involution',
        'backbone_variant': 'mit_b0', # Still based on B0 structure
        'embed_dims': [32, 64, 128, 256], # Example of a wider variant
        'depths': [1, 1, 1, 1],
        'use_invo_stages': (True, True, True, True)
    },
     {
        'name': 'Mamba-Convolution-B0-wider',
        'mixer_type': 'Mamba',
        'stem_type': 'Convolution',
        'backbone_variant': 'mit_b0', # Still based on B0 structure
        'embed_dims': [32, 64, 128, 256], # Example of a wider variant
        'depths': [1, 1, 1, 1],
        'use_invo_stages': (False, False, False, False)
    },
]

print(model_configs)

[{'name': 'Mamba-Involution-B0-default', 'mixer_type': 'Mamba', 'stem_type': 'Involution', 'backbone_variant': 'mit_b0', 'embed_dims': [16, 32, 64, 128], 'depths': [1, 1, 1, 1], 'use_invo_stages': (True, True, True, True)}, {'name': 'Mamba-Convolution-B0-default', 'mixer_type': 'Mamba', 'stem_type': 'Convolution', 'backbone_variant': 'mit_b0', 'embed_dims': [16, 32, 64, 128], 'depths': [1, 1, 1, 1], 'use_invo_stages': (False, False, False, False)}, {'name': 'Mamba-Involution-B0-shallow', 'mixer_type': 'Mamba', 'stem_type': 'Involution', 'backbone_variant': 'mit_b0', 'embed_dims': [16, 32, 64, 128], 'depths': [1, 1, 1, 1], 'use_invo_stages': (True, True, True, True)}, {'name': 'Mamba-Involution-B0-deep', 'mixer_type': 'Mamba', 'stem_type': 'Involution', 'backbone_variant': 'mit_b0', 'embed_dims': [16, 32, 64, 128], 'depths': [2, 2, 2, 2], 'use_invo_stages': (True, True, True, True)}, {'name': 'Mamba-Involution-B0-wider', 'mixer_type': 'Mamba', 'stem_type': 'Involution', 'backbone_varian

In [None]:
import os
import time
import numpy as np
from torch.utils.tensorboard import SummaryWriter

def _format_list(vals):
    # pipe-separated with 4-decimal precision
    return "[" + "|".join(f"{float(v):.4f}" for v in vals) + "]"

def _flatten_cm(cm):
    return "[" + "|".join(str(int(x)) for x in cm.flatten().tolist()) + "]"

def train_model_for_config(
    config,
    dataset_train,
    dataset_val,
    device="cuda",
    epochs_default=50,
    batch_size_default=30,
    num_classes=2,
    base_run_dir="runs",
    base_weight_dir="weights",
    save_every=5,              # e.g., 10 or None
    es_patience_default=10
):
    name = config['name']
    run_dir = os.path.join(base_run_dir, name)
    weight_dir = os.path.join(base_weight_dir, name)
    os.makedirs(run_dir, exist_ok=True)
    os.makedirs(weight_dir, exist_ok=True)

    # Build loaders (allow config overrides)
    epochs = config.get("epochs", epochs_default)
    batch_size = config.get("batch_size", batch_size_default)

    train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)
    val_loader   = DataLoader(dataset_val,   batch_size=batch_size, shuffle=False)

    # Build model
    in_chans = dataset_train[0][0].shape[0]
    model = SegFormer(
        num_classes=num_classes,
        variant=config['backbone_variant'],
        drop_path_rate=0.1,
        in_chans=in_chans,
        embed_dims=config['embed_dims'],
        depths=config['depths'],
        use_invo_stages=config['use_invo_stages']
    ).to(device)

    # Optimizer & scheduler
    optimizer = build_optimizer(model, config.get("optimizer"))
    scheduler = build_scheduler(optimizer, config.get("scheduler"))

    # Criterion
    criterion = nn.CrossEntropyLoss()

    # TensorBoard writer
    writer = SummaryWriter(run_dir)

    # Optional: computational analysis once (safe formatting)
    try:
        computational_metrics = analyze_model(model, input_size=(1, in_chans, 256, 256), device=device)
    except Exception as e:
        print(f"[WARN] analyze_model failed: {e}")
        computational_metrics = {}
    # Log static compute metrics if present
    if computational_metrics:
        if 'params' in computational_metrics:       writer.add_scalar("Computational/Params", computational_metrics['params'], 0)
        if 'macs' in computational_metrics:         writer.add_scalar("Computational/MACs_M", computational_metrics['macs'], 0)
        if 'flops' in computational_metrics:        writer.add_scalar("Computational/FLOPs_M", computational_metrics['flops'], 0)
        if 'avg_inference_time_ms' in computational_metrics:
            writer.add_scalar("Computational/AvgInference_ms", computational_metrics['avg_inference_time_ms'], 0)

    # Text log
    log_path = os.path.join(weight_dir, "training_log.txt")
    if not os.path.exists(log_path):
        with open(log_path, "w") as f:
            f.write("epoch,train_loss,val_pixel_acc,val_mIoU,val_dice,val_precision,val_recall,lr,per_class_iou[],per_class_dice[],confusion_matrix_flat\n")

    # Early stopping
    es_cfg = config.get("early_stopping", {"monitor": "val_mIoU", "patience": es_patience_default})
    es_patience = es_cfg.get("patience", es_patience_default)
    early_stopper = EarlyStopping(patience=es_patience, mode='max', min_delta=0.0)

    best_iou = -1.0
    best_epoch = -1
    start_time = time.time()

    for epoch in range(1, epochs + 1):
        # --- Train ---
        train_loss = train_epoch(train_loader, model, criterion, optimizer, device)

        # --- Validate ---
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device), y.to(device)
                logits = model(x)
                preds  = torch.argmax(logits, dim=1)
                all_preds.append(preds.cpu().numpy())
                all_labels.append(y.cpu().numpy())

        y_true = np.concatenate(all_labels, axis=None)
        y_pred = np.concatenate(all_preds, axis=None)
        metrics = segmentation_metrics(y_true, y_pred, num_classes=num_classes)

        val_acc   = float(metrics["pixel_acc"])
        val_miou  = float(metrics["mean_iou"])
        val_dice  = float(metrics["mean_dice"])
        val_prec  = float(metrics["precision"])
        val_rec   = float(metrics["recall"])
        cls_iou   = metrics["per_class_iou"]
        cls_dice  = metrics["per_class_dice"]
        cm        = metrics["confusion_matrix"]

        # --- LR & Scheduler ---
        current_lr = optimizer.param_groups[0]['lr']
        if scheduler is not None:
            # ReduceLROnPlateau expects a value; we monitor mIoU
            scheduler.step(val_miou)
            current_lr = optimizer.param_groups[0]['lr']

        # --- TensorBoard ---
        writer.add_scalar("Loss/train",    train_loss, epoch)
        writer.add_scalar("Val/PixelAcc",  val_acc,    epoch)
        writer.add_scalar("Val/mIoU",      val_miou,   epoch)
        writer.add_scalar("Val/Dice",      val_dice,   epoch)
        writer.add_scalar("Val/Precision", val_prec,   epoch)
        writer.add_scalar("Val/Recall",    val_rec,    epoch)
        writer.add_scalar("LR/lr",         current_lr, epoch)

        # --- Text log (CSV line) ---
        with open(log_path, "a") as f:
            f.write(f"{epoch},{train_loss:.6f},{val_acc:.6f},{val_miou:.6f},{val_dice:.6f},{val_prec:.6f},{val_rec:.6f},{current_lr:.8e},{_format_list(cls_iou)},{_format_list(cls_dice)},{_flatten_cm(cm)}\n")

        # --- Checkpointing ---
        if val_miou > best_iou:
            best_iou = val_miou
            best_epoch = epoch
            best_ckpt_path = os.path.join(weight_dir, "best_model.pth")
            torch.save(model.state_dict(), best_ckpt_path)
            print(f"[{name}] ✔ New best mIoU={best_iou:.4f} at epoch {epoch} → saved {best_ckpt_path}")

        if save_every is not None and epoch % save_every == 0:
            ep_path = os.path.join(weight_dir, f"epoch_{epoch:03d}.pth")
            torch.save(model.state_dict(), ep_path)

        # --- Early stopping ---
        if early_stopper.step(val_miou):
            print(f"[{name}] ⏹ Early stopping at epoch {epoch} (no improvement for {es_patience} epochs).")
            break

    total_time = time.time() - start_time
    # Final summary in log file
    with open(log_path, "a") as f:
        f.write(f"BEST_EPOCH={best_epoch}\nBEST_mIoU={best_iou:.6f}\nTOTAL_TIME_SEC={total_time:.2f}\n")

    writer.flush()
    writer.close()

    # Print run summary
    print(f"[{name}] Finished. Best mIoU={best_iou:.4f} at epoch {best_epoch}. Log: {log_path}")


In [14]:
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 2

# Default knobs (used if a config doesn’t specify them)
DEFAULTS = {
    "epochs": 30,
    "batch_size": 30,
    "optimizer": {"name": "AdamW", "lr": 1e-4, "weight_decay": 1e-4},
    "scheduler": {"name": "ReduceLROnPlateau", "factor": 0.5, "patience": 5, "min_lr": 1e-7},
    "early_stopping": {"patience": 12},
    "save_every": 5,
}

for config in model_configs:
    print(f"\n=== Processing configuration: {config['name']} ===")

    # Merge defaults with per-config overrides
    merged_cfg = {**DEFAULTS, **config}

    train_model_for_config(
        config=merged_cfg,
        dataset_train=dataset_train,
        dataset_val=dataset_val,
        device=DEVICE,
        epochs_default=merged_cfg["epochs"],
        batch_size_default=merged_cfg["batch_size"],
        num_classes=NUM_CLASSES,
        base_run_dir="runs",
        base_weight_dir=WEIGHT_DIR,
        save_every=merged_cfg.get("save_every", None),
        es_patience_default=merged_cfg["early_stopping"]["patience"]
    )



=== Processing configuration: Mamba-Involution-B0-default ===
[INFO] Using device: cuda
Layer (type:depth-idx)                             Output Shape              Param #
SegFormer                                          [1, 2, 256, 256]          --
├─MixVisionTransformer: 1-1                        [1, 16, 128, 128]         --
│    └─ModuleList: 2-1                             --                        --
│    │    └─MiTStage: 3-1                          [1, 16, 128, 128]         5,795
│    │    └─MiTStage: 3-2                          [1, 32, 64, 64]           19,057
│    │    └─MiTStage: 3-3                          [1, 64, 32, 32]           68,441
│    │    └─MiTStage: 3-4                          [1, 128, 16, 16]          258,217
├─SegFormerHead: 1-2                               [1, 2, 128, 128]          --
│    └─ModuleList: 2-2                             --                        --
│    │    └─Sequential: 3-5                        [1, 128, 128, 128]        2,304
│    │ 

100%|██████████| 67/67 [01:40<00:00,  1.49s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.6161 at epoch 1 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:46<00:00,  1.59s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.6280 at epoch 2 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:37<00:00,  1.45s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.6581 at epoch 3 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:35<00:00,  1.43s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.7267 at epoch 5 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9447 at epoch 6 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9452 at epoch 7 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:36<00:00,  1.44s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9665 at epoch 8 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9816 at epoch 9 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9845 at epoch 10 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9858 at epoch 12 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9903 at epoch 13 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:35<00:00,  1.43s/it]
100%|██████████| 67/67 [01:46<00:00,  1.59s/it]
100%|██████████| 67/67 [01:34<00:00,  1.40s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]
100%|██████████| 67/67 [01:35<00:00,  1.42s/it]
100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9921 at epoch 20 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-default] ✔ New best mIoU=0.9922 at epoch 23 → saved weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth


100%|██████████| 67/67 [01:35<00:00,  1.42s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]
100%|██████████| 67/67 [01:46<00:00,  1.59s/it]
100%|██████████| 67/67 [01:50<00:00,  1.64s/it]
100%|██████████| 67/67 [01:35<00:00,  1.42s/it]
100%|██████████| 67/67 [01:50<00:00,  1.66s/it]


[Mamba-Involution-B0-default] Finished. Best mIoU=0.9922 at epoch 23. Log: weights/NIRD_Config/Mamba-Involution-B0-default/training_log.txt

=== Processing configuration: Mamba-Convolution-B0-default ===
[INFO] Using device: cuda
Layer (type:depth-idx)                             Output Shape              Param #
SegFormer                                          [1, 2, 256, 256]          --
├─MixVisionTransformer: 1-1                        [1, 16, 128, 128]         --
│    └─ModuleList: 2-1                             --                        --
│    │    └─MiTStage: 3-1                          [1, 16, 128, 128]         7,136
│    │    └─MiTStage: 3-2                          [1, 32, 64, 64]           23,008
│    │    └─MiTStage: 3-3                          [1, 64, 32, 32]           84,416
│    │    └─MiTStage: 3-4                          [1, 128, 16, 16]          322,432
├─SegFormerHead: 1-2                               [1, 2, 128, 128]          --
│    └─ModuleList: 2-2       

100%|██████████| 67/67 [01:53<00:00,  1.70s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9734 at epoch 1 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9814 at epoch 2 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:56<00:00,  1.74s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9862 at epoch 3 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9890 at epoch 4 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9897 at epoch 5 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9901 at epoch 6 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9902 at epoch 7 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9913 at epoch 8 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9922 at epoch 9 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9928 at epoch 10 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9932 at epoch 11 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9949 at epoch 12 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.40s/it]
100%|██████████| 67/67 [01:49<00:00,  1.63s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]
100%|██████████| 67/67 [01:32<00:00,  1.38s/it]
100%|██████████| 67/67 [01:34<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9952 at epoch 17 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:32<00:00,  1.38s/it]
100%|██████████| 67/67 [01:34<00:00,  1.40s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9953 at epoch 19 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:32<00:00,  1.39s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9956 at epoch 20 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9961 at epoch 22 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]
100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9965 at epoch 24 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]
100%|██████████| 67/67 [01:32<00:00,  1.38s/it]
100%|██████████| 67/67 [01:45<00:00,  1.58s/it]
100%|██████████| 67/67 [01:45<00:00,  1.57s/it]


[Mamba-Convolution-B0-default] ✔ New best mIoU=0.9968 at epoch 28 → saved weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth


100%|██████████| 67/67 [01:45<00:00,  1.57s/it]
100%|██████████| 67/67 [01:32<00:00,  1.38s/it]


[Mamba-Convolution-B0-default] Finished. Best mIoU=0.9968 at epoch 28. Log: weights/NIRD_Config/Mamba-Convolution-B0-default/training_log.txt

=== Processing configuration: Mamba-Involution-B0-shallow ===
[INFO] Using device: cuda
Layer (type:depth-idx)                             Output Shape              Param #
SegFormer                                          [1, 2, 256, 256]          --
├─MixVisionTransformer: 1-1                        [1, 16, 128, 128]         --
│    └─ModuleList: 2-1                             --                        --
│    │    └─MiTStage: 3-1                          [1, 16, 128, 128]         5,795
│    │    └─MiTStage: 3-2                          [1, 32, 64, 64]           19,057
│    │    └─MiTStage: 3-3                          [1, 64, 32, 32]           68,441
│    │    └─MiTStage: 3-4                          [1, 128, 16, 16]          258,217
├─SegFormerHead: 1-2                               [1, 2, 128, 128]          --
│    └─ModuleList: 2-2      

100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9245 at epoch 1 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9652 at epoch 2 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9684 at epoch 3 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9863 at epoch 4 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [02:01<00:00,  1.81s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9902 at epoch 5 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9907 at epoch 6 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:48<00:00,  1.62s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9912 at epoch 8 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9920 at epoch 9 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.42s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]
100%|██████████| 67/67 [01:44<00:00,  1.56s/it]
100%|██████████| 67/67 [01:45<00:00,  1.58s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9927 at epoch 14 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9933 at epoch 15 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:47<00:00,  1.60s/it]
100%|██████████| 67/67 [01:46<00:00,  1.58s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9935 at epoch 17 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.40s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]
100%|██████████| 67/67 [01:33<00:00,  1.40s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9939 at epoch 20 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9940 at epoch 21 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]
100%|██████████| 67/67 [02:01<00:00,  1.81s/it]
100%|██████████| 67/67 [01:34<00:00,  1.41s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9941 at epoch 24 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:33<00:00,  1.39s/it]
100%|██████████| 67/67 [01:44<00:00,  1.56s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9943 at epoch 26 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:34<00:00,  1.40s/it]
100%|██████████| 67/67 [01:33<00:00,  1.39s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9945 at epoch 28 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth


100%|██████████| 67/67 [01:44<00:00,  1.55s/it]
100%|██████████| 67/67 [01:47<00:00,  1.60s/it]


[Mamba-Involution-B0-shallow] ✔ New best mIoU=0.9948 at epoch 30 → saved weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth
[Mamba-Involution-B0-shallow] Finished. Best mIoU=0.9948 at epoch 30. Log: weights/NIRD_Config/Mamba-Involution-B0-shallow/training_log.txt

=== Processing configuration: Mamba-Involution-B0-deep ===
[INFO] Using device: cuda
Layer (type:depth-idx)                             Output Shape              Param #
SegFormer                                          [1, 2, 256, 256]          --
├─MixVisionTransformer: 1-1                        [1, 16, 128, 128]         --
│    └─ModuleList: 2-1                             --                        --
│    │    └─MiTStage: 3-1                          [1, 16, 128, 128]         11,315
│    │    └─MiTStage: 3-2                          [1, 32, 64, 64]           37,393
│    │    └─MiTStage: 3-3                          [1, 64, 32, 32]           134,297
│    │    └─MiTStage: 3-4                          [1, 128, 

100%|██████████| 67/67 [01:38<00:00,  1.48s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.8826 at epoch 1 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.49s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9633 at epoch 2 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.50s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9754 at epoch 3 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:52<00:00,  1.68s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9777 at epoch 4 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:41<00:00,  1.51s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9813 at epoch 5 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.49s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9842 at epoch 6 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:53<00:00,  1.70s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9850 at epoch 7 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [01:40<00:00,  1.50s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9861 at epoch 9 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:53<00:00,  1.69s/it]
100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [01:52<00:00,  1.67s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9874 at epoch 12 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:53<00:00,  1.69s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9881 at epoch 13 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [02:04<00:00,  1.86s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9914 at epoch 14 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.49s/it]
100%|██████████| 67/67 [01:40<00:00,  1.49s/it]
100%|██████████| 67/67 [01:52<00:00,  1.68s/it]
100%|██████████| 67/67 [01:39<00:00,  1.49s/it]
100%|██████████| 67/67 [01:41<00:00,  1.51s/it]
100%|██████████| 67/67 [01:40<00:00,  1.50s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9914 at epoch 20 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:52<00:00,  1.67s/it]
100%|██████████| 67/67 [02:07<00:00,  1.90s/it]
100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [02:04<00:00,  1.85s/it]
100%|██████████| 67/67 [01:39<00:00,  1.49s/it]
100%|██████████| 67/67 [01:53<00:00,  1.70s/it]
100%|██████████| 67/67 [01:39<00:00,  1.48s/it]
100%|██████████| 67/67 [01:41<00:00,  1.52s/it]
100%|██████████| 67/67 [01:42<00:00,  1.53s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9917 at epoch 29 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth


100%|██████████| 67/67 [01:52<00:00,  1.68s/it]


[Mamba-Involution-B0-deep] ✔ New best mIoU=0.9918 at epoch 30 → saved weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth
[Mamba-Involution-B0-deep] Finished. Best mIoU=0.9918 at epoch 30. Log: weights/NIRD_Config/Mamba-Involution-B0-deep/training_log.txt

=== Processing configuration: Mamba-Involution-B0-wider ===
[INFO] Using device: cuda
Layer (type:depth-idx)                             Output Shape              Param #
SegFormer                                          [1, 2, 256, 256]          --
├─MixVisionTransformer: 1-1                        [1, 32, 128, 128]         --
│    └─ModuleList: 2-1                             --                        --
│    │    └─MiTStage: 3-1                          [1, 32, 128, 128]         18,835
│    │    └─MiTStage: 3-2                          [1, 64, 64, 64]           68,441
│    │    └─MiTStage: 3-3                          [1, 128, 32, 32]          258,217
│    │    └─MiTStage: 3-4                          [1, 256, 16, 16]    

100%|██████████| 67/67 [02:18<00:00,  2.07s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9029 at epoch 1 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [02:06<00:00,  1.89s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9102 at epoch 3 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:41<00:00,  1.51s/it]
100%|██████████| 67/67 [01:41<00:00,  1.51s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9668 at epoch 5 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.49s/it]
100%|██████████| 67/67 [02:03<00:00,  1.85s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9672 at epoch 7 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [01:53<00:00,  1.70s/it]
100%|██████████| 67/67 [01:41<00:00,  1.51s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9771 at epoch 10 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [02:15<00:00,  2.02s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9895 at epoch 11 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [02:05<00:00,  1.87s/it]
100%|██████████| 67/67 [01:41<00:00,  1.51s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9917 at epoch 13 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.51s/it]
100%|██████████| 67/67 [01:40<00:00,  1.51s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9923 at epoch 15 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:51<00:00,  1.66s/it]
100%|██████████| 67/67 [01:40<00:00,  1.51s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9930 at epoch 17 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:51<00:00,  1.67s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9931 at epoch 18 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [02:17<00:00,  2.05s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9931 at epoch 19 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:53<00:00,  1.70s/it]
100%|██████████| 67/67 [01:54<00:00,  1.71s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9936 at epoch 21 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:53<00:00,  1.70s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9943 at epoch 22 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:51<00:00,  1.66s/it]
100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [02:03<00:00,  1.84s/it]


[Mamba-Involution-B0-wider] ✔ New best mIoU=0.9949 at epoch 25 → saved weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:40<00:00,  1.49s/it]
100%|██████████| 67/67 [01:40<00:00,  1.50s/it]
100%|██████████| 67/67 [01:49<00:00,  1.64s/it]
100%|██████████| 67/67 [01:52<00:00,  1.68s/it]
100%|██████████| 67/67 [01:53<00:00,  1.69s/it]


[Mamba-Involution-B0-wider] Finished. Best mIoU=0.9949 at epoch 25. Log: weights/NIRD_Config/Mamba-Involution-B0-wider/training_log.txt

=== Processing configuration: Mamba-Convolution-B0-wider ===
[INFO] Using device: cuda
Layer (type:depth-idx)                             Output Shape              Param #
SegFormer                                          [1, 2, 256, 256]          --
├─MixVisionTransformer: 1-1                        [1, 32, 128, 128]         --
│    └─ModuleList: 2-1                             --                        --
│    │    └─MiTStage: 3-1                          [1, 32, 128, 128]         21,568
│    │    └─MiTStage: 3-2                          [1, 64, 64, 64]           84,416
│    │    └─MiTStage: 3-3                          [1, 128, 32, 32]          322,432
│    │    └─MiTStage: 3-4                          [1, 256, 16, 16]          1,259,264
├─SegFormerHead: 1-2                               [1, 2, 128, 128]          --
│    └─ModuleList: 2-2         

100%|██████████| 67/67 [02:09<00:00,  1.94s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9795 at epoch 1 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [02:16<00:00,  2.04s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9870 at epoch 2 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9884 at epoch 3 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9905 at epoch 4 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.49s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9921 at epoch 5 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:37<00:00,  1.46s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9928 at epoch 6 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.47s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9935 at epoch 7 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:58<00:00,  1.77s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9939 at epoch 8 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [02:00<00:00,  1.80s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9945 at epoch 9 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.49s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9945 at epoch 10 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [02:25<00:00,  2.17s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9957 at epoch 11 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.48s/it]
100%|██████████| 67/67 [01:51<00:00,  1.66s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9959 at epoch 13 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:51<00:00,  1.67s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9962 at epoch 14 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.49s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9965 at epoch 15 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.48s/it]
100%|██████████| 67/67 [01:39<00:00,  1.49s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9966 at epoch 17 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.47s/it]
100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9969 at epoch 19 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.47s/it]
100%|██████████| 67/67 [01:58<00:00,  1.76s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9970 at epoch 21 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9973 at epoch 22 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9976 at epoch 23 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:38<00:00,  1.47s/it]
100%|██████████| 67/67 [01:38<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9977 at epoch 25 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9980 at epoch 26 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]
100%|██████████| 67/67 [01:38<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9981 at epoch 28 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] ✔ New best mIoU=0.9982 at epoch 29 → saved weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth


100%|██████████| 67/67 [01:39<00:00,  1.48s/it]


[Mamba-Convolution-B0-wider] Finished. Best mIoU=0.9982 at epoch 29. Log: weights/NIRD_Config/Mamba-Convolution-B0-wider/training_log.txt


In [13]:
# %% [markdown]
# # Evaluation Pipeline for SegFormer-Mamba Models
# This script loads best models, evaluates metrics, collects computational stats,
# and generates comparison tables + visualizations.

# %%
import os
import time
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F
from torch.utils.data import DataLoader

from sklearn.metrics import (
    accuracy_score, 
    f1_score, 
    jaccard_score, 
    precision_score, 
    recall_score, 
    confusion_matrix
)

# Ensure reproducibility
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

# Directories
RESULTS_DIR = "results"
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, "plots"), exist_ok=True)
os.makedirs(os.path.join(RESULTS_DIR, "visualizations"), exist_ok=True)


In [14]:
def save_results_as_csv_md(results, csv_path, md_path):
    """
    Save evaluation results to CSV and Markdown table.
    """
    import pandas as pd

    df = pd.DataFrame(results)
    df.to_csv(csv_path, index=False)

    # Build Markdown table
    with open(md_path, "w") as f:
        f.write(df.to_markdown(index=False))

    print(f"[INFO] Saved results → {csv_path}, {md_path}")
    return df


In [15]:
# %%
from thop import profile
from torchinfo import summary

def collect_computational_metrics(model, input_size=(1, 11, 256, 256), device="cuda"):
    """Collect Params, FLOPs, Inference Time for a model."""
    device = device if torch.cuda.is_available() else "cpu"
    model = model.to(device)
    dummy_input = torch.randn(*input_size).to(device)

    # --- Params & FLOPs ---
    try:
        macs, params = profile(model, inputs=(dummy_input,), verbose=False)
        flops = 2 * macs   # 1 MAC = 2 FLOPs
    except Exception as e:
        print(f"[WARN] thop profiling failed: {e}")
        macs, params, flops = 0, 0, 0

    # --- Inference time ---
    model.eval()
    with torch.no_grad():
        for _ in range(5):  # warm-up
            _ = model(dummy_input)
        if device.startswith("cuda"):
            torch.cuda.synchronize()
        start = time.time()
        for _ in range(20):
            _ = model(dummy_input)
        if device.startswith("cuda"):
            torch.cuda.synchronize()
        end = time.time()
    avg_time = (end - start) / 20 * 1000  # ms per sample

    return {
        "params": params,
        "macs": macs / 1e6,
        "flops": flops / 1e6,
        "inference_ms": avg_time
    }


In [16]:
# %%
def segmentation_metrics(y_true, y_pred, num_classes=2):
    """Compute IoU, Dice, Precision, Recall, Pixel Accuracy for segmentation."""
    y_true = y_true.ravel()
    y_pred = y_pred.ravel()

    labels = list(range(num_classes))
    cm = confusion_matrix(y_true, y_pred, labels=labels)

    per_class_iou  = jaccard_score (y_true, y_pred, average=None, labels=labels, zero_division=0)
    mean_iou       = jaccard_score (y_true, y_pred, average="macro", labels=labels, zero_division=0)
    per_class_dice = f1_score      (y_true, y_pred, average=None, labels=labels, zero_division=0)
    mean_dice      = f1_score      (y_true, y_pred, average="macro", labels=labels, zero_division=0)
    precision      = precision_score(y_true, y_pred, average="macro", labels=labels, zero_division=0)
    recall         = recall_score   (y_true, y_pred, average="macro", labels=labels, zero_division=0)
    pixel_acc      = accuracy_score (y_true, y_pred)

    return {
        "pixel_acc": pixel_acc,
        "mean_iou": mean_iou,
        "mean_dice": mean_dice,
        "per_class_iou": per_class_iou,
        "per_class_dice": per_class_dice,
        "precision": precision,
        "recall": recall,
        "confusion_matrix": cm
    }


In [17]:
# %%
def visualize_samples(model, dataset, device, config_name, out_dir="results/visualizations", num_samples=3):
    """
    Visualize RGB (from optical bands), Ground Truth, Prediction, and Overlay.
    Saves PNGs in results/visualizations/<config_name>/.
    """
    model.eval()
    os.makedirs(os.path.join(out_dir, config_name), exist_ok=True)

    for i in range(num_samples):
        # --- Pick a random sample ---
        idx = random.randint(0, len(dataset)-1)
        x, y = dataset[idx]

        # Extract RGB correctly (optical channels: 2,3,4)
        rgb = x[2:5].numpy().transpose(1, 2, 0)  
        rgb = (rgb - rgb.min()) / (rgb.max() - rgb.min() + 1e-6)

        # --- Model prediction ---
        with torch.no_grad():
            inp = x.unsqueeze(0).to(device)
            logits = model(inp)
            pred = torch.argmax(logits, dim=1).squeeze().cpu().numpy()

        gt = y.numpy()

        # --- Create figure ---
        fig, axs = plt.subplots(1, 4, figsize=(16, 4))
        axs[0].imshow(rgb)
        axs[0].set_title("RGB Image")
        axs[1].imshow(gt, cmap="nipy_spectral")
        axs[1].set_title("Ground Truth")
        axs[2].imshow(pred, cmap="nipy_spectral")
        axs[2].set_title("Prediction")
        axs[3].imshow(rgb)
        axs[3].imshow(pred, cmap="nipy_spectral", alpha=0.5)
        axs[3].set_title("Overlay")
        for ax in axs:
            ax.axis("off")

        # --- Save figure ---
        save_path = os.path.join(out_dir, config_name, f"sample_{i+1}.png")
        plt.savefig(save_path, bbox_inches="tight")
        plt.close(fig)


In [18]:
# %%
def evaluate_model(model, dataloader, device, num_classes=2):
    """
    Run evaluation on a dataset and compute segmentation metrics.
    Returns a dict with accuracy, IoU, Dice, Precision, Recall.
    """
    model.eval()
    all_preds, all_labels = [], []

    with torch.no_grad():
        for x, y in dataloader:
            x, y = x.to(device), y.to(device)
            logits = model(x)
            preds = torch.argmax(logits, dim=1)

            all_preds.append(preds.cpu().numpy())
            all_labels.append(y.cpu().numpy())

    y_true = np.concatenate(all_labels, axis=None)
    y_pred = np.concatenate(all_preds, axis=None)

    return segmentation_metrics(y_true, y_pred, num_classes=num_classes)


In [26]:
# %%
def run_full_evaluation(model_configs, dataset_val, device="cuda", num_classes=2):
    """
    Loop through all model configs, load best weights, 
    evaluate metrics, collect compute stats, and visualize.
    """
    results = []
    val_loader = DataLoader(dataset_val, batch_size=30, shuffle=False)

    for config in model_configs:
        name = config["name"]
        print(f"\n=== Evaluating {name} ===")

        # Paths
        weight_dir = os.path.join("weights","NIRD_Config", name)
        best_ckpt = os.path.join(weight_dir, "best_model.pth")

        if not os.path.exists(best_ckpt):
            print(f"[WARN] Best model not found for {name}, skipping.")
            continue

        # --- Build model ---
        in_chans = dataset_val[0][0].shape[0]
        model = SegFormer(
            num_classes=num_classes,
            variant=config["backbone_variant"],
            in_chans=in_chans,
            embed_dims=config["embed_dims"],
            depths=config["depths"],
            use_invo_stages=config["use_invo_stages"]
        ).to(device)

        # Load weights
        state_dict = torch.load(best_ckpt, map_location=device)
        model.load_state_dict(state_dict, strict=False)
        print(f"[INFO] Loaded best model from {best_ckpt}")

        # --- Computational metrics ---
        comp_stats = collect_computational_metrics(model, input_size=(1, in_chans, 256, 256), device=device)

        # --- Evaluation metrics ---
        eval_stats = evaluate_model(model, val_loader, device, num_classes=num_classes)

        # --- Combine results ---
        result = {
            "Model": name,
            "Params(M)": comp_stats["params"] / 1e6,
            "FLOPs(M)": comp_stats["flops"],
            "InferTime(ms)": comp_stats["inference_ms"],
            "mIoU": eval_stats["mean_iou"],
            "Dice": eval_stats["mean_dice"],
            "Precision": eval_stats["precision"],
            "Recall": eval_stats["recall"],
            "PixelAcc": eval_stats["pixel_acc"],
        }
        results.append(result)

        # --- Save visualizations ---
        visualize_samples(model, dataset_val, device, config_name=name, out_dir="results/visualizations")

    # --- Save results table ---
    csv_path = os.path.join("results", "metrics.csv")
    md_path  = os.path.join("results", "metrics.md")
    df = save_results_as_csv_md(results, csv_path, md_path)

    return df


In [24]:
# %%
def plot_results_table(df, out_dir="results/plots"):
    """
    Generate bar plots comparing models on accuracy and efficiency metrics.
    """
    os.makedirs(out_dir, exist_ok=True)

    # Metrics groups
    accuracy_metrics = ["mIoU", "Dice", "Precision", "Recall", "PixelAcc"]
    efficiency_metrics = ["Params(M)", "FLOPs(M)", "InferTime(ms)"]

    # --- Accuracy plots ---
    for metric in accuracy_metrics:
        plt.figure(figsize=(8, 5))
        plt.bar(df["Model"], df[metric], color="skyblue")
        plt.xticks(rotation=45, ha="right")
        plt.ylabel(metric)
        plt.title(f"Comparison of {metric}")
        plt.tight_layout()
        save_path = os.path.join(out_dir, f"{metric}_comparison.png")
        plt.savefig(save_path, dpi=150)
        plt.close()
        print(f"[INFO] Saved {metric} plot → {save_path}")

    # --- Efficiency plots ---
    for metric in efficiency_metrics:
        plt.figure(figsize=(8, 5))
        plt.bar(df["Model"], df[metric], color="salmon")
        plt.xticks(rotation=45, ha="right")
        plt.ylabel(metric)
        plt.title(f"Comparison of {metric}")
        plt.tight_layout()
        save_path = os.path.join(out_dir, f"{metric}_comparison.png")
        plt.savefig(save_path, dpi=150)
        plt.close()
        print(f"[INFO] Saved {metric} plot → {save_path}")


In [27]:
# %%
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_CLASSES = 2   # adjust if more classes

# Run full evaluation across all configs
df_results = run_full_evaluation(
    model_configs=model_configs,
    dataset_val=dataset_test,
    device=DEVICE,
    num_classes=NUM_CLASSES
)

# Plot results
plot_results_table(df_results, out_dir="results/plots")

# Show Markdown table inline
from IPython.display import Markdown
display(Markdown(df_results.to_markdown(index=False)))



=== Evaluating Mamba-Involution-B0-default ===
[INFO] Loaded best model from weights/NIRD_Config/Mamba-Involution-B0-default/best_model.pth

=== Evaluating Mamba-Convolution-B0-default ===
[INFO] Loaded best model from weights/NIRD_Config/Mamba-Convolution-B0-default/best_model.pth

=== Evaluating Mamba-Involution-B0-shallow ===
[INFO] Loaded best model from weights/NIRD_Config/Mamba-Involution-B0-shallow/best_model.pth

=== Evaluating Mamba-Involution-B0-deep ===
[INFO] Loaded best model from weights/NIRD_Config/Mamba-Involution-B0-deep/best_model.pth

=== Evaluating Mamba-Involution-B0-wider ===
[INFO] Loaded best model from weights/NIRD_Config/Mamba-Involution-B0-wider/best_model.pth

=== Evaluating Mamba-Convolution-B0-wider ===
[INFO] Loaded best model from weights/NIRD_Config/Mamba-Convolution-B0-wider/best_model.pth
[INFO] Saved results → results/metrics.csv, results/metrics.md
[INFO] Saved mIoU plot → results/plots/mIoU_comparison.png
[INFO] Saved Dice plot → results/plots/Dic

| Model                        |   Params(M) |   FLOPs(M) |   InferTime(ms) |     mIoU |     Dice |   Precision |   Recall |   PixelAcc |
|:-----------------------------|------------:|-----------:|----------------:|---------:|---------:|------------:|---------:|-----------:|
| Mamba-Involution-B0-default  |    0.434136 |    7464.08 |         2.85507 | 0.99231  | 0.996138 |    0.996233 | 0.996043 |   0.996679 |
| Mamba-Convolution-B0-default |    0.519618 |    7606.89 |         2.40961 | 0.996676 | 0.998335 |    0.998296 | 0.998374 |   0.998574 |
| Mamba-Involution-B0-shallow  |    0.434136 |    7464.08 |         2.81612 | 0.994556 | 0.997269 |    0.997638 | 0.996903 |   0.9977   |
| Mamba-Involution-B0-deep     |    0.609416 |    7732.52 |         4.99783 | 0.990888 | 0.99542  |    0.99586  | 0.994985 |   0.995976 |
| Mamba-Involution-B0-wider    |    1.02576  |    8448.13 |         2.84063 | 0.994255 | 0.997118 |    0.996884 | 0.997352 |   0.997591 |
| Mamba-Convolution-B0-wider   |    1.36615  |    8933.61 |         2.45694 | 0.99821  | 0.999104 |    0.999103 | 0.999104 |   0.999222 |

In [29]:
# %%
import os
import pandas as pd
import matplotlib.pyplot as plt

BASE_DIR = "weights/NIRD_Config"   # parent directory containing all model subfolders
OUT_DIR = "results/plots"
os.makedirs(OUT_DIR, exist_ok=True)

def load_all_logs(base_dir=BASE_DIR):
    """Load all training_log.txt files under base_dir/*/"""
    logs = {}
    for model in os.listdir(base_dir):
        log_path = os.path.join(base_dir, model, "training_log.txt")
        if os.path.exists(log_path):
            try:
                df = pd.read_csv(log_path)
                logs[model] = df
            except Exception as e:
                print(f"[WARN] Could not parse {log_path}: {e}")
    return logs

def plot_metric(logs, metric, out_dir=OUT_DIR, ylabel=None):
    """Plot one metric across all models with bold lines and larger font."""
    plt.figure(figsize=(10, 6))
    for model, df in logs.items():
        if metric in df.columns:
            plt.plot(
                df["epoch"], df[metric],
                label=model, linewidth=2
            )
    plt.xlabel("Epoch", fontsize=12)
    plt.ylabel(ylabel or metric, fontsize=12)
    plt.title(f"{metric} Comparison Across Models", fontsize=14, fontweight="bold")
    plt.legend(fontsize=9)
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.tight_layout()
    save_path = os.path.join(out_dir, f"{metric}_comparison.png")
    plt.savefig(save_path, bbox_inches="tight", dpi=150)
    plt.close()
    print(f"[INFO] Saved plot → {save_path}")

def summarize_best(logs, out_path="results/summary.csv"):
    """Save best mIoU per model and other final stats into CSV + Markdown."""
    rows = []
    for model, df in logs.items():
        if "val_mIoU" in df.columns:
            best_row = df.loc[df["val_mIoU"].idxmax()]
            row = {
                "Model": model,
                "BestEpoch": int(best_row["epoch"]),
                "TrainLoss": best_row["train_loss"] if "train_loss" in best_row else None,
                "ValLoss": best_row["val_loss"] if "val_loss" in best_row else None,
                "Best_mIoU": best_row["val_mIoU"],
                "Best_Dice": best_row["val_dice"],
                "Best_Acc": best_row["val_pixel_acc"],
                "Best_Precision": best_row["val_precision"],
                "Best_Recall": best_row["val_recall"]
            }
            rows.append(row)
    summary = pd.DataFrame(rows)
    os.makedirs(os.path.dirname(out_path), exist_ok=True)
    summary.to_csv(out_path, index=False)
    md_path = out_path.replace(".csv", ".md")
    summary.to_markdown(md_path, index=False)
    print(f"[INFO] Saved summary → {out_path}, {md_path}")
    return summary

# --- Run everything ---
logs = load_all_logs(BASE_DIR)

metrics = {
    "train_loss": "Training Loss",
    "val_loss": "Validation Loss",          # 🔹 added
    "val_mIoU": "Validation mIoU",
    "val_dice": "Validation Dice",
    "val_pixel_acc": "Validation Pixel Accuracy",
    "val_precision": "Validation Precision",
    "val_recall": "Validation Recall"
}
for m, label in metrics.items():
    plot_metric(logs, m, ylabel=label)

summary_df = summarize_best(logs)
print(summary_df)


[INFO] Saved plot → results/plots/train_loss_comparison.png
[INFO] Saved plot → results/plots/val_loss_comparison.png


  plt.legend(fontsize=9)


[INFO] Saved plot → results/plots/val_mIoU_comparison.png
[INFO] Saved plot → results/plots/val_dice_comparison.png
[INFO] Saved plot → results/plots/val_pixel_acc_comparison.png
[INFO] Saved plot → results/plots/val_precision_comparison.png
[INFO] Saved plot → results/plots/val_recall_comparison.png
[INFO] Saved summary → results/summary.csv, results/summary.md
                          Model  BestEpoch  TrainLoss ValLoss  Best_mIoU  \
0      Mamba-Involution-B0-deep         30   0.018755    None   0.991763   
1     Mamba-Involution-B0-wider         25   0.011387    None   0.994926   
2  Mamba-Convolution-B0-default         28   0.005104    None   0.996816   
3    Mamba-Convolution-B0-wider         29   0.002449    None   0.998154   
4   Mamba-Involution-B0-default         23   0.021569    None   0.992243   
5   Mamba-Involution-B0-shallow         30   0.009784    None   0.994787   

   Best_Dice  Best_Acc  Best_Precision  Best_Recall  
0   0.995862  0.996509        0.996152     0.995