In [1]:
# import gc, torch

# gc.collect()
# torch.cuda.empty_cache()
# torch.cuda.reset_peak_memory_stats()

In [2]:
# SEGRESNET RUN (with less noise probability in pre processing)
!pip install -q monai einops nibabel
from tqdm.auto import tqdm
from pathlib import Path
import matplotlib
import matplotlib.pyplot as plt
from monai.transforms import (
    Compose, LoadImaged, EnsureChannelFirstd, EnsureTyped, Orientationd,
    Spacingd, CropForegroundd, SpatialPadd, DivisiblePadd,
    ScaleIntensityRangePercentilesd, AsDiscreted,
    RandCropByPosNegLabeld, RandFlipd, RandAffined, RandRotated,
    RandGaussianNoised, RandGaussianSmoothd, DeleteItemsd,
    Activations, AsDiscrete, KeepLargestConnectedComponent
)
from monai.data import PersistentDataset, DataLoader, decollate_batch, list_data_collate, Dataset
from monai.metrics import DiceMetric
from monai.inferers import sliding_window_inference
from monai.networks.nets import SegResNet
from monai.losses import DiceCELoss
from torch.optim.lr_scheduler import LinearLR, CosineAnnealingLR, SequentialLR
from sklearn.model_selection import train_test_split
import os
import random
import warnings
import shutil
import numpy as np
import torch
import sys
import math
import json
import gc
# imports


matplotlib.use("Agg")


# some cuda settings

os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True,max_split_size_mb:128")
warnings.filterwarnings("ignore", message="single channel prediction")
warnings.filterwarnings("ignore", category=FutureWarning)

torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
try:
    torch.set_float32_matmul_precision("high")
except Exception:
    pass

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NGPU = torch.cuda.device_count() if torch.cuda.is_available() else 0
print(f"CUDA: {torch.cuda.get_device_name(0) if NGPU else 'CPU'} | GPUs={NGPU}")
if NGPU > 0:
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# Configs, all hyperparams, dir paths, model parameters etc...
CONFIG = {
    # dirs
    #
    "images_dir": "/kaggle/input/imagess/volumes/",
    "labels_dir": "/kaggle/input/labels/masks/",
    "cache_dir": "/kaggle/working/cache_swin",
    "best_model_path": "/kaggle/working/best_segres.pth",
    "clear_cache": True,

    # Training hyperparams
    "seed": 121,
    "epochs": 130,
    "batch_size": 1,
    "accum_steps": 4,
    "val_every": 1,

    # optimizer
    "base_lr": 1e-5,
    "max_lr": 3e-4,
    "weight_decay": 2e-5,
    "warmup_epochs": 20,

    # for SwinUNETR
    "feature_size": 24,
    "drop_rate": 0.2,

    # transform settings
    "spacing": (2.0, 2.0, 3.0),
    "roi_size": (96, 96, 64),
    "crop_margin": 8,
    "divisible_pad": (32, 32, 16),

    # val
    "swi_batch_size": 1,
    "overlap": 0.8,
    "init_threshold": 0.5,
    "thr_sweep_every": 5,
    "thr_grid": np.linspace(0.35, 0.65, 7).tolist(),

    # loss weights
    "dice_weight": 1.0,
    "ce_weight": 1.0,


    "patience": 30,
    "min_delta": 0.0,

    # change in pos/neg analogy
    "curriculum_stages": [
        {"epoch_start": 0, "pos": 1, "neg": 0, "desc": "Foreground-only"},
        {"epoch_start": 50, "pos": 3, "neg": 1, "desc": "Mixed sampling"},
        {"epoch_start": 125, "pos": 1, "neg": 1, "desc": "Balanced"},
    ],

    # expo moving average
    "ema_decay": 0.995,
}

os.makedirs(CONFIG["cache_dir"], exist_ok=True)
if CONFIG["clear_cache"]:
    shutil.rmtree(os.path.join(CONFIG["cache_dir"], "train"), ignore_errors=True)
    shutil.rmtree(os.path.join(CONFIG["cache_dir"], "val"), ignore_errors=True)

# Set seeds
random.seed(CONFIG["seed"])
np.random.seed(CONFIG["seed"])
torch.manual_seed(CONFIG["seed"])
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(CONFIG["seed"])

# transforms


def get_transforms():
    base_transform = Compose([
        LoadImaged(keys=["image", "label"]),
        EnsureChannelFirstd(keys=["image", "label"]),
        EnsureTyped(keys=["image", "label"], dtype=torch.float32, track_meta=True),
        Orientationd(keys=["image", "label"], axcodes="RAS"),
        Spacingd(keys=["image", "label"], pixdim=CONFIG["spacing"], mode=("bilinear", "nearest")),
        ScaleIntensityRangePercentilesd(keys=["image"], lower=2.0, upper=99.9, b_min=0.0, b_max=1.0, clip=True),
        AsDiscreted(keys=["label"], threshold=0.5),
        CropForegroundd(keys=["image", "label"], source_key="label", margin=CONFIG["crop_margin"]),
        SpatialPadd(keys=["image", "label"], spatial_size=CONFIG["roi_size"], method="symmetric"),
        DivisiblePadd(keys=["image", "label"], k=CONFIG["divisible_pad"]),
        EnsureTyped(keys=["image", "label"], dtype=torch.float32, track_meta=False),
        DeleteItemsd(keys=["image_meta_dict", "label_meta_dict"]),
    ])

    def rand_transform(pos_num, neg_num):
        return Compose([
            RandCropByPosNegLabeld(keys=["image", "label"], label_key="label",
                                   spatial_size=CONFIG["roi_size"], pos=pos_num, neg=neg_num, num_samples=4, allow_smaller=True),
            RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=[0]),
            RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=[1]),
            RandFlipd(keys=["image", "label"], prob=0.5, spatial_axis=[2]),
            RandRotated(keys=["image", "label"], prob=0.5, range_x=0.15, range_y=0.15, range_z=0.15,
                        mode=("bilinear", "nearest"), padding_mode="zeros"),
            RandAffined(keys=["image", "label"], prob=0.35,
                        rotate_range=(0.0, 0.0, np.pi/6),
                        scale_range=(0.2, 0.2, 0.2),
                        mode=("bilinear", "nearest")),
            RandGaussianNoised(keys=["image"], prob=0.1, mean=0.0, std=0.02),
            RandGaussianSmoothd(keys=["image"], prob=0.2, sigma_x=(0.5, 1.0), sigma_y=(0.5, 1.0), sigma_z=(0.5, 1.0)),
            EnsureTyped(keys=["image", "label"], dtype=torch.float32, track_meta=False),
        ])

    return base_transform, rand_transform


base_transform, rand_transform = get_transforms()

# dataset and dataloadeers


class PlacentaDataset(Dataset):
    def __init__(self, images_dir, labels_dir, transform=None, limit=None):
        self.images_dir = Path(images_dir)
        self.labels_dir = Path(labels_dir)
        self.transform = transform

        image_files = sorted([f for f in self.images_dir.glob("*.nii*")])
        label_files = sorted([f for f in self.labels_dir.glob("*.nii*")])

        label_map = {f.stem.replace("_mask", ""): f for f in label_files}

        self.pairs = []
        missing = []
        for img_path in image_files:
            stem = img_path.stem
            lbl_path = label_map.get(stem)
            if lbl_path is None or not lbl_path.exists():
                missing.append(img_path.name)
                continue
            self.pairs.append({"image": str(img_path), "label": str(lbl_path)})

        if limit is not None:
            self.pairs = self.pairs[:limit]

        if missing:
            print(f"[Dataset] WARNING: {len(missing)} images missing labels (showing first 3): {missing[:3]}")
        print(f"[Dataset] Loaded {len(self.pairs)} valid image-label pairs")

    def __len__(self):
        return len(self.pairs)

    def __getitem__(self, idx):
        sample = dict(self.pairs[idx])
        if self.transform:
            return self.transform(sample)
        return sample


full_dataset = PlacentaDataset(
    CONFIG["images_dir"],
    CONFIG["labels_dir"],
    transform=None
)

all_indices = np.arange(len(full_dataset))
train_idx, val_idx = train_test_split(all_indices, test_size=0.2, random_state=CONFIG["seed"])

train_items = [full_dataset.pairs[i] for i in train_idx]
val_items = [full_dataset.pairs[i] for i in val_idx]

print(f"[Split] Train: {len(train_items)} | Val: {len(val_items)}")

train_cache = PersistentDataset(train_items, transform=base_transform, cache_dir=os.path.join(CONFIG["cache_dir"], "train"))
val_cache = PersistentDataset(val_items, transform=base_transform, cache_dir=os.path.join(CONFIG["cache_dir"], "val"))

# apply the random transforms in train set
train_dataset = Dataset(train_cache, transform=rand_transform(1, 0))
val_dataset = Dataset(val_cache, transform=EnsureTyped(keys=["image", "label"], dtype=torch.float32, track_meta=False))

train_loader = DataLoader(
    train_dataset,
    batch_size=CONFIG["batch_size"],
    shuffle=True,
    num_workers=4,  # Reduced for memory
    persistent_workers=True,
    prefetch_factor=2,
    pin_memory=True,
    collate_fn=list_data_collate,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    persistent_workers=True,
    pin_memory=True,
    collate_fn=list_data_collate,
)


model = SegResNet(
    in_channels=1,
    out_channels=1,
    init_filters=32,
    spatial_dims=3,
    dropout_prob=CONFIG["drop_rate"],
    num_groups=8,
    blocks_down=(2,2,2,4),
    blocks_up=(1,1,1),
    norm='INSTANCE'
    
).to(device).to(memory_format=torch.channels_last_3d)

print(f"[Model] SegResNet | Parameters: {sum(p.numel() for p in model.parameters()):,}")

# loss and opts
criterion = DiceCELoss(
    include_background=False,
    to_onehot_y=False,
    sigmoid=True,
    squared_pred=True,
    lambda_dice=CONFIG["dice_weight"],
    lambda_ce=CONFIG["ce_weight"],
)

dice_only_loss = DiceCELoss(
    include_background=False,
    to_onehot_y=False,
    sigmoid=True,
    squared_pred=True,
    lambda_dice=1.0,
    lambda_ce=0.0,
)

ce_only_loss = DiceCELoss(
    include_background=False,
    to_onehot_y=False,
    sigmoid=True,
    squared_pred=True,
    lambda_dice=0.0,
    lambda_ce=1.0,
)

# model


optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=CONFIG["base_lr"],
    weight_decay=CONFIG["weight_decay"],
    betas=(0.9, 0.999),
)


def compute_opt_steps_per_epoch(dloader_len, accum_steps):
    return max(1, math.ceil(dloader_len / max(1, accum_steps)))


opt_steps_per_epoch = compute_opt_steps_per_epoch(len(train_loader), CONFIG["accum_steps"])
cycle_epochs = 10

scheduler = torch.optim.lr_scheduler.CyclicLR(
    optimizer,
    base_lr=CONFIG["base_lr"],
    max_lr=CONFIG["max_lr"],
    step_size_up=opt_steps_per_epoch * (cycle_epochs // 2),
    step_size_down=opt_steps_per_epoch * (cycle_epochs // 2),
    mode='triangular2',
    cycle_momentum=True
)


class EMA:
    def __init__(self, model, decay=0.999):
        self.decay = decay
        self.shadow = {k: p.detach().clone() for k, p in model.named_parameters() if p.requires_grad}
        self.backup = None

    @torch.no_grad()
    def update(self, model):
        for k, p in model.named_parameters():
            if not p.requires_grad:
                continue
            self.shadow[k].mul_(self.decay).add_(p.detach(), alpha=1 - self.decay)

    @torch.no_grad()
    def apply(self, model):
        self.backup = {k: p.detach().clone() for k, p in model.named_parameters() if p.requires_grad}
        for k, p in model.named_parameters():
            if not p.requires_grad:
                continue
            p.data.copy_(self.shadow[k])

    @torch.no_grad()
    def restore(self, model):
        for k, p in model.named_parameters():
            if not p.requires_grad:
                continue
            p.data.copy_(self.backup[k])


ema = EMA(model, decay=CONFIG["ema_decay"])

# scaler
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

post_label = AsDiscrete(threshold=0.5)
keep_lcc = KeepLargestConnectedComponent(connectivity=3, num_components=1)


@torch.no_grad()
def postprocess_mask(bin_pred: torch.Tensor, min_size=500):
    x = keep_lcc(bin_pred)
    # Remove very small components if any remain
    if x.sum() > 0 and min_size > 0:
        components = torch.unique(x)
        if len(components) > 2:  # More than background + one component
            # This is a simple size filter - in practice you might want connected_components
            pass
    return x


@torch.no_grad()
def dice_score(y_pred_bin: torch.Tensor, y_true_bin: torch.Tensor):
    inter = (y_pred_bin * y_true_bin).sum()
    union = y_pred_bin.sum() + y_true_bin.sum()
    return float((2.0 * inter + 1e-7) / (union + 1e-7))


# val with SWI function
@torch.no_grad()
def validate_model(model, loader, threshold=0.5, do_threshold_sweep=False, calc_components=False):
    """Validation with memory-optimized sliding window inference"""
    model.eval()
    dice_metric = DiceMetric(include_background=False, reduction="mean", ignore_empty=True)
    dice_metric.reset()

    total_loss = 0.0
    total_dice_loss = 0.0
    total_ce_loss = 0.0
    iou_sum = 0.0
    n_cases = 0

    thr_grid = CONFIG["thr_grid"] if do_threshold_sweep else []
    sweep_scores = np.zeros(len(thr_grid))

    pbar = tqdm(loader, desc="Validation", leave=False)

    for batch in pbar:
        x = batch["image"].to(device, non_blocking=True).float()
        y = batch["label"].to(device, non_blocking=True).float()

        try:
            x = x.to(memory_format=torch.channels_last_3d)
            y = y.to(memory_format=torch.channels_last_3d)
        except Exception:
            pass

        # Memory-efficient sliding window with OOM recovery
        sw_bs = CONFIG["swi_batch_size"]
        logits = None

        while sw_bs >= 1:
            try:
                with torch.cuda.amp.autocast(enabled=True):
                    logits = sliding_window_inference(
                        x,
                        roi_size=CONFIG["roi_size"],
                        sw_batch_size=sw_bs,
                        predictor=model,
                        overlap=CONFIG["overlap"],
                        mode="gaussian",
                    )
                break  # Success
            except torch.cuda.OutOfMemoryError:
                torch.cuda.empty_cache()
                sw_bs //= 2
                if sw_bs < 1:
                    print(f"[OOM] Skipping batch at size {x.shape} - cannot process")
                    continue

        if logits is None:
            continue

        # Calculate losses
        loss = criterion(logits, y)
        total_loss += float(loss)

        if calc_components:
            total_dice_loss += float(dice_only_loss(logits, y))
            total_ce_loss += float(ce_only_loss(logits, y))

        probs = torch.sigmoid(logits)
        preds_bin = (probs >= threshold).float()

        # Post-process
        preds_list = decollate_batch(preds_bin)
        trues_list = decollate_batch(y > 0.5)

        preds_pp = [postprocess_mask(p) for p in preds_list]

        # Update metrics
        dice_metric(y_pred=preds_pp, y=trues_list)

        for p, t in zip(preds_pp, trues_list):
            inter = (p * t).sum()
            union = (p + t).sum()
            iou_sum += float((inter + 1e-7) / (union - inter + 1e-7))

        # Threshold sweep
        if do_threshold_sweep:
            y_bin = (y > 0.5).float()
            for i, thr in enumerate(thr_grid):
                sweep_preds = [(probs >= thr).float() for probs in decollate_batch(probs)]
                sweep_pp = [postprocess_mask(p) for p in sweep_preds]
                sweep_scores[i] += np.mean([dice_score(p, t) for p, t in zip(sweep_pp, decollate_batch(y_bin))])

        n_cases += 1

        # Clear cache periodically
        if n_cases % 10 == 0:
            torch.cuda.empty_cache()

        pbar.set_postfix({"cases": n_cases})

    # Aggregate metrics
    avg_loss = total_loss / max(n_cases, 1)
    avg_dice_loss = total_dice_loss / max(n_cases, 1) if calc_components else float('nan')
    avg_ce_loss = total_ce_loss / max(n_cases, 1) if calc_components else float('nan')
    avg_dice = dice_metric.aggregate().item()
    avg_iou = iou_sum / max(n_cases, 1)

    # Best threshold from sweep
    best_thr, best_dice_sweep = None, None
    if do_threshold_sweep and n_cases > 0:
        sweep_scores /= n_cases
        best_idx = int(np.argmax(sweep_scores))
        best_thr = float(thr_grid[best_idx])
        best_dice_sweep = float(sweep_scores[best_idx])

    # Final cleanup
    del x, y, logits, probs
    torch.cuda.empty_cache()

    return avg_loss, avg_dice_loss, avg_ce_loss, avg_dice, avg_iou, best_thr, best_dice_sweep


# training history
history = {
    "epoch": [], "train_loss": [], "val_loss": [], "val_loss_dice": [],
    "val_loss_ce": [], "dice": [], "iou": [], "lr": [], "thr": [], "validated": []
}

print("Starting Training...")

best_metric = -1.0
best_epoch = -1
epochs_no_improve = 0
current_thr = CONFIG["init_threshold"]


# get the analogy based on the epoch
def get_curriculum(epoch):
    for stage in reversed(CONFIG["curriculum_stages"]):
        if epoch >= stage["epoch_start"]:
            return stage
    return CONFIG["curriculum_stages"][0]


# loop
for epoch in range(CONFIG["epochs"]):
    # Curriculum update
    curr_stage = get_curriculum(epoch)
    if epoch == curr_stage["epoch_start"]:
        train_dataset.transform = rand_transform(curr_stage["pos"], curr_stage["neg"])
        print(f"\n[Curriculum] Epoch {epoch+1}: {curr_stage['desc']} (pos={curr_stage['pos']}, neg={curr_stage['neg']})")

    # Training
    model.train()
    train_loss_sum = 0.0
    train_batches = 0

    # Memory monitoring
    if epoch == 0:
        print(f"[Memory] Before training: {torch.cuda.memory_allocated()/1e9:.2f} GB allocated")

    pbar_train = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']} [Train]", leave=False)
    optimizer.zero_grad(set_to_none=True)

    for batch_idx, batch in enumerate(pbar_train, 1):
        x = batch["image"].to(device, non_blocking=True).float()
        y = batch["label"].to(device, non_blocking=True).float()

        try:
            x = x.to(memory_format=torch.channels_last_3d)
            y = y.to(memory_format=torch.channels_last_3d)
        except Exception:
            pass

        # Skip bad batches
        if not (torch.isfinite(x).all() and torch.isfinite(y).all()):
            print(f"[Warning] Skipping batch {batch_idx} - non-finite values")
            continue

        # Forward pass
        with torch.cuda.amp.autocast(enabled=True):
            logits = model(x)
            loss = criterion(logits, y)

        if not torch.isfinite(loss):
            print(f"[Warning] Skipping batch {batch_idx} - NaN/Inf loss")
            continue

        # Backward with accumulation
        loss_scaled = loss / CONFIG["accum_steps"]
        scaler.scale(loss_scaled).backward()

        if batch_idx % CONFIG["accum_steps"] == 0:
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, error_if_nonfinite=False)
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad(set_to_none=True)
            ema.update(model)

        train_loss_sum += float(loss.detach())
        train_batches += 1

        pbar_train.set_postfix({"loss": f"{float(loss):.4f}"})

    # Handle remaining gradients
    if (batch_idx % CONFIG["accum_steps"]) != 0:
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0, error_if_nonfinite=False)
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad(set_to_none=True)
        ema.update(model)

    avg_train_loss = train_loss_sum / max(train_batches, 1)

    # Validation
    do_validate = (epoch + 1) % CONFIG["val_every"] == 0
    val_metrics = None

    if do_validate:
        # Use EMA model for validation
        ema.apply(model)

        # Determine if we should sweep thresholds
        do_sweep = (epoch + 1) % CONFIG["thr_sweep_every"] == 0
        # Calculate component losses in final epochs
        calc_components = epoch > CONFIG["epochs"] - 20

        try:
            val_metrics = validate_model(
                model, val_loader,
                threshold=current_thr,
                do_threshold_sweep=do_sweep,
                calc_components=calc_components
            )
        except torch.cuda.OutOfMemoryError:
            print(f"[OOM] Validation failed at epoch {epoch+1}, skipping...")
            torch.cuda.empty_cache()
            val_metrics = (float('nan'), float('nan'), float('nan'), 0.0, 0.0, None, None)

        ema.restore(model)

        # Unpack metrics
        vloss, vloss_dice, vloss_ce, vdice, viou, best_thr, best_dice_sweep = val_metrics

        # Update threshold if sweep found a better one
        threshold_changed = ""
        if best_thr is not None and best_dice_sweep is not None and (best_dice_sweep - vdice) > 1e-3:
            old_thr = current_thr
            current_thr = best_thr
            threshold_changed = f" | thr {old_thr:.3f}→{current_thr:.3f}"

        # Check for improvement
        is_best = vdice > best_metric + CONFIG["min_delta"]
        if is_best:
            best_metric = vdice
            best_epoch = epoch + 1
            ema.apply(model)
            torch.save(model.state_dict(), CONFIG["best_model_path"])
            ema.restore(model)
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        # Print summary
        print(f"[Epoch {epoch+1:03d}] "
              f"TrainLoss: {avg_train_loss:.4f} | "
              f"ValLoss: {vloss:.4f} | "
              f"ValDice@thr{current_thr:.2f}: {vdice:.4f} | "
              f"ValIoU: {viou:.4f} | "
              f"LR: {optimizer.param_groups[0]['lr']:.2e}{threshold_changed} "
              f"{'| **NEW BEST**' if is_best else ''}")

        if calc_components and not np.isnan(vloss_dice):
            print(f"          └─ Loss Components - Dice: {vloss_dice:.4f}, CE: {vloss_ce:.4f}")

        # Record history
        history["epoch"].append(epoch + 1)
        history["train_loss"].append(avg_train_loss)
        history["val_loss"].append(vloss)
        history["val_loss_dice"].append(vloss_dice)
        history["val_loss_ce"].append(vloss_ce)
        history["dice"].append(vdice)
        history["iou"].append(viou)
        history["lr"].append(optimizer.param_groups[0]["lr"])
        history["thr"].append(current_thr)
        history["validated"].append(True)

    else:
        print(f"[Epoch {epoch+1:03d}] TrainLoss: {avg_train_loss:.4f} | Validation: Skipped")

        history["epoch"].append(epoch + 1)
        history["train_loss"].append(avg_train_loss)
        history["val_loss"].append(float('nan'))
        history["val_loss_dice"].append(float('nan'))
        history["val_loss_ce"].append(float('nan'))
        history["dice"].append(float('nan'))
        history["iou"].append(float('nan'))
        history["lr"].append(optimizer.param_groups[0]["lr"])
        history["thr"].append(current_thr)
        history["validated"].append(False)

    # Step scheduler
    scheduler.step()

    # Early stopping
    if epochs_no_improve >= CONFIG["patience"]:
        print(f"\n[Early Stopping] No improvement for {CONFIG['patience']} epochs. Best Dice: {best_metric:.4f} @ epoch {best_epoch}")
        break

    # Memory cleanup
    if (epoch + 1) % 10 == 0:
        torch.cuda.empty_cache()

print("")
print(f"Training Complete!")
print(f"Best Validation Dice: {best_metric:.4f} @ Epoch {best_epoch}")
print(f"Best Model Saved: {CONFIG['best_model_path']}")
print(f"Final Threshold: {current_thr:.3f}")
print("")

# plots
print("Generating plots...")

epochs = history["epoch"]
val_epochs = [e for e, v in zip(epochs, history["validated"]) if v]

fig, axes = plt.subplots(3, 1, figsize=(14, 12), dpi=150)
fig.suptitle('Training History', fontsize=16, fontweight='bold')

# dice
ax = axes[0]
ax.plot(val_epochs, [history["dice"][i-1] for i in val_epochs], 'o-',
        color='#1f77b4', linewidth=2, markersize=5, label='Validation Dice')
if best_epoch != -1:
    ax.axvline(x=best_epoch, color='gold', linestyle='--', alpha=0.7,
               label=f'Best Dice ({best_metric:.4f}) @ Epoch {best_epoch}')
    ax.scatter(best_epoch, best_metric, s=150, c='gold', marker='*',
               edgecolors='black', linewidth=1.5, zorder=5)
ax.set_title('Validation Dice Score', fontsize=12, fontweight='bold')
ax.set_xlabel('Epoch')
ax.set_ylabel('Dice Score')
ax.grid(True, alpha=0.3)
ax.legend(loc='lower right')
ax.set_ylim(0, 1)

# losses
ax = axes[1]
ax.plot(epochs, history["train_loss"], 'o-', color='#ff7f0e',
        linewidth=1.5, markersize=3, label='Training Loss')
ax.plot(val_epochs, [history["val_loss"][i-1] for i in val_epochs], 's-',
        color='#2ca02c', linewidth=2, markersize=4, label='Validation Loss')
ax.set_title('Training & Validation Loss', fontsize=12, fontweight='bold')
ax.set_xlabel('Epoch')
ax.set_ylabel('Loss')
ax.grid(True, alpha=0.3)
ax.legend()

# other plots
ax = axes[2]
ax_twin = ax.twinx()
line1 = ax.plot(epochs, history["lr"], 'o-', color='#d62728',
                linewidth=1.5, markersize=3, label='Learning Rate')
line2 = ax_twin.plot(val_epochs, [history["thr"][i-1] for i in val_epochs], 'D-',
                     color='#9467bd', linewidth=1.5, markersize=3, label='Threshold')
ax.set_title('Learning Rate & Optimal Threshold', fontsize=12, fontweight='bold')
ax.set_xlabel('Epoch')
ax.set_ylabel('Learning Rate', color='#d62728')
ax_twin.set_ylabel('Threshold', color='#9467bd')
ax.grid(True, alpha=0.3)
lines = line1 + line2
labs = [l.get_label() for l in lines]
ax.legend(lines, labs, loc='center right')

plt.tight_layout()
plt.savefig("training_history.png", dpi=300, bbox_inches='tight')
print("Plot saved: training_history.png")
plt.show()

model.load_state_dict(torch.load(CONFIG["best_model_path"]))
final_metrics = validate_model(model, val_loader, threshold=current_thr, do_threshold_sweep=True, calc_components=True)
print(f"Final Dice: {final_metrics[3]:.4f} | Final IoU: {final_metrics[4]:.4f}")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m100.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m85.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m21.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━

2025-11-30 15:17:42.435216: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764515862.859449      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764515862.959617      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


CUDA: Tesla T4 | GPUs=2
GPU Memory: 15.83 GB
[Dataset] Loaded 137 valid image-label pairs
[Split] Train: 109 | Val: 28
[Model] SegResNet | Parameters: 18,844,545
Starting Training...

[Curriculum] Epoch 1: Foreground-only (pos=1, neg=0)
[Memory] Before training: 0.15 GB allocated


Epoch 1/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 001] TrainLoss: 1.1630 | ValLoss: 1.3330 | ValDice@thr0.50: 0.2713 | ValIoU: 0.1581 | LR: 1.00e-05 | **NEW BEST**


Epoch 2/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 002] TrainLoss: 1.0381 | ValLoss: 1.2645 | ValDice@thr0.50: 0.4763 | ValIoU: 0.3155 | LR: 1.21e-05 | **NEW BEST**


Epoch 3/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 003] TrainLoss: 0.9840 | ValLoss: 1.1949 | ValDice@thr0.50: 0.4992 | ValIoU: 0.3356 | LR: 1.41e-05 | **NEW BEST**


Epoch 4/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 004] TrainLoss: 0.9500 | ValLoss: 1.1284 | ValDice@thr0.50: 0.5159 | ValIoU: 0.3511 | LR: 1.62e-05 | **NEW BEST**


Epoch 5/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 005] TrainLoss: 0.9251 | ValLoss: 1.0717 | ValDice@thr0.65: 0.5402 | ValIoU: 0.3743 | LR: 1.83e-05 | thr 0.500→0.650 | **NEW BEST**


Epoch 6/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 006] TrainLoss: 0.9097 | ValLoss: 1.0259 | ValDice@thr0.65: 0.6636 | ValIoU: 0.5051 | LR: 2.04e-05 | **NEW BEST**


Epoch 7/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 007] TrainLoss: 0.8921 | ValLoss: 0.9904 | ValDice@thr0.65: 0.6806 | ValIoU: 0.5247 | LR: 2.24e-05 | **NEW BEST**


Epoch 8/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 008] TrainLoss: 0.8813 | ValLoss: 0.9615 | ValDice@thr0.65: 0.6963 | ValIoU: 0.5432 | LR: 2.45e-05 | **NEW BEST**


Epoch 9/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 009] TrainLoss: 0.8655 | ValLoss: 0.9389 | ValDice@thr0.65: 0.7088 | ValIoU: 0.5581 | LR: 2.66e-05 | **NEW BEST**


Epoch 10/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 010] TrainLoss: 0.8585 | ValLoss: 0.9204 | ValDice@thr0.65: 0.7185 | ValIoU: 0.5700 | LR: 2.86e-05 | **NEW BEST**


Epoch 11/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 011] TrainLoss: 0.8458 | ValLoss: 0.9044 | ValDice@thr0.65: 0.7289 | ValIoU: 0.5823 | LR: 3.07e-05 | **NEW BEST**


Epoch 12/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 012] TrainLoss: 0.8362 | ValLoss: 0.8908 | ValDice@thr0.65: 0.7382 | ValIoU: 0.5936 | LR: 3.28e-05 | **NEW BEST**


Epoch 13/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 013] TrainLoss: 0.8226 | ValLoss: 0.8781 | ValDice@thr0.65: 0.7455 | ValIoU: 0.6022 | LR: 3.49e-05 | **NEW BEST**


Epoch 14/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 014] TrainLoss: 0.8070 | ValLoss: 0.8665 | ValDice@thr0.65: 0.7526 | ValIoU: 0.6103 | LR: 3.69e-05 | **NEW BEST**


Epoch 15/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 015] TrainLoss: 0.7997 | ValLoss: 0.8551 | ValDice@thr0.65: 0.7597 | ValIoU: 0.6187 | LR: 3.90e-05 | **NEW BEST**


Epoch 16/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 016] TrainLoss: 0.7939 | ValLoss: 0.8448 | ValDice@thr0.65: 0.7655 | ValIoU: 0.6256 | LR: 4.11e-05 | **NEW BEST**


Epoch 17/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 017] TrainLoss: 0.7785 | ValLoss: 0.8344 | ValDice@thr0.65: 0.7718 | ValIoU: 0.6333 | LR: 4.31e-05 | **NEW BEST**


Epoch 18/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 018] TrainLoss: 0.7701 | ValLoss: 0.8244 | ValDice@thr0.65: 0.7758 | ValIoU: 0.6381 | LR: 4.52e-05 | **NEW BEST**


Epoch 19/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 019] TrainLoss: 0.7585 | ValLoss: 0.8146 | ValDice@thr0.65: 0.7799 | ValIoU: 0.6433 | LR: 4.73e-05 | **NEW BEST**


Epoch 20/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 020] TrainLoss: 0.7485 | ValLoss: 0.8053 | ValDice@thr0.60: 0.7825 | ValIoU: 0.6464 | LR: 4.94e-05 | thr 0.650→0.600 | **NEW BEST**


Epoch 21/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 021] TrainLoss: 0.7371 | ValLoss: 0.7958 | ValDice@thr0.60: 0.7911 | ValIoU: 0.6579 | LR: 5.14e-05 | **NEW BEST**


Epoch 22/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 022] TrainLoss: 0.7264 | ValLoss: 0.7861 | ValDice@thr0.60: 0.7942 | ValIoU: 0.6619 | LR: 5.35e-05 | **NEW BEST**


Epoch 23/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 023] TrainLoss: 0.7173 | ValLoss: 0.7764 | ValDice@thr0.60: 0.7970 | ValIoU: 0.6654 | LR: 5.56e-05 | **NEW BEST**


Epoch 24/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 024] TrainLoss: 0.7090 | ValLoss: 0.7661 | ValDice@thr0.60: 0.7997 | ValIoU: 0.6689 | LR: 5.76e-05 | **NEW BEST**


Epoch 25/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 025] TrainLoss: 0.6965 | ValLoss: 0.7562 | ValDice@thr0.55: 0.8022 | ValIoU: 0.6722 | LR: 5.97e-05 | thr 0.600→0.550 | **NEW BEST**


Epoch 26/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 026] TrainLoss: 0.6859 | ValLoss: 0.7470 | ValDice@thr0.55: 0.8056 | ValIoU: 0.6769 | LR: 6.18e-05 | **NEW BEST**


Epoch 27/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 027] TrainLoss: 0.6711 | ValLoss: 0.7372 | ValDice@thr0.55: 0.8080 | ValIoU: 0.6801 | LR: 6.39e-05 | **NEW BEST**


Epoch 28/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 028] TrainLoss: 0.6664 | ValLoss: 0.7270 | ValDice@thr0.55: 0.8105 | ValIoU: 0.6834 | LR: 6.59e-05 | **NEW BEST**


Epoch 29/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 029] TrainLoss: 0.6544 | ValLoss: 0.7170 | ValDice@thr0.55: 0.8122 | ValIoU: 0.6856 | LR: 6.80e-05 | **NEW BEST**


Epoch 30/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 030] TrainLoss: 0.6510 | ValLoss: 0.7065 | ValDice@thr0.55: 0.8144 | ValIoU: 0.6886 | LR: 7.01e-05 | **NEW BEST**


Epoch 31/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 031] TrainLoss: 0.6490 | ValLoss: 0.6966 | ValDice@thr0.55: 0.8158 | ValIoU: 0.6905 | LR: 7.21e-05 | **NEW BEST**


Epoch 32/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 032] TrainLoss: 0.6301 | ValLoss: 0.6863 | ValDice@thr0.55: 0.8178 | ValIoU: 0.6933 | LR: 7.42e-05 | **NEW BEST**


Epoch 33/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 033] TrainLoss: 0.6168 | ValLoss: 0.6764 | ValDice@thr0.55: 0.8183 | ValIoU: 0.6940 | LR: 7.63e-05 | **NEW BEST**


Epoch 34/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 034] TrainLoss: 0.6034 | ValLoss: 0.6662 | ValDice@thr0.55: 0.8192 | ValIoU: 0.6953 | LR: 7.84e-05 | **NEW BEST**


Epoch 35/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 035] TrainLoss: 0.5994 | ValLoss: 0.6553 | ValDice@thr0.55: 0.8210 | ValIoU: 0.6977 | LR: 8.04e-05 | **NEW BEST**


Epoch 36/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 036] TrainLoss: 0.5857 | ValLoss: 0.6448 | ValDice@thr0.55: 0.8235 | ValIoU: 0.7012 | LR: 8.25e-05 | **NEW BEST**


Epoch 37/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 037] TrainLoss: 0.5744 | ValLoss: 0.6340 | ValDice@thr0.55: 0.8241 | ValIoU: 0.7021 | LR: 8.46e-05 | **NEW BEST**


Epoch 38/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 038] TrainLoss: 0.5632 | ValLoss: 0.6230 | ValDice@thr0.55: 0.8250 | ValIoU: 0.7034 | LR: 8.66e-05 | **NEW BEST**


Epoch 39/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 039] TrainLoss: 0.5604 | ValLoss: 0.6124 | ValDice@thr0.55: 0.8256 | ValIoU: 0.7043 | LR: 8.87e-05 | **NEW BEST**


Epoch 40/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 040] TrainLoss: 0.5510 | ValLoss: 0.6016 | ValDice@thr0.55: 0.8263 | ValIoU: 0.7053 | LR: 9.08e-05 | **NEW BEST**


Epoch 41/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 041] TrainLoss: 0.5385 | ValLoss: 0.5911 | ValDice@thr0.55: 0.8268 | ValIoU: 0.7060 | LR: 9.29e-05 | **NEW BEST**


Epoch 42/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 042] TrainLoss: 0.5185 | ValLoss: 0.5801 | ValDice@thr0.55: 0.8274 | ValIoU: 0.7070 | LR: 9.49e-05 | **NEW BEST**


Epoch 43/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 043] TrainLoss: 0.5134 | ValLoss: 0.5686 | ValDice@thr0.55: 0.8290 | ValIoU: 0.7093 | LR: 9.70e-05 | **NEW BEST**


Epoch 44/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 044] TrainLoss: 0.5086 | ValLoss: 0.5580 | ValDice@thr0.55: 0.8295 | ValIoU: 0.7100 | LR: 9.91e-05 | **NEW BEST**


Epoch 45/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 045] TrainLoss: 0.4931 | ValLoss: 0.5472 | ValDice@thr0.55: 0.8298 | ValIoU: 0.7105 | LR: 1.01e-04 | **NEW BEST**


Epoch 46/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 046] TrainLoss: 0.4860 | ValLoss: 0.5366 | ValDice@thr0.55: 0.8308 | ValIoU: 0.7118 | LR: 1.03e-04 | **NEW BEST**


Epoch 47/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 047] TrainLoss: 0.4791 | ValLoss: 0.5254 | ValDice@thr0.55: 0.8314 | ValIoU: 0.7128 | LR: 1.05e-04 | **NEW BEST**


Epoch 48/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 048] TrainLoss: 0.4650 | ValLoss: 0.5149 | ValDice@thr0.55: 0.8321 | ValIoU: 0.7138 | LR: 1.07e-04 | **NEW BEST**


Epoch 49/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 049] TrainLoss: 0.4632 | ValLoss: 0.5037 | ValDice@thr0.55: 0.8332 | ValIoU: 0.7155 | LR: 1.09e-04 | **NEW BEST**


Epoch 50/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 050] TrainLoss: 0.4509 | ValLoss: 0.4930 | ValDice@thr0.50: 0.8340 | ValIoU: 0.7166 | LR: 1.12e-04 | thr 0.550→0.500 | **NEW BEST**

[Curriculum] Epoch 51: Mixed sampling (pos=3, neg=1)


Epoch 51/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 051] TrainLoss: 0.4493 | ValLoss: 0.4832 | ValDice@thr0.50: 0.8361 | ValIoU: 0.7198 | LR: 1.14e-04 | **NEW BEST**


Epoch 52/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 052] TrainLoss: 0.4360 | ValLoss: 0.4734 | ValDice@thr0.50: 0.8367 | ValIoU: 0.7207 | LR: 1.16e-04 | **NEW BEST**


Epoch 53/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 053] TrainLoss: 0.4246 | ValLoss: 0.4634 | ValDice@thr0.50: 0.8374 | ValIoU: 0.7218 | LR: 1.18e-04 | **NEW BEST**


Epoch 54/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 054] TrainLoss: 0.4158 | ValLoss: 0.4537 | ValDice@thr0.50: 0.8380 | ValIoU: 0.7227 | LR: 1.20e-04 | **NEW BEST**


Epoch 55/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 055] TrainLoss: 0.4116 | ValLoss: 0.4441 | ValDice@thr0.50: 0.8388 | ValIoU: 0.7238 | LR: 1.22e-04 | **NEW BEST**


Epoch 56/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 056] TrainLoss: 0.4076 | ValLoss: 0.4348 | ValDice@thr0.50: 0.8394 | ValIoU: 0.7247 | LR: 1.24e-04 | **NEW BEST**


Epoch 57/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 057] TrainLoss: 0.3981 | ValLoss: 0.4264 | ValDice@thr0.50: 0.8396 | ValIoU: 0.7251 | LR: 1.26e-04 | **NEW BEST**


Epoch 58/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 058] TrainLoss: 0.3891 | ValLoss: 0.4175 | ValDice@thr0.50: 0.8401 | ValIoU: 0.7257 | LR: 1.28e-04 | **NEW BEST**


Epoch 59/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 059] TrainLoss: 0.3800 | ValLoss: 0.4087 | ValDice@thr0.50: 0.8409 | ValIoU: 0.7269 | LR: 1.30e-04 | **NEW BEST**


Epoch 60/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 060] TrainLoss: 0.3674 | ValLoss: 0.4005 | ValDice@thr0.50: 0.8414 | ValIoU: 0.7276 | LR: 1.32e-04 | **NEW BEST**


Epoch 61/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 061] TrainLoss: 0.3692 | ValLoss: 0.3928 | ValDice@thr0.50: 0.8421 | ValIoU: 0.7287 | LR: 1.34e-04 | **NEW BEST**


Epoch 62/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 062] TrainLoss: 0.3630 | ValLoss: 0.3856 | ValDice@thr0.50: 0.8426 | ValIoU: 0.7294 | LR: 1.36e-04 | **NEW BEST**


Epoch 63/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 063] TrainLoss: 0.3538 | ValLoss: 0.3787 | ValDice@thr0.50: 0.8433 | ValIoU: 0.7304 | LR: 1.38e-04 | **NEW BEST**


Epoch 64/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 064] TrainLoss: 0.3457 | ValLoss: 0.3720 | ValDice@thr0.50: 0.8434 | ValIoU: 0.7306 | LR: 1.41e-04 | **NEW BEST**


Epoch 65/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 065] TrainLoss: 0.3424 | ValLoss: 0.3643 | ValDice@thr0.50: 0.8436 | ValIoU: 0.7309 | LR: 1.43e-04 | **NEW BEST**


Epoch 66/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 066] TrainLoss: 0.3397 | ValLoss: 0.3571 | ValDice@thr0.50: 0.8436 | ValIoU: 0.7308 | LR: 1.45e-04 


Epoch 67/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 067] TrainLoss: 0.3449 | ValLoss: 0.3500 | ValDice@thr0.50: 0.8432 | ValIoU: 0.7301 | LR: 1.47e-04 


Epoch 68/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 068] TrainLoss: 0.3381 | ValLoss: 0.3426 | ValDice@thr0.50: 0.8441 | ValIoU: 0.7315 | LR: 1.49e-04 | **NEW BEST**


Epoch 69/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 069] TrainLoss: 0.3300 | ValLoss: 0.3364 | ValDice@thr0.50: 0.8441 | ValIoU: 0.7315 | LR: 1.51e-04 | **NEW BEST**


Epoch 70/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 070] TrainLoss: 0.3258 | ValLoss: 0.3297 | ValDice@thr0.50: 0.8445 | ValIoU: 0.7321 | LR: 1.53e-04 | **NEW BEST**


Epoch 71/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 071] TrainLoss: 0.3220 | ValLoss: 0.3248 | ValDice@thr0.50: 0.8438 | ValIoU: 0.7311 | LR: 1.55e-04 


Epoch 72/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 072] TrainLoss: 0.3176 | ValLoss: 0.3202 | ValDice@thr0.50: 0.8426 | ValIoU: 0.7293 | LR: 1.57e-04 


Epoch 73/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 073] TrainLoss: 0.3150 | ValLoss: 0.3170 | ValDice@thr0.50: 0.8403 | ValIoU: 0.7264 | LR: 1.59e-04 


Epoch 74/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 074] TrainLoss: 0.3111 | ValLoss: 0.3129 | ValDice@thr0.50: 0.8395 | ValIoU: 0.7256 | LR: 1.61e-04 


Epoch 75/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 075] TrainLoss: 0.3075 | ValLoss: 0.3074 | ValDice@thr0.50: 0.8400 | ValIoU: 0.7261 | LR: 1.63e-04 


Epoch 76/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 076] TrainLoss: 0.2971 | ValLoss: 0.3025 | ValDice@thr0.50: 0.8407 | ValIoU: 0.7270 | LR: 1.65e-04 


Epoch 77/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 077] TrainLoss: 0.2956 | ValLoss: 0.2994 | ValDice@thr0.50: 0.8394 | ValIoU: 0.7256 | LR: 1.67e-04 


Epoch 78/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 078] TrainLoss: 0.2943 | ValLoss: 0.2957 | ValDice@thr0.50: 0.8393 | ValIoU: 0.7256 | LR: 1.69e-04 


Epoch 79/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 079] TrainLoss: 0.2872 | ValLoss: 0.2910 | ValDice@thr0.50: 0.8402 | ValIoU: 0.7267 | LR: 1.72e-04 


Epoch 80/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 080] TrainLoss: 0.2877 | ValLoss: 0.2866 | ValDice@thr0.50: 0.8405 | ValIoU: 0.7271 | LR: 1.74e-04 


Epoch 81/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 081] TrainLoss: 0.2908 | ValLoss: 0.2827 | ValDice@thr0.50: 0.8412 | ValIoU: 0.7280 | LR: 1.76e-04 


Epoch 82/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 082] TrainLoss: 0.2864 | ValLoss: 0.2773 | ValDice@thr0.50: 0.8433 | ValIoU: 0.7306 | LR: 1.78e-04 


Epoch 83/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 083] TrainLoss: 0.2856 | ValLoss: 0.2714 | ValDice@thr0.50: 0.8463 | ValIoU: 0.7348 | LR: 1.80e-04 | **NEW BEST**


Epoch 84/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 084] TrainLoss: 0.2737 | ValLoss: 0.2676 | ValDice@thr0.50: 0.8471 | ValIoU: 0.7359 | LR: 1.82e-04 | **NEW BEST**


Epoch 85/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 085] TrainLoss: 0.2712 | ValLoss: 0.2648 | ValDice@thr0.50: 0.8466 | ValIoU: 0.7352 | LR: 1.84e-04 


Epoch 86/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 086] TrainLoss: 0.2710 | ValLoss: 0.2620 | ValDice@thr0.50: 0.8463 | ValIoU: 0.7348 | LR: 1.86e-04 


Epoch 87/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 087] TrainLoss: 0.2728 | ValLoss: 0.2586 | ValDice@thr0.50: 0.8469 | ValIoU: 0.7356 | LR: 1.88e-04 


Epoch 88/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 088] TrainLoss: 0.2793 | ValLoss: 0.2557 | ValDice@thr0.50: 0.8471 | ValIoU: 0.7360 | LR: 1.90e-04 | **NEW BEST**


Epoch 89/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 089] TrainLoss: 0.2686 | ValLoss: 0.2529 | ValDice@thr0.50: 0.8472 | ValIoU: 0.7362 | LR: 1.92e-04 | **NEW BEST**


Epoch 90/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 090] TrainLoss: 0.2707 | ValLoss: 0.2516 | ValDice@thr0.45: 0.8457 | ValIoU: 0.7339 | LR: 1.94e-04 | thr 0.500→0.450 


Epoch 91/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 091] TrainLoss: 0.2612 | ValLoss: 0.2518 | ValDice@thr0.45: 0.8439 | ValIoU: 0.7320 | LR: 1.96e-04 


Epoch 92/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 092] TrainLoss: 0.2587 | ValLoss: 0.2510 | ValDice@thr0.45: 0.8427 | ValIoU: 0.7306 | LR: 1.98e-04 


Epoch 93/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 093] TrainLoss: 0.2628 | ValLoss: 0.2473 | ValDice@thr0.45: 0.8442 | ValIoU: 0.7323 | LR: 2.01e-04 


Epoch 94/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 094] TrainLoss: 0.2672 | ValLoss: 0.2447 | ValDice@thr0.45: 0.8442 | ValIoU: 0.7325 | LR: 2.03e-04 


Epoch 95/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 095] TrainLoss: 0.2524 | ValLoss: 0.2440 | ValDice@thr0.45: 0.8427 | ValIoU: 0.7307 | LR: 2.05e-04 


Epoch 96/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 096] TrainLoss: 0.2495 | ValLoss: 0.2427 | ValDice@thr0.45: 0.8422 | ValIoU: 0.7303 | LR: 2.07e-04 


Epoch 97/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 097] TrainLoss: 0.2447 | ValLoss: 0.2403 | ValDice@thr0.45: 0.8426 | ValIoU: 0.7310 | LR: 2.09e-04 


Epoch 98/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 098] TrainLoss: 0.2459 | ValLoss: 0.2379 | ValDice@thr0.45: 0.8430 | ValIoU: 0.7314 | LR: 2.11e-04 


Epoch 99/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 099] TrainLoss: 0.2444 | ValLoss: 0.2359 | ValDice@thr0.45: 0.8432 | ValIoU: 0.7317 | LR: 2.13e-04 


Epoch 100/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 100] TrainLoss: 0.2409 | ValLoss: 0.2362 | ValDice@thr0.45: 0.8415 | ValIoU: 0.7300 | LR: 2.15e-04 


Epoch 101/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 101] TrainLoss: 0.2380 | ValLoss: 0.2345 | ValDice@thr0.45: 0.8416 | ValIoU: 0.7302 | LR: 2.17e-04 


Epoch 102/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 102] TrainLoss: 0.2371 | ValLoss: 0.2332 | ValDice@thr0.45: 0.8372 | ValIoU: 0.7262 | LR: 2.19e-04 


Epoch 103/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 103] TrainLoss: 0.2426 | ValLoss: 0.2326 | ValDice@thr0.45: 0.8367 | ValIoU: 0.7260 | LR: 2.21e-04 


Epoch 104/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 104] TrainLoss: 0.2424 | ValLoss: 0.2302 | ValDice@thr0.45: 0.8371 | ValIoU: 0.7264 | LR: 2.23e-04 


Epoch 105/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 105] TrainLoss: 0.2436 | ValLoss: 0.2267 | ValDice@thr0.45: 0.8434 | ValIoU: 0.7323 | LR: 2.25e-04 


Epoch 106/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 106] TrainLoss: 0.2351 | ValLoss: 0.2239 | ValDice@thr0.45: 0.8444 | ValIoU: 0.7334 | LR: 2.27e-04 


Epoch 107/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 107] TrainLoss: 0.2467 | ValLoss: 0.2198 | ValDice@thr0.45: 0.8471 | ValIoU: 0.7365 | LR: 2.30e-04 


Epoch 108/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 108] TrainLoss: 0.2500 | ValLoss: 0.2156 | ValDice@thr0.45: 0.8502 | ValIoU: 0.7408 | LR: 2.32e-04 | **NEW BEST**


Epoch 109/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 109] TrainLoss: 0.2420 | ValLoss: 0.2125 | ValDice@thr0.45: 0.8520 | ValIoU: 0.7433 | LR: 2.34e-04 | **NEW BEST**


Epoch 110/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 110] TrainLoss: 0.2419 | ValLoss: 0.2116 | ValDice@thr0.45: 0.8518 | ValIoU: 0.7431 | LR: 2.36e-04 


Epoch 111/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 111] TrainLoss: 0.2324 | ValLoss: 0.2106 | ValDice@thr0.45: 0.8517 | ValIoU: 0.7430 | LR: 2.38e-04 


Epoch 112/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 112] TrainLoss: 0.2365 | ValLoss: 0.2082 | ValDice@thr0.45: 0.8528 | ValIoU: 0.7445 | LR: 2.40e-04 | **NEW BEST**
          └─ Loss Components - Dice: 0.1256, CE: 0.0826


Epoch 113/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 113] TrainLoss: 0.2290 | ValLoss: 0.2063 | ValDice@thr0.45: 0.8536 | ValIoU: 0.7457 | LR: 2.42e-04 | **NEW BEST**
          └─ Loss Components - Dice: 0.1247, CE: 0.0816


Epoch 114/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 114] TrainLoss: 0.2257 | ValLoss: 0.2050 | ValDice@thr0.45: 0.8538 | ValIoU: 0.7461 | LR: 2.44e-04 | **NEW BEST**
          └─ Loss Components - Dice: 0.1241, CE: 0.0808


Epoch 115/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 115] TrainLoss: 0.2212 | ValLoss: 0.2037 | ValDice@thr0.45: 0.8541 | ValIoU: 0.7465 | LR: 2.46e-04 | **NEW BEST**
          └─ Loss Components - Dice: 0.1236, CE: 0.0801


Epoch 116/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 116] TrainLoss: 0.2224 | ValLoss: 0.2029 | ValDice@thr0.45: 0.8540 | ValIoU: 0.7463 | LR: 2.48e-04 
          └─ Loss Components - Dice: 0.1234, CE: 0.0795


Epoch 117/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 117] TrainLoss: 0.2226 | ValLoss: 0.2023 | ValDice@thr0.45: 0.8538 | ValIoU: 0.7460 | LR: 2.50e-04 
          └─ Loss Components - Dice: 0.1233, CE: 0.0790


Epoch 118/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 118] TrainLoss: 0.2234 | ValLoss: 0.2017 | ValDice@thr0.45: 0.8535 | ValIoU: 0.7455 | LR: 2.52e-04 
          └─ Loss Components - Dice: 0.1233, CE: 0.0784


Epoch 119/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 119] TrainLoss: 0.2225 | ValLoss: 0.2005 | ValDice@thr0.45: 0.8538 | ValIoU: 0.7460 | LR: 2.54e-04 
          └─ Loss Components - Dice: 0.1228, CE: 0.0777


Epoch 120/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 120] TrainLoss: 0.2243 | ValLoss: 0.1999 | ValDice@thr0.45: 0.8535 | ValIoU: 0.7456 | LR: 2.57e-04 
          └─ Loss Components - Dice: 0.1228, CE: 0.0771


Epoch 121/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 121] TrainLoss: 0.2219 | ValLoss: 0.1992 | ValDice@thr0.45: 0.8535 | ValIoU: 0.7456 | LR: 2.59e-04 
          └─ Loss Components - Dice: 0.1227, CE: 0.0766


Epoch 122/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 122] TrainLoss: 0.2131 | ValLoss: 0.1992 | ValDice@thr0.45: 0.8528 | ValIoU: 0.7445 | LR: 2.61e-04 
          └─ Loss Components - Dice: 0.1230, CE: 0.0762


Epoch 123/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 123] TrainLoss: 0.2158 | ValLoss: 0.1977 | ValDice@thr0.45: 0.8533 | ValIoU: 0.7453 | LR: 2.63e-04 
          └─ Loss Components - Dice: 0.1223, CE: 0.0754


Epoch 124/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 124] TrainLoss: 0.2160 | ValLoss: 0.1958 | ValDice@thr0.45: 0.8544 | ValIoU: 0.7469 | LR: 2.65e-04 | **NEW BEST**
          └─ Loss Components - Dice: 0.1212, CE: 0.0746


Epoch 125/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 125] TrainLoss: 0.2140 | ValLoss: 0.1946 | ValDice@thr0.45: 0.8547 | ValIoU: 0.7474 | LR: 2.67e-04 | **NEW BEST**
          └─ Loss Components - Dice: 0.1207, CE: 0.0740

[Curriculum] Epoch 126: Balanced (pos=1, neg=1)


Epoch 126/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 126] TrainLoss: 0.2093 | ValLoss: 0.1949 | ValDice@thr0.45: 0.8536 | ValIoU: 0.7457 | LR: 2.69e-04 
          └─ Loss Components - Dice: 0.1212, CE: 0.0737


Epoch 127/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 127] TrainLoss: 0.2091 | ValLoss: 0.1945 | ValDice@thr0.45: 0.8532 | ValIoU: 0.7452 | LR: 2.71e-04 
          └─ Loss Components - Dice: 0.1213, CE: 0.0732


Epoch 128/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 128] TrainLoss: 0.2098 | ValLoss: 0.1948 | ValDice@thr0.45: 0.8522 | ValIoU: 0.7437 | LR: 2.73e-04 
          └─ Loss Components - Dice: 0.1218, CE: 0.0729


Epoch 129/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 129] TrainLoss: 0.2059 | ValLoss: 0.1947 | ValDice@thr0.45: 0.8515 | ValIoU: 0.7428 | LR: 2.75e-04 
          └─ Loss Components - Dice: 0.1221, CE: 0.0726


Epoch 130/130 [Train]:   0%|          | 0/109 [00:00<?, ?it/s]

Validation:   0%|          | 0/28 [00:00<?, ?it/s]

[Epoch 130] TrainLoss: 0.2082 | ValLoss: 0.1939 | ValDice@thr0.45: 0.8516 | ValIoU: 0.7430 | LR: 2.77e-04 
          └─ Loss Components - Dice: 0.1219, CE: 0.0721

Training Complete!
Best Validation Dice: 0.8547 @ Epoch 125
Best Model Saved: /kaggle/working/best_segres.pth
Final Threshold: 0.450

Generating plots...
Plot saved: training_history.png


Validation:   0%|          | 0/28 [00:00<?, ?it/s]

Final Dice: 0.8547 | Final IoU: 0.7474
