In [2]:
from pathlib import Path
import os
import logging
from typing import Tuple, List, Dict

import numpy as np
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchvision.ops import box_iou
from torchvision.models._api import WeightsEnum
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torchgeo.models import scalemae_large_patch16, ScaleMAELarge16_Weights
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from tqdm import tqdm


In [3]:


class ScaleMAEBackbone(nn.Module):
    
    """
    Wraps TorchGeo ScaleMAE as a single-scale detection backbone for torchvision FasterRCNN.

    Assumptions:
      - ViT patch size = 16 (true for scalemae_large_patch16)
      - model.forward_features(x) returns tokens or an embedding with a CLS token first
      - We reshape tokens to (B, C, H', W') where H'=W'=img_size//16
    """
    def __init__(self, img_size=224, weights=ScaleMAELarge16_Weights.FMOW_RGB, out_channels=768):
        super().__init__()
        self.img_size = img_size
        self.patch = 16
        assert img_size % self.patch == 0, "img_size must be a multiple of patch size (16)."
        self.grid = img_size // self.patch  # e.g., 8 for 128x128

        # Load pretrained ScaleMAE model; num_classes=0 to avoid creating a classifier head
        self.body = scalemae_large_patch16(weights=weights)

        # Figure out the embedding dimension from the model head or norm
        # Commonly ViT-L/16 has embed_dim=1024, but we derive it programmatically.
        # Try a forward pass to infer embed dim robustly.
        with torch.no_grad():
            dummy = torch.zeros(1, 3, img_size, img_size)
            feats = self._forward_tokens(dummy)  # (1, grid*grid, embed_dim)
            embed_dim = feats.shape[-1]

        # Project to the out_channels expected by detection heads
        self.proj = nn.Conv2d(embed_dim, out_channels, kernel_size=1)
        self.out_channels = out_channels  # torchvision convention

    def _forward_tokens(self, x: torch.Tensor) -> torch.Tensor:
        """
        Returns token sequence of shape (B, N, C). Removes CLS token if present.
        We try the common ViT/Timm-style APIs.
        """
        # 1) Preferred: forward_features
        if hasattr(self.body, "forward_features"):
            t = self.body.forward_features(x)  # could be (B, N, C) or dict
            if isinstance(t, dict):
                # If a dict (rare), try a common key
                if "x" in t:
                    t = t["x"]
                elif "tokens" in t:
                    t = t["tokens"]
                else:
                    # fall back: try to find the first tensor
                    t = next(v for v in t.values() if torch.is_tensor(v))
        else:
            # 2) Fallback: call the model to get logits, then look for intermediate attr — not ideal.
            # We strongly prefer forward_features, but keep a fallback that raises a clear error.
            raise RuntimeError("ScaleMAE model does not expose forward_features; cannot extract tokens.")

        # t is now expected to be (B, N, C) with optional CLS token at t[:,0]
        if t.dim() != 3:
            # Some models return (B, C, H, W) already
            if t.dim() == 4:
                B, C, H, W = t.shape
                return t.permute(0, 2, 3, 1).reshape(B, H*W, C)  # -> (B, N, C)
            raise RuntimeError(f"Unexpected feature shape from forward_features: {tuple(t.shape)}")

        # If there is a CLS token, N should be grid*grid + 1
        B, N, C = t.shape
        expected_N_with_cls = self.grid * self.grid + 1
        expected_N_no_cls   = self.grid * self.grid
        if N == expected_N_with_cls:
            t = t[:, 1:, :]  # drop CLS
        elif N != expected_N_no_cls:
            # Some models may use different token layouts; try to reshape anyway and error if impossible
            if N != expected_N_no_cls:
                raise RuntimeError(
                    f"Token count {N} doesn't match expected {expected_N_no_cls} or {expected_N_with_cls} "
                    f"for grid={self.grid} (img={self.img_size}, patch={self.patch})."
                )
        return t  # (B, grid*grid, C)

    def forward(self, x: torch.Tensor):
        """
        Returns a dict of feature maps. torchvision's FasterRCNN will use keys in insertion order.
        """
        t = self._forward_tokens(x)  # (B, grid*grid, C)
        B, N, C = t.shape
        H = W = self.grid
        fmap = t.transpose(1, 2).reshape(B, C, H, W)  # (B, C, H, W)
        fmap = self.proj(fmap)  # (B, out_channels, H, W)
        return {"0": fmap}



In [4]:
backbone = ScaleMAEBackbone(weights=ScaleMAELarge16_Weights.FMOW_RGB, out_channels=768)
total_params = sum(p.numel() for p in backbone.parameters() if p.requires_grad)
print(f"Total trainable parameters: {total_params:,}")
trainable_params = sum(p.numel() for p in backbone.parameters() if p.requires_grad)
non_trainable_params = sum(p.numel() for p in backbone.parameters() if not p.requires_grad)

print(f"Trainable parameters:     {trainable_params:,}")
print(f"Non-trainable parameters: {non_trainable_params:,}")
print(f"Total parameters:         {trainable_params + non_trainable_params:,}")

Total trainable parameters: 304,912,104
Trainable parameters:     304,912,104
Non-trainable parameters: 201,728
Total parameters:         305,113,832


In [5]:
def build_scalemae_fasterrcnn(num_classes: int,
                              img_size: int = 224,
                              weights=ScaleMAELarge16_Weights.FMOW_RGB):
    """
    Constructs a FasterRCNN detector that uses ScaleMAE as backbone.
    """
    backbone = ScaleMAEBackbone(img_size=img_size, weights=weights, out_channels=768)

    # Anchors: stride is 16 (ViT patch size). With 8×8 fmap at 128 input,
    # choose relatively small anchor sizes. You can tune these based on object scale.
    anchor_generator = AnchorGenerator(
        sizes=((16, 24, 32, 48, 64),),       # pixels at input scale
        aspect_ratios=((0.5, 1.0, 2.0),)
    )

    # ROI heads params can be tweaked; defaults are fine to start
    model = FasterRCNN(
        backbone=backbone,
        num_classes=num_classes,    # include background as class 0 internally
        rpn_anchor_generator=anchor_generator,
        min_size=224, 
        max_size=224,
        box_detections_per_img=300
    )
    return model


In [6]:
model=build_scalemae_fasterrcnn(num_classes=4)
print("Model built successfully with ScaleMAE backbone.")
print("model footprint", model)

Model built successfully with ScaleMAE backbone.
model footprint FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(224,), max_size=224, mode='bilinear')
  )
  (backbone): ScaleMAEBackbone(
    (body): ScaleMAE(
      (patch_embed): PatchEmbed(
        (proj): Conv2d(3, 1024, kernel_size=(16, 16), stride=(16, 16))
        (norm): Identity()
      )
      (pos_drop): Dropout(p=0.0, inplace=False)
      (patch_drop): Identity()
      (norm_pre): Identity()
      (blocks): Sequential(
        (0): Block(
          (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)
          (attn): Attention(
            (qkv): Linear(in_features=1024, out_features=3072, bias=True)
            (q_norm): Identity()
            (k_norm): Identity()
            (attn_drop): Dropout(p=0.0, inplace=False)
            (proj): Linear(in_features=1024, out_features=1024, bias=True)
            (proj_drop):

In [7]:
logging.basicConfig(level=logging.INFO)

# -----------------------
# Config
# -----------------------
IMG_SIZE = 224
BATCH_SIZE = 128
NUM_WORKERS = 16
LR = 1e-4
WEIGHT_DECAY = 0.05
EPOCHS = 10
CLASS_AGNOSTIC_EVAL = True  # set False for per-class AP

# Paths
TRAIN_ROOT = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/sentinel/final_data_neurips_2025/train"
TEST_ROOT  = "/home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/sentinel/final_data_neurips_2025/test"

# Classes (your labels are 0-based in files, we add +1 so background=0 internally)
# Adjust if you have different mapping
CLASSES = ["CFCBK", "FCBK", "Zigzag"]
NUM_CLASSES = len(CLASSES) + 1  # + background


# -----------------------
# Dataset
# -----------------------
class BrickKilnDataset(Dataset):
    """
    Expects:
      <root>/<split>/images/*.png
      <root>/<split>/labels/*.txt
    or if split is '' (empty), then <root>/images, <root>/labels

    Label format assumed YOLO-OBB-like with 9 tokens per line (class + 8 coords normalized 0..1).
    We convert OBB -> AABB (xyxy) after resizing to IMG_SIZE.
    """

    def __init__(self, root: str, split: str, input_size: int = IMG_SIZE,
                 mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
        self.root = Path(root)
        self.split = split
        self.img_dir = self.root / split / 'images' if split else self.root / 'images'
        self.label_dir = self.root / split / 'yolo_obb_labels' if split else self.root / 'yolo_obb_labels'

        self.transform = transforms.Compose([
            transforms.Resize((input_size, input_size)),
            transforms.ToTensor(),
            transforms.Normalize(mean=mean, std=std),
        ])

        self.img_files = []
        all_files = sorted(os.listdir(self.img_dir))
        logging.info(f"Scanning {len(all_files)} images in {self.img_dir} for valid annotations...")
        for img_name in all_files:
            if img_name.lower().endswith('.png') and self._has_valid_annotations(img_name):
                self.img_files.append(img_name)
        logging.info(f"Found {len(self.img_files)} images with valid annotations in {self.img_dir}")

    def _has_valid_annotations(self, img_name: str) -> bool:
        label_path = self.label_dir / f"{Path(img_name).stem}.txt"
        if not label_path.exists():
            return False
        try:
            with open(label_path, 'r') as f:
                for line in f:
                    if len(line.strip().split()) == 9:
                        return True
        except Exception:
            return False
        return False

    def __len__(self) -> int:
        return len(self.img_files)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
        img_name = self.img_files[idx]
        img_path = self.img_dir / img_name
        label_path = self.label_dir / f"{Path(img_name).stem}.txt"

        try:
            img = Image.open(img_path).convert("RGB")
            img_tensor = self.transform(img)  # (3, H, W)
        except Exception as e:
            logging.error(f"Error opening image {img_path}: {e}")
            # Return an empty target so collate can drop it
            return torch.zeros(3, IMG_SIZE, IMG_SIZE), {
                "boxes": torch.empty((0, 4), dtype=torch.float32),
                "labels": torch.empty((0,), dtype=torch.int64),
            }

        _, new_h, new_w = img_tensor.shape
        boxes, labels = [], []

        try:
            with open(label_path, 'r') as f:
                for line in f:
                    parts = line.strip().split()
                    if len(parts) != 9:
                        continue
                    class_id = int(parts[0]) + 1  # shift by +1, background=0
                    obb = np.array([float(p) for p in parts[1:]])
                    x_coords = obb[0::2] * new_w
                    y_coords = obb[1::2] * new_h
                    xmin, ymin = float(np.min(x_coords)), float(np.min(y_coords))
                    xmax, ymax = float(np.max(x_coords)), float(np.max(y_coords))
                    if xmax > xmin and ymax > ymin:
                        boxes.append([xmin, ymin, xmax, ymax])
                        labels.append(class_id)
        except Exception:
            pass

        target = {
            "boxes": torch.tensor(boxes, dtype=torch.float32),
            "labels": torch.tensor(labels, dtype=torch.int64),
        }
        return img_tensor, target


def collate_fn(batch):
    # drop images with no boxes
    batch = [b for b in batch if b[1]["boxes"].numel() > 0]
    if not batch:
        return [], []
    images, targets = zip(*batch)
    return list(images), list(targets)


In [8]:
# build_scalemae_fasterrcnn

In [9]:
# def build_model(num_classes: int, img_size: int = IMG_SIZE):
#     # Use ScaleMAE pretrained mean/std in the dataset; so set identity stats here
#     weights = ScaleMAELarge16_Weights.FMOW_RGB
#     backbone = ScaleMAEBackbone(img_size=img_size, weights=weights, out_channels=768)

#     # Anchor sizes tuned to stride=16 & 224 input (feature map 14x14). Tweak as needed.
#     anchor_generator = AnchorGenerator(sizes=((16, 24, 32, 48, 64),),
#                                        aspect_ratios=((0.5, 1.0, 2.0),))

#     model = FasterRCNN(
#         backbone=backbone,
#         num_classes=num_classes,
#         rpn_anchor_generator=anchor_generator,
#         # Transform: keep 224x224 and identity normalization (dataset handles norm)
#         min_size=img_size,
#         max_size=img_size,
#         image_mean=[0.0, 0.0, 0.0],
#         image_std=[1.0, 1.0, 1.0],
#         box_detections_per_img=300,
#     )
#     return model

In [10]:
# final_model=build_model(num_classes=NUM_CLASSES, img_size=IMG_SIZE)


In [11]:
# final_model

In [12]:
# -----------------------
# Training & Evaluation
# -----------------------
def train_one_epoch(model, loader, optimizer, device):
    model.train()
    running = 0.0
    for images, targets in loader:
        if len(images) == 0:
            continue
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        losses = model(images, targets)
        loss = sum(v for v in losses.values())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running += loss.item()
    return running / max(1, len(loader))

In [13]:
from torchmetrics.detection.mean_ap import MeanAveragePrecision
from tqdm import tqdm
import logging, numpy as np
import torch

@torch.no_grad()
def validate_report(model, data_loader, device, epoch=0, class_names=None, writer=None, desc_suffix=""):
    """
    Returns:
      {
        "ca_ap50": float,                    # class-agnostic AP@50
        "ca_ap5095": float,                  # class-agnostic AP@[.50:.95]
        "cw_ap50": dict[name_or_id -> ap],   # class-wise AP@50
        "cw_ap5095": dict[name_or_id -> ap], # class-wise AP@[.50:.95]
      }
    """
    model.eval()
    loop = tqdm(data_loader, desc=f"Epoch {epoch} [Validation{desc_suffix}]")

    # Class-aware (per-class metrics across .50:.95)
    tm_cls = MeanAveragePrecision(iou_type="bbox", box_format="xyxy", class_metrics=True)
    # Class-agnostic (collapse all labels)
    tm_agn = MeanAveragePrecision(iou_type="bbox", box_format="xyxy", class_metrics=False)

    for images, targets in loop:
        if not images: 
            continue
        images = [img.to(device) for img in images]
        outputs = model(images)

        # Move to CPU for TM
        preds = [{k: v.detach().cpu() for k, v in o.items()} for o in outputs]
        tgts  = [{k: v.detach().cpu() for k, v in t.items()} for t in targets]

        # Class-aware update
        try:
            tm_cls.update(preds, tgts)
        except Exception as e:
            logging.warning(f"[validate_report] class-aware update failed: {e}")

        # Class-agnostic copies (labels -> 0)
        preds_agn = [{"boxes": d["boxes"], "scores": d["scores"], "labels": d["labels"].new_zeros(d["labels"].shape)} for d in preds]
        tgts_agn  = [{"boxes": d["boxes"], "labels": d["labels"].new_zeros(d["labels"].shape)} for d in tgts]
        try:
            tm_agn.update(preds_agn, tgts_agn)
        except Exception as e:
            logging.warning(f"[validate_report] class-agnostic update failed: {e}")

    # Compute
    res_cls = tm_cls.compute()
    res_agn = tm_agn.compute()

    # ---- (1) CA AP@50 and (2) CA AP@50:95 ----
    ca_ap50   = float(res_agn["map_50"].item()) if res_agn["map_50"].numel() else 0.0
    ca_ap5095 = float(res_agn["map"].item())    if res_agn["map"].numel()    else 0.0

    # ---- (3) Class-wise AP@50 ----
    cw_ap50 = {}
    if res_cls.get("map_50_per_class") is not None:
        vals = res_cls["map_50_per_class"].tolist()
        classes = res_cls.get("classes")
        if classes is not None and classes.numel() == len(vals):
            for cid, m in zip(classes.tolist(), vals):
                if cid == 0:  # skip background if present
                    continue
                name = class_names[cid-1] if class_names and (cid-1) < len(class_names) else f"class_{cid}"
                cw_ap50[name] = float(m) if np.isfinite(m) else float("nan")
        else:
            for i, m in enumerate(vals, start=1):
                name = class_names[i-1] if class_names and (i-1) < len(class_names) else f"class_{i}"
                cw_ap50[name] = float(m) if np.isfinite(m) else float("nan")

    # ---- (4) Class-wise AP@50:95 ----
    cw_ap5095 = {}
    if res_cls.get("map_per_class") is not None:
        vals = res_cls["map_per_class"].tolist()
        classes = res_cls.get("classes")
        if classes is not None and classes.numel() == len(vals):
            for cid, m in zip(classes.tolist(), vals):
                if cid == 0:
                    continue
                name = class_names[cid-1] if class_names and (cid-1) < len(class_names) else f"class_{cid}"
                cw_ap5095[name] = float(m) if np.isfinite(m) else float("nan")
        else:
            for i, m in enumerate(vals, start=1):
                name = class_names[i-1] if class_names and (i-1) < len(class_names) else f"class_{i}"
                cw_ap5095[name] = float(m) if np.isfinite(m) else float("nan")

    # Logging / TensorBoard
    logging.info(
        f"CA AP@50: {ca_ap50:.4f} | CA AP@[.50:.95]: {ca_ap5095:.4f} | "
        f"CW AP@50: {cw_ap50} | CW AP@[.50:.95]: {cw_ap5095}"
    )
    if writer is not None:
        writer.add_scalar(f"mAP/ca_ap50{desc_suffix}", ca_ap50, epoch)
        writer.add_scalar(f"mAP/ca_ap5095{desc_suffix}", ca_ap5095, epoch)
        for k, v in cw_ap50.items():
            writer.add_scalar(f"mAP/cw_ap50_{k}{desc_suffix}", v, epoch)
        for k, v in cw_ap5095.items():
            writer.add_scalar(f"mAP/cw_ap5095_{k}{desc_suffix}", v, epoch)

    return {
        "ca_ap50": ca_ap50,
        "ca_ap5095": ca_ap5095,
        "cw_ap50": cw_ap50,
        "cw_ap5095": cw_ap5095,
    }


In [None]:
import hashlib

def param_fingerprint(model) -> dict:
    """Return quick fingerprints of current model params."""
    # sum of L2 norms (fast), and a tiny SHA256 over a small sample (cheap)
    l2_sum = 0.0
    hasher = hashlib.sha256()
    with torch.no_grad():
        for i, p in enumerate(model.parameters()):
            if p.requires_grad and p.numel() > 0:
                l2_sum += float(torch.linalg.vector_norm(p.detach()).cpu())
                # sample first 1024 elements to keep it cheap
                s = p.detach().view(-1).float().cpu()
                hasher.update(s[: min(1024, s.numel())].numpy().tobytes())
    return {"l2": l2_sum, "sha": hasher.hexdigest()}


In [18]:
def main():
    device = "cuda:1" if torch.cuda.is_available() else "cpu"

    train_ds = BrickKilnDataset(TRAIN_ROOT, split="", input_size=IMG_SIZE)
    test_ds  = BrickKilnDataset(TEST_ROOT,  split="", input_size=IMG_SIZE)

    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True,
                              num_workers=NUM_WORKERS, pin_memory=True, collate_fn=collate_fn)
    test_loader  = DataLoader(test_ds,  batch_size=BATCH_SIZE, shuffle=False,
                              num_workers=NUM_WORKERS, pin_memory=True, collate_fn=collate_fn)

    model = build_scalemae_fasterrcnn(NUM_CLASSES, img_size=IMG_SIZE).to(device)

    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.AdamW(params, lr=LR, weight_decay=WEIGHT_DECAY)

    # (optional) baseline eval BEFORE any training
    base_fp = param_fingerprint(model)
    logging.info(f"[E0/before-train] fp: {base_fp}")

    best_ca_ap50 = 0.0
    for epoch in tqdm(range(1, EPOCHS + 1), desc="Training"):
        # ---- fingerpint BEFORE training this epoch
        fp_before = param_fingerprint(model)
        logging.info(f"[E{epoch}/pre-train] fp: {fp_before}")

        # ---- train (this should call optimizer.step() internally)
        train_loss = train_one_epoch(model, train_loader, optimizer, device)

        # ---- fingerprint AFTER training this epoch
        fp_after = param_fingerprint(model)
        logging.info(f"[E{epoch}/post-train] fp: {fp_after}")

        # Sanity: did weights change?
        if abs(fp_after["l2"] - fp_before["l2"]) < 1e-6 or fp_after["sha"] == fp_before["sha"]:
            logging.warning(f"[E{epoch}] WARNING: weights fingerprint unchanged after training. "
                            "Check that gradients are non-zero, the LR isn’t zero, "
                            "and optimizer.step() is executed.")

        # ---- right BEFORE validation (should match post-train)
        fp_preval = param_fingerprint(model)
        if fp_preval["sha"] != fp_after["sha"]:
            logging.warning(f"[E{epoch}] Model changed between post-train and pre-val (unexpected).")

        # ---- Evaluate (CA AP@50, CA AP@50:95, CW AP@50, CW AP@50:95)
        metrics = validate_report(model, test_loader, device, epoch, class_names=CLASSES, writer=None)
        ca_ap50   = metrics["ca_ap50"]
        ca_ap5095 = metrics["ca_ap5095"]
        cw_ap50   = metrics["cw_ap50"]
        cw_ap5095 = metrics["cw_ap5095"]

        cw50_str   = " | ".join(f"{k}: {v:.4f}" for k, v in cw_ap50.items())
        cw5095_str = " | ".join(f"{k}: {v:.4f}" for k, v in cw_ap5095.items())

        print(
            f"Epoch {epoch:02d} | loss={train_loss:.4f} | "
            f"CA AP@50={ca_ap50:.4f} | CA AP@[.50:.95]={ca_ap5095:.4f}\n"
            f"  CW AP@50      -> {cw50_str}\n"
            f"  CW AP@[.50:.95]-> {cw5095_str}"
        )
        logging.info(
            f"[E{epoch}] CA AP@50={ca_ap50:.4f} | CA AP@[.50:.95]={ca_ap5095:.4f} | "
            f"CW AP@50={cw_ap50} | CW AP@[.50:.95]={cw_ap5095}"
        )

        # Save best by CA AP@50
        if ca_ap50 > best_ca_ap50:
            best_ca_ap50 = ca_ap50
            ckpt = {
                "model": model.state_dict(),
                "epoch": epoch,
                "metrics": metrics,
                "fp_post_train": fp_after,  # store the fingerprint used for this eval
                "cfg": {
                    "IMG_SIZE": IMG_SIZE,
                    "CLASSES": CLASSES,
                    "NUM_CLASSES": NUM_CLASSES,
                    "LR": LR,
                    "WEIGHT_DECAY": WEIGHT_DECAY,
                }
            }
            os.makedirs("checkpoints", exist_ok=True)
            torch.save(ckpt, "checkpoints/scalemae_frcnn_best.pth")
            logging.info(f"Saved best checkpoint with CA AP@50={ca_ap50:.4f}")

    logging.info(f"Training done. Best CA AP@50={best_ca_ap50:.4f}")


In [19]:

if __name__ == "__main__":
    main()

INFO:root:Scanning 47214 images in /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/sentinel/final_data_neurips_2025/train/images for valid annotations...
INFO:root:Found 47214 images with valid annotations in /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/sentinel/final_data_neurips_2025/train/images
INFO:root:Scanning 15738 images in /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/sentinel/final_data_neurips_2025/test/images for valid annotations...
INFO:root:Found 15738 images with valid annotations in /home/rishabh.mondal/Brick-Kilns-project/ijcai_2025_kilns/data/processed_data/sentinel/final_data_neurips_2025/test/images
INFO:root:[E0/before-train] fp: {'l2': 9875.787951219827, 'sha': '6288aadfc651b46fc0a87e01d620da2acfe5904295b0317a988819ff6bd486c2'}
Training:   0%|          | 0/10 [00:00<?, ?it/s]INFO:root:[E1/pre-train] fp: {'l2': 9875.787951219827, 'sha': '6288aadfc651b46fc0a87e01d620da2a

Epoch 01 | loss=0.4202 | CA AP@50=0.0000 | CA AP@[.50:.95]=0.0000
  CW AP@50      -> 
  CW AP@[.50:.95]-> CFCBK: 0.0000 | FCBK: 0.0000 | Zigzag: 0.0000


INFO:root:[E2/pre-train] fp: {'l2': 9865.51096666744, 'sha': '73f94d2ae0e788ea11d09030feaba7f4b0ce454934aa9599126579ec3d52f88f'}
INFO:root:[E2/post-train] fp: {'l2': 9855.05748561304, 'sha': '13c3d0348a3241bbe615c1305ea87a031ff296ba495d97e762474638a5e7e93f'}
Epoch 2 [Validation]: 100%|██████████| 123/123 [03:31<00:00,  1.72s/it]
INFO:root:CA AP@50: 0.0000 | CA AP@[.50:.95]: 0.0000 | CW AP@50: {} | CW AP@[.50:.95]: {'CFCBK': 0.0, 'FCBK': 0.0, 'Zigzag': 0.0}
INFO:root:[E2] CA AP@50=0.0000 | CA AP@[.50:.95]=0.0000 | CW AP@50={} | CW AP@[.50:.95]={'CFCBK': 0.0, 'FCBK': 0.0, 'Zigzag': 0.0}
Training:  20%|██        | 2/10 [57:51<3:51:21, 1735.15s/it]

Epoch 02 | loss=0.2715 | CA AP@50=0.0000 | CA AP@[.50:.95]=0.0000
  CW AP@50      -> 
  CW AP@[.50:.95]-> CFCBK: 0.0000 | FCBK: 0.0000 | Zigzag: 0.0000


INFO:root:[E3/pre-train] fp: {'l2': 9855.05748561304, 'sha': '13c3d0348a3241bbe615c1305ea87a031ff296ba495d97e762474638a5e7e93f'}
INFO:root:[E3/post-train] fp: {'l2': 9849.297889551148, 'sha': '814844b59e71aea46adb34496cdac018d2cd2b1fc0f61db31d855b0424dd07ef'}
Epoch 3 [Validation]: 100%|██████████| 123/123 [03:38<00:00,  1.78s/it]
INFO:root:CA AP@50: 0.0000 | CA AP@[.50:.95]: 0.0000 | CW AP@50: {} | CW AP@[.50:.95]: {'CFCBK': 0.0, 'FCBK': 0.0, 'Zigzag': 0.0}
INFO:root:[E3] CA AP@50=0.0000 | CA AP@[.50:.95]=0.0000 | CW AP@50={} | CW AP@[.50:.95]={'CFCBK': 0.0, 'FCBK': 0.0, 'Zigzag': 0.0}
Training:  30%|███       | 3/10 [1:26:49<3:22:34, 1736.39s/it]

Epoch 03 | loss=0.2058 | CA AP@50=0.0000 | CA AP@[.50:.95]=0.0000
  CW AP@50      -> 
  CW AP@[.50:.95]-> CFCBK: 0.0000 | FCBK: 0.0000 | Zigzag: 0.0000


INFO:root:[E4/pre-train] fp: {'l2': 9849.297889551148, 'sha': '814844b59e71aea46adb34496cdac018d2cd2b1fc0f61db31d855b0424dd07ef'}
Training:  30%|███       | 3/10 [1:29:02<3:27:46, 1780.95s/it]


KeyboardInterrupt: 