In [1]:
!pip -q install albumentations==1.4.7 imgaug==0.4.0 opencv-python-headless

import os, math, gc, sys, json, random, warnings
from pathlib import Path
from typing import List, Tuple, Dict
import numpy as np
import pandas as pd
from PIL import Image, ImageOps
import cv2

import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from albumentations import (
    Compose, HorizontalFlip, RandomBrightnessContrast, GaussianBlur,
    ShiftScaleRotate, Resize, Normalize
)
from albumentations.pytorch import ToTensorV2

warnings.filterwarnings("ignore")
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
torch.backends.cudnn.benchmark = True
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", DEVICE)

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.7/155.7 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m948.0/948.0 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m101.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
category-encoders 2.7.0 requires scikit-learn<1.6.0,>=1.0.0, but you have scikit-learn 1.7.1 which is incompatible.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
sklearn-compat 0.1.3 requires scikit-learn<1.7,>=1.2, but you have scikit-learn 1.7.1 which is incompatible.[0m[31m
[0mDevice: cuda


In [2]:
# ---- dataset roots (Kaggle input) ----
VOC_ROOT   = Path("/kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val")
IMG_ROOT   = VOC_ROOT / "JPEGImages"
GT_ROOT    = VOC_ROOT / "SegmentationClass"   # 21-class masks (255=ignore)
SPLIT_ROOT = VOC_ROOT / "ImageSets" / "Segmentation"

# ---- experiment I/O ----
EXP_NAME = "A_rotate"                  # <— only change for other experiments
BASE_OUT = Path("/kaggle/working/outputs") / EXP_NAME
SEEDS_DIR   = BASE_OUT / "seeds"           # Grad-CAM seed overlays
PSEUDO_DIR  = BASE_OUT / "pseudo_masks"    # binary masks from seeds
CKPT_DIR    = BASE_OUT / "ckpt"
RESULTS_DIR = BASE_OUT / "results"
for d in [SEEDS_DIR, PSEUDO_DIR, CKPT_DIR, RESULTS_DIR]:
    d.mkdir(parents=True, exist_ok=True)

# quick check that VOC is visible
print("VOC exists:", VOC_ROOT.exists(), "| JPEGImages:", IMG_ROOT.exists())
print("Saving to:", BASE_OUT)

VOC exists: True | JPEGImages: True
Saving to: /kaggle/working/outputs/A_rotate


In [3]:
def read_ids(txt: Path) -> List[str]:
    return [x.strip() for x in open(txt) if x.strip()]

train_ids = read_ids(SPLIT_ROOT / "train.txt")
val_ids   = read_ids(SPLIT_ROOT / "val.txt")

print("Counts  → train:", len(train_ids), " val:", len(val_ids))
print("Sample  →", val_ids[0])
print("Sample image path →", (IMG_ROOT / f"{val_ids[0]}.jpg"))

Counts  → train: 1464  val: 1449
Sample  → 2007_000033
Sample image path → /kaggle/input/pascal-voc-2012-dataset/VOC2012_train_val/VOC2012_train_val/JPEGImages/2007_000033.jpg


In [4]:
import os, sys, time, csv, math, warnings
from pathlib import Path
import numpy as np
import torch, torchvision
import cv2
from PIL import Image
from tqdm import tqdm
from torchvision.models import resnet50, ResNet50_Weights


try:
    DEVICE
except NameError:
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

PSEUDO_DIR = Path(PSEUDO_DIR) if 'PSEUDO_DIR' in globals() else Path('./outputs/pseudo_masks')
PSEUDO_DIR.mkdir(parents=True, exist_ok=True)

SEEDS_DIR = Path(SEEDS_DIR) if 'SEEDS_DIR' in globals() else Path('./outputs/seeds')  # not required but kept

assert 'IMG_ROOT' in globals(), "IMG_ROOT must be defined (folder with VOC JPEGImages)."
assert 'train_ids' in globals(), "train_ids must be defined (list of VOC image ids)."

class CAMHelper:
    def __init__(self, device=DEVICE):
        m = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1).to(device).eval()
        self.model = m
        self.device = device
        self._feat = []
        self._grad = []

        def f_hook(_, __, out): self._feat = [out]
        def b_hook(_, grad_in, grad_out): self._grad = [grad_out[0]]

        # hook last conv in layer4
        self.h1 = m.layer4[-1].conv3.register_forward_hook(f_hook)
        self.h2 = m.layer4[-1].conv3.register_full_backward_hook(b_hook)

        self.pre = torchvision.transforms.Compose([
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize(mean=[0.485,0.456,0.406],
                                             std=[0.229,0.224,0.225])
        ])

        # AMP for speed
        self.use_amp = True
        self.scaler = torch.cuda.amp.GradScaler(enabled=self.use_amp)

    @torch.inference_mode(False)
    def run(self, pil_img: Image.Image) -> np.ndarray:
        self._feat.clear(); self._grad.clear()
        x = self.pre(pil_img).unsqueeze(0).to(self.device)
        x.requires_grad_(True)

        with torch.cuda.amp.autocast(enabled=self.use_amp):
            logits = self.model(x)              # [1,1000]
            cls = logits.argmax(1)
            score = logits[0, cls]

        # backward for CAM weights
        self.model.zero_grad(set_to_none=True)
        (score).backward()

        A = self._feat[0][0]                   # [C,H,W]
        G = self._grad[0][0]                   # [C,H,W]
        w = G.mean(dim=(1,2))                  # [C]
        cam = (w[:, None, None] * A).sum(0)
        cam = torch.relu(cam)
        cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)
        return cam.detach().float().cpu().numpy()   # [Hc, Wc]

    def close(self):
        self.h1.remove(); self.h2.remove()

cam_helper = CAMHelper()

# --------- main writer ----------
def seeds_to_pseudo(ids, rot_deg=10.0, th=0.30,
                    overwrite=False,
                    log_csv=PSEUDO_DIR.parent / "seed_build_log.csv"):
    """
    Build binary pseudo masks from Grad-CAM with an optional rotation.
    - ids: list of VOC ids (strings)
    - rot_deg: rotate input before CAM, then center-crop back
    - th: threshold on upsampled CAM to produce FG (>=th)
    - overwrite: if False, skip if output PNG already exists
    - log_csv: write per-image stats; will append if exists
    """
    ids = list(ids)
    if len(ids) == 0:
        print("No ids to process.")
        return

    # CSV logger (append-safe)
    log_csv = Path(log_csv)
    header = ["img_id", "saved_path", "time_sec", "cam_mean", "cam_max", "threshold", "skipped", "error"]
    new_file = not log_csv.exists()
    f_log = open(log_csv, "a", newline="")
    wlog = csv.writer(f_log)
    if new_file: wlog.writerow(header)

    t0 = time.time()
    t_last = t0
    proc, skipped, errors = 0, 0, 0

    pbar = tqdm(ids, desc="Grad-CAM→pseudo", unit="img")
    for img_id in pbar:
        out_png = PSEUDO_DIR / f"{img_id}.png"
        if out_png.exists() and not overwrite:
            skipped += 1
            wlog.writerow([img_id, str(out_png), 0.0, "", "", th, True, ""])
            continue

        # resolve input image (jpg/jpeg)
        img_p = IMG_ROOT / f"{img_id}.jpg"
        if not img_p.exists():
            alt = IMG_ROOT / f"{img_id}.jpeg"
            if alt.exists(): img_p = alt
        if not img_p.exists():
            errors += 1
            wlog.writerow([img_id, "", 0.0, "", "", th, False, "missing input"])
            continue

        try:
            # load + rotate + center-crop back to original size
            img = Image.open(img_p).convert("RGB")
            W, H = img.size
            if abs(rot_deg) > 1e-3:
                rot = img.rotate(rot_deg, resample=Image.BILINEAR, expand=True, fillcolor=(0,0,0))
                left = max((rot.width - W)//2, 0); top = max((rot.height - H)//2, 0)
                rot = rot.crop((left, top, left+W, top+H))
            else:
                rot = img

            # CAM
            t1 = time.time()
            cmap = cam_helper.run(rot)                   # [h,w]
            cam_up = cv2.resize(cmap, (W, H), interpolation=cv2.INTER_LINEAR)

            # stats + mask
            cmean = float(cam_up.mean()); cmax = float(cam_up.max())
            mask = (cam_up >= th).astype(np.uint8) * 255

            # write (fast: no compression)
            Image.fromarray(mask).save(out_png, compress_level=0)

            # bookkeeping
            dt = time.time() - t1
            proc += 1
            # rolling ETA
            elapsed = time.time() - t0
            per_img = elapsed / max(proc, 1)
            remain = per_img * (len(ids) - proc - skipped)
            pbar.set_postfix(proc=proc, skip=skipped, err=errors,
                             sec_img=f"{per_img:.2f}", eta=f"{remain/60:.1f}m")

            # log
            wlog.writerow([img_id, str(out_png), f"{dt:.3f}", f"{cmean:.5f}", f"{cmax:.5f}", th, False, ""])

            # flush every 100
            if (proc + skipped) % 100 == 0:
                f_log.flush()
                sys.stdout.flush()

        except Exception as e:
            errors += 1
            wlog.writerow([img_id, "", 0.0, "", "", th, False, repr(e)])
            continue

    f_log.close()
    cam_helper.close()
    print(f"\nDone. Written: {proc} | Skipped: {skipped} | Errors: {errors}")
    print(f"Log: {log_csv}")
    print(f"Output dir: {PSEUDO_DIR.resolve()}")
    # quick sample check
    print("Sample outputs:", list(map(str, list(PSEUDO_DIR.glob('*.png'))[:5])))

seeds_to_pseudo(train_ids, rot_deg=10.0, th=0.30, overwrite=False)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 166MB/s] 
Grad-CAM→pseudo: 100%|██████████| 1464/1464 [01:55<00:00, 12.64img/s, err=0, eta=0.0m, proc=1464, sec_img=0.08, skip=0]


Done. Written: 1464 | Skipped: 0 | Errors: 0
Log: /kaggle/working/outputs/A_rotate/seed_build_log.csv
Output dir: /kaggle/working/outputs/A_rotate/pseudo_masks
Sample outputs: ['/kaggle/working/outputs/A_rotate/pseudo_masks/2011_002590.png', '/kaggle/working/outputs/A_rotate/pseudo_masks/2008_002258.png', '/kaggle/working/outputs/A_rotate/pseudo_masks/2007_003000.png', '/kaggle/working/outputs/A_rotate/pseudo_masks/2009_004661.png', '/kaggle/working/outputs/A_rotate/pseudo_masks/2007_009889.png']





In [9]:
IGNORE_IDX = 255
IMG_SIZE = 256
BATCH_TRAIN = 8
BATCH_VAL = 8

class VOCPseudoBinary(Dataset):
    """
    Returns x (FloatTensor CxHxW), g (LongTensor HxW in {0,1}), 
            q (LongTensor HxW in {0,255}), id (str)
    g: pseudo labels (binary), q: GT for eval (255=ignore)
    """
    def __init__(self, ids, img_root, pseudo_root, gt_root, train=True, size=256):
        self.ids = ids
        self.img_root = Path(img_root)
        self.pseudo_root = Path(pseudo_root)
        self.gt_root = Path(gt_root)
        self.train = bool(train)
        self.size = int(size)

        aug_train = Compose([
            HorizontalFlip(p=0.5),
            RandomBrightnessContrast(p=0.2),
            GaussianBlur(blur_limit=(3,5), p=0.15),
            ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, border_mode=cv2.BORDER_CONSTANT, p=0.5),
            Resize(self.size, self.size),
            Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
            ToTensorV2()
        ])
        aug_val = Compose([
            Resize(self.size, self.size),
            Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
            ToTensorV2()
        ])
        self.tr = aug_train if self.train else aug_val

    def __len__(self): return len(self.ids)

    def __getitem__(self, i):
        img_id = self.ids[i]
        # image
        ip = self.img_root / f"{img_id}.jpg"
        if not ip.exists(): ip = self.img_root / f"{img_id}.jpeg"
        img = np.array(Image.open(ip).convert("RGB"))

        # pseudo (binary)
        pp = self.pseudo_root / f"{img_id}.png"
        if pp.exists():
            pm = np.array(Image.open(pp))
        else:
            pm = np.zeros(img.shape[:2], np.uint8)

        # GT (21-class → we only use 'is not background' as fg; 255 ignore)
        gp = self.gt_root / f"{img_id}.png"
        if gp.exists():
            gt = np.array(Image.open(gp))
        else:
            gt = np.full(img.shape[:2], IGNORE_IDX, np.uint8)

        # resize masks to image size before joint transforms
        H, W = img.shape[:2]
        if pm.shape != (H,W): pm = cv2.resize(pm, (W,H), interpolation=cv2.INTER_NEAREST)
        if gt.shape != (H,W): gt = cv2.resize(gt, (W,H), interpolation=cv2.INTER_NEAREST)

        # binary gt for eval (fg = any class ≠ 0 & ≠ 255)
        gbin = np.where(gt==IGNORE_IDX, IGNORE_IDX, (gt!=0).astype(np.uint8)*1)

        # joint transform
        out = self.tr(image=img, mask=pm, masks=[gbin])
        x = out["image"].float()
        pm2 = out["mask"]                       # (H,W) in {0..255}
        gt2 = out["masks"][0]                   # (H,W) in {0,1,255}

        # tensor types
        g = (torch.as_tensor(pm2).long() > 0).to(torch.long)        # 0/1
        q = torch.as_tensor(gt2).to(torch.long)                     # 0/1/255

        return x, g, q, img_id

def make_loaders():
    train_ds = VOCPseudoBinary(train_ids, IMG_ROOT, PSEUDO_DIR, GT_ROOT, train=True, size=IMG_SIZE)
    val_ds   = VOCPseudoBinary(val_ids, IMG_ROOT, PSEUDO_DIR, GT_ROOT, train=False, size=IMG_SIZE)
    train_dl = DataLoader(train_ds, batch_size=BATCH_TRAIN, shuffle=True, num_workers=2, pin_memory=True)
    val_dl   = DataLoader(val_ds, batch_size=BATCH_VAL,   shuffle=False, num_workers=2, pin_memory=True)
    return train_dl, val_dl

train_dl, val_dl = make_loaders()

# smoke test
bx, bg, bq, bids = next(iter(train_dl))
print("Shapes:", bx.shape, bg.shape, bq.shape, "| batches →", len(train_dl), len(val_dl))

Shapes: torch.Size([8, 3, 256, 256]) torch.Size([8, 256, 256]) torch.Size([8, 256, 256]) | batches → 183 182


In [10]:
import torchvision
from torchvision.models.segmentation import deeplabv3_resnet50, DeepLabV3_ResNet50_Weights

def build_deeplab_binary(num_classes=2):
    try:
        m = deeplabv3_resnet50(weights_backbone=DeepLabV3_ResNet50_Weights.DEFAULT.backbone)
    except Exception:
        m = deeplabv3_resnet50(weights=None)
    m.classifier[-1] = nn.Conv2d(256, num_classes, kernel_size=1)
    return m.to(DEVICE)

model = build_deeplab_binary(num_classes=2)

# optimizer / loss
opt = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=1e-4)
scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())

def ce_loss(logits, target):
    # target in {0,1}; ignore 255
    return F.cross_entropy(logits, target, ignore_index=IGNORE_IDX)

def fast_confusion(pred, gt, num_classes=2, ignore_index=255):
    # pred, gt: (H,W) ints
    mask = gt != ignore_index
    n = num_classes
    k = (gt[mask] * n + pred[mask]).to(torch.int64)
    binc = torch.bincount(k, minlength=n*n).float()
    return binc.reshape(n, n)

def iou_from_cm(cm):
    inter = torch.diag(cm)
    union = cm.sum(1) + cm.sum(0) - inter
    iou = (inter / (union + 1e-7))
    miou = iou.mean().item()
    return {"IoU_bg": iou[0].item(), "IoU_fg": iou[1].item(), "mIoU": miou}

@torch.no_grad()
def evaluate(dl):
    model.eval()
    cm = torch.zeros(2,2, device=DEVICE)
    for x, _, q, _ in dl:
        x = x.to(DEVICE); q = q.to(DEVICE)
        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
            out = model(x)["out"].argmax(1)
        for b in range(out.size(0)):
            cm += fast_confusion(out[b], q[b])
    return iou_from_cm(cm)

def train_one_epoch():
    model.train()
    tot = 0.0
    for x, g, _, _ in train_dl:
        x = x.to(DEVICE); g = g.to(DEVICE)
        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
            logits = model(x)["out"]
            loss = ce_loss(logits, g)
        scaler.scale(loss).backward()
        scaler.step(opt); scaler.update()
        tot += loss.item() * x.size(0)
    return tot / len(train_dl.dataset)

# train quick (match baseline: 15 epochs + early stop 3)
EPOCHS, PATIENCE = 15, 3
best, wait = 0.0, 0
for ep in range(1, EPOCHS+1):
    tl = train_one_epoch()
    mets = evaluate(val_dl)
    print(f"Epoch {ep:02d}: loss={tl:.4f}  mIoU={mets['mIoU']:.3f} (bg={mets['IoU_bg']:.3f}, fg={mets['IoU_fg']:.3f})")
    if mets['mIoU'] > best + 1e-4:
        best, wait = mets['mIoU'], 0
        torch.save(model.state_dict(), CKPT_DIR / "best.pth")
    else:
        wait += 1
        if wait > PATIENCE:
            print("Early stop.")
            break

print("Best mIoU:", round(best,3))

Epoch 01: loss=0.4952  mIoU=0.557 (bg=0.716, fg=0.398)
Epoch 02: loss=0.4583  mIoU=0.541 (bg=0.757, fg=0.324)
Epoch 03: loss=0.4502  mIoU=0.506 (bg=0.738, fg=0.273)
Epoch 04: loss=0.4418  mIoU=0.514 (bg=0.730, fg=0.297)
Epoch 05: loss=0.4294  mIoU=0.528 (bg=0.752, fg=0.305)
Early stop.
Best mIoU: 0.557


In [11]:
# simple perturbations (eval only)
def apply_blur(img):     return cv2.GaussianBlur(img, (5,5), 1.0)
def apply_bright(img):   return np.clip(img*1.25, 0, 255).astype(np.uint8)
def apply_noise(img):    return np.clip(img + np.random.normal(0, 10, img.shape), 0, 255).astype(np.uint8)
def apply_hflip(img):    return cv2.flip(img, 1)
def apply_rotate(img):   # 10 degrees
    h,w = img.shape[:2]
    M = cv2.getRotationMatrix2D((w/2,h/2), 10, 1.0)
    return cv2.warpAffine(img, M, (w,h), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101)

PERTURBS = {
    "clean":    lambda im: im,
    "blur":     apply_blur,
    "brightness": apply_bright,
    "gauss":    apply_noise,
    "hflip":    apply_hflip,
    "rotation": apply_rotate,
}

@torch.no_grad()
def eval_perturbations(val_ids):
    rows = []
    model.eval()
    for name, fn in PERTURBS.items():
        cm = torch.zeros(2,2, device=DEVICE)
        for img_id in val_ids:
            ip = IMG_ROOT / f"{img_id}.jpg"
            if not ip.exists(): ip = IMG_ROOT / f"{img_id}.jpeg"
            img = np.array(Image.open(ip).convert("RGB"))
            gt_p = GT_ROOT / f"{img_id}.png"
            gt = np.array(Image.open(gt_p)) if gt_p.exists() else np.full(img.shape[:2], IGNORE_IDX, np.uint8)

            pim = fn(img)
            pim = cv2.resize(pim, (IMG_SIZE, IMG_SIZE))
            x = torch.from_numpy((pim/255.0 - np.array([0.485,0.456,0.406])) / np.array([0.229,0.224,0.225])).permute(2,0,1).float().unsqueeze(0).to(DEVICE)
            q = cv2.resize(gt, (IMG_SIZE, IMG_SIZE), interpolation=cv2.INTER_NEAREST)
            q = torch.from_numpy(np.where(q==IGNORE_IDX, IGNORE_IDX, (q!=0).astype(np.uint8))).long().to(DEVICE)

            out = model(x)["out"].argmax(1)[0]
            cm += fast_confusion(out, q)
        mets = iou_from_cm(cm)
        rows.append({"perturb": name, **mets})
        print(f"{name:10s}  mIoU={mets['mIoU']:.3f}  (bg={mets['IoU_bg']:.3f}, fg={mets['IoU_fg']:.3f})")

    df = pd.DataFrame(rows)
    df.to_csv(RESULTS_DIR / "voc_val_robustness.csv", index=False)
    print("Saved:", RESULTS_DIR / "voc_val_robustness.csv")
    return df

df_rob = eval_perturbations(val_ids)
df_rob

clean       mIoU=0.528  (bg=0.752, fg=0.305)
blur        mIoU=0.532  (bg=0.749, fg=0.315)
brightness  mIoU=0.515  (bg=0.752, fg=0.277)
gauss       mIoU=0.508  (bg=0.753, fg=0.263)
hflip       mIoU=0.474  (bg=0.713, fg=0.234)
rotation    mIoU=0.527  (bg=0.741, fg=0.312)
Saved: /kaggle/working/outputs/A_rotate/results/voc_val_robustness.csv


Unnamed: 0,perturb,IoU_bg,IoU_fg,mIoU
0,clean,0.751711,0.305164,0.528438
1,blur,0.749218,0.31453,0.531874
2,brightness,0.751709,0.27732,0.514515
3,gauss,0.752617,0.263381,0.507999
4,hflip,0.713244,0.234416,0.47383
5,rotation,0.741232,0.311783,0.526508


In [12]:
SAMPLE_DIR = BASE_OUT / "samples"; SAMPLE_DIR.mkdir(parents=True, exist_ok=True)

@torch.no_grad()
def save_samples(n=6):
    model.eval()
    picks = val_ids[:n]
    for img_id in picks:
        ip = IMG_ROOT / f"{img_id}.jpg"
        if not ip.exists(): ip = IMG_ROOT / f"{img_id}.jpeg"
        img = Image.open(ip).convert("RGB"); arr = np.array(img)
        x = torchvision.transforms.functional.normalize(
                torchvision.transforms.functional.to_tensor(img),
                mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]
            ).unsqueeze(0).to(DEVICE)
        pred = model(x)["out"].argmax(1)[0].cpu().numpy()  # 0/1
        overlay = arr.copy()
        overlay[pred==1] = (0.0*overlay[pred==1] + np.array([0,255,0])*0.8).astype(np.uint8)
        Image.fromarray(overlay).save(SAMPLE_DIR / f"{img_id}_overlay.png")

save_samples(n=8)
print("Samples at:", SAMPLE_DIR)

Samples at: /kaggle/working/outputs/A_rotate/samples


In [None]:
!zip -r pseudo_masks_gradcam.zip outputs/A_rotate/pseudo_masks/

In [20]:
!zip -r A_Rotate_samples_gradcam.zip outputs/A_rotate/samples/

updating: outputs/A_rotate/samples/ (stored 0%)
  adding: outputs/A_rotate/samples/2007_000033_overlay.png (deflated 0%)
  adding: outputs/A_rotate/samples/2007_000123_overlay.png (deflated 1%)
  adding: outputs/A_rotate/samples/2007_000061_overlay.png (deflated 1%)
  adding: outputs/A_rotate/samples/2007_000175_overlay.png (deflated 0%)
  adding: outputs/A_rotate/samples/2007_000042_overlay.png (deflated 0%)
  adding: outputs/A_rotate/samples/2007_000323_overlay.png (deflated 0%)
  adding: outputs/A_rotate/samples/2007_000129_overlay.png (deflated 0%)
  adding: outputs/A_rotate/samples/2007_000187_overlay.png (deflated 0%)
