In [5]:
from google.colab import drive
import os

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# !pip uninstall -y torch torchvision torchaudio xformers

In [7]:
!pip install -q torchmetrics segmentation-models-pytorch
# 2. Install a modern stable trio that exists in the index
!pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu121

# 3. Install compatible xformers
!pip install -q xformers==0.0.27.post2

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.8.6 requires torch<2.10,>=1.10, but you have torch 2.10.0 which is incompatible.[0m[31m
[0mLooking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch==2.4.0
  Using cached https://download.pytorch.org/whl/cu121/torch-2.4.0%2Bcu121-cp312-cp312-linux_x86_64.whl (799.0 MB)
Collecting torchvision==0.19.0
  Using cached https://download.pytorch.org/whl/cu121/torchvision-0.19.0%2Bcu121-cp312-cp312-linux_x86_64.whl (7.1 MB)
Collecting torchaudio==2.4.0
  Using cached https://download.pytorch.org/whl/cu121/torchaudio-2.4.0%2Bcu121-cp312-cp312-linux_x86_64.whl (3.4 MB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.4.0)
  Using cached https://download.pytorch.org/whl/cu121/nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtim

In [3]:
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import Image
import os
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from torch.amp import autocast, GradScaler

# ============================================================
# CONFIG
# ============================================================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
N_ENSEMBLE = 3
SEEDS = [42, 123, 999]
BATCH_SIZE = 12
W, H = 896, 504
LR = 5e-4
EPOCHS = 10

BASE_PATH = "/content/drive/MyDrive/data/Offroad_Segmentation_Training_Dataset/Offroad_Segmentation_Training_Dataset"
TRAIN_DIR = os.path.join(BASE_PATH, "train")
VAL_DIR = os.path.join(BASE_PATH, "val")

# ============================================================
# LOSS
# ============================================================
class DiceCELoss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.ce = nn.CrossEntropyLoss()
        self.num_classes = num_classes

    def forward(self, pred, target):
        ce_loss = self.ce(pred, target)
        pred = F.softmax(pred, dim=1)
        target_oh = F.one_hot(target, self.num_classes).permute(0,3,1,2).float()

        inter = (pred * target_oh).sum((2,3))
        union = (pred + target_oh).sum((2,3))
        dice = 1 - (2 * inter / (union + 1e-6)).mean()

        return 0.5 * ce_loss + 0.5 * dice

# ============================================================
# MASK UTILS
# ============================================================
value_map = {
    0: 0, 100: 1, 200: 2, 300: 3, 500: 4,
    550: 5, 700: 6, 800: 7, 7100: 8, 10000: 9
}
N_CLASSES = len(value_map)

def convert_mask(mask):
    arr = np.array(mask, dtype=np.uint16)
    new = np.zeros_like(arr, dtype=np.uint8)
    for k, v in value_map.items():
        new[arr == k] = v
    return new

# ============================================================
# DATASET
# ============================================================
class MaskDataset(Dataset):
    def __init__(self, root, h, w, train=True):
        self.img_dir = os.path.join(root, "Color_Images")
        self.mask_dir = os.path.join(root, "Segmentation")

        if train:
            self.tf = A.Compose([
                A.HorizontalFlip(p=0.5),
                A.RandomRotate90(p=0.5),
                A.ShiftScaleRotate(0.1,0.1,15,p=0.5),
                A.Resize(h,w),
                A.ColorJitter(0.4,0.4,0.4,0.1,p=0.8),
                A.RandomBrightnessContrast(p=0.5),
                A.Normalize(mean=(0.485,0.456,0.406),
                            std=(0.229,0.224,0.225)),
                ToTensorV2(transpose_mask=True)
            ])
        else:
            self.tf = A.Compose([
                A.Resize(h,w),
                A.Normalize(mean=(0.485,0.456,0.406),
                            std=(0.229,0.224,0.225)),
                ToTensorV2(transpose_mask=True)
            ])

        self.ids = sorted(os.listdir(self.img_dir))

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, i):
        name = self.ids[i]
        img = np.array(Image.open(os.path.join(self.img_dir, name)).convert("RGB"))
        mask = convert_mask(Image.open(os.path.join(self.mask_dir, name)))
        aug = self.tf(image=img, mask=mask)
        return aug["image"], aug["mask"].long()

# ============================================================
# MODEL
# ============================================================
class SegmentationHeadConvNeXt(nn.Module):
    def __init__(self, in_c, out_c, w, h):
        super().__init__()
        self.w, self.h = w, h
        self.stem = nn.Sequential(
            nn.Conv2d(in_c, 256, 7, padding=3),
            nn.BatchNorm2d(256),
            nn.GELU()
        )
        self.block = nn.Sequential(
            nn.Conv2d(256,256,7,padding=3,groups=256),
            nn.GELU(),
            nn.Conv2d(256,256,1),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        self.cls = nn.Conv2d(256,out_c,1)

    def forward(self,x):
        b,n,c = x.shape
        x = x.reshape(b,self.h,self.w,c).permute(0,3,1,2)
        return self.cls(self.block(self.stem(x)))

# ============================================================
# METRIC
# ============================================================
def compute_iou(pred, gt, num_classes):
    pred = torch.argmax(pred,1).view(-1)
    gt = gt.view(-1)
    ious = []
    for c in range(num_classes):
        inter = ((pred==c)&(gt==c)).sum().float()
        union = ((pred==c)|(gt==c)).sum().float()
        if union > 0:
            ious.append((inter/union).item())
    return np.mean(ious)

# ============================================================
# TRAIN + ENSEMBLE
# ============================================================
def main():
    train_loader = DataLoader(
        MaskDataset(TRAIN_DIR, H, W, True),
        batch_size=BATCH_SIZE, shuffle=True, num_workers=4
    )
    val_loader = DataLoader(
        MaskDataset(VAL_DIR, H, W, False),
        batch_size=BATCH_SIZE, shuffle=False, num_workers=4
    )

    backbone = torch.hub.load(
        "facebookresearch/dinov2","dinov2_vitb14"
    ).to(DEVICE).eval()

    loss_fn = DiceCELoss(N_CLASSES).to(DEVICE)
    ensemble = []

    for idx, seed in enumerate(SEEDS):
        print(f"\n===== TRAINING MODEL {idx+1}/{N_ENSEMBLE} | SEED {seed} =====")
        torch.manual_seed(seed); np.random.seed(seed)

        model = SegmentationHeadConvNeXt(
            768, N_CLASSES, W//14, H//14
        ).to(DEVICE)

        opt = optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
        sch = optim.lr_scheduler.OneCycleLR(
            opt, max_lr=LR,
            steps_per_epoch=len(train_loader),
            epochs=EPOCHS
        )
        scaler = GradScaler("cuda")

        for ep in range(EPOCHS):
            model.train()
            losses = []

            pbar = tqdm(train_loader, desc=f"Epoch {ep+1}/{EPOCHS}")
            for imgs, masks in pbar:
                imgs, masks = imgs.to(DEVICE), masks.to(DEVICE)

                with autocast("cuda"):
                    with torch.no_grad():
                        feat = backbone.forward_features(imgs)["x_norm_patchtokens"]
                    logits = model(feat)
                    out = F.interpolate(logits, imgs.shape[2:], mode="bilinear", align_corners=False)
                    loss = loss_fn(out, masks)

                opt.zero_grad(set_to_none=True)
                scaler.scale(loss).backward()
                scaler.step(opt)
                scaler.update()
                sch.step()

                losses.append(loss.item())
                pbar.set_postfix(loss=f"{loss.item():.4f}")

            print(f"Epoch {ep+1} | Train Loss: {np.mean(losses):.4f}")

        ensemble.append(model.eval())
        torch.save(model.state_dict(), f"/content/drive/MyDrive/offroad_model_seed_{seed}.pth")

    print("\n===== ENSEMBLE VALIDATION =====")
    ious = []
    with torch.no_grad():
        for imgs, masks in tqdm(val_loader):
            imgs, masks = imgs.to(DEVICE), masks.to(DEVICE)
            logits_sum = 0
            for model in ensemble:
                feat = backbone.forward_features(imgs)["x_norm_patchtokens"]
                logits = model(feat)
                logits = F.interpolate(logits, imgs.shape[2:], mode="bilinear", align_corners=False)
                logits_sum += logits
            logits_mean = logits_sum / len(ensemble)
            ious.append(compute_iou(logits_mean, masks, N_CLASSES))

    print(f"\nENSEMBLE Val IoU: {np.mean(ious):.4f}")
    print("DONE.")

if __name__ == "__main__":
    main()


  original_init(self, **validated_kwargs)
Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main



===== TRAINING MODEL 1/3 | SEED 42 =====


Epoch 1/10 [Train]: 100%|██████████| 179/179 [05:31<00:00,  1.85s/it, loss=0.6891]


Epoch 1/10 | Train Loss: 1.1954 | Val Loss: 0.6808 | Train IoU: 0.3899 | Val IoU: 0.3438


Epoch 2/10 [Train]: 100%|██████████| 179/179 [03:28<00:00,  1.16s/it, loss=0.5007]


KeyboardInterrupt: 

In [6]:
"""
Segmentation Testing Script - FINAL FIXED
- Correct mask encoding (FIXES IoU = 0)
- Ensemble + Full TTA (8 variants)
- Meaningful test-time graphs
"""

import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from torch import nn
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from PIL import Image
import cv2
import os, shutil
from tqdm import tqdm
import matplotlib.pyplot as plt

# ============================================================
# CONFIG
# ============================================================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

H, W = 504, 896
N_CLASSES = 10
EMBED_DIM = 768

MODEL_PATHS = [
    "/content/drive/MyDrive/offroad_model_seed_42.pth",
    "/content/drive/MyDrive/offroad_model_seed_123.pth",
    "/content/drive/MyDrive/offroad_model_seed_999.pth",
]

DATA_DIR = "/content/drive/MyDrive/data/Offroad_Segmentation_testImages/Offroad_Segmentation_testImages"
OUTPUT_DIR = "/content/drive/MyDrive/test_results"

# ============================================================
# MASK CONVERSION (CRITICAL FIX)
# ============================================================
VALUE_MAP = {
    0: 0, 100: 1, 200: 2, 300: 3, 500: 4,
    550: 5, 700: 6, 800: 7, 7100: 8, 10000: 9
}

def convert_mask(mask):
    arr = np.array(mask, dtype=np.uint16)
    new = np.zeros_like(arr, dtype=np.uint8)
    for k, v in VALUE_MAP.items():
        new[arr == k] = v
    return new

# ============================================================
# COLOR MAP
# ============================================================
COLOR_PALETTE = np.array([
    [0, 0, 0], [34, 139, 34], [0, 255, 0], [210, 180, 140],
    [139, 90, 43], [128, 128, 0], [139, 69, 19],
    [128, 128, 128], [160, 82, 45], [135, 206, 235]
], dtype=np.uint8)

# ============================================================
# DATASET
# ============================================================
class TestDataset(Dataset):
    def __init__(self, root):
        self.img_dir = os.path.join(root, "Color_Images")
        self.mask_dir = os.path.join(root, "Segmentation")
        self.ids = sorted(os.listdir(self.img_dir))
        self.has_masks = os.path.exists(self.mask_dir)

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, i):
        name = self.ids[i]

        img = Image.open(os.path.join(self.img_dir, name)).convert("RGB")
        img = TF.resize(img, (H, W))
        img = TF.to_tensor(img)
        img = TF.normalize(img, mean=[0.485,0.456,0.406],
                                 std=[0.229,0.224,0.225])

        mask = torch.zeros((H, W), dtype=torch.long)
        if self.has_masks:
            mp = os.path.join(self.mask_dir, name)
            if os.path.exists(mp):
                m = Image.open(mp)
                m = convert_mask(m)
                m = TF.resize(
                    Image.fromarray(m),
                    (H, W),
                    interpolation=TF.InterpolationMode.NEAREST
                )
                mask = torch.from_numpy(np.array(m)).long()

        return img, mask, name

# ============================================================
# MODEL
# ============================================================
class SegmentationHeadConvNeXt(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.stem = nn.Sequential(
            nn.Conv2d(in_c, 256, 7, padding=3),
            nn.BatchNorm2d(256),
            nn.GELU()
        )
        self.block = nn.Sequential(
            nn.Conv2d(256,256,7,padding=3,groups=256),
            nn.GELU(),
            nn.Conv2d(256,256,1),
            nn.GELU(),
            nn.Dropout(0.1)
        )
        self.cls = nn.Conv2d(256,out_c,1)

    def forward(self, x, h, w):
        b,n,c = x.shape
        x = x.reshape(b, h//14, w//14, c).permute(0,3,1,2)
        return self.cls(self.block(self.stem(x)))

# ============================================================
# TTA
# ============================================================
def apply_tta(x, k):
    if k >= 4:
        x = x.transpose(-1,-2)
        k -= 4
    return torch.rot90(x, k, dims=(-2,-1))

def undo_tta(x, k):
    x = torch.rot90(x, -(k%4), dims=(-2,-1))
    if k >= 4:
        x = x.transpose(-1,-2)
    return x

# ============================================================
# METRIC
# ============================================================
def compute_iou(pred, gt):
    pred = pred.view(-1)
    gt = gt.view(-1)
    ious = []
    for c in range(N_CLASSES):
        inter = ((pred==c)&(gt==c)).sum().float()
        union = ((pred==c)|(gt==c)).sum().float()
        if union > 0:
            ious.append((inter/union).item())
    return np.mean(ious) if ious else None

# ============================================================
# MAIN
# ============================================================
def main():
    os.makedirs(os.path.join(OUTPUT_DIR,"masks"), exist_ok=True)

    backbone = torch.hub.load(
        "facebookresearch/dinov2","dinov2_vitb14"
    ).to(DEVICE).eval()

    models = []
    for p in MODEL_PATHS:
        m = SegmentationHeadConvNeXt(EMBED_DIM, N_CLASSES).to(DEVICE)
        m.load_state_dict(torch.load(p, map_location=DEVICE))
        m.eval()
        models.append(m)

    loader = DataLoader(TestDataset(DATA_DIR), batch_size=1)
    all_ious = []

    print("Running Ensemble + TTA inference...")
    with torch.no_grad():
        for img, gt, name in tqdm(loader):
            img, gt = img.to(DEVICE), gt.to(DEVICE)
            logits_sum = torch.zeros((1,N_CLASSES,H,W), device=DEVICE)

            for k in range(8):
                aug = apply_tta(img, k)
                h,w = aug.shape[-2:]
                tmp = 0
                for model in models:
                    feat = backbone.forward_features(aug)["x_norm_patchtokens"]
                    log = model(feat, h, w)
                    log = F.interpolate(log, (h,w), mode="bilinear", align_corners=False)
                    tmp += log
                tmp /= len(models)
                logits_sum += undo_tta(tmp, k)

            pred = torch.argmax(logits_sum/8,1)[0]

            if gt.sum() > 0:
                iou = compute_iou(pred, gt[0])
                if iou is not None:
                    all_ious.append(iou)

            color = COLOR_PALETTE[pred.cpu().numpy()]
            cv2.imwrite(
                os.path.join(OUTPUT_DIR,"masks",name[0]),
                cv2.cvtColor(color, cv2.COLOR_RGB2BGR)
            )

    # =========================
    # GRAPHS
    # =========================
    if all_ious:
        plt.hist(all_ious, bins=20)
        plt.title("Test IoU Distribution")
        plt.xlabel("IoU")
        plt.ylabel("Images")
        plt.savefig(os.path.join(OUTPUT_DIR,"iou_hist.png"))
        plt.close()

        plt.plot(np.cumsum(all_ious)/np.arange(1,len(all_ious)+1))
        plt.title("Running Mean IoU")
        plt.xlabel("Images")
        plt.ylabel("Mean IoU")
        plt.savefig(os.path.join(OUTPUT_DIR,"iou_running_mean.png"))
        plt.close()

        print(f"\nTEST MEAN IoU: {np.mean(all_ious):.4f}")

    shutil.make_archive(OUTPUT_DIR,"zip",OUTPUT_DIR)
    print("DONE.")

if __name__ == "__main__":
    main()


Using cache found in /root/.cache/torch/hub/facebookresearch_dinov2_main


Running Ensemble + TTA inference...


100%|██████████| 1004/1004 [1:39:52<00:00,  5.97s/it]



TEST MEAN IoU: 0.3243
DONE.
