In [None]:
!pip uninstall -y numpy
!pip install --quiet numpy==1.26.4

!pip uninstall -y opencv-python opencv-python-headless
!pip install --quiet opencv-python-headless==4.8.1.78

!pip uninstall -y albumentations qudida scikit-image scipy scikit-learn imgaug
!pip install --quiet albumentations==1.3.1


In [None]:
import os; os._exit(0)

In [2]:
print(torch.cuda.device_count(), "GPUs available")

2 GPUs available


In [3]:
torch.backends.cudnn.benchmark = True
!nvidia-smi

Sun Nov  9 10:29:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   64C    P8             10W /   70W |       1MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  Tesla T4                       Off |   00

In [1]:
import os
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import cv2

In [4]:
device = ('cuda' if torch.cuda.is_available else 'cpu')

In [6]:
mask_dir = r"/kaggle/input/fs2020-runway-dataset/labels/labels/areas/train_labels_1920x1080"
test_mask_dir = r"/kaggle/input/fs2020-runway-dataset/labels/labels/areas/test_labels_1920x1080"
binary_mask_dir = r"/kaggle/working/binary_mask"
binary_test_mask_dir = r"/kaggle/working/binary_test_mask"

def convert2Binary(mask_dir, binary_mask_dir):
    os.makedirs(binary_mask_dir, exist_ok = True)
    for filename in os.listdir(mask_dir):
        if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            continue
        img_path = os.path.join(mask_dir, filename)
        img = cv2.imread(img_path)
    
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
        binary = np.where(gray > 0, 255, 0).astype(np.uint8)
    
        cv2.imwrite(os.path.join(binary_mask_dir, filename), binary)

#convert2Binary(test_mask_dir, binary_test_mask_dir)
#convert2Binary(mask_dir, binary_mask_dir)

In [7]:

class RunwayDataset(Dataset):
    def __init__(self, img_dir, mask_dir, transform=None):
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(img_dir)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.float32) / 255.0

        if self.transform is not None:
            aug = self.transform(image=image, mask=mask)
            image = aug["image"]
            mask = aug["mask"]
            
        return image, mask


In [8]:
import torchvision.transforms.functional as TF

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)

class UNET(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64,128,256,512]):
        super().__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(2)

        for f in features:
            self.downs.append(DoubleConv(in_channels, f))
            in_channels = f

        for f in reversed(features):
            self.ups.append(nn.ConvTranspose2d(f*2, f, kernel_size=2, stride=2))
            self.ups.append(DoubleConv(f*2, f))

        self.bottleneck = DoubleConv(features[-1], features[-1]*2)
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        skips = []
        for down in self.downs:
            x = down(x)
            skips.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        skips = skips[::-1]

        for idx in range(0, len(self.ups), 2):
            x = self.ups[idx](x)
            skip = skips[idx//2]
            if x.shape != skip.shape:
                x = TF.resize(x, skip.shape[2:])
            x = torch.cat((skip, x), dim=1)
            x = self.ups[idx+1](x)

        return self.final_conv(x)


In [13]:
class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-8):
        super().__init__()
        self.smooth = smooth

    def forward(self, preds, targets):
        preds = torch.sigmoid(preds)
        intersection = (preds * targets).sum()
        dice = 1 - (2 * intersection + self.smooth) / (preds.sum() + targets.sum() + self.smooth)
        return dice.mean()


class TrevskyLoss(nn.Module):
    def __init__(self, alpha = 0.5, beta = 0.5, smooth = 1e-6):
        super(TrevskyLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.smooth = smooth

    def forward(self, inputs, targets):
        probs = torch.sigmoid(inputs)
        targets = targets.float()

        tp = (probs*targets).sum(dim = (1,2,3))
        fp = ((1 - targets)*probs).sum(dim = (1,2,3))
        fn = ((1-probs)*targets).sum(dim = (1,2,3))

        ti = (self.smooth + tp)/(tp+ self.alpha*fp + self.beta*fn)
        loss = 1 - ti
        return loss.mean()


class FocalTrevsky(nn.Module):
    def __init__(self, alpha = 0.5, beta = 0.5, gamma = 1.33, smooth = 1e-6):
        super(FocalTrevsky, self).__init__()
        self.alpha = alpha
        self.beta = beta
        self.smooth = smooth
        self.gamma = gamma

    def forward(self, inputs, targets):
        probs = torch.sigmoid(inputs)
        targets = targets.float()

        tp = (probs*targets).sum(dim = (1,2,3))
        fp = ((1 - targets)*probs).sum(dim = (1,2,3))
        fn = ((1-probs)*targets).sum(dim = (1,2,3))

        ti = (self.smooth + tp)/(tp+ self.alpha*fp + self.beta*fn)
        loss = (1 - ti)**self.gamma
        focalloss = loss.mean()
        return focalloss





def check_accuracy(loader, model, device="cuda"):
    dice = 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            preds = torch.sigmoid(model(x))
            preds = (preds > 0.5).float()
            intersection = (preds * y).sum()
            dice += (2*intersection + 1e-8) / (preds.sum() + y.sum() + 1e-8)
    print("DICE SCORE:", dice/len(loader))
    model.train()

def save_sample_predictions(loader, model, folder="saved_images/", device="cuda", num_images_to_save = 10):
    os.makedirs(folder, exist_ok=True)
    model.eval()

    try:
        x, y =  next(iter(loader))
    except StopIteration:
        print("TEST LOADER IS EMPTY")
        return
    x.to(device)
    y.to(device)

    with torch.no_grad():
        preds = (torch.sigmoid(model(x)) > 0.5).float()
    
    num_to_save = min(num_images_to_save, x.shape[0])
    print(f"Saving {num_to_save} detailed prediction sets...")
    for i in range(num_to_save):
        img_tensor = x[i]
        mask_tensor = y[i].unsqueeze(0)
        pred_tensor = preds[i]


        mask_rgb = mask_tensor.repeat(3, 1, 1)
        pred_rgb = pred_tensor.repeat(3, 1, 1)

        combined_image = torch.cat([img_tensor, mask_rgb, pred_rgb], dim=2)

        save_path = os.path.join(folder, f"comparison_sample_{i}.png")

        torchvision.utils.save_image(combined_image, save_path)
    model.train()

def auto_find_max_batch(model, dataset, start=4, max_try=64, device="cuda"):
    """
    Tries batch sizes: start, start+2, start+4, ... until OOM.
    Returns the largest stable batch size.
    """
    batch = start
    last_good = start

    while batch <= max_try:
        try:
            loader = DataLoader(dataset, batch_size=batch, shuffle=True)
            x, y = next(iter(loader))
            x = x.to(device)
            y = y.float().unsqueeze(1).to(device)

            # Run a small forward & backward to test capacity
            with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                out = model(x)
                loss = (out.mean() - y.mean()).abs()

            loss.backward()
            torch.cuda.empty_cache()

            print(f"✅ Batch {batch} worked")
            last_good = batch
            batch += 2  # Increase step size

        except RuntimeError as e:
            if "out of memory" in str(e).lower():
                print(f"❌ OOM at batch {batch}")
                torch.cuda.empty_cache()
                break
            else:
                raise e

    return last_good

def save_predictions_as_imgs(loader, model, folder="saved_images/", device="cuda"):
    os.makedirs(folder, exist_ok=True)
    model.eval()
    for idx, (x, y) in enumerate(loader):
        x = x.to(device)
        with torch.no_grad():
            preds = (torch.sigmoid(model(x)) > 0.5).float()
        torchvision.utils.save_image(preds, f"{folder}/pred_{idx}.png")
        torchvision.utils.save_image(y.unsqueeze(1), f"{folder}/mask_{idx}.png")
    model.train()

pos_weight = torch.tensor([5.0]).to(device)
bce_loss = nn.BCEWithLogitsLoss(pos_weight = pos_weight)
dice_loss = DiceLoss()
focalTrevsky = FocalTrevsky(alpha = 0.7, beta = 0.3)

def loss_fn(preds, targets):
    bce = bce_loss(preds, targets)
    dice = dice_loss(preds, targets)
    trevsky = focalTrevsky(preds, targets)
    return 0.8*bce + 0.2*trevsky




In [14]:
# Paths must be changed to match your Kaggle dataset paths:
TRAIN_IMG_DIR = "/kaggle/input/fs2020-runway-dataset/1920x1080/1920x1080/train"
TRAIN_MASK_DIR = r"/kaggle/working/binary_mask"
TEST_IMG_DIR = "/kaggle/input/fs2020-runway-dataset/1920x1080/1920x1080/test"
TEST_MASK_DIR = "/kaggle/working/binary_test_mask"

BATCH_SIZE = 58
LR = 1e-4
EPOCHS = 25
IMG_H, IMG_W = 288, 512

device = "cuda" if torch.cuda.is_available() else "cpu"

train_tf = A.Compose([
    A.Resize(IMG_H, IMG_W),
    A.Rotate(limit=35, p=1.0),
    A.HorizontalFlip(p=0.5),
    A.Normalize(mean=0, std=1, max_pixel_value=255),
    ToTensorV2(),
])

test_tf = A.Compose([
    A.Resize(IMG_H, IMG_W),
    A.Normalize(mean=0, std=1, max_pixel_value=255),
    ToTensorV2(),
])

train_ds = RunwayDataset(TRAIN_IMG_DIR, TRAIN_MASK_DIR, train_tf)
test_ds = RunwayDataset(TEST_IMG_DIR, TEST_MASK_DIR, test_tf)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers = 4, pin_memory = True)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, num_workers = 4, pin_memory = True)

model = UNET()
model = torch.nn.DataParallel(model) 
model = model.to(device)

opt = torch.optim.Adam(model.parameters(), lr=LR)
scaler = torch.amp.GradScaler(device)

for epoch in range(EPOCHS):
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    for x, y in loop:
        x, y = x.to(device), y.float().unsqueeze(1).to(device)
        with torch.amp.autocast(device):
            preds = model(x)
            loss = loss_fn(preds, y)
        opt.zero_grad()
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()
        loop.set_postfix(loss=loss.item())

    if ((epoch+1)%5 == 0) or ((epoch+1) == EPOCHS):
        print(f"\n--- Running Validation for Epoch {epoch+1} ---\n")
        check_accuracy(test_loader, model, device)
        print("SAVING PREDICIONS........")
        save_sample_predictions(test_loader, model, folder="saved_images", device=device)
        print("\n---------PREDICTIONS SAVED---------\n")


Epoch 1/25: 100%|██████████| 69/69 [13:15<00:00, 11.52s/it, loss=0.522]
Epoch 2/25: 100%|██████████| 69/69 [13:15<00:00, 11.53s/it, loss=0.505]
Epoch 3/25: 100%|██████████| 69/69 [13:15<00:00, 11.53s/it, loss=0.421]
Epoch 4/25: 100%|██████████| 69/69 [13:14<00:00, 11.52s/it, loss=0.445]
Epoch 5/25: 100%|██████████| 69/69 [13:13<00:00, 11.50s/it, loss=0.434]


--- Running Validation for Epoch 5 ---






DICE SCORE: tensor(0.6652, device='cuda:0')
SAVING PREDICIONS........
Saving 10 detailed prediction sets...


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument tensors in method wrapper_CUDA_cat)