In [1]:
'''!pip uninstall -y numpy
!pip install --quiet numpy==1.26.4

!pip uninstall -y opencv-python opencv-python-headless
!pip install --quiet opencv-python-headless==4.8.1.78

!pip uninstall -y albumentations qudida scikit-image scipy scikit-learn imgaug
!pip install --quiet albumentations==1.3.1'''

'!pip uninstall -y numpy\n!pip install --quiet numpy==1.26.4\n\n!pip uninstall -y opencv-python opencv-python-headless\n!pip install --quiet opencv-python-headless==4.8.1.78\n\n!pip uninstall -y albumentations qudida scikit-image scipy scikit-learn imgaug\n!pip install --quiet albumentations==1.3.1'

In [2]:
#import os; os._exit(0)

In [3]:
import os
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
import torchvision.transforms.functional as TF
import cv2
from torch.optim.lr_scheduler import CosineAnnealingLR
import json
import torchvision.models as models
import torch.nn.functional as F

In [4]:
DEVICE = ('cuda' if torch.cuda.is_available else 'cpu')

In [5]:

class RunwayKeypointDataset(Dataset):
    def __init__(self, img_dir, json_path, transform=None):
        self.img_dir = img_dir
        with open(json_path, 'r') as f:
            self.labels = json.load(f)
        self.filenames = list(self.labels.keys())
        
        self.line_order = ['LEDG', 'REDG', 'CTL']
        
        self.width = 640
        self.height = 360

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        # 1. Load Image
        fname = self.filenames[idx]
        img_path = os.path.join(self.img_dir, fname)
        image = cv2.imread(img_path) 
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        raw_points = []
        validity_mask = []
        image_data = self.labels[fname]
        lines_dict = {item['label']: item['points'] for item in image_data}
        
        for label in self.line_order:
            if label in lines_dict:
                pts = lines_dict[label]
                raw_points.extend(pts[0]) 
                raw_points.extend(pts[1]) 
                validity_mask.extend([1.0, 1.0, 1.0, 1.0])
            else:
                raw_points.extend([0, 0, 0, 0]) 
                validity_mask.extend([0.0, 0.0, 0.0, 0.0])

        coords = np.array(raw_points, dtype=np.float32)

        coords[0::2] /= self.width  
        coords[1::2] /= self.height 

        image = image.transpose(2, 0, 1)
        image = torch.tensor(image, dtype=torch.float32) / 255.0
        target = torch.tensor(coords, dtype=torch.float32)
        mask = torch.tensor(validity_mask, dtype=torch.float32)

        return image, target, mask

class RunwayDataset(Dataset):
    def __init__(self, img_dir, mask_dir, transform=None):
        self.img_dir = img_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = os.listdir(img_dir)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.img_dir, self.images[index])
        mask_path = os.path.join(self.mask_dir, self.images[index])
        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path).convert("L"), dtype=np.int64)

        if self.transform is not None:
            aug = self.transform(image=image, mask=mask)
            image = aug["image"]
            mask = aug["mask"]

        if isinstance(mask, torch.Tensor):
            mask = mask.long()
        else:
            mask = torch.as_tensor(mask, dtype=torch.long)
            
        return image, mask


def masks_from_json(json_path, output_dir, width = 640, height = 360, thickness = 2):
    os.makedirs(output_dir, exist_ok = True)
    class_map = {
        'LEDG' : 1,
        'REDG' : 2,
        'CTL' : 3
    }
    with open(json_path, 'r') as f:
        data = json.load(f)

    print(f"GENERATING MASK IN THE DIRECTORY {output_dir}")
    for filename, lines in tqdm(data.items()):
        mask = np.zeros((height, width), dtype=np.uint8)
        
        for line_item in lines:
            label = line_item['label']
            
            if label in class_map:
                points = line_item['points']
                pts = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
                cv2.polylines(mask, [pts], isClosed=False, color=class_map[label], thickness=thickness)
        
        save_path = os.path.join(output_dir, filename)
        cv2.imwrite(save_path, mask)

JSON_TRAIN_PATH = "/kaggle/input/fs2020-runway-dataset/labels/labels/lines/train_labels_640x360.json"
JSON_TEST_PATH = "/kaggle/input/fs2020-runway-dataset/labels/labels/lines/test_labels_640x360.json"

TRAIN_MASK_OUTPUT = "/kaggle/working/masks_train"
TEST_MASK_OUTPUT = "/kaggle/working/masks_test"

#masks_from_json(JSON_TRAIN_PATH, TRAIN_MASK_OUTPUT)
#masks_from_json(JSON_TEST_PATH, TEST_MASK_OUTPUT)

In [6]:
class RunwayRegressor(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.resnet18(pretrained = True)
        in_features = self.backbone.fc.in_features

        self.backbone.fc = nn.Sequential(
            nn.Linear(in_features,256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 12),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.backbone(x)


In [7]:
import torchvision.transforms.functional as TF

class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)

class UNET(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64,128,256,512]):
        super().__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        self.pool = nn.MaxPool2d(2)

        for f in features:
            self.downs.append(DoubleConv(in_channels, f))
            in_channels = f

        for f in reversed(features):
            self.ups.append(nn.ConvTranspose2d(f*2, f, kernel_size=2, stride=2))
            self.ups.append(DoubleConv(f*2, f))

        self.bottleneck = DoubleConv(features[-1], features[-1]*2)
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def forward(self, x):
        skips = []
        for down in self.downs:
            x = down(x)
            skips.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)
        skips = skips[::-1]

        for idx in range(0, len(self.ups), 2):
            x = self.ups[idx](x)
            skip = skips[idx//2]
            if x.shape != skip.shape:
                x = TF.resize(x, skip.shape[2:])
            x = torch.cat((skip, x), dim=1)
            x = self.ups[idx+1](x)

        return self.final_conv(x)

In [8]:
def save_visual_predictions(loader, model, folder, device):
    os.makedirs(folder, exist_ok=True)
    model.eval()
    
    x, y, valid_mask = next(iter(loader))
    x = x.to(device)
    y = y.to(device)
    valid_mask = valid_mask.to(device)
    
    with torch.no_grad():
        preds = model(x).cpu().numpy()
        y = y.cpu().numpy()
        valid_mask = valid_mask.cpu().numpy()
    
    x = x.cpu().numpy().transpose(0, 2, 3, 1)
    x = (x * 255).astype(np.uint8)
    
    def draw_lines_on_image(coords, background_img, validity=None):
        img = background_img.copy()
        height, width = img.shape[:2] 
        
        c = coords.copy()
        c[0::2] *= width 
        c[1::2] *= height 
        c = c.astype(int)
        
        colors = [(255, 0, 0), (0, 0, 255), (0, 255, 0)]
        
        for i in range(3):
            start_idx = i * 4
            
            if validity is not None and validity[start_idx] == 0:
                continue
                
            pt1 = (c[start_idx], c[start_idx+1])
            pt2 = (c[start_idx+2], c[start_idx+3])
            
            cv2.line(img, pt1, pt2, colors[i], 3)
            
        return img

    num_to_save = min(5, x.shape[0])
    
    for i in range(num_to_save):
        original_img = x[i].copy()
        
        gt_overlay = draw_lines_on_image(y[i], original_img, validity=valid_mask[i])
        
        pred_overlay = draw_lines_on_image(preds[i], original_img)
        
        combined = np.hstack((original_img, gt_overlay, pred_overlay))
        
        # E. Save
        combined_bgr = cv2.cvtColor(combined, cv2.COLOR_RGB2BGR)
        cv2.imwrite(f"{folder}/val_pred_{i}.png", combined_bgr)
        
    model.train()


def save_multiclass_predictions(loader, model, folder="saved_images/", device="cuda"):
    os.makedirs(folder, exist_ok=True)
    model.eval()
    
    x, y = next(iter(loader))
    x = x.to(device)
    y = y.to(device) 

    with torch.no_grad():
        preds = model(x)          
        pred_labels = torch.argmax(preds, dim=1) 

    mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1).to(device)
    std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1).to(device)

    num_to_save = min(5, x.shape[0])
    print(f"Saving {num_to_save} comparison images to {folder}...")

    for i in range(num_to_save):
        img_tensor = x[i] * std + mean
        img_vis = img_tensor.cpu().numpy().transpose(1, 2, 0)
        img_vis = (img_vis * 255).clip(0, 255).astype(np.uint8)

        gt_mask_rgb = visualize_multiclass_prediction(y[i])
        
        pred_mask_rgb = visualize_multiclass_prediction(pred_labels[i])
        
        combined = np.hstack((img_vis, gt_mask_rgb, pred_mask_rgb))
        
        combined_bgr = cv2.cvtColor(combined, cv2.COLOR_RGB2BGR)
        cv2.imwrite(f"{folder}/comparison_{i}.png", combined_bgr)
        
    model.train()

In [9]:
class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-8):
        super().__init__()
        self.smooth = smooth

    def forward(self, preds, targets):
        preds = torch.sigmoid(preds)
        intersection = (preds * targets).sum()
        dice = 1 - (2 * intersection + self.smooth) / (preds.sum() + targets.sum() + self.smooth)
        return dice.mean()


class MultiClassTverskyLoss(nn.Module):
    def __init__(self, alpha=0.7, beta=0.3, smooth=1.0):
        super(MultiClassTverskyLoss, self).__init__()
        self.alpha = alpha
        self.beta = beta    
        self.smooth = smooth

    def forward(self, inputs, targets):
        
        probs = F.softmax(inputs, dim=1)
        
        targets_one_hot = F.one_hot(targets, num_classes=inputs.shape[1]).permute(0, 3, 1, 2).float()
        
        tp = (probs * targets_one_hot).sum(dim=(0, 2, 3))
        fp = (probs * (1 - targets_one_hot)).sum(dim=(0, 2, 3))
        fn = ((1 - probs) * targets_one_hot).sum(dim=(0, 2, 3))
        
        tversky = (tp + self.smooth) / (tp + self.alpha * fp + self.beta * fn + self.smooth)
        
        return 1 - tversky.mean()


class_weights = torch.tensor([1.0, 50.0, 50.0, 50.0]).to(DEVICE)
ce_loss_fn = nn.CrossEntropyLoss(weight=class_weights)
tversky_loss_fn = MultiClassTverskyLoss(alpha=0.7, beta=0.3)

def loss_fn(preds, targets):
    ce = ce_loss_fn(preds, targets)
    
    tversky = tversky_loss_fn(preds, targets)
    
    return 0.5 * ce + 0.5 * tversky

In [10]:
def check_accuracy(loader, model, device="cuda"):
    dice = 0
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            y = y.to(device).unsqueeze(1)
            raw_scores = model(x)
            probs = torch.softmax(raw_scores, dim = 1)
            preds = torch.argmax(probs, dim = 1)
            intersection = (preds * y).sum()
            dice += (2*intersection + 1e-8) / (preds.sum() + y.sum() + 1e-8)
    print("DICE SCORE:", dice/len(loader))
    model.train()

def save_checkpoint(state, filename="my_checkpoint.pth.tar"):
    print("SAVING CHECK POINT......")
    torch.save(state, filename)

def load_checkpoint(checkpoint, model):
    print("......LOADING CHECKPOINT")
    model.load_state_dict(checkpoint["state_dict"])

In [None]:
TRAIN_IMG_DIR = r"/kaggle/input/fs2020-runway-dataset/640x360/640x360/train"
TEST_IMG_DIR = r"/kaggle/input/fs2020-runway-dataset/640x360/640x360/test"
TRAIN_MASK_OUTPUT = "/kaggle/working/masks_train"
TEST_MASK_OUTPUT = "/kaggle/working/masks_test"
JSON_TRAIN_PATH = r"/kaggle/input/fs2020-runway-dataset/labels/labels/lines/train_labels_640x360.json"
JSON_TEST_PATH = r"/kaggle/input/fs2020-runway-dataset/labels/labels/lines/test_labels_640x360.json"

BATCH_SIZE = 16
LR = 1e-4
EPOCHS = 25
IMG_H = 360
IMG_W = 640

train_tf = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=0.5),
    A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, p=0.5),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
    A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.3),
])

test_tf = A.Compose([
    A.Resize(IMG_H, IMG_W),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_pixel_value=255.0),
    ToTensorV2(),
])


train_ds = RunwayKeypointDataset(TRAIN_IMG_DIR, JSON_TRAIN_PATH, train_tf)
test_ds = RunwayKeypointDataset(TEST_IMG_DIR, JSON_TEST_PATH)


train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True)


model = RunwayRegressor().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)

scaler = torch.amp.GradScaler('cuda')

print(f"Starting Training with Batch Size {BATCH_SIZE}...")
best_loss = float('inf')

for epoch in range(EPOCHS):
    model.train()
    loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}")
    train_loss = 0
    
    for x, y, mask in loop:
        x, y, mask = x.to(DEVICE), y.to(DEVICE), mask.to(DEVICE)
        
        preds = model(x)
        
        raw_error = torch.abs(preds - y)
        masked_error = raw_error * mask
        loss = masked_error.sum() / (mask.sum() + 1e-6)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        loop.set_postfix(loss=loss.item())
        
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x, y, mask in test_loader:
            x, y, mask = x.to(DEVICE), y.to(DEVICE), mask.to(DEVICE)
            preds = model(x)
            
            raw_error = torch.abs(preds - y)
            masked_error = raw_error * mask
            loss = masked_error.sum() / (mask.sum() + 1e-6)
            
            val_loss += loss.item()
            
    val_loss /= len(test_loader)
    print(f"Epoch {epoch+1} | Train Loss: {train_loss/len(train_loader):.5f} | Val Loss: {val_loss:.5f}")

    if val_loss < best_loss:
        best_loss = val_loss
        torch.save(model.state_dict(), "best_regressor.pth")
        
    if (epoch+1) % 5 == 0:
        save_visual_predictions(test_loader, model, f"output_images/epoch_{epoch+1}", DEVICE)

  A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 177MB/s]


Starting Training with Batch Size 16...


Epoch 1/25: 100%|██████████| 249/249 [00:50<00:00,  4.97it/s, loss=0.0873]


Epoch 1 | Train Loss: 0.08213 | Val Loss: 0.06256


Epoch 2/25: 100%|██████████| 249/249 [00:53<00:00,  4.66it/s, loss=0.0527]


Epoch 2 | Train Loss: 0.06714 | Val Loss: 0.05757


Epoch 3/25: 100%|██████████| 249/249 [00:52<00:00,  4.73it/s, loss=0.089] 


Epoch 3 | Train Loss: 0.06095 | Val Loss: 0.05170


Epoch 4/25: 100%|██████████| 249/249 [00:52<00:00,  4.71it/s, loss=0.0507]


Epoch 4 | Train Loss: 0.05488 | Val Loss: 0.04919


Epoch 5/25: 100%|██████████| 249/249 [00:53<00:00,  4.69it/s, loss=0.0478]


Epoch 5 | Train Loss: 0.04823 | Val Loss: 0.04290


Epoch 6/25: 100%|██████████| 249/249 [00:52<00:00,  4.71it/s, loss=0.0432]


Epoch 6 | Train Loss: 0.04285 | Val Loss: 0.03643


Epoch 7/25: 100%|██████████| 249/249 [00:52<00:00,  4.72it/s, loss=0.0265]


Epoch 7 | Train Loss: 0.03870 | Val Loss: 0.03180


Epoch 8/25: 100%|██████████| 249/249 [00:52<00:00,  4.73it/s, loss=0.0322]


Epoch 8 | Train Loss: 0.03554 | Val Loss: 0.02866


Epoch 9/25: 100%|██████████| 249/249 [00:52<00:00,  4.72it/s, loss=0.0308]


Epoch 9 | Train Loss: 0.03362 | Val Loss: 0.03096


Epoch 10/25: 100%|██████████| 249/249 [00:52<00:00,  4.71it/s, loss=0.0472]


Epoch 10 | Train Loss: 0.03163 | Val Loss: 0.02862


Epoch 11/25: 100%|██████████| 249/249 [00:52<00:00,  4.72it/s, loss=0.0364]


Epoch 11 | Train Loss: 0.03018 | Val Loss: 0.02647


Epoch 12/25: 100%|██████████| 249/249 [00:52<00:00,  4.74it/s, loss=0.0384]


Epoch 12 | Train Loss: 0.02893 | Val Loss: 0.02505


Epoch 13/25:  57%|█████▋    | 143/249 [00:30<00:22,  4.74it/s, loss=0.0239]