In [None]:
!pip install segmentation-models-pytorch albumentations opencv-python matplotlib --quiet

In [None]:
!sudo rm -rf *

In [None]:
zip_url = "http://rugd.vision/data/RUGD_frames-with-annotations.zip"
zip_name = "terrain_dataset.zip"
extract_to = "/contents"
rename_from = "RUGD_frames-with-annotations"
rename_to = "raw"

import os
import requests

r = requests.get(zip_url, stream=True)
with open(zip_name, 'wb') as f:
    for chunk in r.iter_content(chunk_size=1024):
        if chunk:
            f.write(chunk)

import zipfile

with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

os.rename(os.path.join(extract_to, rename_from), rename_to)

os.remove(zip_name)

In [None]:
zip_url = "http://rugd.vision/data/RUGD_annotations.zip"
zip_name = "RUGD_annotations.zip"
extract_to = "data"
rename_from = "RUGD_annotations"
rename_to = "annotations"

import os
import requests

r = requests.get(zip_url, stream=True)
with open(zip_name, 'wb') as f:
    for chunk in r.iter_content(chunk_size=1024):
        if chunk:
            f.write(chunk)

import zipfile

with zipfile.ZipFile(zip_name, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

os.rename(os.path.join(extract_to, rename_from), rename_to)

os.remove(zip_name)

In [None]:
import os
import shutil
from glob import glob

raw_root = '/content/raw'
ann_root = '/content/annotations'

os.makedirs('data/images', exist_ok=True)
os.makedirs('data/masks', exist_ok=True)

scene_folders = os.listdir(raw_root)

copied = 0
for scene in scene_folders:
    raw_scene = os.path.join(raw_root, scene)
    ann_scene = os.path.join(ann_root, scene)

    if not os.path.isdir(raw_scene) or not os.path.isdir(ann_scene):
        print(f"Skipping non-folder: {scene}")
        continue

    image_paths = sorted(glob(os.path.join(raw_scene, '*.*')))
    mask_paths = sorted(glob(os.path.join(ann_scene, '*.*')))

    for img_path in image_paths:
        img_name = os.path.splitext(os.path.basename(img_path))[0]

        possible_masks = [p for p in mask_paths if os.path.splitext(os.path.basename(p))[0] == img_name]
        if not possible_masks:
            print(f"⚠️ No matching mask for {img_name}")
            continue
        mask_path = possible_masks[0]

        new_img_name = f"{scene}_{os.path.basename(img_path)}".replace(" ", "_")
        new_mask_name = f"{scene}_{os.path.basename(mask_path)}".replace(" ", "_")

        shutil.copy(img_path, os.path.join('data/images', new_img_name))
        shutil.copy(mask_path, os.path.join('data/masks', new_mask_name))
        copied += 1

In [None]:
import os
import random

image_dir = 'data/images'
mask_dir = 'data/masks'
split_dir = 'data/splits'
os.makedirs(split_dir, exist_ok=True)

image_files = sorted(os.listdir(image_dir))

image_files = [f for f in image_files if f.endswith(('.jpg', '.jpeg', '.png'))]

random.seed(42)
random.shuffle(image_files)

total = len(image_files)
train_split = int(0.7 * total)
val_split = int(0.85 * total)

train_files = image_files[:train_split]
val_files = image_files[train_split:val_split]
test_files = image_files[val_split:]

def save_list(file_list, path):
    with open(path, 'w') as f:
        for name in file_list:
            f.write(name + '\n')

save_list(train_files, os.path.join(split_dir, 'train.txt'))
save_list(val_files, os.path.join(split_dir, 'val.txt'))
save_list(test_files, os.path.join(split_dir, 'test.txt'))

In [None]:
import os
from torch.utils.data import Dataset
from PIL import Image
import numpy as np
import torch
import albumentations as A
from albumentations.pytorch import ToTensorV2

class TerrainDataset(Dataset):
    def __init__(self, list_file, image_dir, mask_dir, transform=None):
        with open(list_file, 'r') as f:
            self.filenames = [line.strip() for line in f.readlines()]

        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
      image_name = self.filenames[idx]
      mask_name = image_name.replace('.jpg', '.png').replace('.jpeg', '.png')

      image_path = os.path.join(self.image_dir, image_name)
      mask_path = os.path.join(self.mask_dir, mask_name)

      image = np.array(Image.open(image_path).convert('RGB'))
      mask = np.array(Image.open(mask_path).convert('L')).astype(np.int64)

      class_mask = np.full_like(mask, fill_value=255)

      class_mask[(mask >= 0)   & (mask <= 50)]   = 0  # road
      class_mask[(mask > 50)  & (mask <= 100)]  = 1  # grass
      class_mask[(mask > 100) & (mask <= 150)]  = 2  # rocks
      class_mask[(mask > 150) & (mask <= 200)]  = 3  # mud
      class_mask[(mask > 200)]                  = 4  # water

      mask = class_mask

      if self.transform:
          augmented = self.transform(image=image, mask=mask)
          image = augmented['image']
          mask = augmented['mask']

      return image, mask.long()

In [None]:
def get_transforms(train=True):
    if train:
        return A.Compose([
            A.Resize(512, 512),
            A.HorizontalFlip(p=0.5),
            A.RandomBrightnessContrast(p=0.2),
            A.Normalize(mean=(0.485, 0.456, 0.406),
                        std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ])
    else:
        return A.Compose([
            A.Resize(512, 512),
            A.Normalize(mean=(0.485, 0.456, 0.406),
                        std=(0.229, 0.224, 0.225)),
            ToTensorV2()
        ])

In [None]:
from torch.utils.data import DataLoader

train_dataset = TerrainDataset(
    list_file='data/splits/train.txt',
    image_dir='data/images',
    mask_dir='data/masks',
    transform=get_transforms(train=True)
)

val_dataset = TerrainDataset(
    list_file='data/splits/val.txt',
    image_dir='data/images',
    mask_dir='data/masks',
    transform=get_transforms(train=False)
)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)

In [None]:
import segmentation_models_pytorch as smp

model = smp.DeepLabV3Plus(
    encoder_name="resnet34",
    encoder_weights="imagenet",
    in_channels=3,
    classes=5,
    decoder_use_batchnorm=False
)

import torch.nn as nn

def convert_batchnorm_to_groupnorm(module):
    for name, child in module.named_children():
        if isinstance(child, nn.BatchNorm2d):
            num_channels = child.num_features
            for g in [32, 16, 8, 4, 2, 1]:
                if num_channels % g == 0:
                    num_groups = g
                    break
            setattr(module, name, nn.GroupNorm(num_groups=num_groups, num_channels=num_channels))
        else:
            convert_batchnorm_to_groupnorm(child)

convert_batchnorm_to_groupnorm(model)

In [None]:
print(model)

DeepLabV3Plus(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): GroupNorm(32, 64, eps=1e-05, affine=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias

In [None]:
import torch.nn as nn
import torch.optim as optim

loss_fn = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=1e-4)

scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

In [None]:
import torch
from tqdm import tqdm

def train_one_epoch(model, dataloader, optimizer, loss_fn):
    model.train()
    running_loss = 0.0

    for images, masks in tqdm(dataloader):
        images, masks = images.to(device), masks.to(device)

        if images.size(0) == 1 or images.shape[-1] < 32 or images.shape[-2] < 32:
          continue

        masks = masks.squeeze(1)

        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_fn(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)

    return running_loss / len(dataloader.dataset)

def validate(model, dataloader, loss_fn):
    model.eval()
    running_loss = 0.0
    total_pixels = 0
    correct_pixels = 0

    with torch.no_grad():
        for images, masks in dataloader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            loss = loss_fn(outputs, masks)
            running_loss += loss.item() * images.size(0)

            preds = torch.argmax(outputs, dim=1)
            correct_pixels += (preds == masks).sum().item()
            total_pixels += masks.numel()

    accuracy = correct_pixels / total_pixels
    return running_loss / len(dataloader.dataset), accuracy

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [None]:
num_epochs = 20
best_val_loss = float('inf')

for epoch in range(num_epochs):
    print(f"\nEpoch {epoch + 1}/{num_epochs}")

    train_loss = train_one_epoch(model, train_loader, optimizer, loss_fn)
    val_loss, val_acc = validate(model, val_loader, loss_fn)

    print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best_model.pth")
        print("Saved best model.")

    scheduler.step()


Epoch 1/20


100%|██████████| 1302/1302 [07:17<00:00,  2.98it/s]


Train Loss: 0.4334 | Val Loss: 0.3247 | Val Acc: 0.8876
Saved best model.

Epoch 2/20


100%|██████████| 1302/1302 [07:16<00:00,  2.98it/s]


Train Loss: 0.3019 | Val Loss: 0.2770 | Val Acc: 0.9026
Saved best model.

Epoch 3/20


100%|██████████| 1302/1302 [07:14<00:00,  3.00it/s]


Train Loss: 0.2686 | Val Loss: 0.2587 | Val Acc: 0.9086
Saved best model.

Epoch 4/20


100%|██████████| 1302/1302 [07:15<00:00,  2.99it/s]


Train Loss: 0.2439 | Val Loss: 0.2547 | Val Acc: 0.9102
Saved best model.

Epoch 5/20


100%|██████████| 1302/1302 [07:15<00:00,  2.99it/s]


Train Loss: 0.2285 | Val Loss: 0.2354 | Val Acc: 0.9171
Saved best model.

Epoch 6/20


100%|██████████| 1302/1302 [07:15<00:00,  2.99it/s]


Train Loss: 0.2215 | Val Loss: 0.2433 | Val Acc: 0.9152

Epoch 7/20


100%|██████████| 1302/1302 [07:14<00:00,  3.00it/s]


Train Loss: 0.2086 | Val Loss: 0.2210 | Val Acc: 0.9214
Saved best model.

Epoch 8/20


100%|██████████| 1302/1302 [07:14<00:00,  3.00it/s]


Train Loss: 0.2016 | Val Loss: 0.2259 | Val Acc: 0.9199

Epoch 9/20


100%|██████████| 1302/1302 [07:21<00:00,  2.95it/s]


Train Loss: 0.1961 | Val Loss: 0.2162 | Val Acc: 0.9235
Saved best model.

Epoch 10/20


100%|██████████| 1302/1302 [07:25<00:00,  2.92it/s]


Train Loss: 0.1975 | Val Loss: 0.2036 | Val Acc: 0.9278
Saved best model.

Epoch 11/20


100%|██████████| 1302/1302 [07:21<00:00,  2.95it/s]


Train Loss: 0.1731 | Val Loss: 0.1989 | Val Acc: 0.9292
Saved best model.

Epoch 12/20


100%|██████████| 1302/1302 [07:21<00:00,  2.95it/s]


Train Loss: 0.1714 | Val Loss: 0.1994 | Val Acc: 0.9288

Epoch 13/20


100%|██████████| 1302/1302 [07:25<00:00,  2.92it/s]


Train Loss: 0.1670 | Val Loss: 0.1966 | Val Acc: 0.9296
Saved best model.

Epoch 14/20


100%|██████████| 1302/1302 [07:26<00:00,  2.92it/s]


Train Loss: 0.1637 | Val Loss: 0.1919 | Val Acc: 0.9322
Saved best model.

Epoch 15/20


100%|██████████| 1302/1302 [07:24<00:00,  2.93it/s]


Train Loss: 0.1629 | Val Loss: 0.1953 | Val Acc: 0.9297

Epoch 16/20


100%|██████████| 1302/1302 [07:24<00:00,  2.93it/s]


Train Loss: 0.1602 | Val Loss: 0.1902 | Val Acc: 0.9338
Saved best model.

Epoch 17/20


100%|██████████| 1302/1302 [07:20<00:00,  2.95it/s]


Train Loss: 0.1568 | Val Loss: 0.1893 | Val Acc: 0.9341
Saved best model.

Epoch 18/20


100%|██████████| 1302/1302 [07:29<00:00,  2.90it/s]


Train Loss: 0.1552 | Val Loss: 0.1856 | Val Acc: 0.9345
Saved best model.

Epoch 19/20


100%|██████████| 1302/1302 [07:24<00:00,  2.93it/s]


Train Loss: 0.1532 | Val Loss: 0.1876 | Val Acc: 0.9345

Epoch 20/20


100%|██████████| 1302/1302 [07:21<00:00,  2.95it/s]


Train Loss: 0.1517 | Val Loss: 0.1896 | Val Acc: 0.9336


In [None]:
from google.colab import files
files.download("best_model.pth")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
CLASS_COLORS = {
    0: (0, 0, 0),         # background (black)
    1: (0, 255, 0),       # grass (green)
    2: (0, 0, 255),       # water (blue)
    3: (255, 0, 0),       # rocks (red)
    4: (255, 255, 0)      # mud/others (yellow)
}

In [None]:
import numpy as np

def decode_segmap(mask):
    h, w = mask.shape
    color_mask = np.zeros((h, w, 3), dtype=np.uint8)
    for class_id, color in CLASS_COLORS.items():
        color_mask[mask == class_id] = color
    return color_mask

In [None]:
from torchvision.transforms.functional import to_pil_image
import matplotlib.pyplot as plt
import torch
from PIL import Image
import torchvision.transforms as T

model.load_state_dict(torch.load("best_model.pth"))
model.eval()
model.to(device)

DeepLabV3Plus(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): GroupNorm(32, 64, eps=1e-05, affine=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): GroupNorm(32, 64, eps=1e-05, affine=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias

In [None]:
sample_path = "/content/dataset/raw/Park/park_001.jpg"  # change as needed

In [None]:
img = Image.open(sample_path).convert('RGB').resize((512, 512))
input_tensor = T.ToTensor()(img).unsqueeze(0).to(device)

with torch.no_grad():
    output = model(input_tensor)
    pred = torch.argmax(output.squeeze(), dim=0).cpu().numpy()