In [2]:
# Install latest torch/vision (Colab usually has them)
!pip -q install --upgrade torch torchvision torchaudio

In [1]:
# Install kaggle and download the dataset
!pip -q install kaggle kagglehub

from google.colab import files
print("👉 Upload your kaggle.json (create from https://www.kaggle.com/settings/account)")
files.upload()  # select kaggle.json

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

import kagglehub
rtk_path = kagglehub.dataset_download("tallwinkingstan/road-traversing-knowledge-rtk-dataset")
print("Path to dataset files:", rtk_path)


👉 Upload your kaggle.json (create from https://www.kaggle.com/settings/account)


Saving kaggle.json to kaggle.json
Downloading from https://www.kaggle.com/api/v1/datasets/download/tallwinkingstan/road-traversing-knowledge-rtk-dataset?dataset_version_number=1...


100%|██████████| 723M/723M [00:09<00:00, 79.0MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/tallwinkingstan/road-traversing-knowledge-rtk-dataset/versions/1


In [3]:
import os, shutil, random
from pathlib import Path
random.seed(42)

# Try to locate the root RTK directory (case/typo tolerant)
candidates = [
    "RTK_Dataset", "RTK_dataset", "RTK_dataset/",
    "RTK_Dataset/", "RTK_dataset/RTK_Dataset", "RTK_Dataset/RTK_dataset"
]
DATASET_DIR = None
for c in candidates:
    p = os.path.join(rtk_path, c)
    if os.path.isdir(p):
        DATASET_DIR = p
        break

# If none of the above worked, try directly scanning one level down
if DATASET_DIR is None:
    for name in os.listdir(rtk_path):
        full = os.path.join(rtk_path, name)
        if os.path.isdir(full) and "rtk" in name.lower():
            DATASET_DIR = full
            break

print("Detected DATASET_DIR:", DATASET_DIR)
assert DATASET_DIR and os.path.isdir(DATASET_DIR), "Could not find RTK dataset folder. Inspect rtk_path printed above."

# Build a tolerant mapping for source → target class
def first_existing(*parts):
    """Return the first subpath that actually exists inside DATASET_DIR."""
    for rel in parts:
        full = os.path.join(DATASET_DIR, rel)
        if os.path.isdir(full):
            return rel
    return None

label_map = {}

# Asphalt
label_map[first_existing("asphalt/asphaltGood", "asphalt/Good", "asphalt/good")] = "asphalt_good"
label_map[first_existing("asphalt/asphaltRegular", "asphalt/Regular", "asphalt/regular")] = "asphalt_regular"
label_map[first_existing("asphalt/asphaltBad", "asphalt/Bad", "asphalt/bad")] = "asphalt_bad"

# Paved
label_map[first_existing("paved/pavedRegular", "paved/Regular", "paved/regular")] = "paved_regular"
label_map[first_existing("paved/pavedBad", "paved/Bad", "paved/bad")] = "paved_bad"

# Unpaved (handle common 'upaved' typo)
label_map[first_existing("unpaved/unpavedRegular", "upaved/unpavedRegular", "unpaved/Regular", "upaved/Regular", "unpaved/regular")] = "unpaved_regular"
label_map[first_existing("unpaved/unpavedBad", "upaved/unpavedBad", "unpaved/Bad", "upaved/Bad", "unpaved/bad")] = "unpaved_bad"

# Clean None keys if any didn't exist
label_map = {k:v for k,v in label_map.items() if k is not None}
print("Resolved source → target classes:")
for k,v in label_map.items():
    print(" ", k, "→", v)

expected_targets = {
    "asphalt_good","asphalt_regular","asphalt_bad",
    "paved_regular","paved_bad",
    "unpaved_regular","unpaved_bad"
}
assert set(label_map.values()) == expected_targets, f"Missing classes. Got {set(label_map.values())}"

# Output dir
OUTPUT_DIR = "/content/prepared_dataset"
for split in ["train", "val", "test"]:
    for cls in expected_targets:
        Path(f"{OUTPUT_DIR}/{split}/{cls}").mkdir(parents=True, exist_ok=True)

train_ratio, val_ratio, test_ratio = 0.70, 0.15, 0.15

def split_and_copy(src_dir, dst_label):
    files = [f for f in os.listdir(src_dir) if not f.startswith(".")]
    random.shuffle(files)
    n = len(files)
    n_train = int(n*train_ratio)
    n_val   = int(n*val_ratio)
    splits = {
        "train": files[:n_train],
        "val":   files[n_train:n_train+n_val],
        "test":  files[n_train+n_val:]
    }
    for split, split_files in splits.items():
        for f in split_files:
            src = os.path.join(src_dir, f)
            dst = os.path.join(OUTPUT_DIR, split, dst_label, f)
            if os.path.isfile(src):
                shutil.copy(src, dst)

# Do the split
for rel_src, tgt in label_map.items():
    split_and_copy(os.path.join(DATASET_DIR, rel_src), tgt)

print("✅ Prepared at:", OUTPUT_DIR)

# Show quick counts
from collections import Counter
def count_images(root):
    counts = {}
    for split in ["train","val","test"]:
        c = Counter()
        for cls in expected_targets:
            d = os.path.join(root, split, cls)
            c[cls] = len([x for x in os.listdir(d) if not x.startswith(".")])
        counts[split] = dict(c)
    return counts

counts = count_images(OUTPUT_DIR)
counts

Detected DATASET_DIR: /root/.cache/kagglehub/datasets/tallwinkingstan/road-traversing-knowledge-rtk-dataset/versions/1/RTK_Dataset
Resolved source → target classes:
  asphalt/asphaltGood → asphalt_good
  asphalt/asphaltRegular → asphalt_regular
  asphalt/asphaltBad → asphalt_bad
  paved/pavedRegular → paved_regular
  paved/pavedBad → paved_bad
  upaved/unpavedRegular → unpaved_regular
  upaved/unpavedBad → unpaved_bad
✅ Prepared at: /content/prepared_dataset


{'train': {'unpaved_regular': 557,
  'paved_regular': 226,
  'paved_bad': 86,
  'asphalt_bad': 324,
  'unpaved_bad': 415,
  'asphalt_regular': 587,
  'asphalt_good': 1384},
 'val': {'unpaved_regular': 119,
  'paved_regular': 48,
  'paved_bad': 18,
  'asphalt_bad': 69,
  'unpaved_bad': 88,
  'asphalt_regular': 125,
  'asphalt_good': 296},
 'test': {'unpaved_regular': 120,
  'paved_regular': 50,
  'paved_bad': 20,
  'asphalt_bad': 71,
  'unpaved_bad': 90,
  'asphalt_regular': 127,
  'asphalt_good': 298}}

In [10]:
import os, time, shutil
CKPT_PATH = "/content/driveA/MyDrive/rtk_resnet/resnet18_rtk_best.pth"
TS_PATH   = "/content/driveA/MyDrive/rtk_resnet/resnet18_rtk_scripted.pt"
LABELS_PATH = "/content/driveA/MyDrive/rtk_resnet/class_names.json"

stamp = time.strftime("%Y%m%d-%H%M%S")
backup_dir = f"/content/driveA/MyDrive/rtk_resnet/_backup_{stamp}"
os.makedirs(backup_dir, exist_ok=True)
for p in [CKPT_PATH, TS_PATH, LABELS_PATH]:
    if os.path.isfile(p):
        shutil.copy2(p, backup_dir)
backup_dir


'/content/driveA/MyDrive/rtk_resnet/_backup_20250908-010957'

In [9]:
from google.colab import drive; drive.mount('/content/driveA')
CKPT_PATH = "/content/driveA/MyDrive/rtk_resnet/resnet18_rtk_best.pth"      # overwrite here
TS_PATH   = "/content/driveA/MyDrive/rtk_resnet/resnet18_rtk_scripted.pt"   # overwrite here
LABELS_PATH = "/content/driveA/MyDrive/rtk_resnet/class_names.json"         # MUST keep same order

DATA_DIR = "/content/prepared_dataset"   # your updated dataset (train/val/…)
IMG_SIZE = 224
BATCH_SIZE = 32
EPOCHS_MORE = 10
LR = 5e-5            # lower LR for resume
WEIGHT_DECAY = 1e-4


In [11]:
!pip -q install --upgrade torch torchvision pillow


In [12]:
import os, json, numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, WeightedRandomSampler

device = "cuda" if torch.cuda.is_available() else "cpu"

train_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomApply([transforms.ColorJitter(0.3,0.3,0.2,0.05)], p=0.5),
    transforms.RandomAffine(10, translate=(0.05,0.05), scale=(0.95,1.05)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])
eval_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

train_ds = datasets.ImageFolder(os.path.join(DATA_DIR,"train"), transform=train_tf)
val_ds   = datasets.ImageFolder(os.path.join(DATA_DIR,"val"),   transform=eval_tf)

# load saved class order and assert same
with open(LABELS_PATH,"r") as f:
    saved_classes = json.load(f)
current_classes = sorted(train_ds.class_to_idx.keys())
assert saved_classes == current_classes, "Class names/order changed – keep the same 7 classes/order."

# weighted sampler for imbalance
labels = [y for _,y in train_ds.samples]
counts = np.bincount(labels, minlength=len(saved_classes)).astype(np.float32)
class_weights = 1.0 / np.maximum(counts, 1)
sample_w = [class_weights[y] for y in labels]
sampler = WeightedRandomSampler(sample_w, num_samples=len(sample_w), replacement=True)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, sampler=sampler, num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)


In [None]:
import torch.nn as nn, torch.optim as optim
from torchvision import models
from time import time

# load checkpoint
ckpt = torch.load(CKPT_PATH, map_location=device)
best_val = ckpt.get("best_val_acc", 0.0)

# build model and load weights
model = models.resnet18(weights=None)
model.fc = nn.Linear(model.fc.in_features, len(saved_classes))
model.load_state_dict(ckpt["model_state"])
model.to(device)

# optimizer (resume if present, but force new LR)
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
if "optimizer_state" in ckpt:
    try:
        optimizer.load_state_dict(ckpt["optimizer_state"])
        for g in optimizer.param_groups: g["lr"] = LR
    except Exception:
        pass

criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights, dtype=torch.float32, device=device))
scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))

def run_epoch(loader, train=True):
    model.train(train)
    total, correct, loss_sum = 0, 0, 0.0
    for x,y in loader:
        x,y = x.to(device), y.to(device)
        if train:
            optimizer.zero_grad(set_to_none=True)
            with torch.cuda.amp.autocast(enabled=(device=="cuda")):
                out = model(x); loss = criterion(out,y)
            scaler.scale(loss).backward(); scaler.step(optimizer); scaler.update()
        else:
            with torch.no_grad():
                out = model(x); loss = criterion(out,y)
        loss_sum += loss.item()*x.size(0)
        correct  += (out.argmax(1)==y).sum().item()
        total    += x.size(0)
    return loss_sum/total, correct/total

patience, bad = 5, 0
for e in range(1, EPOCHS_MORE+1):
    t0=time()
    tr_loss,tr_acc = run_epoch(train_loader, True)
    va_loss,va_acc = run_epoch(val_loader, False)
    print(f"[+{e:02d}] train {tr_loss:.4f}/{tr_acc:.4f} | val {va_loss:.4f}/{va_acc:.4f}  ({time()-t0:.1f}s)")
    if va_acc > best_val:
        best_val = va_acc; bad = 0
        torch.save({
            "epoch": ckpt.get("epoch",0)+e,
            "model_state": model.state_dict(),
            "optimizer_state": optimizer.state_dict(),
            "class_names": saved_classes,
            "best_val_acc": best_val
        }, CKPT_PATH)  # <-- overwrite same checkpoint path
        print("  ✔ saved improved checkpoint (overwrote CKPT_PATH)")
    else:
        bad += 1
        if bad >= patience:
            print("Early stopping."); break


  scaler = torch.cuda.amp.GradScaler(enabled=(device=="cuda"))
  with torch.cuda.amp.autocast(enabled=(device=="cuda")):


[+01] train 0.0360/0.9835 | val 0.0449/0.9882  (1049.6s)
  ✔ saved improved checkpoint (overwrote CKPT_PATH)


In [None]:
# Overwrite TorchScript file as well
dummy = torch.randn(1,3,IMG_SIZE,IMG_SIZE).to(device)
model.eval()
scripted = torch.jit.trace(model, dummy)
scripted.save(TS_PATH)   # <-- overwrite same TorchScript path
print("Overwrote TorchScript:", TS_PATH)
