# 0. Download Dataset



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q ultralytics supervision opencv-python pycocotools tqdm

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.2/207.2 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from pathlib import Path

ROOT = Path("/content/drive/MyDrive/Colab Notebooks/UIT-ADrone")

# Tạo folder chứa dataset
OUT = Path("/content/datasets")
(OUT / "images/train").mkdir(parents=True, exist_ok=True)
(OUT / "images/val").mkdir(parents=True, exist_ok=True)
(OUT / "images/test").mkdir(parents=True, exist_ok=True)
(OUT / "labels/train").mkdir(parents=True, exist_ok=True)
(OUT / "labels/val").mkdir(parents=True, exist_ok=True)
(OUT / "labels/test").mkdir(parents=True, exist_ok=True)

print("ROOT exists:", ROOT.exists(), "\nOUT:", OUT)

ROOT exists: True 
OUT: /content/datasets


# 1. Import libraries and split dataset

In [None]:
import random, re, json, shutil
from tqdm import tqdm
from pathlib import Path

random.seed(42)

TRAIN_FRAMES_DIR = ROOT / "train" / "frames"
TEST_FRAMES_DIR  = ROOT / "test"  / "frames"

train_videos = sorted([d.name for d in TRAIN_FRAMES_DIR.iterdir() if d.is_dir()])
test_videos  = sorted([d.name for d in TEST_FRAMES_DIR.iterdir()  if d.is_dir()])

# Số video muốn lấy để train
N_TRAIN_VIDS = 12
N_VAL_VIDS   = 4
N_TEST_VIDS  = 4

assert len(train_videos) >= N_TRAIN_VIDS + N_VAL_VIDS, "Không đủ video trong train/frames"
assert len(test_videos)  >= N_TEST_VIDS, "Không đủ video trong test/frames"

# Chọn ngẫu nhiên nhưng cố định seed
random.shuffle(train_videos)
train_split_videos = train_videos[:N_TRAIN_VIDS]
val_split_videos   = train_videos[N_TRAIN_VIDS:N_TRAIN_VIDS+N_VAL_VIDS]

random.shuffle(test_videos)
test_split_videos  = test_videos[:N_TEST_VIDS]

print("Train vids:", train_split_videos[:5], "...", len(train_split_videos))
print("Val   vids:", val_split_videos[:5],   "...", len(val_split_videos))
print("Test  vids:", test_split_videos[:5],  "...", len(test_split_videos))

# Tham số giảm FPS (lấy 1 frame mỗi STRIDE ảnh)
STRIDE = 6   # ≈ 5 FPS nếu gốc 30 FPS

def list_sampled_frames(video_dir: Path, stride: int):
    imgs = sorted(video_dir.glob("*.jpg"))
    return [p for i, p in enumerate(imgs) if i % stride == 0]

selected = {
    "train": [],
    "val":   [],
    "test":  []
}

for v in train_split_videos:
    selected["train"].extend(list_sampled_frames(TRAIN_FRAMES_DIR / v, STRIDE))

for v in val_split_videos:
    selected["val"].extend(list_sampled_frames(TRAIN_FRAMES_DIR / v, STRIDE))

for v in test_split_videos:
    selected["test"].extend(list_sampled_frames(TEST_FRAMES_DIR / v, STRIDE))

for k in selected:
    print(k, "frames:", len(selected[k]))

Train vids: ['DJI_0071', '50m_90d_morning_ngatuanninh_22_3', 'DJI_0073', 'DJI_0105', 'DJI_0092-034'] ... 12
Val   vids: ['DJI_0090-035', 'DJI_0084', 'DJI_0075', 'DJI_0103'] ... 4
Test  vids: ['DJI_0066', 'DJI_0067', 'DJI_0105', '50m_90d_morning_congkhuA_22_3'] ... 4
train frames: 3000
val frames: 852
test frames: 1608


# 2. Mapping classes and convert COCO format to YOLO format

In [None]:
import json, cv2, shutil
from collections import defaultdict
from pathlib import Path
from tqdm import tqdm

# 5 lớp chuẩn
NAME2NEW = {"motorbike":0, "car":1, "bus":2, "truck":3, "bicycle":4}
ALIAS    = {"van":"car", "bike":"bicycle"}
DROP     = {"ped", "tricycle"}

def load_coco(json_path: Path):
    with open(json_path, "r") as f:
        return json.load(f)

coco_train = load_coco(ROOT / "train.json")
coco_test  = load_coco(ROOT / "test.json")

def build_id_maps(coco_obj):
    imgs = {img["id"]: img for img in coco_obj["images"]}
    anns_by_img = defaultdict(list)
    for a in coco_obj["annotations"]:
        anns_by_img[a["image_id"]].append(a)
    cats = {c["id"]: c["name"] for c in coco_obj["categories"]}
    return imgs, anns_by_img, cats

imgs_tr, anns_tr, cats_tr = build_id_maps(coco_train)
imgs_te, anns_te, cats_te = build_id_maps(coco_test)

def to_key_from_path(p: Path) -> str:
    return f"{p.parent.name}/{p.name}"

def to_key_from_coco_file(file_name: str) -> str:
    parts = Path(file_name).as_posix().split('/')
    if len(parts) >= 2:
        return f"{parts[-2]}/{parts[-1]}"
    return parts[-1]  # fallback (ít gặp)

selected_key = {
    split: set(to_key_from_path(p) for p in selected[split])
    for split in selected
}

def resolve_src_image(split: str, file_name: str, key: str) -> Path | None:
    train_candidate = TRAIN_FRAMES_DIR / key
    test_candidate  = TEST_FRAMES_DIR  / key
    raw_candidate   = ROOT / file_name

    cands = []
    if split in ("train", "val"):
        cands = [train_candidate, raw_candidate, test_candidate]
    else:
        cands = [test_candidate, raw_candidate, train_candidate]

    for c in cands:
        if c.exists():
            return c
    return None

def category_to_new_id(cat_name: str):
    if cat_name in DROP: return None
    cat_name = ALIAS.get(cat_name, cat_name)
    return NAME2NEW.get(cat_name, None)

def write_yolo_for_split(split: str, coco_obj, imgs_map, anns_map, cats_map):
    img_out_dir = OUT / f"images/{split}"
    lbl_out_dir = OUT / f"labels/{split}"
    img_out_dir.mkdir(parents=True, exist_ok=True)
    lbl_out_dir.mkdir(parents=True, exist_ok=True)

    kept = 0
    miss_key = 0
    miss_file = 0

    for img_id, img in tqdm(imgs_map.items(), desc=f"Convert {split}"):
        file_rel = img.get("file_name")
        key = to_key_from_coco_file(file_rel)

        # chỉ giữ ảnh đã sample theo STRIDE
        if key not in selected_key[split]:
            continue

        src_img = resolve_src_image(split, file_rel, key)
        if src_img is None:
            miss_file += 1
            continue

        # Lấy kích thước
        h, w = img.get("height"), img.get("width")
        if not h or not w:
            im = cv2.imread(str(src_img))
            if im is None:
                miss_file += 1
                continue
            h, w = im.shape[:2]

        # Tạo tên mới gọn: video_frame.jpg
        video_name = src_img.parent.name
        frame_name = src_img.stem
        new_name   = f"{video_name}_{frame_name}.jpg"

        # Copy ảnh
        shutil.copy2(src_img, img_out_dir / new_name)

        # Ghi nhãn YOLO
        lines = []
        for a in anns_map.get(img_id, []):
            old_cat_name = cats_map.get(a["category_id"])
            new_id = category_to_new_id(old_cat_name)
            if new_id is None:
                continue

            x, y, bw, bh = a["bbox"]  # COCO xywh
            xc = (x + bw/2) / w
            yc = (y + bh/2) / h
            nw = bw / w
            nh = bh / h

            # clamp
            xc = min(max(xc, 0.0), 1.0)
            yc = min(max(yc, 0.0), 1.0)
            nw = min(max(nw, 0.0), 1.0)
            nh = min(max(nh, 0.0), 1.0)

            lines.append(f"{new_id} {xc:.6f} {yc:.6f} {nw:.6f} {nh:.6f}")

        with open(lbl_out_dir / (Path(new_name).stem + ".txt"), "w") as f:
            f.write("\n".join(lines))

        kept += 1

    print(f"{split}: wrote {kept} images + labels | missing_files: {miss_file}")
    return kept

# === chạy lại ===
k1 = write_yolo_for_split("train", coco_train, imgs_tr, anns_tr, cats_tr)
k2 = write_yolo_for_split("val",   coco_train, imgs_tr, anns_tr, cats_tr)
k3 = write_yolo_for_split("test",  coco_test,  imgs_te, anns_te, cats_te)
print("Totals:", k1, k2, k3)

Convert train: 100%|██████████| 59186/59186 [32:01<00:00, 30.80it/s]


train: wrote 3000 images + labels | missing_files: 0


Convert val: 100%|██████████| 59186/59186 [09:07<00:00, 108.07it/s]


val: wrote 852 images + labels | missing_files: 0


Convert test: 100%|██████████| 84265/84265 [10:23<00:00, 135.23it/s] 

test: wrote 947 images + labels | missing_files: 0
Totals: 3000 852 947





# 4. Create data.yaml file

In [None]:
data_yaml = f"""# UIT-ADrone → YOLO (5 classes, subset)
path: {OUT.as_posix()}
train: images/train
val: images/val
test: images/test

names:
  0: motorbike
  1: car
  2: bus
  3: truck
  4: bicycle
"""
with open(OUT / "data.yaml", "w") as f:
    f.write(data_yaml)

print((OUT / "data.yaml").read_text())

# UIT-ADrone → YOLO (5 classes, subset)
path: /content/datasets
train: images/train
val: images/val
test: images/test

names:
  0: motorbike
  1: car
  2: bus
  3: truck
  4: bicycle



# 5. Download dataset

In [None]:
!zip -r -q /content/datasets.zip /content/datasets

from google.colab import files
files.download("datasets.zip")

4.4G	/content/datasets
/content/datasets
/content/datasets/images
/content/datasets/images/train
/content/datasets/images/val
/content/datasets/images/test
/content/datasets/.ipynb_checkpoints
/content/datasets/labels
/content/datasets/labels/train
/content/datasets/labels/val
/content/datasets/labels/test
/content


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>