In [None]:

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for i,filename in enumerate(filenames):
        if i % 5000 == 0:
            print(i,os.path.join(dirname, filename))



In [None]:
# Paths from your Kaggle dataset
CSV_PATH   = "/kaggle/input/militaryaircraftdetectiondataset/labels_with_split.csv"
IMAGES_DIR = "/kaggle/input/militaryaircraftdetectiondataset/dataset"  # folder with all images

# Working dir where we'll assemble YOLO data
WORK_DIR   = "/kaggle/working"
YOLO_DIR   = f"{WORK_DIR}/yolo_data"

In [None]:

!pip -q install ultralytics

In [None]:
import os, shutil, glob
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm import tqdm

# Create split directories
for split in ["train", "val", "test"]:
    (Path(YOLO_DIR)/split/"images").mkdir(parents=True, exist_ok=True)
    (Path(YOLO_DIR)/split/"labels").mkdir(parents=True, exist_ok=True)

# Read CSV
df = pd.read_csv(CSV_PATH)

# Normalize column names just in case
df.columns = [c.strip().lower() for c in df.columns]

# Some datasets use 'valid' -> map to 'val'
df["split"] = df["split"].str.lower().replace({"valid":"val", "validation":"val"})

# Build list of available images (map both stem and filename → full path) to be robust to missing extensions in CSV
valid_exts = {".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff", ".webp"}
all_image_paths = []
for p in Path(IMAGES_DIR).rglob("*"):
    if p.suffix.lower() in valid_exts:
        all_image_paths.append(p)

by_stem     = {p.stem: p for p in all_image_paths}
by_basename = {p.name: p  for p in all_image_paths}

def resolve_image_path(name: str):
    name = str(name)
    # if full basename (with ext) is present
    if name in by_basename: 
        return by_basename[name]
    # if only stem is there
    stem = Path(name).stem
    if stem in by_stem:
        return by_stem[stem]
    return None

df["class"] = "plane"
# Sanitize class names and build class map (sorted for deterministic ids)
df["class"] = df["class"].astype(str).str.strip()
class_names = sorted(df["class"].unique().tolist())
name_to_id  = {n:i for i,n in enumerate(class_names)}
print("Classes:", name_to_id)

In [None]:
# Clip boxes helper
def xyxy_to_yolo(xmin, ymin, xmax, ymax, W, H):
    # clip to image bounds
    xmin = np.clip(xmin, 0, W-1)
    ymin = np.clip(ymin, 0, H-1)
    xmax = np.clip(xmax, 0, W-1)
    ymax = np.clip(ymax, 0, H-1)
    bw = max(0.0, xmax - xmin)
    bh = max(0.0, ymax - ymin)
    if bw <= 0 or bh <= 0:
        return None
    x_center = (xmin + xmax) / 2.0 / W
    y_center = (ymin + ymax) / 2.0 / H
    bw /= W
    bh /= H
    return x_center, y_center, bw, bh

# Group rows per (split, filename)
bad, written = 0, 0
for (split, fname), g in tqdm(df.groupby(["split", "filename"]), total=df.groupby(["split", "filename"]).ngroups):
    img_path = resolve_image_path(fname)
    if img_path is None:
        bad += 1
        continue

    # Label file path
    stem = img_path.stem
    lbl_fp = Path(YOLO_DIR)/split/"labels"/f"{stem}.txt"

    # Build label lines
    lines = []
    W = float(g.iloc[0]["width"])
    H = float(g.iloc[0]["height"])
    for _, r in g.iterrows():
        cid = name_to_id[str(r["class"])]
        xywh = xyxy_to_yolo(float(r["xmin"]), float(r["ymin"]), float(r["xmax"]), float(r["ymax"]), W, H)
        if xywh is None:
            continue
        lines.append(f"{cid} " + " ".join(f"{v:.6f}" for v in xywh))

    if not lines:
        # Skip images that ended up without any valid boxes
        continue

    # Write label file
    with open(lbl_fp, "w") as f:
        f.write("\n".join(lines))
    written += 1

    # Copy/link image to split/images
    out_img = Path(YOLO_DIR)/split/"images"/img_path.name
    try:
        os.link(img_path, out_img)     # hard link (fast, saves space)
    except Exception:
        try:
            os.symlink(img_path, out_img)  # symlink fallback
        except Exception:
            shutil.copy2(img_path, out_img)  # last resort: copy

print(f"Images with labels written: {written}, missing/failed: {bad}")

In [None]:
yaml_text = f"""# Auto-generated for YOLOv8
path: {Path(YOLO_DIR).resolve()}    # dataset root
train: train/images
val:   val/images
test:  test/images

nc: {len(class_names)}
names: {class_names}
"""

with open(Path(YOLO_DIR)/"data.yaml", "w") as f:
    f.write(yaml_text)

print(open(Path(YOLO_DIR)/"data.yaml").read())

In [None]:
from ultralytics import YOLO

# Choose a model size: yolov8n.pt / yolov8s.pt / yolov8m.pt / yolov8l.pt / yolov8x.pt
model = YOLO("yolov8s.pt")   # pretrained COCO weights

results = model.train(
    data=str(Path(YOLO_DIR)/"data.yaml"),
    imgsz=640,
    epochs=5,
    batch=-1,          # auto batch
    device=0,
    workers=8,
    project="runs",
    name="yolov8n-military",
    cos_lr=True,
    patience=30,
    close_mosaic=10,   # stabilize late training
)


In [None]:
dir_to_zip = "/kaggle/working/runs"
import shutil
shutil.make_archive("out", 'zip', dir_to_zip)