In [4]:
def pip_install(pkg):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--no-input", pkg])
        print("Installed:", pkg)
        return True
    except Exception as e:
        print("Failed:", pkg, "→", e)
        return False




# Multi-label stratification (try options in order)
ok_iter = pip_install("iterative-stratification==0.1.7")
if not ok_iter:
    ok_iter = pip_install("scikit-multilearn==0.2.0")


Failed: iterative-stratification==0.1.7 → name 'subprocess' is not defined
Failed: scikit-multilearn==0.2.0 → name 'subprocess' is not defined


In [5]:
pip install ultralytics==8.*

Collecting ultralytics==8.*
  Downloading ultralytics-8.3.178-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics==8.*)
  Downloading ultralytics_thop-2.0.15-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0-

In [6]:
# ==================== 1) CONFIG ======================
import os, glob, shutil
from pathlib import Path
import pandas as pd
from ultralytics import YOLO

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [8]:
# --- Paths from your "Input" panel ---
BEST_MODEL_DIR = "/kaggle/input/best-model"
TEST_DIR       = "/kaggle/input/vehicle-detection-dataset/test1/test"

In [10]:
# Expect these exact filenames (as per your screenshot)
WEIGHTS  = f"{BEST_MODEL_DIR}/phase2_epoch59.pt"
DATA_YAML = f"{BEST_MODEL_DIR}/data.yaml"  # only for sanity/class order; not required for predict()


In [11]:
# Output
OUT_DIR  = "/kaggle/working/vehdet/preds_test"
RUN_NAME = "epoch59_infer"

In [12]:
# Inference knobs (precision-leaning defaults)
IMG_SIZE_PREF = 1024     # falls back to 896 if OOM
CONF_BASE     = 0.30     # raise to cut false positives (try 0.30–0.35)
NMS_IOU       = 0.55     # more aggressive NMS to remove near-duplicates
USE_TTA       = True     # mild bump; disable if in a rush

In [14]:
# Optional: class-specific minimum conf (post-filter in CSV)
# Example: tighten motorbike only:
PER_CLASS_MIN_CONF = {
    # "motorbike": 0.33,
}

In [15]:
# ==================== 2) CHECKS ======================
assert os.path.isdir(TEST_DIR), f"Test folder not found: {TEST_DIR}"
assert os.path.exists(WEIGHTS), f"Weights not found: {WEIGHTS}"
os.makedirs(OUT_DIR, exist_ok=True)

print("Using weights:", WEIGHTS)
if os.path.exists(DATA_YAML):
    print("Found data.yaml:", DATA_YAML)

Using weights: /kaggle/input/best-model/phase2_epoch59.pt
Found data.yaml: /kaggle/input/best-model/data.yaml


In [16]:
# ==================== 3) LOAD MODEL ==================
model = YOLO(WEIGHTS)
print("Model classes (order matters):", model.names)

Model classes (order matters): {0: 'car', 1: 'rickshaw', 2: 'bus', 3: 'motorbike'}


In [17]:
# ==================== 4) INFERENCE ===================
def run_predict(imgsz):
    return model.predict(
        source=TEST_DIR,
        imgsz=imgsz,
        conf=CONF_BASE,
        iou=NMS_IOU,
        device=0,
        half=True,
        augment=USE_TTA,
        save=True,        # annotated images
        save_txt=True,    # YOLO txt with conf
        save_conf=True,
        project=OUT_DIR,
        name=RUN_NAME,
        exist_ok=True
    )

try:
    print(f"Running inference @ imgsz={IMG_SIZE_PREF} ...")
    results = run_predict(IMG_SIZE_PREF)
    IMG_USED = IMG_SIZE_PREF
except Exception as e:
    print("Hit error at 1024, trying 896. Error:", e)
    results = run_predict(896)
    IMG_USED = 896

pred_dir = os.path.join(OUT_DIR, RUN_NAME)
print("✅ Predictions saved to:", pred_dir)

Running inference @ imgsz=1024 ...

image 1/500 /kaggle/input/vehicle-detection-dataset/test1/test/Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663.jpg: 1024x1024 1 car, 1 motorbike, 165.8ms
image 2/500 /kaggle/input/vehicle-detection-dataset/test1/test/Asraf_51_jpg.rf.0e3516baf7509bc2c4a4aa8deea494c2.jpg: 1024x1024 5 cars, 2 buss, 3 motorbikes, 90.4ms
image 3/500 /kaggle/input/vehicle-detection-dataset/test1/test/Asraf_52_jpg.rf.867869f276e6db3a09a84b99643df316.jpg: 1024x1024 1 car, 1 bus, 1 motorbike, 89.7ms
image 4/500 /kaggle/input/vehicle-detection-dataset/test1/test/Sabiha_(01)_jpg.rf.f935c7fc51a14c64e34c17a17c41cb1f.jpg: 1024x1024 2 cars, 2 buss, 83.9ms
image 5/500 /kaggle/input/vehicle-detection-dataset/test1/test/Sabiha_(10)_jpg.rf.d163e2a53d5ed22112e9ead62807acf4.jpg: 1024x1024 4 cars, 4 rickshaws, 1 bus, 82.9ms
image 6/500 /kaggle/input/vehicle-detection-dataset/test1/test/Sabiha_(11)_jpg.rf.36299290e6d6ce12423051e1f5084f05.jpg: 1024x1024 5 cars, 6 rickshaws, 82.7ms
image 7/

In [18]:
# ==================== 5) BUILD CSV ===================
OUT_DIR_CSV  = "/kaggle/working/vehdet/csv_test"
OUT_DIR_zip  = "/kaggle/working/vehdet/zip_test"
rows = []
for r in results:
    W, H = r.orig_shape[1], r.orig_shape[0]
    img_name = Path(r.path).name
    for b in r.boxes:
        cls_id   = int(b.cls)
        cls_name = model.names.get(cls_id, str(cls_id))
        conf     = float(b.conf)
        # Optional per-class tightening:
        min_need = PER_CLASS_MIN_CONF.get(cls_name, -1.0)
        if min_need > 0 and conf < min_need:
            continue
        x1, y1, x2, y2 = b.xyxy[0].tolist()
        rows.append({
            "image": img_name,
            "class_id": cls_id,
            "class_name": cls_name,
            "conf": round(conf, 6),
            "xmin": round(x1, 1), "ymin": round(y1, 1),
            "xmax": round(x2, 1), "ymax": round(y2, 1),
            "width": W, "height": H,
            "imgsz_used": IMG_USED
        })

df = pd.DataFrame(rows)
csv_path = os.path.join(OUT_DIR_CSV, f"{RUN_NAME}_predictions.csv")
df.to_csv(csv_path, index=False)
print("✅ CSV:", csv_path, f"({len(df)} rows)")

# ==================== 6) ZIP THE RUN =================
shutil.make_archive(os.path.join(OUT_DIR, f"{RUN_NAME}_all"), "zip", pred_dir)
print("📦 Zip created:", os.path.join(OUT_DIR, f"{RUN_NAME}_all.zip"))

print("\nNext: File → Save Version → check 'Save output' so your images/CSV/ZIP persist.")


✅ CSV: /kaggle/working/vehdet/preds_test/epoch59_infer_predictions.csv (1548 rows)
📦 Zip created: /kaggle/working/vehdet/preds_test/epoch59_infer_all.zip

Next: File → Save Version → check 'Save output' so your images/CSV/ZIP persist.


In [21]:
import os, glob, zipfile

# --- If you used my earlier names, these exist already ---
OUT_DIR  = "/kaggle/working/vehdet/preds_test"
RUN_NAME = "epoch59_infer"   # change if you used a different name

PRED_DIR = os.path.join(OUT_DIR, RUN_NAME)
OUT_DIR_ZIP = "/kaggle/working/vehdet/zip_test"
os.makedirs(OUT_DIR_ZIP, exist_ok=True)

zip_path = os.path.join(OUT_DIR_ZIP, f"{RUN_NAME}_annotated_images.zip")

# Gather ONLY images (skip label txts and any files inside 'labels/')
img_exts = {".jpg", ".jpeg", ".png", ".JPG", ".JPEG", ".PNG"}
files_to_zip = []
for p in glob.glob(os.path.join(PRED_DIR, "**", "*"), recursive=True):
    if os.path.isdir(p): 
        continue
    if "/labels/" in p.replace("\\", "/"):
        continue
    if os.path.splitext(p)[1] in img_exts:
        files_to_zip.append(p)

# Create the ZIP
with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
    for p in files_to_zip:
        zf.write(p, arcname=os.path.relpath(p, PRED_DIR))

print(f"📦 Created: {zip_path}")
print(f"🖼️  Images zipped: {len(files_to_zip)}")

# Tip: File → Save Version → check “Save output” so the zip persists.


📦 Created: /kaggle/working/vehdet/zip_test/epoch59_infer_annotated_images.zip
🖼️  Images zipped: 500


In [23]:
# ====== CLEAN + REGENERATE LEAN EDA SET ======
import os, glob, xml.etree.ElementTree as ET
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ---- Paths ----
SRC_TRAIN = "/kaggle/input/vehicle-detection-dataset/train/Final Train Dataset"
OUT_DIR   = "/kaggle/working/vehdet/eda"
os.makedirs(OUT_DIR, exist_ok=True)

# ---- Delete any old EDA ZIPs to recover space ----
for z in glob.glob(os.path.join(OUT_DIR, "*.zip")):
    try:
        os.remove(z)
        print("Deleted:", z)
    except Exception as e:
        print("Could not delete", z, "->", e)

# ---- Classes & canonicalization (match training) ----
TARGET_CLASSES = ["car","rickshaw","bus","motorbike"]
CANON_MAP = {
    "minivan": "car",
    "policecar": "car",
    "suv": "car",
    "three wheelers (cng)": "rickshaw",
    "auto rickshaw": "rickshaw",
    "scooter": "motorbike",
}

def clip(v, lo, hi): return max(lo, min(hi, v))

# ---- Parse Pascal-VOC XMLs into a dataframe ----
records = []
xml_files = sorted(glob.glob(os.path.join(SRC_TRAIN, "*.xml")))
print("Found XML files:", len(xml_files))
for xp in xml_files:
    try:
        root = ET.parse(xp).getroot()
    except Exception:
        continue
    fname = (root.findtext("filename") or "").strip()
    img_stem = Path(fname).stem if fname else Path(xp).stem
    try:
        W = int(root.findtext("size/width") or 0)
        H = int(root.findtext("size/height") or 0)
    except:
        continue
    if W <= 0 or H <= 0: 
        continue
    for obj in root.findall("object"):
        cls = (obj.findtext("name") or "").strip().lower()
        cls = CANON_MAP.get(cls, cls)
        if cls not in TARGET_CLASSES:
            continue
        bb = obj.find("bndbox")
        if bb is None: 
            continue
        try:
            xmin = float(bb.findtext("xmin")); ymin = float(bb.findtext("ymin"))
            xmax = float(bb.findtext("xmax")); ymax = float(bb.findtext("ymax"))
        except:
            continue
        xmin = clip(xmin, 0, W-1); xmax = clip(xmax, 0, W-1)
        ymin = clip(ymin, 0, H-1); ymax = clip(ymax, 0, H-1)
        bw = max(1.0, xmax - xmin); bh = max(1.0, ymax - ymin)

        x = (xmin + bw/2) / W
        y = (ymin + bh/2) / H
        w = bw / W
        h = bh / H
        area = w * h
        records.append({"image": img_stem, "class": cls, "x": x, "y": y, "w": w, "h": h, "area": area})

df = pd.DataFrame(records)
print("Parsed objects:", len(df))

# ------- 1) Objects per class -------
fig = plt.figure(figsize=(6,4), dpi=150, constrained_layout=True)
ax = fig.add_subplot(111)
counts = df["class"].value_counts().reindex(TARGET_CLASSES).fillna(0).astype(int)
ax.bar(counts.index, counts.values)
ax.set_title("Objects per Class")
ax.set_ylabel("instances")
for i,v in enumerate(counts.values): ax.text(i, v, str(v), ha="center", va="bottom", fontsize=8)
fig.savefig(os.path.join(OUT_DIR, "01_objects_per_class.png"))
plt.close(fig)

# ------- 2) Width hist -------
fig = plt.figure(figsize=(6,4), dpi=150, constrained_layout=True)
ax = fig.add_subplot(111)
ax.hist(df["w"].clip(0,1), bins=60)
ax.set_xlim(0,1); ax.set_title("BBox Width (normalized)"); ax.set_xlabel("w"); ax.set_ylabel("count")
fig.savefig(os.path.join(OUT_DIR, "02_width_hist.png"))
plt.close(fig)

# ------- 3) Height hist -------
fig = plt.figure(figsize=(6,4), dpi=150, constrained_layout=True)
ax = fig.add_subplot(111)
ax.hist(df["h"].clip(0,1), bins=60)
ax.set_xlim(0,1); ax.set_title("BBox Height (normalized)"); ax.set_xlabel("h"); ax.set_ylabel("count")
fig.savefig(os.path.join(OUT_DIR, "03_height_hist.png"))
plt.close(fig)

# ------- 4) Area hist (log y) -------
fig = plt.figure(figsize=(6,4), dpi=150, constrained_layout=True)
ax = fig.add_subplot(111)
ax.hist(df["area"].clip(0,1), bins=60)
ax.set_yscale("log")
ax.set_xlim(0,1); ax.set_title("BBox Area (normalized)"); ax.set_xlabel("area"); ax.set_ylabel("count (log)")
fig.savefig(os.path.join(OUT_DIR, "04_area_hist_log.png"))
plt.close(fig)

# ------- 5) Center heatmap -------
fig = plt.figure(figsize=(7,5), dpi=150, constrained_layout=True)
ax = fig.add_subplot(111)
h = ax.hist2d(df["x"], df["y"], bins=60, range=[[0,1],[0,1]])
ax.set_title("Object Center Heatmap"); ax.set_xlabel("x"); ax.set_ylabel("y")
cb = fig.colorbar(h[3], ax=ax); cb.set_label("count")
fig.savefig(os.path.join(OUT_DIR, "05_center_heatmap.png"))
plt.close(fig)

# ------- 6) (Optional) Width vs Height hexbin -------
fig = plt.figure(figsize=(6,6), dpi=150, constrained_layout=True)
ax = fig.add_subplot(111)
hb = ax.hexbin(df["w"], df["h"], gridsize=40, extent=(0,1,0,1), mincnt=1, cmap=None)  # default color map
ax.set_title("Width vs Height (hexbin)"); ax.set_xlabel("w"); ax.set_ylabel("h")
cb = fig.colorbar(hb, ax=ax); cb.set_label("count")
fig.savefig(os.path.join(OUT_DIR, "06_width_vs_height_hexbin.png"))
plt.close(fig)

print("✅ Saved minimal EDA plots to:", OUT_DIR)


Deleted: /kaggle/working/vehdet/eda/eda_and_flowchart.zip
Found XML files: 3003
Parsed objects: 19853
✅ Saved minimal EDA plots to: /kaggle/working/vehdet/eda
