In [2]:
pip install ultralytics==8.*

Collecting ultralytics==8.*
  Downloading ultralytics-8.3.177-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics==8.*)
  Downloading ultralytics_thop-2.0.15-py3-none-any.whl.metadata (14 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics==8.*)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0-

In [1]:
# ==== CLASS CENSUS: COUNT LABEL FREQUENCIES ACROSS ALL XMLs ====
import os, glob, xml.etree.ElementTree as ET
from collections import Counter, defaultdict
import pandas as pd

In [2]:
# ---- adjust this path to your dataset root ----
SRC_TRAIN = "/kaggle/input/vehicle-detection-dataset/test1/test"

# Find all XMLs (non-recursive if all files are flat; set recursive=True if nested)
xml_files = sorted(glob.glob(os.path.join(SRC_TRAIN, "*.xml")))
print(f"Found XML files: {len(xml_files)}")

Found XML files: 3003


In [3]:
instances = Counter()               # total bbox instances per class
images_per_class = defaultdict(set) # unique image ids containing the class
parse_errors = []

for xp in xml_files:
    try:
        root = ET.parse(xp).getroot()
    except Exception as e:
        parse_errors.append((xp, str(e)))
        continue

    # Prefer <filename>, else fallback to XML filename stem
    fname = (root.findtext("filename") or "").strip()
    image_id = os.path.splitext(os.path.basename(fname))[0] if fname else os.path.splitext(os.path.basename(xp))[0]

    for obj in root.findall("object"):
        cls = (obj.findtext("name") or "").strip().lower()
        if not cls:
            continue
        instances[cls] += 1
        images_per_class[cls].add(image_id)


In [4]:
# Summarize
rows = []
for cls, cnt in instances.items():
    rows.append({
        "class": cls,
        "instances": cnt,
        "images": len(images_per_class[cls])
    })

In [5]:
df = pd.DataFrame(rows).sort_values(["instances","images"], ascending=False).reset_index(drop=True)
display(df.head(25))
print(f"\nUnique classes: {df.shape[0]}")
print(f"Total instances (all classes): {int(df['instances'].sum())}")


Unnamed: 0,class,instances,images
0,car,5476,1621
1,rickshaw,3549,1021
2,bus,3340,1558
3,three wheelers (cng),2990,1170
4,motorbike,2284,1186
5,truck,1492,842
6,pickup,1225,792
7,minivan,935,576
8,suv,860,539
9,van,756,449



Unique classes: 21
Total instances (all classes): 24368


In [6]:
# Show a quick top-50 text summary
print("\nTop classes (instances, images):")
for cls, cnt in instances.most_common(50):
    print(f"{cls:28s}  inst={cnt:5d}  imgs={len(images_per_class[cls]):4d}")


Top classes (instances, images):
car                           inst= 5476  imgs=1621
rickshaw                      inst= 3549  imgs=1021
bus                           inst= 3340  imgs=1558
three wheelers (cng)          inst= 2990  imgs=1170
motorbike                     inst= 2284  imgs=1186
truck                         inst= 1492  imgs= 842
pickup                        inst= 1225  imgs= 792
minivan                       inst=  935  imgs= 576
suv                           inst=  860  imgs= 539
van                           inst=  756  imgs= 449
bicycle                       inst=  459  imgs= 352
auto rickshaw                 inst=  372  imgs= 149
human hauler                  inst=  169  imgs= 128
wheelbarrow                   inst=  120  imgs=  93
minibus                       inst=   95  imgs=  58
ambulance                     inst=   70  imgs=  69
taxi                          inst=   60  imgs=  52
army vehicle                  inst=   43  imgs=  39
scooter                       

In [7]:
# Optional: inspect any XML parse issues
if parse_errors:
    print(f"\nXML parse errors: {len(parse_errors)} (showing up to 5)")
    for p, e in parse_errors[:5]:
        print(" -", os.path.basename(p), "->", e)


XML parse errors: 1 (showing up to 5)
 - 231.xml -> syntax error: line 1, column 0


In [8]:
# ----- OPTIONAL: canonicalized counts (uncomment and edit the map) -----
CANON_MAP = {
     "minivan": "car",
     "policecar" : "car",
     "three wheelers (cng)": "rickshaw",
     "auto rickshaw": "rickshaw",
     "scooter": "motorbike",
    "suv": "car"
    
 }

In [9]:
inst_canon = Counter()
imgs_canon = defaultdict(set)

In [11]:
for cls, cnt in instances.items():
    canon = CANON_MAP.get(cls, cls)
    inst_canon[canon] += cnt
    imgs_canon[canon] |= images_per_class[cls]

In [12]:
rows_c = [{"class": c, "instances": n, "images": len(imgs_canon[c])} for c, n in inst_canon.items()]
df_c = pd.DataFrame(rows_c).sort_values(["instances","images"], ascending=False).reset_index(drop=True)
display(df_c.head(25))

Unnamed: 0,class,instances,images
0,car,14606,1816
1,rickshaw,13822,1751
2,bus,6680,1558
3,motorbike,4644,1196
4,truck,2984,842
5,pickup,2450,792
6,van,1512,449
7,bicycle,918,352
8,human hauler,338,128
9,wheelbarrow,240,93


In [14]:
import importlib

package_name = "iterstrat"  # change this to the package you want to check
spec = importlib.util.find_spec(package_name)

if spec is not None:
    print(f"✅ '{package_name}' is installed.")
else:
    print(f"❌ '{package_name}' is NOT installed.")


❌ 'iterstrat' is NOT installed.


In [18]:
import sys, subprocess

In [20]:
def pip_install(pkg):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", "--no-input", pkg])
        print("Installed:", pkg)
        return True
    except Exception as e:
        print("Failed:", pkg, "→", e)
        return False




In [21]:
# Multi-label stratification (try options in order)
ok_iter = pip_install("iterative-stratification==0.1.7")
if not ok_iter:
    ok_iter = pip_install("scikit-multilearn==0.2.0")


Installed: iterative-stratification==0.1.7


In [24]:
# ---------------------------
# 1) IMPORTS & CONFIG
# ---------------------------
import os, glob, shutil, random, warnings, json
from pathlib import Path
from collections import defaultdict, Counter
import numpy as np
from PIL import Image
import xml.etree.ElementTree as ET
warnings.filterwarnings("ignore")

# lxml (robust XML parser) – optional
try:
    from lxml import etree as LET
    HAS_LXML = True
except Exception:
    HAS_LXML = False

# iterative stratification or fallbacks
SPLITTER = None
try:
    from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
    SPLITTER = "iterstrat"
    print("Using: iterative-stratification")
except Exception:
    try:
        from skmultilearn.model_selection import IterativeStratification
        SPLITTER = "skmultilearn"
        print("Using: scikit-multilearn IterativeStratification")
    except Exception:
        SPLITTER = "fallback"
        print("Using: simple fallback multilabel split")

from tqdm import tqdm
from ultralytics import YOLO

Using: iterative-stratification
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [23]:
# pip_install("ultralytics==8.*")

   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 16.4 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 363.4/363.4 MB 4.7 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 13.8/13.8 MB 96.5 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 24.6/24.6 MB 79.2 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 883.7/883.7 kB 38.9 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 664.8/664.8 MB 2.5 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 211.5/211.5 MB 7.8 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 56.3/56.3 MB 30.8 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 127.9/127.9 MB 9.1 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 207.5/207.5 MB 8.1 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 21.1/21.1 MB 83.8 MB/s eta 0:00:00
Installed: ultralytics==8.*


True

In [25]:
SEED = 42
random.seed(SEED); np.random.seed(SEED)

In [26]:
# ---- paths (edit as needed) ----
SRC_TRAIN = "/kaggle/input/vehicle-detection-dataset/train/Final Train Dataset"
SRC_TEST  = "/kaggle/input/vehicle-detection-dataset/test/Final Test Dataset"  # if not present, it's fine
WORKDIR   = "/kaggle/working/vehdet"

In [27]:

# ---- experiment settings ----
VAL_RATIO        = 0.15            # 0.10 for 90/10
TARGET_CLASSES   = ["car","rickshaw","bus","motorbike"]
OVERSAMPLE_CLASS = "motorbike"
OVERSAMPLE_FACTOR= 2               # 2x; set 3 if needed
IMG_SIZE_PREF    = 1024            # accuracy-first; auto-fallback to 896 if OOM
MODEL_WEIGHTS    = "yolov8m.pt"    # try 'yolov8l.pt' if VRAM allows

# ---- canonicalization map (extend if you want) ----
CANON_MAP = {
    "minivan": "car",
    "policecar": "car",
    "suv": "car",
    "three wheelers (cng)": "rickshaw",
    "auto rickshaw": "rickshaw",
    "scooter": "motorbike",
}

In [28]:
# skip known-bad XMLs
SKIP_XML_BASENAMES = {"231"}   # stems without extension

In [29]:
# ---------------------------
# 2) UTILS
# ---------------------------
def ensure_dirs(base):
    for p in [
        base,
        f"{base}/images/train", f"{base}/labels/train",
        f"{base}/images/val",   f"{base}/labels/val",
        f"{base}/images/full_train", f"{base}/labels/full_train",
        f"{base}/preds_test"
    ]:
        Path(p).mkdir(parents=True, exist_ok=True)

def parse_xml_any(xml_path):
    try:
        if HAS_LXML:
            return LET.parse(xml_path).getroot()
        return ET.parse(xml_path).getroot()
    except Exception:
        return None

def get_img_size(path):
    try:
        with Image.open(path) as im:
            return im.size  # (w,h)
    except Exception:
        return None, None

def voc_to_yolo(xmin,ymin,xmax,ymax,w,h):
    # clip to image and normalize
    xmin = max(0, min(xmin, w-1)); xmax = max(0, min(xmax, w-1))
    ymin = max(0, min(ymin, h-1)); ymax = max(0, min(ymax, h-1))
    bw = xmax - xmin; bh = ymax - ymin
    if bw <= 1 or bh <= 1: return None
    x_c = xmin + bw/2; y_c = ymin + bh/2
    return x_c/w, y_c/h, bw/w, bh/h

def write_label_txt(path, items):
    with open(path, "w") as f:
        for (cid, x,y,w,h) in items:
            f.write(f"{cid} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

def copy_and_label(split_stems, img_by_stem, image_ann, img_out, lbl_out):
    n_img = 0; n_box = 0
    for s in tqdm(split_stems, desc=f"Write {Path(img_out).name}"):
        src_img = img_by_stem.get(s)
        if not src_img: 
            continue
        shutil.copy2(src_img, os.path.join(img_out, Path(src_img).name))
        write_label_txt(os.path.join(lbl_out, f"{s}.txt"), image_ann[s]["items"])
        n_img += 1; n_box += len(image_ann[s]["items"])
    return n_img, n_box

def stems_with_class(stems, image_ann, cname):
    return [s for s in stems if cname in image_ann[s]["present"]]

def duplicate_images(stems_to_dup, factor, img_dir, lbl_dir, class_name="minority"):
    made = 0
    for s in tqdm(stems_to_dup, desc=f"Oversample {class_name} x{factor}"):
        for k in range(1, factor):  # original already exists
            # locate copied image in out dir by stem
            candidates = [p for p in glob.glob(os.path.join(img_dir, "*")) if Path(p).stem == s]
            if not candidates: 
                continue
            src_img = candidates[0]
            ext = Path(src_img).suffix
            src_lbl = os.path.join(lbl_dir, f"{s}.txt")
            if not os.path.exists(src_lbl): 
                continue
            new_stem = f"{s}_dup{k}"
            shutil.copy2(src_img, os.path.join(img_dir, f"{new_stem}{ext}"))
            shutil.copy2(src_lbl, os.path.join(lbl_dir, f"{new_stem}.txt"))
            made += 1
    return made

def simple_iterative_split(stems, Y, val_ratio=0.15, seed=42):
    """
    Fallback multilabel split:
    - take ~val_ratio positives per class into val,
    - fill remainder randomly (includes negatives).
    """
    rng = np.random.RandomState(seed)
    N = len(stems)
    target_val = max(1, int(round(N * val_ratio)))
    idx_all = np.arange(N)

    val_idx_set = set()
    for c in range(Y.shape[1]):
        pos = np.where(Y[:, c] == 1)[0]
        if len(pos) == 0: 
            continue
        k = max(1, int(round(len(pos) * val_ratio)))
        rng.shuffle(pos)
        val_idx_set.update(pos[:k])

    remaining = [i for i in idx_all if i not in val_idx_set]
    rng.shuffle(remaining)
    need = target_val - len(val_idx_set)
    if need > 0:
        val_idx_set.update(remaining[:need])

    val_idx = np.array(sorted(val_idx_set))
    train_idx = np.array([i for i in idx_all if i not in val_idx_set])
    return train_idx, val_idx

In [30]:
# ---------------------------
# 3) PREP WORKDIR & DISCOVER FILES
# ---------------------------
ensure_dirs(WORKDIR)

xml_files = sorted(glob.glob(os.path.join(SRC_TRAIN, "*.xml")))
img_files = []
for ext in ("*.jpg","*.jpeg","*.png","*.JPG","*.JPEG","*.PNG"):
    img_files += glob.glob(os.path.join(SRC_TRAIN, ext))

print(f"Found: {len(img_files)} images, {len(xml_files)} xmls")
img_by_stem = {Path(p).stem: p for p in img_files}

Found: 3003 images, 3003 xmls


In [31]:
# ---------------------------
# 4) PARSE VOC → MEMORY (canonicalize + keep 4 classes)
# ---------------------------
cls2id = {c:i for i,c in enumerate(TARGET_CLASSES)}
image_ann = {}   # stem -> {"w":w,"h":h,"items":[(cid,x,y,w,h)], "present": set()}
bad_xml = []

for xp in tqdm(xml_files, desc="Parsing XML"):
    stem = Path(xp).stem
    if stem in SKIP_XML_BASENAMES:
        bad_xml.append((xp, "listed_skip"))
        continue

    root = parse_xml_any(xp)
    if root is None:
        bad_xml.append((xp, "parse_error"))
        continue

    fname = (root.findtext("filename") or "").strip()
    img_stem = Path(fname).stem if fname else stem
    img_path = img_by_stem.get(img_stem) or img_by_stem.get(stem)
    if not img_path:
        bad_xml.append((xp, "missing_image"))
        continue

    w = root.findtext("size/width"); h = root.findtext("size/height")
    try:
        w = int(w) if w else None; h = int(h) if h else None
    except Exception:
        w, h = None, None
    if not w or not h:
        w, h = get_img_size(img_path)
        if not w or not h:
            bad_xml.append((xp, "no_size"))
            continue

    rec = image_ann.get(img_stem, {"w":w, "h":h, "items":[], "present":set()})
    for obj in root.findall("object"):
        name = (obj.findtext("name") or "").strip().lower()
        name = CANON_MAP.get(name, name)   # canonicalize
        if name not in cls2id:
            continue
        bb = obj.find("bndbox")
        if bb is None: 
            continue
        try:
            xmin = float(bb.findtext("xmin")); ymin = float(bb.findtext("ymin"))
            xmax = float(bb.findtext("xmax")); ymax = float(bb.findtext("ymax"))
        except Exception:
            continue
        yline = voc_to_yolo(xmin,ymin,xmax,ymax,w,h)
        if yline is None:
            continue
        rec["items"].append((cls2id[name], *yline))
        rec["present"].add(name)

    image_ann[img_stem] = rec

# include negatives (images with no target-class labels)
for stem, p in img_by_stem.items():
    if stem not in image_ann:
        image_ann[stem] = {"w":None,"h":None,"items":[], "present":set()}

print(f"Usable images: {len(image_ann)} | Skipped XMLs: {len(bad_xml)}")
if bad_xml:
    print("Examples of skipped:", bad_xml[:3])

Parsing XML: 100%|██████████| 3003/3003 [00:06<00:00, 451.68it/s]

Usable images: 3263 | Skipped XMLs: 1
Examples of skipped: [('/kaggle/input/vehicle-detection-dataset/train/Final Train Dataset/231.xml', 'listed_skip')]





In [32]:
# ---------------------------
# 5) MULTILABEL STRATIFIED SPLIT (with fallbacks)
# ---------------------------
stems = sorted(image_ann.keys())
Y = np.zeros((len(stems), len(TARGET_CLASSES)), dtype=int)
for i, s in enumerate(stems):
    for c in image_ann[s]["present"]:
        Y[i, cls2id[c]] = 1

if SPLITTER == "iterstrat":
    n_splits = max(2, int(round(1/VAL_RATIO)))
    mskf = MultilabelStratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)
    train_idx, val_idx = next(mskf.split(stems, Y))
elif SPLITTER == "skmultilearn":
    n_splits = max(2, int(round(1/VAL_RATIO)))
    istrat = IterativeStratification(n_splits=n_splits, order=1)
    # scikit-multilearn's split API differs; take first split
    splits = list(istrat.split(np.zeros((len(stems),1)), Y))
    train_idx, val_idx = splits[0]
else:
    train_idx, val_idx = simple_iterative_split(stems, Y, val_ratio=VAL_RATIO, seed=SEED)

train_stems = [stems[i] for i in train_idx]
val_stems   = [stems[i] for i in val_idx]
print(f"Split → train: {len(train_stems)} | val: {len(val_stems)} (≈ {len(val_stems)/len(stems):.2f} val)")


Split → train: 2797 | val: 466 (≈ 0.14 val)


In [33]:

# quick class presence report
def presence_counts(split_stems):
    cnt = Counter()
    for s in split_stems:
        for c in image_ann[s]["present"]:
            cnt[c] += 1
    return cnt

print("Train presence:", presence_counts(train_stems))
print("Val presence:  ", presence_counts(val_stems))

Train presence: Counter({'car': 1556, 'rickshaw': 1500, 'bus': 1336, 'motorbike': 1025})
Val presence:   Counter({'car': 260, 'rickshaw': 251, 'bus': 222, 'motorbike': 171})


In [34]:
# ---------------------------
# 6) WRITE YOLO FILES (train/val)
# ---------------------------
tr_img, tr_lbl = f"{WORKDIR}/images/train", f"{WORKDIR}/labels/train"
vl_img, vl_lbl = f"{WORKDIR}/images/val",   f"{WORKDIR}/labels/val"
Path(tr_img).mkdir(parents=True, exist_ok=True)
Path(tr_lbl).mkdir(parents=True, exist_ok=True)
Path(vl_img).mkdir(parents=True, exist_ok=True)
Path(vl_lbl).mkdir(parents=True, exist_ok=True)

ntr, btr = copy_and_label(train_stems, img_by_stem, image_ann, tr_img, tr_lbl)
nvl, bvl = copy_and_label(val_stems,   img_by_stem, image_ann, vl_img, vl_lbl)
print(f"Train: {ntr} imgs / {btr} boxes | Val: {nvl} imgs / {bvl} boxes")

Write train: 100%|██████████| 2797/2797 [00:43<00:00, 64.29it/s] 
Write val: 100%|██████████| 466/466 [00:07<00:00, 64.24it/s]

Train: 2579 imgs / 13596 boxes | Val: 424 imgs / 2118 boxes





In [35]:
# ---------------------------
# 7) OVERSAMPLE MINORITY CLASS (motorbike)
# ---------------------------
minority_train = stems_with_class(train_stems, image_ann, OVERSAMPLE_CLASS)
print(f"Minority '{OVERSAMPLE_CLASS}' train images: {len(minority_train)}")

if OVERSAMPLE_FACTOR > 1 and minority_train:
    made = duplicate_images(minority_train, OVERSAMPLE_FACTOR, tr_img, tr_lbl, class_name=OVERSAMPLE_CLASS)
    print(f"Created {made} duplicates for '{OVERSAMPLE_CLASS}'")

Minority 'motorbike' train images: 1025


Oversample motorbike x2: 100%|██████████| 1025/1025 [00:18<00:00, 55.15it/s]

Created 887 duplicates for 'motorbike'





In [36]:
# ---------------------------
# 8) data.yaml
# ---------------------------
yaml_path = f"{WORKDIR}/data.yaml"
with open(yaml_path, "w") as f:
    f.write(f"path: {WORKDIR}\n")
    f.write("train: images/train\n")
    f.write("val: images/val\n")
    f.write(f"names: {TARGET_CLASSES}\n")
print("data.yaml:\n", open(yaml_path).read())

data.yaml:
 path: /kaggle/working/vehdet
train: images/train
val: images/val
names: ['car', 'rickshaw', 'bus', 'motorbike']



In [37]:

# ---------------------------
# 9) TRAIN YOLO — PHASE 1 (freeze) → PHASE 2 (unfreeze)
# ---------------------------
def train_yolo(imgsz):
    model = YOLO(MODEL_WEIGHTS)   # COCO-pretrained
    # Warm-up with frozen backbone (stabilize transfer)
    model.train(
        data=yaml_path, imgsz=imgsz, epochs=10, batch=-1, device=0, workers=2,
        cache=True, patience=20, freeze=10, cos_lr=True, amp=True,
        project=WORKDIR, name=f"y8_phase1_{imgsz}"
    )
    # Continue training unfrozen
    run = model.train(
        data=yaml_path, imgsz=imgsz, epochs=100, batch=-1, device=0, workers=2,
        cache=True, patience=20, freeze=0, cos_lr=True, amp=True,
        project=WORKDIR, name=f"y8_phase2_{imgsz}", resume=True
    )
    return model, run

try:
    model, run = train_yolo(IMG_SIZE_PREF)
    IMG_SIZE_USED = IMG_SIZE_PREF
except Exception as e:
    print("Hit OOM/other at", IMG_SIZE_PREF, "→ fallback to 896. Err:", e)
    model, run = train_yolo(896)
    IMG_SIZE_USED = 896

print("Training complete @", IMG_SIZE_USED)
runs_dir = os.path.join(WORKDIR, f"y8_phase2_{IMG_SIZE_USED}")
print("Results saved to:", runs_dir)

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt': 100%|██████████| 49.7M/49.7M [00:00<00:00, 107MB/s] 


Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/kaggle/working/vehdet/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=10, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=y8_phase1_1024, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=20, perspective=0.0, pl

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf': 100%|██████████| 755k/755k [00:00<00:00, 17.5MB/s]


Overriding model.yaml nc=80 with nc=4

                   from  n    params  module                                       arguments                     
  0                  -1  1      1392  ultralytics.nn.modules.conv.Conv             [3, 48, 3, 2]                 
  1                  -1  1     41664  ultralytics.nn.modules.conv.Conv             [48, 96, 3, 2]                
  2                  -1  2    111360  ultralytics.nn.modules.block.C2f             [96, 96, 2, True]             
  3                  -1  1    166272  ultralytics.nn.modules.conv.Conv             [96, 192, 3, 2]               
  4                  -1  4    813312  ultralytics.nn.modules.block.C2f             [192, 192, 4, True]           
  5                  -1  1    664320  ultralytics.nn.modules.conv.Conv             [192, 384, 3, 2]              
  6                  -1  4   3248640  ultralytics.nn.modules.block.C2f             [384, 384, 4, True]           
  7                  -1  1   1991808  ultralytics

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n.pt to 'yolo11n.pt': 100%|██████████| 5.35M/5.35M [00:00<00:00, 67.2MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2182.4±900.9 MB/s, size: 211.9 KB)


[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:09<00:00, 355.27it/s] 

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




[34m[1mtrain: [0mNew cache created: /kaggle/working/vehdet/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mAutoBatch: [0mComputing optimal batch size for imgsz=1024 at 60.0% CUDA memory utilization.
[34m[1mAutoBatch: [0mCUDA:0 (Tesla P100-PCIE-16GB) 15.89G total, 0.26G reserved, 0.25G allocated, 15.38G free
      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    25858636       202.4         4.383         47.76         143.9      (1, 3, 1024, 1024)                    list
    25858636       404.9         4.998         61.72         108.9      (2, 3, 1024, 1024)                    list
    25858636       809.7         6.115         116.9         134.4      (4, 3, 1024, 1024)                    list
    25858636     

[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo






[34m[1mtrain: [0mCaching images (5.9GB RAM): 100%|██████████| 3466/3466 [00:31<00:00, 110.08it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 867.5±806.4 MB/s, size: 648.5 KB)


[34m[1mval: [0mScanning /kaggle/working/vehdet/labels/val... 424 images, 59 backgrounds, 0 corrupt: 100%|██████████| 424/424 [00:01<00:00, 270.56it/s]

[34m[1mval: [0m/kaggle/working/vehdet/images/val/145.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/153.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/159.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/169.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/170.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/182.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/187.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/Dipto_366.jpg: corrupt JPEG restored and saved
[34m[1mval: [0mNew cache created: /kaggle/working/vehdet/labels/val.cache







[34m[1mval: [0mCaching images (0.7GB RAM): 100%|██████████| 424/424 [00:04<00:00, 105.90it/s]


Plotting labels to /kaggle/working/vehdet/y8_phase1_1024/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0004921875), 83 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 2 dataloader workers
Logging results to [1m/kaggle/working/vehdet/y8_phase1_1024[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      6.43G      1.257       1.79      1.233          7       1024: 100%|██████████| 386/386 [03:20<00:00,  1.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:10<00:00,  2.24it/s]

                   all        424       2118      0.585      0.618      0.616      0.377






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      6.44G      1.266      1.259      1.258          4       1024: 100%|██████████| 386/386 [03:17<00:00,  1.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.47it/s]

                   all        424       2118       0.64      0.617      0.644      0.392






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      6.48G      1.241      1.158      1.246          6       1024: 100%|██████████| 386/386 [03:16<00:00,  1.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.48it/s]

                   all        424       2118      0.642      0.642      0.669      0.416






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      6.53G      1.189       1.06      1.206          2       1024: 100%|██████████| 386/386 [03:16<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.48it/s]

                   all        424       2118      0.662      0.677      0.687      0.438






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      6.58G      1.141     0.9628      1.183          8       1024: 100%|██████████| 386/386 [03:16<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.48it/s]

                   all        424       2118      0.729      0.658      0.717      0.458






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      6.63G      1.083     0.8888       1.14         10       1024: 100%|██████████| 386/386 [03:16<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.49it/s]

                   all        424       2118      0.716      0.683      0.734      0.481






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      6.68G       1.05     0.8279      1.125          1       1024: 100%|██████████| 386/386 [03:16<00:00,  1.96it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.49it/s]

                   all        424       2118      0.756      0.676       0.76      0.504






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      6.72G     0.9947     0.7515      1.088          0       1024: 100%|██████████| 386/386 [03:16<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.51it/s]

                   all        424       2118      0.731      0.726      0.775       0.52






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      6.77G     0.9564      0.702      1.068          1       1024: 100%|██████████| 386/386 [03:16<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.51it/s]

                   all        424       2118      0.738      0.725      0.782       0.53






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      6.82G     0.9367     0.6639      1.055          0       1024: 100%|██████████| 386/386 [03:16<00:00,  1.97it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:09<00:00,  2.50it/s]

                   all        424       2118      0.745      0.741       0.79      0.536






10 epochs completed in 0.577 hours.
Optimizer stripped from /kaggle/working/vehdet/y8_phase1_1024/weights/last.pt, 52.1MB
Optimizer stripped from /kaggle/working/vehdet/y8_phase1_1024/weights/best.pt, 52.1MB

Validating /kaggle/working/vehdet/y8_phase1_1024/weights/best.pt...
Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
Model summary (fused): 92 layers, 25,842,076 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:10<00:00,  2.33it/s]


                   all        424       2118      0.746      0.741       0.79      0.537
                   car        234        712      0.765      0.778      0.832      0.601
              rickshaw        218        749      0.735      0.746      0.764      0.527
                   bus        198        370      0.745      0.751      0.818      0.605
             motorbike        148        287      0.737      0.686      0.744      0.413
Speed: 0.3ms preprocess, 19.4ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1m/kaggle/working/vehdet/y8_phase1_1024[0m
Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=disk, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=0.001, copy_paste=0.1, copy_paste_mode=flip, cos_lr=False, crop_fraction=1.0, cutmix=0.0, data=/kaggle/working/vehdet/d

[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    25858636       202.4         5.205         37.17         179.6      (1, 3, 1024, 1024)                    list
    25858636       404.9         6.468         63.33         260.2      (2, 3, 1024, 1024)                    list
    25858636       809.7         8.672         118.6         347.1      (4, 3, 1024, 1024)                    list
    25858636        1619        12.973         234.2         563.3      (8, 3, 1024, 1024)                    list
    25858636        3239        19.139           428          1004     (16, 3, 1024, 1024)                    list
[34m[1mAutoBatch: [0mUsing batch-size 4 for CUDA:0 10.21G/15.89G (64%) ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2258.5±1055.9 MB/s, size: 656.1 KB)


[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1013.9±1061.5 MB/s, size: 648.5 KB)


[34m[1mval: [0mScanning /kaggle/working/vehdet/labels/val.cache... 424 images, 59 backgrounds, 0 corrupt: 100%|██████████| 424/424 [00:00<?, ?it/s]

[34m[1mval: [0m/kaggle/working/vehdet/images/val/145.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/153.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/159.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/169.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/170.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/182.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/187.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/Dipto_366.jpg: corrupt JPEG restored and saved



[34m[1mval: [0mCaching images (2.7GB Disk): 100%|██████████| 424/424 [00:04<00:00, 93.71it/s] 


[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.001), 83 bias(decay=0.0)
Hit OOM/other at 1024 → fallback to 896. Err: yolov8m.pt training to 500 epochs is finished, nothing to resume.
Start a new training without resuming, i.e. 'yolo train model=yolov8m.pt'
Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/kaggle/working/vehdet/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=10, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=896, int8=F

[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    25858636         155         2.636         38.17         53.81        (1, 3, 896, 896)                    list
    25858636         310         3.058         50.28         68.98        (2, 3, 896, 896)                    list
    25858636         620         4.058         92.07         91.37        (4, 3, 896, 896)                    list
    25858636        1240         5.587         181.1         153.1        (8, 3, 896, 896)                    list
    25858636        2480         9.599         322.3         267.9       (16, 3, 896, 896)                    list
[34m[1mAutoBatch: [0mUsing batch-size 14 for CUDA:0 10.23G/15.89G (64%) ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2756.7±1141.4 MB/s, size: 656.1 KB)


[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo






[34m[1mtrain: [0mCaching images (4.5GB RAM): 100%|██████████| 3466/3466 [00:35<00:00, 98.01it/s] 

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 557.9±224.0 MB/s, size: 648.5 KB)


[34m[1mval: [0mScanning /kaggle/working/vehdet/labels/val.cache... 424 images, 59 backgrounds, 0 corrupt: 100%|██████████| 424/424 [00:00<?, ?it/s]

[34m[1mval: [0m/kaggle/working/vehdet/images/val/145.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/153.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/159.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/169.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/170.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/182.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/187.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/Dipto_366.jpg: corrupt JPEG restored and saved







[34m[1mval: [0mCaching images (0.5GB RAM): 100%|██████████| 424/424 [00:01<00:00, 216.01it/s]


Plotting labels to /kaggle/working/vehdet/y8_phase1_896/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.000546875), 83 bias(decay=0.0)
Image sizes 896 train, 896 val
Using 2 dataloader workers
Logging results to [1m/kaggle/working/vehdet/y8_phase1_896[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      5.17G      1.245      1.731      1.192         69        896: 100%|██████████| 248/248 [02:18<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.30it/s]

                   all        424       2118      0.598      0.557      0.561      0.337






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      6.93G      1.284      1.251      1.235         31        896: 100%|██████████| 248/248 [02:16<00:00,  1.81it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.34it/s]

                   all        424       2118      0.627      0.645      0.638      0.392






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      6.96G      1.262      1.143      1.217         42        896: 100%|██████████| 248/248 [02:16<00:00,  1.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.34it/s]

                   all        424       2118      0.702      0.608      0.672      0.416






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      6.96G      1.206       1.03      1.181         35        896: 100%|██████████| 248/248 [02:15<00:00,  1.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.35it/s]

                   all        424       2118      0.695      0.629      0.698      0.436






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      6.96G      1.144     0.9509      1.158         50        896: 100%|██████████| 248/248 [02:15<00:00,  1.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.35it/s]

                   all        424       2118      0.686      0.651      0.706      0.458






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      6.96G      1.092     0.8829      1.123         25        896: 100%|██████████| 248/248 [02:15<00:00,  1.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.32it/s]

                   all        424       2118      0.738       0.67      0.722      0.471






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      6.96G      1.051     0.8096      1.098         33        896: 100%|██████████| 248/248 [02:16<00:00,  1.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.34it/s]

                   all        424       2118      0.751      0.675      0.745      0.494






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      6.96G      1.007      0.748      1.078         34        896: 100%|██████████| 248/248 [02:15<00:00,  1.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.35it/s]

                   all        424       2118      0.716      0.708      0.756      0.512






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      6.96G     0.9706     0.6972      1.055         27        896: 100%|██████████| 248/248 [02:15<00:00,  1.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.34it/s]

                   all        424       2118      0.747      0.692      0.766      0.522






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      6.96G      0.947     0.6603      1.041        149        896: 100%|██████████| 248/248 [02:16<00:00,  1.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:06<00:00,  2.31it/s]

                   all        424       2118      0.756      0.706      0.775      0.533






10 epochs completed in 0.400 hours.
Optimizer stripped from /kaggle/working/vehdet/y8_phase1_896/weights/last.pt, 52.0MB
Optimizer stripped from /kaggle/working/vehdet/y8_phase1_896/weights/best.pt, 52.0MB

Validating /kaggle/working/vehdet/y8_phase1_896/weights/best.pt...
Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
Model summary (fused): 92 layers, 25,842,076 parameters, 0 gradients, 78.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 16/16 [00:08<00:00,  1.97it/s]


                   all        424       2118      0.756      0.705      0.775      0.532
                   car        234        712      0.782      0.763      0.821      0.588
              rickshaw        218        749      0.752      0.704      0.763      0.532
                   bus        198        370      0.759       0.75      0.825      0.618
             motorbike        148        287      0.731      0.603      0.691      0.392
Speed: 0.2ms preprocess, 12.9ms inference, 0.0ms loss, 2.3ms postprocess per image
Results saved to [1m/kaggle/working/vehdet/y8_phase1_896[0m
Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=disk, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=0.001, copy_paste=0.1, copy_paste_mode=flip, cos_lr=False, crop_fraction=1.0, cutmix=0.0, data=/kaggle/working/vehdet/da

[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




      Params      GFLOPs  GPU_mem (GB)  forward (ms) backward (ms)                   input                  output
    25858636         155         3.238         39.83           111        (1, 3, 896, 896)                    list
    25858636         310         4.255          52.6         172.7        (2, 3, 896, 896)                    list
    25858636         620         5.992         94.91         239.1        (4, 3, 896, 896)                    list
    25858636        1240         9.441         186.5         397.3        (8, 3, 896, 896)                    list
    25858636        2480        16.316         333.2         686.5       (16, 3, 896, 896)                    list
[34m[1mAutoBatch: [0mUsing batch-size 6 for CUDA:0 9.65G/15.89G (61%) ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2031.0±838.4 MB/s, size: 656.1 KB)


[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 827.0±978.6 MB/s, size: 648.5 KB)


[34m[1mval: [0mScanning /kaggle/working/vehdet/labels/val.cache... 424 images, 59 backgrounds, 0 corrupt: 100%|██████████| 424/424 [00:00<?, ?it/s]

[34m[1mval: [0m/kaggle/working/vehdet/images/val/145.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/153.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/159.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/169.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/170.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/182.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/187.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/Dipto_366.jpg: corrupt JPEG restored and saved



[34m[1mval: [0mCaching images (2.7GB Disk): 100%|██████████| 424/424 [00:00<00:00, 41875.88it/s]


[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.00103125), 83 bias(decay=0.0)


AssertionError: yolov8m.pt training to 500 epochs is finished, nothing to resume.
Start a new training without resuming, i.e. 'yolo train model=yolov8m.pt'

In [41]:
WORKDIR = "/kaggle/working/vehdet"

def find_latest(prefix):
    cands = sorted(glob.glob(os.path.join(WORKDIR, f"{prefix}_*")), key=os.path.getmtime, reverse=True)
    return cands[0] if cands else None

p1_dir = find_latest("y8_phase1")
p2_dir = find_latest("y8_phase2")

print("Phase-1 run:", p1_dir)
print("Phase-2 run:", p2_dir)

Phase-1 run: /kaggle/working/vehdet/y8_phase1_896
Phase-2 run: None


In [None]:
# ================= SAFEST PHASE-2 START/RESUME (robust Phase-1 finder + per-epoch saves) ================
import os, glob, time, json
from pathlib import Path
import pandas as pd
from ultralytics import YOLO

WORKDIR = "/kaggle/working/vehdet"
DATA_YAML = f"{WORKDIR}/data.yaml"
TARGET_PHASE2_EPOCHS = 100
FORCE_SAVE_PERIOD = 1  # save a checkpoint every epoch

def list_phase_dirs(prefix, roots):
    """Return candidate run dirs matching prefix under given roots, newest first."""
    cands = []
    for root in roots:
        for d in glob.glob(os.path.join(root, f"{prefix}_*")):
            if os.path.isdir(d):
                cands.append((d, os.path.getmtime(d)))
    return [d for d,_ in sorted(cands, key=lambda x: x[1], reverse=True)]

def find_ckpt(run_dir):
    """Return (ckpt_path, type) where type in {'last','best'} if present, else (None,None)."""
    for nm in ("last.pt","best.pt"):
        p = os.path.join(run_dir, "weights", nm)
        if os.path.exists(p): return p, nm.split(".")[0]
    return None, None

def find_phase1_ckpt():
    """Find a Phase-1 run that actually has weights, prefer newest with last.pt then best.pt."""
    # 1) Prefer RUNS under WORKDIR
    for d in list_phase_dirs("y8_phase1", [WORKDIR]):
        ck, kind = find_ckpt(d)
        if ck: return d, ck
    # 2) Fallback: search anywhere under /kaggle/working
    for d in list_phase_dirs("y8_phase1", ["/kaggle/working"]):
        ck, kind = find_ckpt(d)
        if ck: return d, ck
    return None, None

def find_phase2_dir():
    """Return newest Phase-2 dir (may or may not have weights)."""
    dirs = list_phase_dirs("y8_phase2", [WORKDIR])
    return dirs[0] if dirs else None

def epochs_done(run_dir):
    csv = os.path.join(run_dir, "results.csv")
    if not os.path.exists(csv): return 0
    try:
        df = pd.read_csv(csv)
        return (int(df["epoch"].max()) + 1) if "epoch" in df.columns else len(df)
    except Exception:
        return 0

def get_imgsz_from_args(run_dir, default=None):
    for name in ("args.yaml","args.json"):
        p = os.path.join(run_dir, name)
        if os.path.exists(p):
            try:
                if p.endswith(".yaml"):
                    with open(p,"r") as f:
                        for line in f:
                            if line.strip().startswith("imgsz:"):
                                return int(line.split(":",1)[1].strip())
                else:
                    with open(p,"r") as f:
                        j = json.load(f)
                        if "imgsz" in j: return int(j["imgsz"])
            except: pass
    # fallback: parse suffix e.g. y8_phase1_896
    try:
        return int(Path(run_dir).name.split("_")[-1])
    except:
        return default

# -------- 1) Robust Phase-1 check (NO re-running Phase-1) --------
p1_dir, p1_ckpt = find_phase1_ckpt()
assert p1_dir, "❌ No Phase-1 run folder found under /kaggle/working. Please (re)train Phase-1 once."
assert p1_ckpt and os.path.exists(p1_ckpt), f"❌ Phase-1 run found but no weights file: {p1_dir}"
print(f"✅ Phase-1 OK → {p1_dir}\n   Using checkpoint: {p1_ckpt}")

imgsz = get_imgsz_from_args(p1_dir, default=896)

# -------- 2) Phase-2 start/continue with per-epoch saves --------
p2_dir = find_phase2_dir()
if p2_dir:
    done = epochs_done(p2_dir)
    p2_ckpt, _ = find_ckpt(p2_dir)
    print(f"🔎 Phase-2 found → {p2_dir} | epochs logged: {done} | ckpt: {p2_ckpt}")

    remain = max(1, TARGET_PHASE2_EPOCHS - done)
    # To enforce save_period=1 going forward, continue in a NEW run that we control:
    if p2_ckpt:
        new_name = f"{Path(p2_dir).name}_cont_{int(time.time())}"
        print(f"➡️  Continuing Phase-2 for {remain} epochs with per-epoch saving → {new_name} (imgsz={imgsz})")
        base = YOLO("yolov8m.pt")  # arch placeholder; resume path loads your exact state
        base.train(
            data=DATA_YAML,
            imgsz=imgsz,
            epochs=remain,
            batch=-1, device=0, workers=2,
            cache=True,
            patience=max(5, min(20, remain//2)),
            freeze=0, cos_lr=True, amp=True,
            deterministic=True,
            project=os.path.dirname(p2_dir),
            name=new_name,
            resume=p2_ckpt,                 # resume from existing Phase-2 checkpoint
            save=True,
            save_period=FORCE_SAVE_PERIOD,  # save EVERY epoch
            plots=True,
        )
        print(f"✅ Continued run at: {os.path.join(os.path.dirname(p2_dir), new_name)}")
    else:
        # No weights in the found Phase-2 dir (maybe a stub). Start fresh from Phase-1 weights.
        target_name = f"y8_phase2_{imgsz}"
        print(f"ℹ️ Found Phase-2 folder without weights. Starting fresh from Phase-1 → {target_name}")
        model = YOLO(p1_ckpt)
        model.train(
            data=DATA_YAML,
            imgsz=imgsz,
            epochs=TARGET_PHASE2_EPOCHS,
            batch=-1, device=0, workers=2,
            cache=True,
            patience=20,
            freeze=0,
            cos_lr=True, amp=True,
            deterministic=True,
            project=WORKDIR, name=target_name,
            save=True,
            save_period=FORCE_SAVE_PERIOD,
            plots=True,
        )
        print(f"✅ New Phase-2 run at: {os.path.join(WORKDIR, target_name)}")
else:
    # No Phase-2 yet → start it from Phase-1 weights
    target_name = f"y8_phase2_{imgsz}"
    print(f"ℹ️ No Phase-2 run found. Starting Phase-2 from Phase-1 → {target_name} (imgsz={imgsz})")
    model = YOLO(p1_ckpt)
    model.train(
        data=DATA_YAML,
        imgsz=imgsz,
        epochs=TARGET_PHASE2_EPOCHS,
        batch=-1, device=0, workers=2,
        cache=True,
        patience=20,
        freeze=0,
        cos_lr=True, amp=True,
        deterministic=True,
        project=WORKDIR, name=target_name,
        save=True,
        save_period=FORCE_SAVE_PERIOD,
        plots=True,
    )
    print(f"✅ New Phase-2 run at: {os.path.join(WORKDIR, target_name)}")


✅ Phase-1 OK → /kaggle/working/vehdet/y8_phase1_896
   Using checkpoint: /kaggle/working/vehdet/y8_phase1_896/weights/last.pt
ℹ️ No Phase-2 run found. Starting Phase-2 from Phase-1 → y8_phase2_896 (imgsz=896)
Ultralytics 8.3.177 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=/kaggle/working/vehdet/data.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=0, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=896, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode

[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo




    25858636         155         2.867         41.95           120        (1, 3, 896, 896)                    list
    25858636         310         3.930         51.15         173.1        (2, 3, 896, 896)                    list
    25858636         620         5.675         94.62         239.3        (4, 3, 896, 896)                    list
    25858636        1240         9.022           185         397.2        (8, 3, 896, 896)                    list
    25858636        2480        15.899         338.3         686.5       (16, 3, 896, 896)                    list
[34m[1mAutoBatch: [0mUsing batch-size 6 for CUDA:0 9.35G/15.89G (59%) ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2132.8±980.1 MB/s, size: 656.1 KB)


[34m[1mtrain: [0mScanning /kaggle/working/vehdet/labels/train.cache... 3466 images, 379 backgrounds, 0 corrupt: 100%|██████████| 3466/3466 [00:00<?, ?it/s]

[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/144.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/146.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/147.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/148.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/149.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/150.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/151.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/152.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/154.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/working/vehdet/images/train/155.jpg: corrupt JPEG restored and saved
[34m[1mtrain: [0m/kaggle/wo






[34m[1mtrain: [0mCaching images (4.5GB RAM): 100%|██████████| 3466/3466 [00:30<00:00, 113.76it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 412.5±121.2 MB/s, size: 648.5 KB)


[34m[1mval: [0mScanning /kaggle/working/vehdet/labels/val.cache... 424 images, 59 backgrounds, 0 corrupt: 100%|██████████| 424/424 [00:00<?, ?it/s]

[34m[1mval: [0m/kaggle/working/vehdet/images/val/145.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/153.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/159.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/169.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/170.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/182.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/187.jpg: corrupt JPEG restored and saved
[34m[1mval: [0m/kaggle/working/vehdet/images/val/Dipto_366.jpg: corrupt JPEG restored and saved







[34m[1mval: [0mCaching images (0.5GB RAM): 100%|██████████| 424/424 [00:16<00:00, 25.96it/s]


Plotting labels to /kaggle/working/vehdet/y8_phase2_896/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.00125, momentum=0.9) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.000515625), 83 bias(decay=0.0)
Image sizes 896 train, 896 val
Using 2 dataloader workers
Logging results to [1m/kaggle/working/vehdet/y8_phase2_896[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      5.24G      1.126      1.012      1.146         30        896: 100%|██████████| 578/578 [04:43<00:00,  2.04it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.84it/s]

                   all        424       2118      0.658      0.562      0.604      0.378






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100      5.24G      1.257      1.216      1.232         19        896: 100%|██████████| 578/578 [04:38<00:00,  2.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.78it/s]

                   all        424       2118      0.619      0.527      0.538      0.324






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100      5.24G      1.279      1.234      1.244         66        896: 100%|██████████| 578/578 [04:38<00:00,  2.07it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.78it/s]

                   all        424       2118      0.657      0.579      0.619      0.387






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100      5.24G      1.241      1.154      1.219         55        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.83it/s]

                   all        424       2118        0.7       0.59      0.648      0.411






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      5.24G      1.208      1.102      1.205         43        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.82it/s]

                   all        424       2118      0.698      0.589      0.662      0.423






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100      5.29G      1.178      1.042      1.184          9        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.80it/s]

                   all        424       2118       0.74      0.578       0.66      0.422






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      6.03G      1.166          1      1.173         30        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.83it/s]

                   all        424       2118      0.697      0.644      0.694      0.448






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      6.03G      1.144     0.9696      1.155         43        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.80it/s]

                   all        424       2118       0.69      0.661      0.698      0.447






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100       6.1G      1.109     0.9265       1.14         55        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.83it/s]

                   all        424       2118      0.727      0.648      0.706      0.457






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100       6.1G        1.1     0.9122       1.14         84        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.83it/s]

                   all        424       2118      0.718      0.656      0.712      0.462






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      6.16G      1.088     0.8941      1.125        100        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.82it/s]

                   all        424       2118      0.732      0.638      0.709      0.464






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100      6.16G      1.076      0.851      1.121         38        896: 100%|██████████| 578/578 [04:38<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.81it/s]

                   all        424       2118      0.716      0.673      0.713      0.469






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      6.16G      1.052     0.8299      1.108        181        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.87it/s]

                   all        424       2118      0.735      0.671      0.735      0.483






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      6.16G      1.038     0.8063      1.102         13        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.84it/s]

                   all        424       2118      0.699      0.693      0.736      0.486






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      6.16G      1.032     0.8129      1.097         43        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.84it/s]

                   all        424       2118      0.722      0.696      0.737      0.489






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100      6.16G      1.017     0.7704      1.085         61        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.87it/s]

                   all        424       2118      0.784       0.66      0.741      0.493






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      6.16G     0.9943      0.749      1.076         44        896: 100%|██████████| 578/578 [04:37<00:00,  2.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 36/36 [00:07<00:00,  4.83it/s]

                   all        424       2118      0.755      0.674      0.738      0.488






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      6.16G      1.001     0.7459      1.076         73        896:  40%|███▉      | 229/578 [01:50<02:47,  2.08it/s]

In [51]:
# ================== PHASE-2 STATUS + CHECKPOINT INTEGRITY ==================
import os, glob, json, time
from pathlib import Path
import pandas as pd

WORKDIR = "/kaggle/working/vehdet"
CHECK_LOAD = True   # set False if you don't want to actually load weights (saves a bit of time)

def latest_phase2(root):
    runs = glob.glob(os.path.join(root, "y8_phase2_*"))
    if not runs: 
        return None
    return sorted(runs, key=os.path.getmtime, reverse=True)[0]

def epochs_from_results_csv(run_dir):
    csv = os.path.join(run_dir, "results.csv")
    if not os.path.exists(csv):
        return 0, None, None, None
    try:
        df = pd.read_csv(csv)
        # completed epochs (CSV is 0-indexed)
        done = int(df["epoch"].max()) + 1 if "epoch" in df.columns else len(df)
        # try to compute best epoch by fitness or mAP
        best_epoch = None; best_col = None; best_val = None
        for col in ["fitness", "metrics/mAP50-95(B)", "metrics/mAP50(B)", "metrics/mAP50-95", "metrics/mAP50"]:
            if col in df.columns:
                idx = int(df[col].idxmax())
                best_epoch = int(df.loc[idx, "epoch"]) if "epoch" in df.columns else idx
                best_col = col
                best_val = float(df.loc[idx, col])
                break
        return done, best_epoch, best_col, best_val
    except Exception as e:
        print("⚠️ Could not parse results.csv:", e)
        return 0, None, None, None

def list_weights(run_dir):
    wdir = os.path.join(run_dir, "weights")
    ep_files = []
    for p in glob.glob(os.path.join(wdir, "epoch*.pt")):
        try:
            n = int(Path(p).stem.replace("epoch", ""))
            ep_files.append((n, p))
        except:
            pass
    ep_files.sort(key=lambda x: x[0])
    best = os.path.join(wdir, "best.pt") if os.path.exists(os.path.join(wdir, "best.pt")) else None
    last = os.path.join(wdir, "last.pt") if os.path.exists(os.path.join(wdir, "last.pt")) else None
    return ep_files, best, last

def file_info(p):
    try:
        st = os.stat(p)
        return f"{Path(p).name}  |  {st.st_size/1e6:.1f} MB  |  {time.ctime(st.st_mtime)}"
    except Exception as e:
        return f"{Path(p).name}  |  <stat error: {e}>"

p2_dir = latest_phase2(WORKDIR)
assert p2_dir, "❌ No Phase-2 run directory found."

print(f"🔎 Phase-2 run: {p2_dir}")

done, best_epoch, best_col, best_val = epochs_from_results_csv(p2_dir)
print(f"📈 Epochs completed (from results.csv): {done}")
if best_epoch is not None:
    print(f"🏆 Best epoch (by {best_col}): {best_epoch}  (value={best_val:.4f})")

ep_files, best, last = list_weights(p2_dir)
print(f"\n💾 Saved epoch checkpoints: {len(ep_files)} file(s)")
if ep_files:
    print("  First 5:", [f"epoch{n}" for n,_ in ep_files[:5]])
    print("  Last  5:", [f"epoch{n}" for n,_ in ep_files[-5:]])

print("\nBest/Last:")
print("  best.pt:", file_info(best) if best else "missing")
print("  last.pt:", file_info(last) if last else "missing")

if ep_files:
    latest_epoch, latest_path = ep_files[-1]
    print("  latest epoch*.pt:", file_info(latest_path))
else:
    latest_epoch, latest_path = None, None

# Optional: try to load a few weights to ensure they are usable
if CHECK_LOAD:
    try:
        from ultralytics import YOLO
        for tag, p in [("best", best), ("last", last), (f"epoch{latest_epoch}", latest_path)]:
            if p and os.path.exists(p):
                try:
                    _ = YOLO(p)  # just construct; no inference
                    print(f"✅ Load OK → {tag}: {p}")
                except Exception as e:
                    print(f"❌ Load FAILED → {tag}: {p}  |  {e}")
    except Exception as e:
        print("⚠️ Skipped load checks:", e)

print("\nDone.")


🔎 Phase-2 run: /kaggle/working/vehdet/y8_phase2_896
📈 Epochs completed (from results.csv): 80
🏆 Best epoch (by metrics/mAP50-95(B)): 59  (value=0.5462)

💾 Saved epoch checkpoints: 79 file(s)
  First 5: ['epoch0', 'epoch1', 'epoch2', 'epoch3', 'epoch4']
  Last  5: ['epoch74', 'epoch75', 'epoch76', 'epoch77', 'epoch78']

Best/Last:
  best.pt: best.pt  |  52.0 MB  |  Tue Aug 12 19:44:24 2025
  last.pt: last.pt  |  52.0 MB  |  Tue Aug 12 19:44:24 2025
  latest epoch*.pt: epoch78.pt  |  155.7 MB  |  Tue Aug 12 19:44:24 2025
✅ Load OK → best: /kaggle/working/vehdet/y8_phase2_896/weights/best.pt
✅ Load OK → last: /kaggle/working/vehdet/y8_phase2_896/weights/last.pt
✅ Load OK → epoch78: /kaggle/working/vehdet/y8_phase2_896/weights/epoch78.pt

Done.


In [52]:
# ================== EXPORT PHASE-2 ARTIFACTS (PERSIST THEM!) ==================
import os, glob, time, zipfile, shutil
from pathlib import Path
import pandas as pd

WORKDIR = "/kaggle/working/vehdet"

def latest_phase2(root):
    runs = glob.glob(os.path.join(root, "y8_phase2_*"))
    return sorted(runs, key=os.path.getmtime, reverse=True)[0] if runs else None

def pick_best_epoch(results_csv):
    df = pd.read_csv(results_csv)
    for col in ["metrics/mAP50-95(B)", "metrics/mAP50-95", "fitness", "metrics/mAP50(B)", "metrics/mAP50"]:
        if col in df.columns:
            idx = int(df[col].idxmax())
            return int(df.loc[idx, "epoch"]), col, float(df.loc[idx, col])
    # fallback
    return int(df["epoch"].max()), "epoch(max)", float("nan")

p2_dir = latest_phase2(WORKDIR)
assert p2_dir, "No Phase-2 run found."

wdir = os.path.join(p2_dir, "weights")
best = os.path.join(wdir, "best.pt")
last = os.path.join(wdir, "last.pt")
res_csv = os.path.join(p2_dir, "results.csv")
assert os.path.exists(res_csv), f"Missing results.csv in {p2_dir}"

best_epoch, col, val = pick_best_epoch(res_csv)
epN = os.path.join(wdir, f"epoch{best_epoch}.pt")
print(f"Best epoch = {best_epoch} by {col} (value={val:.4f})")

export = os.path.join(WORKDIR, "export_phase2")
os.makedirs(export, exist_ok=True)

def keep(src, newname=None):
    if src and os.path.exists(src):
        dst = os.path.join(export, newname or Path(src).name)
        shutil.copy2(src, dst)
        print("✓", Path(dst).name)
        return dst

kept = []
kept += [keep(best, "phase2_best.pt")]
kept += [keep(last, "phase2_last.pt")]
kept += [keep(epN, f"phase2_epoch{best_epoch}.pt")]
kept += [keep(res_csv, "phase2_results.csv")]
for nm in ["args.yaml","args.json","hyp.yaml"]:
    kept += [keep(os.path.join(p2_dir, nm), f"phase2_{nm}")]

# also keep data.yaml for evaluation/inference consistency
kept += [keep(os.path.join(WORKDIR, "data.yaml"), "data.yaml")]

# zip everything so you can download or save as output
zip_path = os.path.join(WORKDIR, f"phase2_export_{int(time.time())}.zip")
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as z:
    for p in filter(None, kept):
        z.write(p, arcname=Path(p).name)

print("\nExport folder:", export)
print("ZIP archive  :", zip_path)
print("→ Now either: (A) Save Version with 'Save output' checked, or (B) download the ZIP.")


Best epoch = 59 by metrics/mAP50-95(B) (value=0.5462)
✓ phase2_best.pt
✓ phase2_last.pt
✓ phase2_epoch59.pt
✓ phase2_results.csv
✓ phase2_args.yaml
✓ data.yaml

Export folder: /kaggle/working/vehdet/export_phase2
ZIP archive  : /kaggle/working/vehdet/phase2_export_1755031467.zip
→ Now either: (A) Save Version with 'Save output' checked, or (B) download the ZIP.
