# OrangePi_passangers — YOLOv8 head training (Colab)

Этот ноутбук запускает обучение через **скрипты проекта** (а не через отдельный `train.py`).

Runbook (подробнее): `Docs/Проект/Операции/ML - Дообучение YOLOv8 head (Google Colab).md`.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# === EDIT ME ===
GITHUB_REPO = 'https://github.com/<USER_OR_ORG>/OrangePi_passangers.git'
BRANCH = 'main'

# Drive layout (persistent)
DRIVE_BASE = '/content/drive/MyDrive/OrangePi_passangers_ml'
# Dataset in Drive: either a folder OR a packed archive.
# Option A (folder): upload dataset tree to this path.
DATASET_DRIVE_DIR = f"{DRIVE_BASE}/datasets/brainwash.v1i.yolov8"  # <-- поменяй
# Option B (archive): pack locally (tar -czf ...) and upload archive to Drive.
DATASET_DRIVE_ARCHIVE = f"{DRIVE_BASE}/datasets_archives/brainwash.v1i.yolov8.tgz"  # <-- поменяй

# Local (Colab runtime) dataset location (fast I/O)
DATASET_ROOT = '/content/datasets/brainwash.v1i.yolov8'
ARTIFACTS_ROOT = f"{DRIVE_BASE}/artifacts/yolov8-head"

# Run params
RUN_LABEL = 'smoke_colab_e1_640'
IMGSZ = 640
EPOCHS = 1
WORKERS = 2
DEVICE = 0

print('DATASET_ROOT (runtime):', DATASET_ROOT)
print('DATASET_DRIVE_DIR:', DATASET_DRIVE_DIR)
print('DATASET_DRIVE_ARCHIVE:', DATASET_DRIVE_ARCHIVE)
print('ARTIFACTS_ROOT:', ARTIFACTS_ROOT)

In [None]:
%cd /content
!rm -rf OrangePi_passangers
!git clone --depth 1 --branch "$BRANCH" "$GITHUB_REPO" OrangePi_passangers
%cd /content/OrangePi_passangers
!git rev-parse --short HEAD

In [None]:
# Prepare dataset in /content (recommended for speed)
import os
import shutil
import subprocess
from pathlib import Path

dst = Path(DATASET_ROOT)
dst.parent.mkdir(parents=True, exist_ok=True)

if dst.exists():
    print('Dataset already present:', dst)
else:
    arc = Path(DATASET_DRIVE_ARCHIVE)
    src_dir = Path(DATASET_DRIVE_DIR)
    if arc.exists():
        print('Extracting archive from Drive:', arc)
        if str(arc).endswith(('.tgz', '.tar.gz')):
            cmd = ['tar', '-xzf', str(arc), '-C', str(dst.parent)]
        elif str(arc).endswith(('.tar.zst', '.tar.zstd')):
            cmd = ['tar', '--zstd', '-xf', str(arc), '-C', str(dst.parent)]
        else:
            cmd = ['tar', '-xf', str(arc), '-C', str(dst.parent)]
        subprocess.check_call(cmd)
        # If archive was created as <parent>/brainwash.v1i.yolov8, ensure it landed where we expect
        if not dst.exists():
            raise RuntimeError(f'Archive extracted but expected dataset dir not found: {dst}. Check archive root folder name.')
    elif src_dir.exists():
        print('Copying dataset folder from Drive to runtime disk (rsync-like):', src_dir)
        shutil.copytree(src_dir, dst, dirs_exist_ok=False)
    else:
        raise FileNotFoundError('Dataset not found in Drive. Upload either DATASET_DRIVE_DIR folder or DATASET_DRIVE_ARCHIVE file.')

print('Dataset ready:', dst)
print('Sample:', list((dst/'train'/'images').glob('*'))[:3])

In [None]:
!python -V
!nvidia-smi -L

# Project-tested Ultralytics version (repo .venv): 8.4.14
!pip -q install ultralytics==8.4.14
!python -c "import torch, ultralytics, numpy as np; print('torch', torch.__version__); print('ultralytics', ultralytics.__version__); print('numpy', np.__version__)"

In [None]:
# Persist artifacts by symlinking project dirs to Drive
!mkdir -p "$ARTIFACTS_ROOT/Docs_auto" "$ARTIFACTS_ROOT/runs"

!rm -rf Docs/auto
!ln -s "$ARTIFACTS_ROOT/Docs_auto" Docs/auto

!rm -rf ml/yolov8_head_finetune/runs
!mkdir -p ml/yolov8_head_finetune
!ln -s "$ARTIFACTS_ROOT/runs" ml/yolov8_head_finetune/runs

!ls -la Docs | head -n 20
!ls -la ml/yolov8_head_finetune | head -n 50

In [None]:
# Write Colab-specific data.yaml (DO NOT use ml/yolov8_head_finetune/data_head.yaml as-is; it has local paths)
from pathlib import Path

data_yaml = Path(ARTIFACTS_ROOT) / 'data_head_colab.yaml'
data_yaml.write_text(
    '\n'.join([
        'path: ' + str(Path(DATASET_ROOT)),
        'train: train/images',
        'val: valid/images',
        'test: test/images',
        '',
        'names:',
        '  0: head',
        ''
    ]),
    encoding='utf-8'
)
print('Wrote:', str(data_yaml))

In [None]:
!bash scripts/yolo_train_run.sh \
  --name "$RUN_LABEL" \
  --dataset "$DATASET_ROOT" \
  --data "$ARTIFACTS_ROOT/data_head_colab.yaml" \
  --model ml/yolov8_head_finetune/weights/base/yolov8_head_scut_nano.pt \
  --imgsz "$IMGSZ" --epochs "$EPOCHS" --batch auto --device "$DEVICE" --workers "$WORKERS" \
  --execute

!echo 'Latest run id:'
!cat Docs/auto/ml-training/yolov8-head/_latest.txt