In [None]:
!pip install ultralytics opencv-python tqdm numpy

Collecting ultralytics
  Downloading ultralytics-8.3.199-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.199-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.199 ultralytics-thop-2.0.17


In [None]:
import os
import cv2
from pathlib import Path
from ultralytics import YOLO
from tqdm import tqdm
import numpy as np

# === CONFIG ===
VIDEO_PATH = r"/content/project_train_video.mp4"  # <-- replace with your video path
MODEL_PATH = "yolov8x.pt"            # pretrained model; fine-tuned weights improve accuracy
OUTPUT_DIR = "train_output"
CONF_THRESHOLD = 0.35
# =================

# Create output dirs
os.makedirs(OUTPUT_DIR, exist_ok=True)
clips_dir = Path(OUTPUT_DIR) / "clips"
frames_dir = Path(OUTPUT_DIR) / "frames"
os.makedirs(clips_dir, exist_ok=True)
os.makedirs(frames_dir, exist_ok=True)

# Load YOLOv8 model
model = YOLO(MODEL_PATH)

# Open video
cap = cv2.VideoCapture(VIDEO_PATH)
fps = cap.get(cv2.CAP_PROP_FPS) or 25
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
annotated_video_path = str(Path(OUTPUT_DIR) / "annotated_train.mp4")
out_video = cv2.VideoWriter(annotated_video_path, fourcc, fps, (width, height))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Tracking data
track_frames = {}      # track_id -> list of (frame_idx, crop)
track_first_frame = {} # track_id -> first seen
track_last_frame = {}  # track_id -> last seen

print("Running detection + tracking (manual streaming)...")
for frame_idx in tqdm(range(total_frames), desc="Processing video"):
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO tracking on this frame
    results = model.track(
        frame,
        conf=CONF_THRESHOLD,
        persist=True,
        tracker="bytetrack.yaml",
        verbose=False
    )

    boxes = getattr(results[0], "boxes", None)
    if boxes is not None:
        for i, box in enumerate(boxes):
            xyxy = box.xyxy.cpu().numpy().astype(int).ravel()
            conf = float(box.conf.cpu().numpy().item()) if hasattr(box, "conf") else 0.0
            if conf < CONF_THRESHOLD:
                continue

            # Get track ID
            if box.id is not None:
                track_id = int(box.id.cpu().numpy().item())
            else:
                track_id = int(f"{frame_idx}{i}")

            x1, y1, x2, y2 = xyxy
            label = f"coach {track_id} {conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 200, 0), 2)
            cv2.putText(frame, label, (x1, max(15, y1 - 5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

            crop = frame[y1:y2, x1:x2].copy()
            track_frames.setdefault(track_id, []).append((frame_idx, crop))
            if track_id not in track_first_frame:
                track_first_frame[track_id] = frame_idx
            track_last_frame[track_id] = frame_idx

    out_video.write(frame)

cap.release()
out_video.release()

# Step 2: Count unique coaches
unique_ids = sorted(track_frames.keys())
coach_count = len(unique_ids)
print(f"\n✅ Total number of coaches detected: {coach_count}")
print(f"Annotated video saved at: {annotated_video_path}")

# Step 3: Save per-coach clips and representative frames
print("Saving per-coach clips and representative frames...")
for tid in unique_ids:
    frames_list = track_frames[tid]
    if not frames_list:
        continue
    # Save clip
    h, w = frames_list[0][1].shape[:2]
    clip_path = clips_dir / f"coach_{tid:03d}.mp4"
    writer = cv2.VideoWriter(str(clip_path), fourcc, fps, (w, h))
    for _, crop in frames_list:
        crop_resized = cv2.resize(crop, (w, h))
        writer.write(crop_resized)
    writer.release()

    # Representative frame: middle frame
    mid_idx = len(frames_list) // 2
    frame_number, crop_mid = frames_list[mid_idx]
    frame_file = frames_dir / f"coach_{tid:03d}_frame_{frame_number}.jpg"
    cv2.imwrite(str(frame_file), crop_mid)

print(f"Per-coach clips saved at: {clips_dir}")
print(f"Representative frames saved at: {frames_dir}")
print("\n✅ Pipeline finished successfully!")


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt': 100% ━━━━━━━━━━━━ 130.5MB 341.1MB/s 0.4s
Running detection + tracking (manual streaming)...


Processing video:   0%|          | 0/2204 [00:00<?, ?it/s]

[31m[1mrequirements:[0m Ultralytics requirement ['lap>=0.5.12'] not found, attempting AutoUpdate...

[31m[1mrequirements:[0m AutoUpdate success ✅ 0.5s



Processing video: 100%|██████████| 2204/2204 [02:16<00:00, 16.12it/s]



✅ Total number of coaches detected: 35
Annotated video saved at: train_output/annotated_train.mp4
Saving per-coach clips and representative frames...
Per-coach clips saved at: train_output/clips
Representative frames saved at: train_output/frames

✅ Pipeline finished successfully!


In [None]:
from google.colab import files
uploaded = files.upload()  # Then select dataset.zip from your PC


Saving dataset.zip to dataset (1).zip


In [None]:
!unzip -q dataset.zip -d /content/my_data

In [None]:
from ultralytics import YOLO

# Load pre-trained YOLOv8 model (small or medium recommended)
model = YOLO("yolov8m.pt")  # or yolov8m.pt

# Train the model on your dataset
model.train(
    data="/content/my_data/data.yaml",  # path to your YAML
    epochs=50,                          # number of epochs
    imgsz=640,                          # resize images
    batch=16,                           # adjust batch size depending on GPU
    workers=2,                           # dataloader workers
    name="train_coach_engine"           # folder to save results
)


Ultralytics 8.3.199 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/my_data/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=50, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8m.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train_coach_engine, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective

ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7e043e521e50>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.04804

In [None]:
import os
import cv2
from pathlib import Path
from ultralytics import YOLO
from tqdm import tqdm
import numpy as np

# === CONFIG ===
VIDEO_PATH = r"/content/project_train_video.mp4"  # Replace with your video path
MODEL_PATH = "/content/runs/detect/train5/weights/best.pt"            # Use your fine-tuned model here
OUTPUT_DIR = "train_output"
CONF_THRESHOLD = 0.64
# =================

# Create output dirs
os.makedirs(OUTPUT_DIR, exist_ok=True)
clips_dir = Path(OUTPUT_DIR) / "clips"
frames_dir = Path(OUTPUT_DIR) / "frames"
os.makedirs(clips_dir, exist_ok=True)
os.makedirs(frames_dir, exist_ok=True)

# Load YOLOv8 model
model = YOLO(MODEL_PATH)

# Open video
cap = cv2.VideoCapture(VIDEO_PATH)
fps = cap.get(cv2.CAP_PROP_FPS) or 25
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
annotated_video_path = str(Path(OUTPUT_DIR) / "annotated_train.mp4")
out_video = cv2.VideoWriter(annotated_video_path, fourcc, fps, (width, height))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Tracking data
track_frames = {}      # track_id -> list of (frame_idx, crop, class_name)
track_first_frame = {} # track_id -> first seen
track_last_frame = {}  # track_id -> last seen

print("Running detection + tracking (manual streaming)...")
for frame_idx in tqdm(range(total_frames), desc="Processing video"):
    ret, frame = cap.read()
    if not ret:
        break

    # Run YOLO tracking on this frame
    results = model.track(
        frame,
        conf=CONF_THRESHOLD,
        persist=True,
        tracker="bytetrack.yaml",
        verbose=False
    )

    boxes = getattr(results[0], "boxes", None)
    if boxes is not None:
        for i, box in enumerate(boxes):
            xyxy = box.xyxy.cpu().numpy().astype(int).ravel()
            conf = float(box.conf.cpu().numpy().item()) if hasattr(box, "conf") else 0.0
            if conf < CONF_THRESHOLD:
                continue

            # Get class label
            cls_id = int(box.cls.cpu().numpy().item()) if hasattr(box, "cls") else 0
            class_name = model.names[cls_id]

            # Get track ID
            track_id = int(box.id.cpu().numpy().item()) if box.id is not None else int(f"{frame_idx}{i}")

            x1, y1, x2, y2 = xyxy
            label = f"{class_name} {track_id} {conf:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 200, 0), 2)
            cv2.putText(frame, label, (x1, max(15, y1 - 5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

            crop = frame[y1:y2, x1:x2].copy()
            track_frames.setdefault(track_id, []).append((frame_idx, crop, class_name))
            if track_id not in track_first_frame:
                track_first_frame[track_id] = frame_idx
            track_last_frame[track_id] = frame_idx
            # Inside your main loop after detection
            # Count coaches and engines
            coach_count = sum(1 for tid in track_frames if any(label == 'coach' for _, _, label in track_frames[tid]))
            engine_count = sum(1 for tid in track_frames if any(label == 'engine' for _, _, label in track_frames[tid]))


            # Overlay counts at top-left
            cv2.putText(frame, f"Coaches: {coach_count}", (10, 30),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f"Engines: {engine_count}", (10, 70),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)


    out_video.write(frame)

cap.release()
out_video.release()

# Step 2: Count unique coaches and engines
coach_count = sum(1 for tid in track_frames if track_frames[tid][0][2] == "coach")
engine_count = sum(1 for tid in track_frames if track_frames[tid][0][2] == "engine")

print(f"\n✅ Total coaches detected: {coach_count}")
print(f"✅ Total engines detected: {engine_count}")
print(f"Annotated video saved at: {annotated_video_path}")

# Step 3: Save per-object clips and representative frames
print("Saving per-object clips and representative frames...")
for tid, frames_list in track_frames.items():
    if not frames_list:
        continue
    class_name = frames_list[0][2]

    # Clip
    h, w = frames_list[0][1].shape[:2]
    clip_path = clips_dir / f"{class_name}_{tid:03d}.mp4"
    writer = cv2.VideoWriter(str(clip_path), fourcc, fps, (w, h))
    for _, crop, _ in frames_list:
        crop_resized = cv2.resize(crop, (w, h))
        writer.write(crop_resized)
    writer.release()

    # Representative frame: middle frame
    mid_idx = len(frames_list) // 2
    frame_number, crop_mid, _ = frames_list[mid_idx]
    frame_file = frames_dir / f"{class_name}_{tid:03d}_frame_{frame_number}.jpg"
    cv2.imwrite(str(frame_file), crop_mid)

print(f"Per-object clips saved at: {clips_dir}")
print(f"Representative frames saved at: {frames_dir}")
print("\n✅ Pipeline finished successfully!")


Running detection + tracking (manual streaming)...


Processing video: 100%|██████████| 2204/2204 [01:31<00:00, 24.03it/s]



✅ Total coaches detected: 49
✅ Total engines detected: 2
Annotated video saved at: train_output/annotated_train.mp4
Saving per-object clips and representative frames...
Per-object clips saved at: train_output/clips
Representative frames saved at: train_output/frames

✅ Pipeline finished successfully!


In [None]:
# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Define your root folder path (change "MyDrive" if your Drive path is different)
root_path = "/content/drive/MyDrive/train_project"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
