# **TEST MODEL WITH A .MP4 VİDEO FILE**

DEVICE: A100 GPU VEYA L4 GPU SEÇİNİZ. (Çalışma Zamanı Türünü Değiştir)

!!! KULLANICI MUTLAKA VIDEO_NAME İSMİNİ DEĞİŞTİRMELİ !!!

ARDINDAN .mp4 dosyasını ve best.pt dosyasını yükleyiniz. Sonra kodu çalıştırınız.

In [2]:
# ============================
# VIDEO INFERENCE (YOLO-CLS) — best.pt ile tek mp4 değerlendirme
# ============================
!pip -q install ultralytics pandas opencv-python

import os, gc, glob, json, math, shutil
from datetime import datetime
from collections import Counter, defaultdict

import cv2
import numpy as np
import pandas as pd
from ultralytics import YOLO
import torch

# -------------------
# USER CONFIG
# -------------------
VIDEO_NAME = "29_2_crop.mp4"  # <-- KENDİ MP4 ADINI YAZ

VIDEO_PATH   = "/content/" + VIDEO_NAME
CKPT_PATH    = "/content/best.pt"
OUT_ROOT     = "/content"  # çıktıların kök klasörü
COPY_TO_DRIVE = True
IMG_SIZE     = 224        # eğitimle uyumlu
BATCH_INFER  = 64         # GPU'na göre artır/azalt
TARGET_FPS   = 15         # videodan örneklenecek efektif FPS (yaklaşık)
SMOOTH_WIN   = 5          # tekil kare flicker'ını azaltmak için çoğunluk filtresi (tek sayı; 1=kapalı)
WRITE_ANNOTATED = True    # anotasyonlu video kaydet
HALF         = False      # FP16 (destekleyen GPU’da hızlanabilir)

# -------------------
# checkpoint & model
# -------------------
assert CKPT_PATH and os.path.exists(CKPT_PATH), f"best.pt bulunamadı: {CKPT_PATH}"

device = 0 if torch.cuda.is_available() else "cpu"
model = YOLO(CKPT_PATH)
if HALF and device != "cpu":
    model.model.half()

print("Using checkpoint:", CKPT_PATH)
print("Device:", device)

# -------------------
# video okuma / örnekleme
# -------------------
assert os.path.exists(VIDEO_PATH), f"Video yok: {VIDEO_PATH}"
cap = cv2.VideoCapture(VIDEO_PATH)
src_fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
src_w   = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
src_h   = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
src_n   = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
stride  = max(1, int(round(src_fps / TARGET_FPS)))
eff_fps = src_fps / stride
print(f"Video: {VIDEO_PATH} | fps_in={src_fps:.3f}, stride={stride} -> eff_fps≈{eff_fps:.3f}, frames={src_n}")

# -------------------
# çıktı klasörleri
# -------------------
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
EVAL_DIR = os.path.join(OUT_ROOT, f"VIDEO_EVAL_{ts}")
os.makedirs(EVAL_DIR, exist_ok=True)

CSV_FRAMES   = os.path.join(EVAL_DIR, "per_frame_predictions.csv")
CSV_SEGMENTS = os.path.join(EVAL_DIR, "segments.csv")
JSON_SUMMARY = os.path.join(EVAL_DIR, "summary.json")
VID_OUT      = os.path.join(EVAL_DIR, "annotated.mp4")

# -------------------
# renkler (sınıf id -> BGR)
# -------------------
def _color_for(i):
    # basit sabit palet
    palette = [
        (60,180,255), (80,220,100), (240,180,70),
        (200,120,255), (60,60,255), (60,255,255),
        (120,120,120), (180,180,60), (255,120,120),
    ]
    return palette[i % len(palette)]

# -------------------
# toplu (batch) inference
# -------------------
names = None
frame_idxs_src = []   # orijinal frame index (0-based)
times_sec      = []   # frame zamanı (s)
top1_ids       = []
top1_names     = []
top1_confs     = []
top5_list      = []   # [(id, name, conf), ...]

batch_imgs = []
batch_meta = []  # (src_idx, t_sec)

def run_batch():
    global batch_imgs, batch_meta, names
    if not batch_imgs:
        return
    preds = model.predict(
        batch_imgs,
        imgsz=IMG_SIZE,
        device=device,
        verbose=False
    )
    if names is None:
        # Ultralytics classification isimleri
        names = preds[0].names
    for pr, (src_idx, t_sec) in zip(preds, batch_meta):
        probs = pr.probs  # tensor
        # top-1
        tid = int(probs.top1)
        conf1 = float(probs.data[tid])
        tname = names.get(tid, str(tid)) if isinstance(names, dict) else (names[tid] if tid < len(names) else str(tid))
        # top-5
        t5_ids = list(map(int, probs.top5))
        t5 = []
        for k in t5_ids:
            nm = names.get(k, str(k)) if isinstance(names, dict) else (names[k] if k < len(names) else str(k))
            t5.append((k, nm, float(probs.data[k])))

        frame_idxs_src.append(src_idx)
        times_sec.append(t_sec)
        top1_ids.append(tid)
        top1_names.append(tname)
        top1_confs.append(conf1)
        top5_list.append(t5)

    batch_imgs = []
    batch_meta = []
    gc.collect()

# oku + örnekle
src_idx = -1
while True:
    ok, img = cap.read()
    if not ok:
        break
    src_idx += 1
    if (src_idx % stride) != 0:
        continue
    t_sec = src_idx / src_fps  # videonun gerçek zamanına göre
    batch_imgs.append(img)
    batch_meta.append((src_idx, t_sec))
    if len(batch_imgs) >= BATCH_INFER:
        run_batch()

run_batch()
cap.release()

assert len(top1_ids) > 0, "Hiç örnek frame çıkarılamadı; TARGET_FPS veya stride’ı kontrol et."

# -------------------
# opsiyonel temporal smoothing (çoğunluk)
# -------------------
def smooth_majority(ids, win):
    if win <= 1 or win % 2 == 0:
        return ids
    r = []
    half = win // 2
    n = len(ids)
    for i in range(n):
        s = max(0, i - half)
        e = min(n, i + half + 1)
        seg = ids[s:e]
        c = Counter(seg).most_common(1)[0][0]
        r.append(int(c))
    return r

ids_raw = list(top1_ids)
ids_smooth = smooth_majority(ids_raw, SMOOTH_WIN)
names_map = names if isinstance(names, dict) else {i:n for i,n in enumerate(names)}

# -------------------
# segmentlere (zaman aralığı) birleştir
# -------------------
segments = []  # dict: class_id, class_name, start_idx_src, end_idx_src, start_sec, end_sec, duration_sec
if ids_smooth:
    cur_id = ids_smooth[0]
    start_i = 0
    for i in range(1, len(ids_smooth)):
        if ids_smooth[i] != cur_id:
            st_src = frame_idxs_src[start_i]
            en_src = frame_idxs_src[i-1]
            st_t   = times_sec[start_i]
            en_t   = times_sec[i-1]
            segments.append({
                "class_id":   int(cur_id),
                "class_name": names_map.get(int(cur_id), str(cur_id)),
                "start_frame_src": int(st_src),
                "end_frame_src":   int(en_src),
                "start_sec":  float(st_t),
                "end_sec":    float(en_t),
                "duration_sec": float(max(0.0, en_t - st_t + (1.0/eff_fps)))  # yaklaşık
            })
            cur_id = ids_smooth[i]
            start_i = i
    # son segment
    st_src = frame_idxs_src[start_i]
    en_src = frame_idxs_src[len(ids_smooth)-1]
    st_t   = times_sec[start_i]
    en_t   = times_sec[len(ids_smooth)-1]
    segments.append({
        "class_id":   int(cur_id),
        "class_name": names_map.get(int(cur_id), str(cur_id)),
        "start_frame_src": int(st_src),
        "end_frame_src":   int(en_src),
        "start_sec":  float(st_t),
        "end_sec":    float(en_t),
        "duration_sec": float(max(0.0, en_t - st_t + (1.0/eff_fps)))
    })

# sınıf bazında özet
dur_by_cls = defaultdict(float)
count_by_cls = defaultdict(int)
for seg in segments:
    dur_by_cls[seg["class_name"]]  += seg["duration_sec"]
    count_by_cls[seg["class_name"]] += 1

# -------------------
# per-frame CSV
# -------------------
rows = []
for i in range(len(times_sec)):
    t5 = [{"id": k, "name": n, "conf": c} for (k,n,c) in top5_list[i]]
    rows.append({
        "frame_src": frame_idxs_src[i],
        "time_sec":  times_sec[i],
        "top1_id_raw": int(ids_raw[i]),
        "top1_name_raw": names_map.get(int(ids_raw[i]), str(ids_raw[i])),
        "top1_id_smooth": int(ids_smooth[i]),
        "top1_name_smooth": names_map.get(int(ids_smooth[i]), str(ids_smooth[i])),
        "top1_conf": float(top1_confs[i]),
        "top5": json.dumps(t5, ensure_ascii=False)
    })
pd.DataFrame(rows).to_csv(CSV_FRAMES, index=False)

# segment CSV
pd.DataFrame(segments).to_csv(CSV_SEGMENTS, index=False)

# summary JSON
summary = {
    "video_path": VIDEO_PATH,
    "checkpoint": CKPT_PATH,
    "img_size": IMG_SIZE,
    "target_fps": TARGET_FPS,
    "effective_fps": eff_fps,
    "stride": stride,
    "smoothing_window": SMOOTH_WIN,
    "classes": names_map,
    "per_class": {
        k: {"segments": int(count_by_cls[k]), "total_sec": float(dur_by_cls[k])}
        for k in sorted(dur_by_cls.keys())
    }
}
with open(JSON_SUMMARY, "w") as f:
    json.dump(summary, f, indent=2, ensure_ascii=False)

print("Saved:")
print(" - per-frame CSV   :", CSV_FRAMES)
print(" - segments CSV    :", CSV_SEGMENTS)
print(" - summary JSON    :", JSON_SUMMARY)

# -------------------
# Anotasyonlu video
# -------------------
if WRITE_ANNOTATED:
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(VID_OUT, fourcc, eff_fps, (src_w, src_h))
    for i in range(len(times_sec)):
        # yeniden görüntüyü inputtan okumak yerine hızlıca tekrar oku (örneklenen frame’i çekeceğiz)
        # Not: büyük videolarda ikinci okuma maliyetli olabilir; pratikte yeterli.
        cap = cv2.VideoCapture(VIDEO_PATH)
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idxs_src[i])
        ok, frame = cap.read()
        cap.release()
        if not ok:
            continue
        cid = int(ids_smooth[i])
        cname = names_map.get(cid, str(cid))
        conf = top1_confs[i]
        tsec = times_sec[i]

        # üst sol etiket
        label = f"{cname}  {conf:.2f}  t={tsec:6.2f}s"
        (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.8, 2)
        cv2.rectangle(frame, (10, 10), (10+tw+10, 10+th+10), _color_for(cid), -1)
        cv2.putText(frame, label, (15, 10+th+2), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,0), 2, cv2.LINE_AA)

        # alt zaman çizgisi
        progress = i / max(1, (len(times_sec)-1))
        bar_w = int(progress * src_w)
        cv2.rectangle(frame, (0, src_h-12), (bar_w, src_h), _color_for(cid), -1)

        out.write(frame)
    out.release()
    print(" - annotated video :", VID_OUT)

print("\nÖZET (segment bazında):")
for cls in sorted(dur_by_cls.keys()):
    print(f" - {cls:18s} -> segments: {count_by_cls[cls]:3d}  total: {dur_by_cls[cls]:7.2f}s")


### DOWNLOAD RESULTS FOLDER ####################################################################

import os, shutil
from google.colab import files

base = os.path.basename(EVAL_DIR.rstrip("/"))
zip_path = f"/content/{base}.zip"
shutil.make_archive(f"/content/{base}", "zip",
                    root_dir=os.path.dirname(EVAL_DIR),
                    base_dir=base)
files.download(zip_path)


Using checkpoint: /content/best.pt
Device: 0
Video: /content/29_2_crop.mp4 | fps_in=30.000, stride=2 -> eff_fps≈15.000, frames=4571
Saved:
 - per-frame CSV   : /content/VIDEO_EVAL_20251015_190245/per_frame_predictions.csv
 - segments CSV    : /content/VIDEO_EVAL_20251015_190245/segments.csv
 - summary JSON    : /content/VIDEO_EVAL_20251015_190245/summary.json
 - annotated video : /content/VIDEO_EVAL_20251015_190245/annotated.mp4

ÖZET (segment bazında):
 - Hand_In_Shelf      -> segments:   8  total:    8.33s
 - Inspect_Product    -> segments:  49  total:   93.67s
 - Inspect_Shelf      -> segments:  43  total:   49.67s
 - Reach_To_Shelf     -> segments:   2  total:    0.27s
 - Retract_From_Shelf -> segments:   2  total:    0.47s


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>