
# Phase 1 MVP Demo (vLLM, Qwen3-VL-4B)

- 依赖：`pip install -r requirements.txt`（已合并 vLLM 依赖）。
- 先启动 vLLM：`./deploy/start_vllm.sh`（默认 0.0.0.0:8000，模型 Qwen/Qwen3-VL-4B-Instruct）。
- 本示例使用内置视频 `/Users/neko_wen/my/代码/uw/cv/project/data/raw/semantic/MOT17-12.mp4`，会在 `output/demo_run/` 下生成两段视频：
  - `tracking_<question>.mp4`：VLM 选中轨迹高亮
  - `tracking_all_tracks_<question>.mp4`：ByteTrack 调试版，所有 ID 标注


In [None]:
import os
from pathlib import Path
os.environ["PYTHONPATH"] = os.path.abspath("src")

from core.config import SystemConfig
from pipeline.video_semantic_search import VideoSemanticSystem

config = SystemConfig(
    video_path=Path("data/raw/semantic/MOT17-12.mp4"),
    output_dir=Path("output/demo_run"),
    vlm_backend="vllm",
    vllm_endpoint="http://localhost:8000/v1",
    vllm_model_name="Qwen/Qwen3-VL-4B-Instruct",
)

system = VideoSemanticSystem(config=config)


In [None]:
# 1) 建立索引（Perception -> Features -> Evidence）
system.build_index()
print("索引完成，证据包数量:", len(system.evidence_map))


In [None]:
# 2) 执行一次示例查询；生成两段输出视频
sample_question = "找穿蓝色衣服的人"
results = system.question_search(sample_question, top_k=3)

print("匹配结果数量:", len(results))
for r in results:
    print(f"track {r.track_id}: {r.start_s:.1f}s–{r.end_s:.1f}s | score={r.score:.2f} | {r.reason}")
