# DeepEyes Image & Video Demo

This notebook demonstrates how to reuse the Python utilities from `eval/video_batch_demo.py`
for single image or video reasoning demos. The notebook focuses on interactive use cases,
while the standalone script also supports batch JSON processing and multi-GPU execution.

In [None]:
import os
from eval import video_batch_demo as demo

API_KEY = "EMPTY"  # TODO: replace with your real key
API_URL = "http://127.0.0.1:8000/v1"  # TODO: update if needed
OUTPUT_ROOT = "notebook_demo_outputs"

client = demo.create_client(API_KEY, API_URL)
try:
    MODEL_NAME = demo.fetch_model_name(client, explicit_name=None)
except Exception as exc:
    MODEL_NAME = "your-model-name"
    print(f"Warning: failed to fetch model name automatically: {exc}")

print(f"Using model: {MODEL_NAME}")

In [None]:
RUN_IMAGE_DEMO = False  # Switch to True after filling in the paths/question

if RUN_IMAGE_DEMO:
    image_path = "/path/to/your/image.jpg"
    question = "What is happening in the photo?"
    options = None  # or a list such as ['A. Option 1', 'B. Option 2']

    image_config = demo.SingleRunConfig(
        media_path=image_path,
        question=question,
        options=options,
        sample_id="image_demo",
        media_type="image",
    )

    image_result = demo.process_single_media(
        image_config,
        client,
        MODEL_NAME,
        OUTPUT_ROOT,
        max_video_frames=512,
        max_rounds=16,
    )

    print(f"Answer: {image_result.reasoning.final_answer}")
    print(f"Visualizations saved to: {image_result.reasoning.output_dir}")

In [None]:
RUN_VIDEO_DEMO = False  # Switch to True after providing the video ID/question

if RUN_VIDEO_DEMO:
    video_id = "fFjv93ACGo8"  # example video ID
    video_path = os.path.join(
        "/remote-home/zhangkc/data/zhangkc/video-mme-bench/data/", f"{video_id}.mp4"
    )
    question = "When demonstrating the Germany modern Christmas tree is initially decorated with apples, candles and berries, which kind of the decoration has the largest number?"
    options = [
        "A. Apples.",
        "B. Candles.",
        "C. Berries.",
        "D. The three kinds are of the same number.",
    ]

    video_config = demo.SingleRunConfig(
        media_path=video_path,
        question=question,
        options=options,
        sample_id=f"{video_id}_demo",
        media_type="video",
        answer="C",
        extra={"video_id": video_id},
    )

    video_result = demo.process_single_media(
        video_config,
        client,
        MODEL_NAME,
        OUTPUT_ROOT,
        max_video_frames=512,
        max_rounds=16,
    )

    print(f"Predicted choice: {video_result.pred_choice}")
    print(f"Visualizations saved to: {video_result.reasoning.output_dir}")