In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from fastmcp import Client
import asyncio
from pathlib import Path
import base64

MCP_SERVER = "http://localhost:9090/mcp"
TEST_VIDEO = "../../shared_media/videos/uploads/97aa67b6_pass_the_butter_rick_and_morty.mp4"  # Change this

In [3]:
client = Client(MCP_SERVER)

async def test_connection():
    async with client:
        tools = await client.list_tools()
        print(f"✓ Connected! Found {len(tools)} tools")
        for tool in tools:
            print(f"  - {tool.name}")

await test_connection()

✓ Connected! Found 5 tools
  - process_video
  - delete_video
  - get_video_clip_from_user_query
  - get_video_clip_from_image
  - ask_question_about_video


In [22]:
async def check_prompts():
    async with client:
        prompts = await client.list_prompts()
        print(f"Found {len(prompts)} prompts:")
        for p in prompts:
            print(f"  - {p.name}")

await check_prompts()

Found 3 prompts:
  - prompt_routing_system
  - prompt_tool_use_system
  - prompt_general_system


In [24]:
# Make sure your video file exists first
video_path = Path(TEST_VIDEO)
print(f"Video exists: {video_path.exists()}")
print(f"Video path: {video_path.absolute()}")

async def process_video():
    async with client:
        result = await client.call_tool(
            "process_video",
            {"video_path": str(video_path.absolute())}
        )
        print(f"✓ Processing result: {result}")
        return result

result = await process_video()

Video exists: True
Video path: E:\python projects\multi-modal-rag\multimodal-mcp\notebooks\..\..\shared_media\videos\uploads\97aa67b6_pass_the_butter_rick_and_morty.mp4
✓ Processing result: CallToolResult(content=[TextContent(type='text', text='true', annotations=None, meta=None)], structured_content={'result': True}, data=True, is_error=False)


In [5]:
if settings.GLOBAL_VIDEO_TABLE_NAME in pxt.list_tables():
    pxt.drop_table(settings.GLOBAL_VIDEO_TABLE_NAME, force=True)

In [14]:
import pixeltable as pxt

from multimodal_mcp.config import get_settings

settings = get_settings()

# pxt.list_tables()
# video_table = pxt.get_table(settings.GLOBAL_VIDEO_TABLE_NAME)
# video_table.show()

In [11]:
# Make sure your video file exists first
video_id = "a3a2db32"

async def delete_video():
    async with client:
        result = await client.call_tool(
            "delete_video",
            {"video_id": video_id}
        )
        print(f"✓ Delete result: {result}")
        return result

result = await delete_video()
video_table.show()

✓ Delete result: CallToolResult(content=[TextContent(type='text', text='true', annotations=None, meta=None)], structured_content={'result': True}, data=True, is_error=False)


video_id,video,video_name,status,processed_at,audio


In [13]:
audio_table = pxt.get_table(settings.GLOBAL_AUDIO_CHUNKS_VIEW_NAME)
audio_table.show()

In [6]:
async def search_by_text(query):
    async with client:
        result = await client.call_tool(
            "get_video_clip_from_user_query",
            {"user_query": query}
        )
        print(f"Search result: {result}")
        return result

result = await search_by_text("show me the robot")

Search result: CallToolResult(content=[TextContent(type='text', text='{"type":"video","content":"videos/ai_responses/194cc500-e0fb-4ba4-8fc2-1fde3f8bcb1b.mp4"}', annotations=None, meta=None)], structured_content={'type': 'video', 'content': 'videos/ai_responses/194cc500-e0fb-4ba4-8fc2-1fde3f8bcb1b.mp4'}, data=Root(type='video', content='videos/ai_responses/194cc500-e0fb-4ba4-8fc2-1fde3f8bcb1b.mp4'), is_error=False)


In [7]:
async def ask_question(question):
    async with client:
        result = await client.call_tool(
            "ask_question_about_video",
            {"user_query": question}
        )
        print(f"Answer: {result}")
        return result

result = await ask_question("is there someone discussing about a science project?")

Answer: CallToolResult(content=[TextContent(type='text', text='{"type":"text","content":"Video: Unknown\\nContent: The image depicts a scene from the animated television series Rick and Morty. The main subject of the image is Rick Sanchez, a mad scientist with spiky blue hair, wearing a white lab coat, sitting at a table with his family.\\n\\n*   **Rick Sanchez**\\n    *   Wearing a white lab coat\\n    *   Has spiky blue hair\\n    *   Sitting on the left side of the table\\n    *   Holding a device that appears to be emitting green laser beams\\n*   **Morty Smith**\\n    *   Wearing a green shirt\\n    *   Sitting to the right of Rick\\n    *   Looking worried or scared\\n    *   Holding a black object\\n*   **Summer Smith**\\n    *   Wearing a red shirt\\n    *   Sitting to the right of Morty\\n    *   Looking worried or scared\\n    *   Holding a fork and knife\\n*   **Table Setting**\\n    *   Yellow plaid tablecloth\\n    *   Plates with pancakes\\n    *   Glasses of orange juice

In [17]:
import base64

# First load an image
test_image_path = "../../shared_media/images/4d2fd3c0_7.png"  # Change this

def load_image_base64(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

# Only run if image exists
if Path(test_image_path).exists():
    image_b64 = load_image_base64(test_image_path)
    print(f"✓ Loaded image ({len(image_b64)} chars)")
else:
    print("⚠ Test image not found, skip this test")

✓ Loaded image (1831380 chars)


In [18]:
# Only if you loaded image in previous cell
async def search_by_image(image_base64):
    async with client:
        result = await client.call_tool(
            "get_video_clip_from_image",
            {"image_base64": image_base64}
        )
        print(f"Image search result: {result}")
        return result

if 'image_b64' in locals():
    result = await search_by_image(image_b64)

Image search result: CallToolResult(content=[TextContent(type='text', text='{"type":"video","content":"videos/ai_responses/d54bb0a8-b78d-4e9e-b2d4-c115ddd3a3e2.mp4"}', annotations=None, meta=None)], structured_content={'type': 'video', 'content': 'videos/ai_responses/d54bb0a8-b78d-4e9e-b2d4-c115ddd3a3e2.mp4'}, data=Root(type='video', content='videos/ai_responses/d54bb0a8-b78d-4e9e-b2d4-c115ddd3a3e2.mp4'), is_error=False)
