In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from fastmcp import Client
import asyncio
from pathlib import Path
import base64

from loguru import logger

from multimodal_mcp.config import get_settings 

settings = get_settings()
logger = logger.bind(name="MCPServer")

MCP_SERVER = "http://localhost:9090/mcp"
TEST_VIDEO = f"{settings.SHARED_MEDIA_DIR}/videos/uploads/97aa67b6_pass_the_butter_rick_and_morty.mp4"  # Change this

In [3]:
client = Client(MCP_SERVER)

async def test_connection():
    async with client:
        tools = await client.list_tools()
        print(f"✓ Connected! Found {len(tools)} tools")
        for tool in tools:
            print(f"  - {tool.name}")

await test_connection()

✓ Connected! Found 4 tools
  - process_video
  - get_video_clip_from_user_query
  - get_video_clip_from_image
  - ask_question_about_video


In [4]:
async def check_prompts():
    async with client:
        prompts = await client.list_prompts()
        print(f"Found {len(prompts)} prompts:")
        for p in prompts:
            print(f"  - {p.name}")

await check_prompts()

Found 3 prompts:
  - prompt_routing_system
  - prompt_tool_use_system
  - prompt_general_system


In [5]:
# Make sure your video file exists first
video_path = Path(TEST_VIDEO)
print(f"Video exists: {video_path.exists()}")
print(f"Video path: {video_path.absolute()}")

async def process_video():
    async with client:
        result = await client.call_tool(
            "process_video",
            {"video_path": str(video_path.absolute())}
        )
        print(f"✓ Processing result: {result}")
        return result

result = await process_video()

Video exists: True
Video path: E:\python projects\multi-modal-rag\shared_media\videos\uploads\97aa67b6_pass_the_butter_rick_and_morty.mp4
✓ Processing result: CallToolResult(content=[TextContent(type='text', text='true', annotations=None, meta=None)], structured_content={'result': True}, data=True, is_error=False)


In [6]:
async def search_by_text(query):
    async with client:
        result = await client.call_tool(
            "get_video_clip_from_user_query",
            {"user_query": query}
        )
        print(f"Search result: {result}")
        return result

result = await search_by_text("show me sad robot")

Search result: CallToolResult(content=[TextContent(type='text', text='{"type":"video","content":"videos/ai_responses/a237e0a2-b87d-47a0-b869-7118a0add86e.mp4"}', annotations=None, meta=None)], structured_content={'type': 'video', 'content': 'videos/ai_responses/a237e0a2-b87d-47a0-b869-7118a0add86e.mp4'}, data=Root(type='video', content='videos/ai_responses/a237e0a2-b87d-47a0-b869-7118a0add86e.mp4'), is_error=False)


In [7]:
async def ask_question(question):
    async with client:
        result = await client.call_tool(
            "ask_question_about_video",
            {"user_query": question}
        )
        print(f"Answer: {result}")
        return result

result = await ask_question("what is happening in the video?")

Answer: CallToolResult(content=[TextContent(type='text', text='{"type":"text","content":"Video: Unknown\\nContent: The image shows a woman looking directly at the camera with a worried and tense expression. She appears to be in a dimly lit room with a focused look on her face. The image seems to be a still from a movie, as indicated by the \\"FANDANGO MOVIECLIPS\\" watermark. The woman\'s expression suggests she is experiencing some form of distress or concern.\\n\\nVideo: Unknown\\nContent: The image depicts a scene from a movie or video game, showcasing a character in mid-air, navigating through a dimly lit corridor with slanted ceilings and walls lined with thin, rectangular panels.\\n\\n* The character is dressed in dark attire and appears to be jumping or falling.\\n\\t+ The character\'s body is positioned at an angle, with their head facing downwards.\\n\\t+ Their arms are outstretched, and their legs are bent.\\n* The background features a wall and ceiling composed of thin, rect

In [8]:
# First load an image
test_image_path = f"{settings.SHARED_MEDIA_DIR}/images/4d2fd3c0_7.png"  # Change this

def load_image_base64(image_path):
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

# Only run if image exists
if Path(test_image_path).exists():
    image_b64 = load_image_base64(test_image_path)
    print(f"✓ Loaded image ({len(image_b64)} chars)")
else:
    print("⚠ Test image not found, skip this test")

✓ Loaded image (1831380 chars)


In [9]:
# Only if you loaded image in previous cell
async def search_by_image(image_base64):
    async with client:
        result = await client.call_tool(
            "get_video_clip_from_image",
            {"image_base64": image_base64}
        )
        print(f"Image search result: {result}")
        return result

if 'image_b64' in locals():
    result = await search_by_image(image_b64)

Image search result: CallToolResult(content=[TextContent(type='text', text='{"type":"video","content":"videos/ai_responses/d50d041b-3c5e-493f-b1ab-03083a7581f0.mp4"}', annotations=None, meta=None)], structured_content={'type': 'video', 'content': 'videos/ai_responses/d50d041b-3c5e-493f-b1ab-03083a7581f0.mp4'}, data=Root(type='video', content='videos/ai_responses/d50d041b-3c5e-493f-b1ab-03083a7581f0.mp4'), is_error=False)
