In [1]:
%load_ext autoreload
%autoreload 2

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
import requests

API_BASE = "http://localhost:8080"

response = requests.get(f"{API_BASE}/")
print(f"✓ API Status: {response.status_code}")
print(response.json())

✓ API Status: 200
{'message': 'Welcome to Multimodal API. Visit /docs for documentation'}


In [3]:
video_file = "E:/TEST DATA/videos-multimodal testing/inception_ending_scene.mp4"  # Local file to upload

with open(video_file, "rb") as f:
    files = {"file": f}
    response = requests.post(f"{API_BASE}/upload-video", files=files)
    
print(f"Upload status: {response.status_code}")
result = response.json()
print(f"Video path: {result.get('video_path')}")
print(f"Media id: {result.get('media_id')}")

Upload status: 200
Video path: videos/uploads/89e1906b-0c06-4938-a172-b6c9a602a647.mp4
Media id: 89e1906b-0c06-4938-a172-b6c9a602a647


In [4]:
# Use video_path from upload response
video_path = result.get('video_path')  # From previous cell
media_id = result.get('media_id')  # From previous cell

response = requests.post(
    f"{API_BASE}/process-video",
    json={"video_path": video_path,
         "media_id": media_id }
)

print(f"Process status: {response.status_code}")
task_info = response.json()
print(f"Task ID: {task_info.get('media_id')}")

Process status: 200
Task ID: 89e1906b-0c06-4938-a172-b6c9a602a647


In [5]:
print(f"Task ID: {task_info.get('media_id')}")

Task ID: 89e1906b-0c06-4938-a172-b6c9a602a647


In [7]:
import time

task_id = task_info.get('media_id')  # From previous cell

for i in range(10):  # Check 10 times
    response = requests.get(f"{API_BASE}/task-status/{task_id}")
    status = response.json()
    print(f"[{i+1}] Status: {status.get('status')}")
    
    if status.get('status') in ['completed', 'failed']:
        break
    time.sleep(5)  # Wait 5 seconds

[1] Status: completed


In [8]:
response = requests.post(
    f"{API_BASE}/chat",
    json={
        "message": "show me where someone is talkin about a science project?",
        # "video_path": video_path,  # From upload
        "image_base64": None
    }
)

print(f"Chat status: {response.status_code}")
result = response.json()
print(f"Message: {result.get('message')}")
print(f"Video clip: {result.get('output_video_path')}")

Chat status: 200
Message: Found it. The clip matching your query has been extracted.
Video clip: videos/ai_responses/55bbfc33-9d97-4993-bce5-b247bd22c344.mp4


In [9]:
from multimodal_api.config.config import get_settings
from pathlib import Path

settings = get_settings()

video_path = Path(settings.SHARED_MEDIA_DIR) / result.get('output_video_path').lstrip("/\\")
print(video_path)

E:\python projects\multi-modal-rag\shared_media\videos\ai_responses\55bbfc33-9d97-4993-bce5-b247bd22c344.mp4


In [10]:
image_file = "E:/TEST DATA/images-multimodal testing/sad_robot.png"

with open(image_file, "rb") as f:
    files = {"file": f}
    response = requests.post(f"{API_BASE}/upload-image", files=files)

print(f"Upload status: {response.status_code}")
image_result = response.json()
print(f"Image path: {image_result.get('image_path')}")

Upload status: 200
Image path: images/abd070f5-cdc6-43eb-9d4c-9f50adf40665.png


In [11]:
import base64

# Load image as base64
with open(image_file, "rb") as f:
    image_b64 = base64.b64encode(f.read()).decode("utf-8")

response = requests.post(
    f"{API_BASE}/chat",
    json={
        "message": "find this scene in the video",
        # "video_path": video_path,
        "image_base64": image_b64
    }
)

result = response.json()
print(f"Message: {result.get('message')}")
print(f"Video clip: {result.get('output_video_path')}")

Message: Found it. The clip matching your query has been extracted.
Video clip: videos/ai_responses/05caba22-9efc-499a-bb5b-b2ea3caae827.mp4


In [12]:
response = requests.get(f"{API_BASE}/media-files")
result = response.json()

print(f"Total files: {result.get('total_count')}")
for file in result.get('files', [])[:5]:  # Show first 5
    print(f"  - {file.get('media_type')}: {file.get('media_url')}")

Total files: 16
  - video: videos/ai_responses/05caba22-9efc-499a-bb5b-b2ea3caae827.mp4
  - image: images/abd070f5-cdc6-43eb-9d4c-9f50adf40665.png
  - video: videos/ai_responses/55bbfc33-9d97-4993-bce5-b247bd22c344.mp4
  - video: videos/uploads/89e1906b-0c06-4938-a172-b6c9a602a647.mp4
  - video: videos/ai_responses/f15a06cd-8add-4643-8259-705b6e3de0ca.mp4


In [13]:
response = requests.get(f"{API_BASE}/media-files/stats")
stats = response.json()

print(f"Total files: {stats.get('total_files')}")
print(f"Images: {stats.get('images')}")
print(f"Videos: {stats.get('videos')}")
print(f"Total size: {stats.get('total_size_mb')} MB")

Total files: 16
Images: 4
Videos: 12
Total size: 49.53 MB


In [19]:
import gc

def _finalize_processing():
    # pxt.commit()
    gc.collect()  # forces PyAV container finalization

_finalize_processing()

In [27]:
# Use video_path from upload response
video_path = "videos/uploads/441e28ad-13ff-4a03-aa04-9db909eaddf1.mp4"

response = requests.delete(
    f"{API_BASE}/media-file",
    json={"media_id": "4bb8a93c-1aa3-4cd2-8ae6-09c569ad6eb8",
          "file_path": video_path}
)

print(f"Delete status: {response.status_code}")
task_info = response.json()
print(f"Task ID: {task_info.get('task_id')}")

Delete status: 404
Task ID: None


In [26]:
import time

task_id = task_info.get('task_id')  # From previous cell

for i in range(10):  # Check 10 times
    response = requests.get(f"{API_BASE}/task-status/{task_id}")
    status = response.json()
    print(f"[{i+1}] Status: {status.get('status')}")
    
    if status.get('status') in ['completed', 'failed']:
        break
    time.sleep(5)  # Wait 5 seconds

[1] Status: failed
