<a target="_blank" href="https://colab.research.google.com/github/tldw-ai/example-playbooks/blob/main/get_started_video_embedding.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

step #1: Submit videos to for embedding


In [None]:
# Get API key from `https://trytldw.ai/account?tab=api`
API_KEY = ''  # Insert API key here!!!
API_ENDPOINT = 'https://api.trytldw.ai/v1'

headers = {
    'Authorization': f'Bearer {API_KEY}',
}

In [None]:
import requests

# API reference: https://docs.trytldw.ai/tldw/embed-media-media-embed-post
res = requests.post(f'{API_ENDPOINT}/media/embed', headers=headers, json={
    'media_list': [
        {
            'title': 'Destroyed Building',
            'external_id': 'video_0',
            'url': 'https://storage.googleapis.com/test-only-public/Aerial%20footage%20shows%20the%20destruction%20in%20Asheville%20and%20parts%20of%20western%20North%20Carolina%20%5BiLlebC7vuJw%5D.webm'
        },
        {
            'title': 'Explosion',
            'external_id': 'video_1',
            'url': 'https://storage.googleapis.com/test-only-public/Drone%20video%20shows%20deadly%20fireworks%20explosion%20in%20the%20Honolulu%20area%20%5BQbbXlMyq3Cs%5D.webm'
        },
        {
            'title': 'Wildfire Smoke',
            'external_id': 'video_2',
            'url': 'https://storage.googleapis.com/test-only-public/Watch%20from%20above%EF%BC%9A%20Aerial%20footage%20of%20natural%20disasters%20in%202017%20%5BmKWq9m3rkTU%5D.mp4'
        }
    ]
})

print(res.json()['media_list'])
media_ids = [media['id'] for media in res.json()['media_list']]

[{'id': 'b653fbda-a7a9-4f7b-90e3-8a62537507dd', 'external_id': 'video_0', 'status': 'PENDING'}, {'id': 'f5abb9d3-282f-4236-a42b-de2c62846a3e', 'external_id': 'video_1', 'status': 'PENDING'}, {'id': 'ab68c9f4-8ee9-44e5-a370-c40882e227a2', 'external_id': 'video_2', 'status': 'PENDING'}]


step #3: Wait until all media are in completed state


In [None]:
import time
import requests

# media_ids = ['5e5f88f5-80cb-43a2-997c-94db0fba5828', 'a715c8c3-f18f-4654-a336-ecaf214b2ccf', '7c0fcef3-b5b4-4010-9a93-eaa9f78ad067']
POLL_INTERVAL_SECONDS = 5


def wait_for_processing_completes(media_ids, headers):
    # Create a copy to avoid mutating the original list
    pending_media = set(media_ids)

    while pending_media:
        # Create a list to avoid modifying set during iteration
        for media_id in list(pending_media):
            # API reference: https://docs.trytldw.ai/tldw/get-media-media-media-id-get
            response = requests.get(
                f'{API_ENDPOINT}/media/{media_id}', headers=headers)
            status = response.json().get('status', 'UNKNOWN')

            if status == 'COMPLETED':
                pending_media.remove(media_id)

        if not pending_media:
            print('All media are in COMPLETED state.')
            break

        print('Waiting for remaining media to reach COMPLETED state...')
        time.sleep(POLL_INTERVAL_SECONDS)


wait_for_processing_completes(media_ids, headers)

All media are in COMPLETED state.


step #4: Retrieve embedding for videos


In [None]:
import json
media_embedding = []

for media_id in media_ids:
    # API reference: https://docs.trytldw.ai/tldw/get-media-media-media-id-get
    response = requests.get(
        f'{API_ENDPOINT}/media/{media_id}?retrieve_embedding=true', headers=headers)
    media_embedding.append(response.json())

# Inspect embedding (only print first 5 dimensions of each embedding)
inspect_embedding = [{
    **item,
    "embeddings": [{**embedding, "embedding": embedding["embedding"][:5]} for embedding in item.get("embeddings", [])[:5]]
} for item in media_embedding]
print(json.dumps(inspect_embedding, indent=2))

[
  {
    "id": "b653fbda-a7a9-4f7b-90e3-8a62537507dd",
    "external_id": "Destroyed Building",
    "status": "COMPLETED",
    "collection_id": null,
    "title": "Destroyed Building",
    "embeddings": [
      {
        "start_ms": 0,
        "end_ms": 2000,
        "embedding": [
          0.057333193719387054,
          0.006780697964131832,
          -0.038048598915338516,
          0.045717742294073105,
          0.057266347110271454
        ]
      },
      {
        "start_ms": 2000,
        "end_ms": 4000,
        "embedding": [
          0.058070261031389236,
          0.004891926888376474,
          -0.04078308492898941,
          0.044884152710437775,
          0.05748013034462929
        ]
      },
      {
        "start_ms": 4000,
        "end_ms": 6000,
        "embedding": [
          0.017752336338162422,
          0.02872983179986477,
          -0.02223474159836769,
          0.09323515743017197,
          0.07714306563138962
        ]
      },
      {
        "start_

step #5: Generate embedding from text query


In [None]:
# API reference: https://docs.trytldw.ai/tldw/embed-text-text-embed-post
res = requests.post(f'{API_ENDPOINT}/text/embed', headers=headers, json={
    'text': "building collapsed"
})

text_embedding = res.json()['embedding']
# Print first 10 dimensions of text embedding
print(text_embedding[:10])

[-0.04794558510184288, 0.049645740538835526, 0.02569637820124626, 0.038467127829790115, -0.016627313569188118, 0.028614701703190804, -0.05325068533420563, 0.015509650111198425, 0.06710607558488846, 0.049288492649793625]


step #6: Find video segements with closest distance to text embedding


In [None]:
import numpy as np
import json


def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


# Convert text_embedding to numpy array once to avoid repeated conversions
text_embedding = np.array(text_embedding)

closest_segments = []

for media in media_embedding:
    # Updated to match your provided structure
    for embedding in media.get('embeddings', []):
        segment_embedding = np.array(embedding.get(
            'embedding', []))  # Convert to numpy array
        if segment_embedding.size > 0:  # Ensure it's non-empty
            similarity = cosine_similarity(text_embedding, segment_embedding)
            closest_segments.append({
                'external_id': media['external_id'],
                'title': media['title'],
                'segment_start_ms': embedding['start_ms'],
                'segment_end_ms': embedding['end_ms'],
                'similarity': similarity
            })
        else:
            print(
                f"Warning: No embedding found for segment in media {media['id']}")

# Sort by similarity (descending order)
closest_segments.sort(key=lambda x: x['similarity'], reverse=True)

print(json.dumps(closest_segments[:5], indent=2))

[
  {
    "external_id": "Destroyed Building",
    "title": "Destroyed Building",
    "segment_start_ms": 70000,
    "segment_end_ms": 72000,
    "similarity": 0.3690587971955406
  },
  {
    "external_id": "Destroyed Building",
    "title": "Destroyed Building",
    "segment_start_ms": 72000,
    "segment_end_ms": 74000,
    "similarity": 0.36605721396028484
  },
  {
    "external_id": "Destroyed Building",
    "title": "Destroyed Building",
    "segment_start_ms": 68000,
    "segment_end_ms": 70000,
    "similarity": 0.36581484976292433
  },
  {
    "external_id": "Destroyed Building",
    "title": "Destroyed Building",
    "segment_start_ms": 256000,
    "segment_end_ms": 258000,
    "similarity": 0.3633373006543512
  },
  {
    "external_id": "Destroyed Building",
    "title": "Destroyed Building",
    "segment_start_ms": 244000,
    "segment_end_ms": 246000,
    "similarity": 0.361978773623242
  }
]
