<a target="_blank" href="https://colab.research.google.com/github/tldw-ai/example-playbooks/blob/main/get_started_video_embedding.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

step #1: Submit videos to for embedding


In [11]:
# Get API key from `https://trytldw.ai/account?tab=api`
# Insert API key here!!!
API_KEY = ''
API_ENDPOINT = 'https://api.trytldw.ai/v1'

headers = {
    'Authorization': f'Bearer {API_KEY}',
}

In [12]:
import requests

# API reference: https://docs.trytldw.ai/tldw/embed-media-media-embed-post
res = requests.post(f'{API_ENDPOINT}/media/embed', headers=headers, json={
    'media_list': [
        {
            'title': 'Destroyed Building',
            'external_id': 'video_0',
            'url': 'https://storage.googleapis.com/test-only-public/Aerial%20footage%20shows%20the%20destruction%20in%20Asheville%20and%20parts%20of%20western%20North%20Carolina%20%5BiLlebC7vuJw%5D.webm'
        },
        {
            'title': 'Explosion',
            'external_id': 'video_1',
            'url': 'https://storage.googleapis.com/test-only-public/Drone%20video%20shows%20deadly%20fireworks%20explosion%20in%20the%20Honolulu%20area%20%5BQbbXlMyq3Cs%5D.webm'
        },
        {
            'title': 'Wildfire Smoke',
            'external_id': 'video_2',
            'url': 'https://storage.googleapis.com/test-only-public/Watch%20from%20above%EF%BC%9A%20Aerial%20footage%20of%20natural%20disasters%20in%202017%20%5BmKWq9m3rkTU%5D.mp4'
        }
    ]
})

print(res.json()['media_list'])
media_ids = [media['id'] for media in res.json()['media_list']]

[{'id': '9194c065-fb01-4f62-a86f-17b7f6412114', 'external_id': 'video_0', 'status': 'PENDING'}, {'id': '1d36d2ad-8bb9-4dfc-9c12-a17182fc2443', 'external_id': 'video_1', 'status': 'PENDING'}, {'id': '473afc78-2ff2-4dc6-a480-561495a3c3ef', 'external_id': 'video_2', 'status': 'PENDING'}]


step #3: Wait until all media are in completed state


In [15]:
import time
import requests

POLL_INTERVAL_SECONDS = 5


def wait_for_processing_completes(media_ids, headers):
    # Create a copy to avoid mutating the original list
    pending_media = set(media_ids)

    while pending_media:
        # Create a list to avoid modifying set during iteration
        for media_id in list(pending_media):
            # API reference: https://docs.trytldw.ai/tldw/get-media-media-media-id-get
            response = requests.get(
                f'{API_ENDPOINT}/media/{media_id}', headers=headers)
            status = response.json().get('status', 'UNKNOWN')

            if status == 'COMPLETED':
                pending_media.remove(media_id)

        if not pending_media:
            print('All media are in COMPLETED state.')
            break

        print('Waiting for remaining media to reach COMPLETED state...')
        time.sleep(POLL_INTERVAL_SECONDS)


wait_for_processing_completes(media_ids, headers)

All media are in COMPLETED state.


step #4: Retrieve embedding for videos


In [20]:
import json
media_embedding = []

for media_id in media_ids:
    # API reference: https://docs.trytldw.ai/tldw/get-media-media-media-id-get
    response = requests.get(
        f'{API_ENDPOINT}/media/{media_id}?retrieve_embedding=true', headers=headers)
    media_embedding.append(response.json())

print(json.dumps(media_embedding[0], indent=2))

{
  "id": "9194c065-fb01-4f62-a86f-17b7f6412114",
  "external_id": "video_0",
  "status": "COMPLETED",
  "collection_id": null,
  "title": "Destroyed Building",
  "segments": [
    {
      "start_ms": 0,
      "end_ms": 3641,
      "embedding": [
        0.0572599321603775,
        0.007384188938885927,
        -0.03984015807509422,
        0.04586324468255043,
        0.05828879401087761,
        0.05092426761984825,
        -0.009534257464110851,
        -0.0031139724887907505,
        -0.035703256726264954,
        0.026544537395238876,
        -0.03366890177130699,
        -0.008890689350664616,
        -0.09055044502019882,
        -0.018642177805304527,
        0.018955420702695847,
        -0.013447659090161324,
        -0.12498047202825546,
        0.055443331599235535,
        0.1871180534362793,
        0.044840481132268906,
        -0.006857766304165125,
        0.03948589041829109,
        0.016131216660141945,
        0.05351639539003372,
        -0.0030773247126489878,
  

step #5: Generate embedding from text query


In [18]:
# API reference: https://docs.trytldw.ai/tldw/embed-text-text-embed-post
res = requests.post(f'{API_ENDPOINT}/text/embed', headers=headers, json={
    'text': "building collapsed"
})

text_embedding = res.json()['embedding']
print(text_embedding)

[-0.04794558510184288, 0.049645740538835526, 0.02569637820124626, 0.038467127829790115, -0.016627313569188118, 0.028614701703190804, -0.05325068533420563, 0.015509650111198425, 0.06710607558488846, 0.049288492649793625, -0.04292348027229309, 0.006029452662914991, -0.13395671546459198, 0.0028951717540621758, 0.046169716864824295, -0.04023966193199158, 0.006891637109220028, 0.024754071608185768, 0.04605504125356674, 0.036996353417634964, -0.05672892928123474, 0.0018656929023563862, -0.016361739486455917, 0.12086210399866104, -0.03646036610007286, -0.006132841110229492, 0.03351479768753052, 0.037415534257888794, 0.0009295081254094839, 0.009246827103197575, 0.019983885809779167, -0.04793570563197136, 0.02938525192439556, 0.029309947043657303, -0.11105753481388092, 0.0004374962591100484, -0.07557809352874756, 0.009797724895179272, -0.1441250741481781, -0.006480111740529537, -0.035732921212911606, 0.00847533904016018, -0.054477937519550323, 0.05528617650270462, -0.007706732023507357, 0.01183

step #6: Find video segements with closest distance to text embedding


In [22]:
import numpy as np
import json


def cosine_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))


# Convert text_embedding to numpy array once to avoid repeated conversions
text_embedding = np.array(text_embedding)

closest_segments = []

for media in media_embedding:
    # Updated to match your provided structure
    for embedding in media.get('segments', []):
        segment_embedding = np.array(embedding.get(
            'embedding', []))  # Convert to numpy array
        if segment_embedding.size > 0:  # Ensure it's non-empty
            similarity = cosine_similarity(text_embedding, segment_embedding)
            closest_segments.append({
                'external_id': media['external_id'],
                'title': media['title'],
                'segment_start_ms': embedding['start_ms'],
                'segment_end_ms': embedding['end_ms'],
                'similarity': similarity
            })
        else:
            print(
                f"Warning: No embedding found for segment in media {media['id']}")

# Sort by similarity (descending order)
closest_segments.sort(key=lambda x: x['similarity'], reverse=True)

print(json.dumps(closest_segments[:5], indent=2))

[
  {
    "external_id": "video_0",
    "title": "Destroyed Building",
    "segment_start_ms": 67767,
    "segment_end_ms": 73714,
    "similarity": 0.36606795553203925
  },
  {
    "external_id": "video_0",
    "title": "Destroyed Building",
    "segment_start_ms": 244437,
    "segment_end_ms": 246807,
    "similarity": 0.36103018683923943
  },
  {
    "external_id": "video_0",
    "title": "Destroyed Building",
    "segment_start_ms": 253807,
    "segment_end_ms": 258795,
    "similarity": 0.35466425751181835
  },
  {
    "external_id": "video_0",
    "title": "Destroyed Building",
    "segment_start_ms": 50158,
    "segment_end_ms": 57169,
    "similarity": 0.3543983433660968
  },
  {
    "external_id": "video_0",
    "title": "Destroyed Building",
    "segment_start_ms": 230448,
    "segment_end_ms": 234626,
    "similarity": 0.3510168216215839
  }
]
