In [1]:
# Twelvelabs

In [6]:
from grp import struct_group
from typing import List
from dotenv import dotenv_values
from twelvelabs import TwelveLabs
from twelvelabs.types import VideoSegment
from twelvelabs.embed import TasksStatusResponse
from pinecone import Pinecone, ServerlessSpec

config = dotenv_values(".env")

In [4]:

# 1. Initialize the client
client = TwelveLabs(api_key=config["TWELVELABS_API_KEY"])

# 2. Upload a video
with open("pushup.MOV", "rb") as video_file:
    task = client.embed.tasks.create(
        model_name="marengo3.0",
        video_file=video_file, # Or use video_file to upload a file from the local file system
        # video_clip_length=5,
        # video_start_offset_sec=30,
        # video_end_offset_sec=60,
        # video_embedding_scope=["clip", "video"]
    )
print(f"Created video embedding task: id={task.id}")

# 3. Monitor the status
def on_task_update(task: TasksStatusResponse):
    print(f"  Status={task.status}")

status = client.embed.tasks.wait_for_done(sleep_interval=5, task_id=task.id, callback=on_task_update)
print(f"Embedding done: {status.status}")

# 4. Retrieve the embeddings
task = client.embed.tasks.retrieve(
    task_id=task.id,
    embedding_option=["visual", "audio", "transcription"]
)

# 5. Process the results
def print_segments(segments: List[VideoSegment], max_elements: int = 5):
    for segment in segments:
        print(f"  embedding_scope={segment.embedding_scope} embedding_option={segment.embedding_option} start_offset_sec={segment.start_offset_sec} end_offset_sec={segment.end_offset_sec}")
        first_few = segment.float_[:max_elements]
        print(
            f"  embeddings: [{', '.join(str(x) for x in first_few)}...] (total: {len(segment.float_)} values)"
        )


if task.video_embedding is not None and task.video_embedding.segments is not None:
    print_segments(task.video_embedding.segments)

    segments = task.video_embedding.segments
    metadata = task.video_embedding.metadata



Created video embedding task: id=692eb2cd54939c83c1d476c8
  Status=processing
  Status=ready
Embedding done: ready
  embedding_scope=clip embedding_option=audio start_offset_sec=0.0 end_offset_sec=6.0
  embeddings: [0.067871094, -0.032958984, -0.11230469, 0.040039062, 0.02722168...] (total: 512 values)
  embedding_scope=clip embedding_option=audio start_offset_sec=6.0 end_offset_sec=12.0
  embeddings: [0.040039062, -0.076171875, -0.046142578, 0.018676758, 0.053466797...] (total: 512 values)
  embedding_scope=clip embedding_option=audio start_offset_sec=12.0 end_offset_sec=19.5
  embeddings: [0.06591797, -0.048583984, -0.083984375, 0.045898438, 0.037597656...] (total: 512 values)
  embedding_scope=clip embedding_option=visual start_offset_sec=0.0 end_offset_sec=6.0
  embeddings: [0.026489258, 0.026855469, -0.0138549805, 0.078125, -0.0022735596...] (total: 512 values)
  embedding_scope=clip embedding_option=visual start_offset_sec=6.0 end_offset_sec=12.0
  embeddings: [0.0064086914, 0.05

In [2]:
# Load to Pinecone

In [12]:
pc = Pinecone(api_key=config["PINECONE_API_KEY"])

index = pc.Index(host="https://gymogul-videos-kuv1rfi.svc.aped-4627-b74a.pinecone.io")

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
# Create vector metadata list

video_id = "review"

vectors = []

for idx, seg in enumerate(segments):
    vectors.append({
        "id": f"{video_id}-{idx}",
        "values": seg.float_,
        "metadata": {
            "video_id": video_id,
            "start": seg.start_offset_sec,
            "end": seg.end_offset_sec
        }
    })


In [16]:
# Upsert to Pinecone

index.upsert(vectors)

{'upserted_count': 8}

In [19]:
from typing import List

from twelvelabs import TwelveLabs
from twelvelabs.types import BaseSegment

# 2. Create text embeddings
res = client.embed.create(
    model_name="marengo3.0",
    text="What is the video about? What does the man say?",
)




In [20]:
question_vectors = res.text_embedding.segments[0].float_

In [21]:
index.query(
    namespace="__default__",
    vector=question_vectors,
    top_k=3,
    include_metadata=True,
    include_values=False
)

{'matches': [{'id': 'pushup:7',
              'metadata': {'end': 19.5,
                           'start': 12.0,
                           'transcript': 'The man holds the plank position for '
                                         'several seconds before pushing off '
                                         'the ground with his hands. He '
                                         'returns to a standing position, '
                                         'completing the exercise.',
                           'video_id': 'pushup'},
              'score': 0.483910918,
              'values': []},
             {'id': 'pushup:6',
              'metadata': {'end': 6.0,
                           'start': 0.0,
                           'transcript': 'A man in black shorts and a black '
                                         't-shirt stands with his feet '
                                         'together, then bends down and places '
                                         'his ha

In [41]:
# To delete Pinecone vectors

# for count in range(0,8):
#     print(count)
#     index.delete(ids=f"pushup-{count}", namespace="__default__")

0
1
2
3
4
5
6
7


In [4]:
# Create a Pegasus enabled index

from twelvelabs import TwelveLabs
from twelvelabs.indexes import IndexesCreateRequestModelsItem
from twelvelabs.tasks import TasksRetrieveResponse

# Initialize the client
client = TwelveLabs(api_key=config["TWELVELABS_API_KEY"])

index = client.indexes.create(
    index_name="pegasus-index",
    models=[
        IndexesCreateRequestModelsItem(
            model_name="pegasus1.2", model_options=["visual", "audio"]
        )
    ]
)
print(f"Created index: id={index.id}")

# task = client.tasks.create(
#     index_id=index.id,
#     video_url="<YOUR_VIDEO_URL>" # Or use video_file to upload a file from the local file system
#     )
# print(f"Created task: id={task.id}")
#
# def on_task_update(task: TasksRetrieveResponse):
#     print(f"  Status={task.status}")
#
# task = client.tasks.wait_for_done(task_id=task.id, callback=on_task_update)
# if task.status != "ready":
#     raise RuntimeError(f"Indexing failed with status {task.status}")
# print(
#     f"Upload complete. The unique identifier of your video is {task.video_id}.")
#
# gist = client.gist(video_id=task.video_id,types=["title", "topic", "hashtag"])
# print(f"Title={gist.title}\nTopics={gist.topics}\nHashtags={gist.hashtags}")


In [12]:
import json

def get_pegasus_transcript(client: TwelveLabs, pegasus_index_id: str, video_path: str):

    # 2. Upload a video
    with open(video_path, "rb") as video_file:
        # 1) Upload to Pegasus-enabled index
        task = client.tasks.create(
            index_id=pegasus_index_id,
            video_file=video_file
        )

    task = client.tasks.wait_for_done(task_id=task.id)

    if task.status != "ready":
        raise RuntimeError(f"Pegasus indexing failed or not ready. Status={task.status}")

    video_id = task.video_id  # this is what analyze() uses

    # 2) Ask Pegasus for a rich, time-stamped transcript in JSON
    # Weâ€™ll use open-ended analyze() with a JSON schema so it returns structured text.
    schema = {
        "type": "object",
        "properties": {
            "segments": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "start_sec": {"type": "number"},
                        "end_sec": {"type": "number"},
                        "text": {"type": "string"},
                    },
                    "required": ["start_sec", "end_sec", "text"],
                },
            }
        },
        "required": ["segments"],
    }

    prompt = (
        "Create a detailed summary of this video."
        "For each segment, return JSON with: start_sec, end_sec, text. "
        "Use the original language, keep sentences complete, and ensure timestamps "
        "are monotonically increasing and aligned with the video timeline."
    )

    res = client.analyze(
        video_id=video_id,
        prompt=prompt,
        temperature=0.1,
        response_format={"type": "json_schema", "json_schema": schema},
        max_tokens=4000,
    )
    # res.data is a JSON string per docs :contentReference[oaicite:4]{index=4}
    parsed = json.loads(res.data)
    return parsed["segments"]  # list of {start_sec, end_sec, text}

In [13]:
def transcript_for_segment(transcript_segments, start, end):
    """Concatenate transcript text that overlaps [start, end]."""
    chunks = []
    for t in transcript_segments:
        if t["end_sec"] <= start:
            continue
        if t["start_sec"] >= end:
            break
        chunks.append(t["text"])
    return " ".join(chunks).strip()

In [14]:
from pinecone import Pinecone

def ingest_video_to_pinecone(
    client: TwelveLabs,
    pegasus_index_id: str,
    video_path: str,
    video_id: str,   # your own ID (filename, UUID, etc.)
):

    # 2) Pegasus transcript (time-coded)
    transcript_segments = get_pegasus_transcript(client, pegasus_index_id, video_path)

    # 3) Build vectors with per-segment transcript
    vectors = []
    for idx, seg in enumerate(segments):
        start = seg.start_offset_sec
        end = seg.end_offset_sec
        snippet = transcript_for_segment(transcript_segments, start, end)

        vectors.append({
            "id": f"{video_id}:{idx}",
            "values": seg.float_,
            "metadata": {
                "video_id": video_id,
                "start": start,
                "end": end,
                "transcript": snippet,
            },
        })

    # 4) Upsert to Pinecone
    # index.upsert(vectors=vectors)

    return vectors

In [15]:
vectors = ingest_video_to_pinecone(client=client, pegasus_index_id="69220af621611291fd429500", video_path="pushup.MOV", video_id="review")

In [9]:
# Feed matches to LLM chat model

matches = index.query(
    namespace="__default__",
    vector=question_vectors,
    top_k=3,
    include_metadata=True,
    include_values=False
)

In [10]:
matches

{'matches': [{'id': 'pushup:7',
              'metadata': {'end': 19.5,
                           'start': 12.0,
                           'transcript': 'The man holds the plank position for '
                                         'several seconds before pushing off '
                                         'the ground with his hands. He '
                                         'returns to a standing position, '
                                         'completing the exercise.',
                           'video_id': 'pushup'},
              'score': 0.483910918,
              'values': []},
             {'id': 'pushup:6',
              'metadata': {'end': 6.0,
                           'start': 0.0,
                           'transcript': 'A man in black shorts and a black '
                                         't-shirt stands with his feet '
                                         'together, then bends down and places '
                                         'his ha

In [7]:
import langchain
langchain.verbose = False
# langchain.debug = False
# langchain.llm_cache = False
from langchain.chat_models import init_chat_model

model = init_chat_model("gpt-4.1-mini", model_provider="openai", api_key=config["OPEN_AI_KEY"])

In [18]:
# Function to embed text > to feed to Pinecone index.query

from openai import OpenAI

def get_embedding_video(text, model="text-embedding-3-large"):
    """
    Generates a vector embedding for the given text using OpenAI's model.
    """
    text = text.replace("\n", " ") # Best practice is to replace newlines
    response = client.embed.create(
        model_name="marengo3.0",
        text=text
    )
    # The embedding is in the 'data' array of the response
    return response.text_embedding.segments[0].float_

# Example usage:
sentence = "What is the review about? Which product are they talking about?"
embedding_vector = get_embedding_video(sentence)

print(f"Original sentence: {sentence}")
print(f"Embedding vector (first 50 dimensions): {embedding_vector[:50]}...")
print(f"Vector dimension: {len(embedding_vector)}")


Original sentence: What is the review about? Which product are they talking about?
Embedding vector (first 50 dimensions): [-0.02319336, -0.13574219, -0.10888672, 0.029785156, -0.035888672, 0.055908203, -0.09863281, -0.08691406, 0.043701172, 0.013916016, 0.028442383, 0.06347656, 0.012145996, 0.00793457, 0.061767578, 0.059326172, -0.033935547, -0.041748047, -0.033447266, -0.1015625, 0.04638672, 0.092285156, -0.000831604, 0.037109375, -0.0234375, -0.004425049, -0.02355957, 0.041992188, -0.012390137, -0.007446289, -0.0546875, 0.04736328, -0.055908203, 0.006072998, -0.09033203, 0.0037994385, -0.053466797, 0.045410156, 0.02722168, 0.033203125, 0.010986328, -0.09863281, 0.09863281, -0.052246094, 0.010375977, 0.012390137, 0.04248047, -0.036865234, 0.056396484, 0.034179688]...
Vector dimension: 512


In [21]:
# Function to embed text > to feed to Pinecone index.query

from openai import OpenAI

def get_embedding_text(text, model="text-embedding-3-large"):
    """
    Generates a vector embedding for the given text using OpenAI's model.
    """
    client = OpenAI(api_key=config["OPEN_AI_KEY"])

    text = text.replace("\n", " ") # Best practice is to replace newlines
    response = client.embeddings.create(
        input=[text],
        model=model
    )
    # The embedding is in the 'data' array of the response
    return response.data[0].embedding

# Example usage:
sentence = "The Rolex Yachtmaster 40 was first released in 1992. The current reference number 126622 is $12,500 USD. It features a 40 mm oyster case in oystersteel and platinum."
embedding_vector = get_embedding_text(sentence)

print(f"Original sentence: {sentence}")
print(f"Embedding vector (first 50 dimensions): {embedding_vector[:50]}...")
print(f"Vector dimension: {len(embedding_vector)}")


Original sentence: The Rolex Yachtmaster 40 was first released in 1992. The current reference number 126622 is $12,500 USD. It features a 40 mm oyster case in oystersteel and platinum.
Embedding vector (first 50 dimensions): [0.006935261655598879, 0.037173446267843246, -0.013058535754680634, 0.03287992626428604, -0.036461565643548965, -0.048808224499225616, -0.016795901581645012, 0.03497106954455376, -0.029454005882143974, -0.01055583544075489, -0.00029424112290143967, 0.0012916716514155269, 0.03216804563999176, 0.002591685624793172, 0.037039969116449356, -0.017574520781636238, -0.008014203980565071, -0.0106058893725276, -0.09645964205265045, -5.852669710293412e-05, -0.03975400701165199, -0.022457566112279892, -0.041266750544309616, 0.009348977357149124, -0.02776329219341278, -0.008164365775883198, -0.006896330509334803, -0.002861421089619398, 0.005124974530190229, -0.018219660967588425, 0.0003378645924385637, -0.009421277791261673, -0.04707301780581474, 0.03474860638380051, -0.0082311

In [22]:
# v2

from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain.agents import create_agent


@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text

    # Get embedding for message
    embedding_video = get_embedding_video(last_query)
    embedding_text = get_embedding_text(last_query)

    # Perform search against Pinecone
    response_video = pc.Index(host="https://gymogul-videos-kuv1rfi.svc.aped-4627-b74a.pinecone.io").query(
        namespace="__default__",
        vector=embedding_video,
        top_k=3,
        include_metadata=True,
        include_values=False
    )

    response_text = pc.Index(host="https://test-index-3-kuv1rfi.svc.aped-4627-b74a.pinecone.io").query(
    namespace="text",
    vector=embedding_text,
    top_k=3,
    include_metadata=True,
    include_values=False
    )

    response = response_video['matches'] + response_text['matches']

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{response}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [25]:
from langchain.messages import SystemMessage, HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser

messages = {
    "messages": [
        {"role": "user", "content": "What is the review about? Which product are they talking about? How much does it cost?"}
    ]
}


result = agent.invoke(messages)

In [26]:
result['messages'][-1].content

'The review is about the Rolex Yachtmaster 40 watch. It features a 40 mm diameter face, a two-tone (polished center link) bracelet, and a design that sits beautifully on the wrist with curved edges and subtle crown details. The current reference number for the Rolex Yachtmaster 40 is 126622, and it costs $12,500 USD. It has an oyster case made of oystersteel and platinum.'