In [4]:
import numpy as np
from tqdm import tqdm
import os
import json

from videograph import VideoGraph
from utils.general import *
from utils.video_processing import *
from utils.chat_api import *
from prompts import *

from face_processing import process_faces
from voice_processing import process_voices
from memory_processing import (
    process_captions,
    generate_captions_and_thinkings_with_ids,
)
from retrieve import answer_with_retrieval

processing_config = json.load(open("configs/processing_config.json"))
memory_config = json.load(open("configs/memory_config.json"))

In [5]:
def process_segment(video_graph, base64_video, base64_frames, base64_audio):

    id2voices = process_voices(video_graph, base64_audio, base64_video)
    print("Finish processing voices")

    print(f"processing {len(base64_frames)} frames...")

    id2faces = process_faces(video_graph, base64_frames)
    # print(id2faces.keys())
    print("Finish processing faces")

    episodic_captions, semantic_captions = generate_captions_and_thinkings_with_ids(
        video_graph,
        base64_video,
        base64_frames,
        base64_audio,
        id2faces,
        id2voices,
    )

    process_captions(video_graph, episodic_captions, type="episodic")
    process_captions(video_graph, semantic_captions, type="semantic")

    print("Finish processing segment")


def streaming_process_video(
    video_graph, video_path, interval_seconds, fps, segment_limit=None
):
    """Process video segments at specified intervals with given fps.

    Args:
        video_graph (VideoGraph): Graph object to store video information
        video_path (str): Path to the video file or directory containing clips
        interval_seconds (float): Time interval between segments in seconds
        fps (float): Frames per second to extract from each segment

    Returns:
        None: Updates video_graph in place with processed segments
    """
    if os.path.isfile(video_path):
        # Process single video file
        video_info = get_video_info(video_path)
        print(video_info)

        # Process each interval
        count = 0
        for start_time in np.arange(0, video_info["duration"], interval_seconds):
            if start_time + interval_seconds > video_info["duration"]:
                break

            print("=" * 20)
            count += 1

            print(f"Loading {count}-th clip starting at {start_time} seconds...")
            base64_video, base64_frames, base64_audio = process_video_clip(
                video_path, start_time, interval_seconds, fps, audio_format="wav"
            )

            # check dtype
            # print(type(base64_video), type(base64_frames[0]), type(base64_audio))

            # Process frames for this interval
            if base64_frames:
                print(
                    f"Starting processing {count}-th clip starting at {start_time} seconds..."
                )
                process_segment(
                    video_graph,
                    base64_video,
                    base64_frames,
                    base64_audio,
                )

            if segment_limit is not None and count >= segment_limit:
                break

    elif os.path.isdir(video_path):
        # Process directory of numbered clips
        files = os.listdir(video_path)
        # Filter for video files and sort by numeric value in filename
        video_files = [
            f for f in files if any(f.endswith(ext) for ext in [".mp4", ".avi", ".mov"])
        ]
        video_files.sort(key=lambda x: int("".join(filter(str.isdigit, x))))

        for count, video_file in enumerate(video_files, 1):
            print("=" * 20)
            full_path = os.path.join(video_path, video_file)
            print(f"Processing clip {count}: {full_path}")

            base64_video, base64_frames, base64_audio = process_video_clip(
                full_path, 0, None, fps, audio_format="wav"
            )

            if base64_frames:
                process_segment(
                    video_graph,
                    base64_video,
                    base64_frames,
                    base64_audio,
                )

            if segment_limit is not None and count >= segment_limit:
                break

In [6]:
# video paths can be paths to directories or paths to mp4 files
video_paths = processing_config["video_paths"]

for video_path in video_paths:

    video_graph = VideoGraph(**memory_config)

    streaming_process_video(
        video_graph,
        video_path,
        processing_config["interval_seconds"],
        processing_config["fps"],
        processing_config["segment_limit"],
    )

    video_graph.refresh_equivalences()

    save_dir = "data/video_graphs"
    save_video_graph(
        video_graph, video_path, save_dir, (processing_config, memory_config)
    )

Processing clip 1: data/videos/clipped/5 Poor People vs 1 Secret Millionaire/1.mp4
{'video_found': True, 'audio_found': True, 'metadata': {'major_brand': 'isom', 'minor_version': '512', 'compatible_brands': 'isomiso2avc1mp41', 'encoder': 'Lavf61.1.100'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1280, 720], 'bitrate': 1044, 'fps': 23.976023976023978, 'codec_name': 'h264', 'profile': '(High)', 'metadata': {'Metadata': '', 'handler_name': 'VideoHandler', 'vendor_id': '[0][0][0][0]', 'encoder': 'Lavc61.3.100 libx264'}}, {'input_number': 0, 'stream_number': 1, 'stream_type': 'audio', 'language': None, 'default': True, 'fps': 44100, 'bitrate': 129, 'metadata': {'Metadata': '', 'handler_name': 'SoundHandler', 'vendor_id': '[0][0][0][0]'}}], 'input_number': 0}], 'duration': 30.0, 'bitrate': 1179, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'h264', 

2025-03-31 09:34:22,366 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Voice node added with ID 0
Voice node added with ID 1
Voice node added with ID 2
Voice node added with ID 3
Node 3 updated with 1 embeddings
Node 0 updated with 1 embeddings
Finish processing voices
processing 150 frames...


100%|██████████| 38/38 [00:10<00:00,  3.53it/s]


Image node added with ID 4
Image node added with ID 5
Image node added with ID 6
Image node added with ID 7
Image node added with ID 8
Image node added with ID 9
Image node added with ID 10
Image node added with ID 11
Finish processing faces
8 faces detected
Generating captions 0 times


2025-03-31 09:34:43,724 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Generating thinkings 0 times


2025-03-31 09:34:50,588 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Text node of type episodic added with ID 12 and contents: ["In a brightly lit studio setting, four individuals, <face_4>, <face_7>, <face_6>, and <face_5>, are seated at a black table covered with a black tablecloth. They face five other individuals, <face_10>, <face_8>, <face_9>, <face_11>. On the table, there's an 'ON AIR' sign, water bottles, paper cups, markers, and paper."]
Edge added between 12 and 4
Edge added between 12 and 7
Edge added between 12 and 6
Edge added between 12 and 5
Edge added between 12 and 10
Edge added between 12 and 8
Edge added between 12 and 9
Edge added between 12 and 11
Text node of type episodic added with ID 13 and contents: ['<voice_0> introduces the individuals seated at the table as Denny, Herm, Aaron, and JC, and then introduces the other five, stating that they claim to be millionaires but only one actually is.']
Edge added between 13 and 0
Text node of type episodic added with ID 14 and contents: ['<face_10> wears a black bomber jacket with a plai

2025-03-31 09:35:05,867 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Voice node added with ID 25
Node 3 updated with 1 embeddings
Node 25 updated with 1 embeddings
Node 25 updated with 1 embeddings
Node 0 updated with 1 embeddings
Node 0 updated with 1 embeddings
Node 0 updated with 1 embeddings
Finish processing voices
processing 150 frames...


100%|██████████| 38/38 [00:13<00:00,  2.86it/s]


Node 11 updated with 3 embeddings
Node 7 updated with 3 embeddings
Node 5 updated with 3 embeddings
Node 6 updated with 3 embeddings
Node 4 updated with 3 embeddings
Image node added with ID 26
Node 10 updated with 3 embeddings
Node 9 updated with 3 embeddings
Finish processing faces
8 faces detected
Generating captions 0 times


2025-03-31 09:35:32,315 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Generating thinkings 0 times


2025-03-31 09:35:37,661 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Text node of type episodic added with ID 27 and contents: ["In a studio setting, four commentators, <face_7>, <face_26>, <face_4>, and <face_5>, sit at a table facing five individuals, <face_10>, <face_8>, <face_9>, <face_11>, and <face_12>. The table has an 'ON AIR' sign, water bottles, and note-taking materials. <face_7> wears a black hoodie. <face_26> wears a dark blue Drexel hoodie and glasses. <face_4> wears a baseball-style shirt. <face_5> wears a white t-shirt with tattoos on his arms."]
Edge added between 27 and 7
Edge added between 27 and 26
Edge added between 27 and 4
Edge added between 27 and 5
Edge added between 27 and 10
Edge added between 27 and 8
Edge added between 27 and 9
Edge added between 27 and 11
Edge added between 27 and 7
Edge added between 27 and 26
Edge added between 27 and 4
Edge added between 27 and 5
Text node of type episodic added with ID 28 and contents: ['<face_10> is wearing a black bomber jacket, plaid shirt, and jeans. <face_8>, a woman, wears a purpl

2025-03-31 09:35:50,614 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Voice node added with ID 38
Voice node added with ID 39
Node 3 updated with 1 embeddings
Voice node added with ID 40
Node 0 updated with 1 embeddings
Node 0 updated with 1 embeddings
Node 3 updated with 1 embeddings
Voice node added with ID 41
Finish processing voices
processing 150 frames...


100%|██████████| 38/38 [00:17<00:00,  2.13it/s]


Node 9 updated with 3 embeddings
Node 11 updated with 3 embeddings
Node 26 updated with 3 embeddings
Node 4 updated with 3 embeddings
Node 6 updated with 3 embeddings
Node 5 updated with 3 embeddings
Node 7 updated with 1 embeddings
Node 10 updated with 3 embeddings
Finish processing faces
8 faces detected
Generating captions 0 times


2025-03-31 09:36:19,870 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Generating thinkings 0 times


2025-03-31 09:36:25,586 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Text node of type episodic added with ID 42 and contents: ["In a studio setting, <face_7>, <face_26>, <face_4>, and <face_5> sit at a table covered with a black tablecloth, facing five standing individuals. On the table are water bottles, paper, pens, and an 'ON AIR' sign."]
Edge added between 42 and 7
Edge added between 42 and 26
Edge added between 42 and 4
Edge added between 42 and 5
Text node of type episodic added with ID 43 and contents: ['<face_26> wears a dark blue Drexel University hoodie. <face_4> wears a black and white baseball-style jersey. <face_5> wears a light beige t-shirt and has noticeable tattoos on his arms.']
Edge added between 43 and 26
Edge added between 43 and 4
Edge added between 43 and 5
Text node of type episodic added with ID 44 and contents: ['<face_7> wears a dark-colored hoodie. <face_4> raises his hands, gesturing to <face_10>. <face_6> looks at and gestures to <face_10> as well. <face_26> crosses his arms and leans back in his chair. <face_5> appears re

2025-03-31 09:36:42,081 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Voice node added with ID 56
Voice node added with ID 57
Voice node added with ID 58
Voice node added with ID 59
Finish processing voices
processing 150 frames...


100%|██████████| 38/38 [00:08<00:00,  4.23it/s]


Node 8 updated with 3 embeddings
Node 10 updated with 3 embeddings
Node 5 updated with 3 embeddings
Node 6 updated with 3 embeddings
Node 4 updated with 3 embeddings
Node 7 updated with 2 embeddings
Node 26 updated with 3 embeddings
Node 9 updated with 3 embeddings
Finish processing faces
8 faces detected
Generating captions 0 times


2025-03-31 09:37:02,571 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Generating thinkings 0 times


2025-03-31 09:37:07,865 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Text node of type episodic added with ID 60 and contents: ["In a brightly lit studio, four commentators, <face_7>, <face_26>, <face_4>, and <face_5>, sit at a table facing five standing individuals, <face_10> through <face_12>.  The table is covered in a black tablecloth and has water bottles, paper, pens, and an 'ON AIR' sign."]
Edge added between 60 and 7
Edge added between 60 and 26
Edge added between 60 and 4
Edge added between 60 and 5
Edge added between 60 and 10
Text node of type episodic added with ID 61 and contents: ["<face_7>, wearing a dark hoodie, listens and reacts to the others' commentary. <face_26>, wearing a Drexel University hoodie, watches the standing individuals and speaks to them. <face_4>, in a black and white baseball jersey, leans in and participates actively in the discussion."]
Edge added between 61 and 7
Edge added between 61 and 26
Edge added between 61 and 4
Text node of type episodic added with ID 62 and contents: ['<face_5>, wearing a beige t-shirt and 

2025-03-31 09:37:23,263 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Node 3 updated with 1 embeddings
Node 59 updated with 1 embeddings
Node 57 updated with 1 embeddings
Node 59 updated with 1 embeddings
Voice node added with ID 72
Node 58 updated with 1 embeddings
Voice node added with ID 73
Finish processing voices
processing 150 frames...


100%|██████████| 38/38 [00:12<00:00,  3.04it/s]


Node 5 updated with 3 embeddings
Node 6 updated with 3 embeddings
Node 4 updated with 3 embeddings
Node 26 updated with 3 embeddings
Node 7 updated with 3 embeddings
Node 9 updated with 3 embeddings
Node 10 updated with 3 embeddings
Node 8 updated with 2 embeddings
Node 11 updated with 3 embeddings
Finish processing faces
9 faces detected
Generating captions 0 times


2025-03-31 09:37:49,490 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Generating thinkings 0 times


2025-03-31 09:37:55,248 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/multimodal/crawl/openai/deployments/gemini-1.5-pro-002/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Text node of type episodic added with ID 74 and contents: ["In a studio setting, four commentators, <face_7>, <face_26>, <face_4>, and <face_5>, sit at a table facing five standing individuals.  On the table is an 'ON AIR' sign, water bottles, paper, and pens."]
Edge added between 74 and 7
Edge added between 74 and 26
Edge added between 74 and 4
Edge added between 74 and 5
Text node of type episodic added with ID 75 and contents: ['<face_26> wears a Drexel University hoodie. <face_4> sports a black and white baseball jersey.  <face_5> wears a light beige t-shirt. <face_7> is wearing a dark hoodie.']
Edge added between 75 and 26
Edge added between 75 and 4
Edge added between 75 and 5
Edge added between 75 and 7
Text node of type episodic added with ID 76 and contents: ['<face_4> laughs heartily, throwing his head down on the table in response to a comment. <face_26> watches the standing individuals with folded arms. <face_5> covers his mouth with his hand, smiling broadly. <face_7> watc

  0%|          | 0/88 [00:00<?, ?it/s]

Generating equivalences 0 times


2025-03-31 09:37:56,890 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  1%|          | 1/88 [00:01<01:28,  1.01s/it]

Generating equivalences 0 times


2025-03-31 09:37:57,758 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  2%|▏         | 2/88 [00:01<01:19,  1.08it/s]

Generating equivalences 0 times


2025-03-31 09:37:58,373 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  5%|▍         | 4/88 [00:02<00:45,  1.86it/s]

Generating equivalences 0 times


2025-03-31 09:37:59,233 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 30%|██▉       | 26/88 [00:03<00:05, 11.75it/s]

Generating equivalences 0 times


2025-03-31 09:37:59,810 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 48%|████▊     | 42/88 [00:03<00:02, 16.31it/s]

Generating equivalences 0 times


2025-03-31 09:38:00,424 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 65%|██████▍   | 57/88 [00:04<00:01, 18.76it/s]

Generating equivalences 0 times


2025-03-31 09:38:01,014 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 68%|██████▊   | 60/88 [00:05<00:01, 14.76it/s]

Generating equivalences 0 times


2025-03-31 09:38:01,929 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
100%|██████████| 88/88 [00:06<00:00, 14.54it/s]


Saving video graph to data/video_graphs/5-Poor-People-vs-1-Secret-Millionaire_60_5_5_10_20_0.3_0.6_0.75.pkl


In [10]:
video_graph_path = "data/video_graphs/5-Poor-People-vs-1-Secret-Millionaire_60_5_5_10_20_0.3_0.6_0.75.pkl"
video_graph = load_video_graph(video_graph_path)
# for text_node in video_graph.text_nodes:
#     print(video_graph.nodes[text_node].metadata['contents'])
# for nodes, weight in video_graph.edges.items():
#     if weight > 1:
#         if video_graph.nodes[nodes[0]].type in ["episodic", "semantic"]:
#            print(video_graph.nodes[nodes[0]].metadata['contents'])
#         else:
#            print(video_graph.nodes[nodes[1]].metadata['contents'])
#         print(weight)

video_graph.text_matching_threshold = 0.4
# video_graph.refresh_equivalences()

# question = 'What does Demar Randy wear?'
# question = "Who has an OnlyFans account?"
question = "What are the people doing in the video?"
answer = answer_with_retrieval(video_graph, question, query_num=10, topk=15)

# video_graph.summarize(logging=True)
# save_dir = "data/video_graphs"
# save_video_graph(
#     video_graph, None, save_dir, None, file_name='5-Poor-People-vs-1-Secret-Millionaire_60_5_5_10_20_0.3_0.6_0.75_augmented.pkl'
# )
# video_graph.visualize()

Loading video graph from data/video_graphs/5-Poor-People-vs-1-Secret-Millionaire_60_5_5_10_20_0.3_0.6_0.75.pkl
Generating queries 0 times


2025-03-31 09:42:08,880 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Queries: ['What actions are the individuals performing in the video?', 'Are there any group activities or interactions between the people?', 'What objects or tools are the people using in the video?', 'Are there any conversations or dialogues happening between the individuals?', 'What gestures or body language are the people displaying?', 'Are there any notable events or key moments involving the individuals?', 'How are the individuals positioned or arranged in the scene?', 'Are there any signs of collaboration or conflict among the people?', 'What is the setting or environment where the people are located?', 'Are there any specific tasks or roles assigned to the individuals in the video?']
["In a brightly lit studio setting, four individuals, <face_4>, <face_7>, <face_6>, and <face_5>, are seated at a black table covered with a black tablecloth. They face five other individuals, <face_10>, <face_8>, <face_9>, <character_0>. On the table, there's an 'ON AIR' sign, water bottles, paper 

2025-03-31 09:42:11,527 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


[FINAL] The people in the video are participating in a show where four commentators—Face 7, Face 26, Character 2, and Face 5—are seated at a table analyzing and discussing the clothing, demeanor, and appearance of five standing individuals—Face 10, Face 8, Face 9, Stewart Thompson, and Face 12—to guess their financial status or social standing.


In [None]:
from utils.chat_api import *
from utils.general import plot_cosine_similarity_distribution

video_graph_path = "data/video_graphs/5-Poor-People-vs-1-Secret-Millionaire_60_5_5_10_20_0.3_0.6_0.75.pkl"
video_graph = load_video_graph(video_graph_path)

graph_embeddings = []

for id, node in video_graph.nodes.items():
    if node.type in ["episodic", "semantic"]:
        graph_embeddings.extend(node.embeddings)

# texts = ["Clothing style of Demar Randy", "<voice_44> introduces himself as Demar Randy."]
texts = ["<face_4> points at <face_9>."]
embs = parallel_get_embedding("text-embedding-3-large", texts)[0]

plot_cosine_similarity_distribution(graph_embeddings, embs)

In [11]:
for text_node in video_graph.text_nodes:
    print(video_graph.nodes[text_node].metadata["contents"])

["In a brightly lit studio setting, four individuals, <face_4>, <face_7>, <face_6>, and <face_5>, are seated at a black table covered with a black tablecloth. They face five other individuals, <face_10>, <face_8>, <face_9>, <face_11>. On the table, there's an 'ON AIR' sign, water bottles, paper cups, markers, and paper."]
['<voice_0> introduces the individuals seated at the table as Denny, Herm, Aaron, and JC, and then introduces the other five, stating that they claim to be millionaires but only one actually is.']
['<face_10> wears a black bomber jacket with a plaid shirt underneath and jeans. <face_8> wears a purple velvet blazer and black pants. <face_9> wears a white long-sleeved shirt and khaki pants. <face_11> wears a blue button-down shirt, gray pants, and a camo jacket. All individuals are standing and facing the seated commentators.']
["<face_4> addresses the group, stating that the first person he speaks to is 'broke,' and <face_10> responds with laughter. <face_6>, wearing a

In [None]:
video_graph.visualize()

In [None]:
# from retrieve import retrieve_from_videograph
# from videograph import VideoGraph
# from utils.chat_api import (
#     generate_messages,
#     get_response_with_retry,
#     parallel_get_embedding,
# )
# from utils.general import validate_and_fix_python_list
# from prompts import prompt_memory_retrieval

# MAX_RETRIES = 3


# def generate_queries(question, existing_knowledge=None, query_num=1):
#     input = [
#         {
#             "type": "text",
#             "content": prompt_memory_retrieval.format(
#                 question=question,
#                 query_num=query_num,
#                 existing_knowledge=existing_knowledge,
#             ),
#         }
#     ]
#     messages = generate_messages(input)
#     model = "gpt-4o-2024-11-20"
#     queries = None
#     for i in range(MAX_RETRIES):
#         print(f"Generating queries {i} times")
#         queries = get_response_with_retry(model, messages)[0]
#         queries = validate_and_fix_python_list(queries)
#         if queries is not None:
#             break
#     if queries is None:
#         raise Exception("Failed to generate queries")
#     return queries


# def retrieve_from_videograph(videograph, question, topk=3):
#     queries = generate_queries(question)
#     print(f"Queries: {queries}")

#     model = "text-embedding-3-large"
#     query_embeddings = parallel_get_embedding(model, queries)[0]

#     related_nodes = []

#     for query_embedding in query_embeddings:
#         nodes = videograph.search_text_nodes(query_embedding)
#         related_nodes.extend(nodes)

#     related_nodes = list(set(related_nodes))
#     return related_nodes


# question = "Denny"
# retrieved_nodes = retrieve_from_videograph(video_graph, question)
# print(retrieved_nodes)