In [1]:
import numpy as np
from tqdm import tqdm
import os
import json

from videograph import VideoGraph
from utils.general import *
from utils.video_processing import *
from utils.chat_api import *
from prompts import *

from face_processing import process_faces
from voice_processing import process_voices
from memory_processing import (
    process_captions,
    generate_captions_and_thinkings_with_ids,
)
from retrieve import answer_with_retrieval



In [None]:
def process_segment(video_graph, base64_video, base64_frames, base64_audio):

    id2voices = process_voices(video_graph, base64_audio, base64_video)
    print("Finish processing voices")

    print(f"processing {len(base64_frames)} frames...")

    id2faces = process_faces(video_graph, base64_frames)
    # print(id2faces.keys())
    print("Finish processing faces")

    episodic_captions, semantic_captions = generate_captions_and_thinkings_with_ids(
        video_graph,
        base64_video,
        base64_frames,
        base64_audio,
        id2faces,
        id2voices,
    )

    process_captions(video_graph, episodic_captions, type="episodic")
    process_captions(video_graph, semantic_captions, type="semantic")

    print("Finish processing segment")


def streaming_process_video(
    video_graph, video_path, interval_seconds, fps, segment_limit=None
):
    """Process video segments at specified intervals with given fps.

    Args:
        video_graph (VideoGraph): Graph object to store video information
        video_path (str): Path to the video file or directory containing clips
        interval_seconds (float): Time interval between segments in seconds
        fps (float): Frames per second to extract from each segment

    Returns:
        None: Updates video_graph in place with processed segments
    """
    if os.path.isfile(video_path):
        # Process single video file
        video_info = get_video_info(video_path)
        print(video_info)

        # Process each interval
        count = 0
        for start_time in np.arange(0, video_info["duration"], interval_seconds):
            if start_time + interval_seconds > video_info["duration"]:
                break

            print("=" * 20)
            count += 1

            print(f"Loading {count}-th clip starting at {start_time} seconds...")
            base64_video, base64_frames, base64_audio = process_video_clip(
                video_path, start_time, interval_seconds, fps, audio_format="wav"
            )

            # check dtype
            # print(type(base64_video), type(base64_frames[0]), type(base64_audio))

            # Process frames for this interval
            if base64_frames:
                print(
                    f"Starting processing {count}-th clip starting at {start_time} seconds..."
                )
                process_segment(
                    video_graph,
                    base64_video,
                    base64_frames,
                    base64_audio,
                )

            if segment_limit is not None and count >= segment_limit:
                break

    elif os.path.isdir(video_path):
        # Process directory of numbered clips
        files = os.listdir(video_path)
        # Filter for video files and sort by numeric value in filename
        video_files = [
            f for f in files if any(f.endswith(ext) for ext in [".mp4", ".avi", ".mov"])
        ]
        video_files.sort(key=lambda x: int("".join(filter(str.isdigit, x))))

        for count, video_file in enumerate(video_files, 1):
            print("=" * 20)
            full_path = os.path.join(video_path, video_file)
            print(f"Processing clip {count}: {full_path}")

            base64_video, base64_frames, base64_audio = process_video_clip(
                full_path, 0, None, fps, audio_format="wav"
            )

            if base64_frames:
                process_segment(
                    video_graph,
                    base64_video,
                    base64_frames,
                    base64_audio,
                )

            if segment_limit is not None and count >= segment_limit:
                break

In [None]:
processing_config = json.load(open("configs/processing_config.json"))
memory_config = json.load(open("configs/memory_config.json"))
# video paths can be paths to directories or paths to mp4 files
video_paths = processing_config["video_paths"]

for video_path in video_paths:

    video_graph = VideoGraph(**memory_config)

    streaming_process_video(
        video_graph,
        video_path,
        processing_config["interval_seconds"],
        processing_config["fps"],
        processing_config["segment_limit"],
    )

    save_dir = "data/video_graphs"
    save_video_graph(
        video_graph, video_path, save_dir, (processing_config, memory_config)
    )

In [5]:
video_graph_path = "data/video_graphs/5-Poor-People-vs-1-Secret-Millionaire_60_5_10_10_20_0.3_0.6_0.75.pkl"
video_graph = load_video_graph(video_graph_path)
# for text_node in video_graph.text_nodes:
#     print(video_graph.nodes[text_node].metadata['contents'])
# for nodes, weight in video_graph.edges.items():
#     if weight > 1:
#         if video_graph.nodes[nodes[0]].type in ["episodic", "semantic"]:
#            print(video_graph.nodes[nodes[0]].metadata['contents'])
#         else:
#            print(video_graph.nodes[nodes[1]].metadata['contents'])
#         print(weight)

video_graph.text_matching_threshold = 0.20
video_graph.refresh_equivalences()

# question = 'What does Demar Randy wear?'
question = "Who is wearing a black bomber jacket and jeans?"
related_memories, answer = answer_with_retrieval(video_graph, question, topk=10)
           
# video_graph.summarize(logging=True)
# save_dir = "data/video_graphs"
# save_video_graph(
#     video_graph, None, save_dir, None, file_name='5-Poor-People-vs-1-Secret-Millionaire_60_5_5_10_20_0.3_0.6_0.75_augmented.pkl'
# )
# video_graph.visualize()

Loading video graph from data/video_graphs/5-Poor-People-vs-1-Secret-Millionaire_60_5_10_10_20_0.3_0.6_0.75.pkl


  0%|          | 0/212 [00:00<?, ?it/s]

Generating equivalences 0 times


2025-03-28 06:06:42,879 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  0%|          | 1/212 [00:01<04:01,  1.14s/it]

Generating equivalences 0 times


2025-03-28 06:06:43,368 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  1%|          | 2/212 [00:01<02:39,  1.32it/s]

Generating equivalences 0 times


2025-03-28 06:06:45,078 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  1%|▏         | 3/212 [00:03<04:09,  1.19s/it]

Generating equivalences 0 times


2025-03-28 06:06:45,833 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
  2%|▏         | 4/212 [00:04<03:32,  1.02s/it]

Generating equivalences 0 times


2025-03-28 06:06:46,453 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 23%|██▎       | 49/212 [00:04<00:08, 19.27it/s]

Generating equivalences 0 times


2025-03-28 06:06:47,426 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 25%|██▍       | 52/212 [00:05<00:11, 13.37it/s]

Generating equivalences 0 times


2025-03-28 06:06:47,940 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 33%|███▎      | 69/212 [00:06<00:08, 17.62it/s]

Generating equivalences 0 times


2025-03-28 06:06:48,574 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Generating equivalences 0 times


2025-03-28 06:06:49,091 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 34%|███▍      | 72/212 [00:07<00:12, 11.49it/s]

Generating equivalences 0 times


2025-03-28 06:06:49,895 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 42%|████▏     | 90/212 [00:08<00:08, 14.64it/s]

Generating equivalences 0 times


2025-03-28 06:06:51,258 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 55%|█████▌    | 117/212 [00:09<00:05, 16.78it/s]

Generating equivalences 0 times


2025-03-28 06:06:51,851 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 57%|█████▋    | 120/212 [00:10<00:06, 14.38it/s]

Generating equivalences 0 times


2025-03-28 06:06:52,590 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
 94%|█████████▍| 199/212 [00:10<00:00, 39.02it/s]

Generating equivalences 0 times


2025-03-28 06:06:53,557 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"
100%|██████████| 212/212 [00:11<00:00, 17.94it/s]


Generating queries 0 times


2025-03-28 06:06:54,408 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


Queries: ['Person wearing a black bomber jacket', 'Individual dressed in jeans and a black bomber jacket']
yes
face_6 character_7
yes
face_10 character_2
yes
face_11 character_6
yes
face_10 character_2
yes
face_10 character_2
yes
face_11 character_6
yes
face_10 character_2
yes
face_8 character_4
yes
face_7 character_3
['<character_7> is wearing a blue denim jacket and a white beanie.', '<character_2> is wearing a black bomber jacket and jeans.', '<face_120> wears a camouflage jacket and an orange baseball cap.', '<character_6> is wearing a blue button-down shirt, a dark jacket and gray pants.', '<character_2> is wearing a black bomber jacket.', '<character_2> is wearing a black jacket and jeans.', '<character_6> is wearing a blue button-down shirt, a dark jacket, and gray pants.', '<character_2> is wearing a black bomber jacket and jeans.', '<character_4> wears a purple crop top, purple jacket, and dark pants.', '<character_3> is wearing a black hoodie.']


2025-03-28 06:06:55,754 - httpx - INFO - HTTP Request: POST https://search-va.byteintl.net/gpt/openapi/online/v2/crawl/openai/deployments/gpt-4o-2024-11-20/chat/completions?api-version=2024-03-01-preview "HTTP/1.1 200 OK"


<character_2> is wearing a black bomber jacket and jeans.


In [3]:
for text_node in video_graph.text_nodes:
    print(video_graph.nodes[text_node].metadata['contents'])

['<voice_0> introduces four individuals named Denny, Herm, Aaron, and JC, seated at a table.']
['<voice_0> mentions five other individuals claiming to be millionaires, with only one being truthful.']
['<face_10> is wearing a black jacket and jeans.']
['<face_4> is wearing a baseball jersey.']
['<face_10> points at someone off-screen.']
['<voice_2> accuses <face_10> of being broke.']
['<face_5> laughs and points.']
['<voice_2> says that he had to pay <face_10> because he was a guest on the Black President show.']
['<face_9> wears sunglasses.']
['<face_10> is not wearing socks with his loafers.']
['Equivalence: <face_4>, <voice_0>']
['Equivalence: <face_7>, <voice_2>']
['Equivalence: <face_5>, <voice_1>']
['<face_10> is perceived as wealthy due to his shoes.']
['<face_10> not wearing socks is interpreted as a sign of comfort and confidence by <voice_2>.']
['The video depicts a game show or competition to identify a millionaire among a group of individuals.']
['<face_7> is wearing a black

In [None]:
video_graph.visualize()

In [None]:
# from retrieve import retrieve_from_videograph
# from videograph import VideoGraph
# from utils.chat_api import (
#     generate_messages,
#     get_response_with_retry,
#     parallel_get_embedding,
# )
# from utils.general import validate_and_fix_python_list
# from prompts import prompt_memory_retrieval

# MAX_RETRIES = 3


# def generate_queries(question, existing_knowledge=None, query_num=1):
#     input = [
#         {
#             "type": "text",
#             "content": prompt_memory_retrieval.format(
#                 question=question,
#                 query_num=query_num,
#                 existing_knowledge=existing_knowledge,
#             ),
#         }
#     ]
#     messages = generate_messages(input)
#     model = "gpt-4o-2024-11-20"
#     queries = None
#     for i in range(MAX_RETRIES):
#         print(f"Generating queries {i} times")
#         queries = get_response_with_retry(model, messages)[0]
#         queries = validate_and_fix_python_list(queries)
#         if queries is not None:
#             break
#     if queries is None:
#         raise Exception("Failed to generate queries")
#     return queries


# def retrieve_from_videograph(videograph, question, topk=3):
#     queries = generate_queries(question)
#     print(f"Queries: {queries}")

#     model = "text-embedding-3-large"
#     query_embeddings = parallel_get_embedding(model, queries)[0]

#     related_nodes = []

#     for query_embedding in query_embeddings:
#         nodes = videograph.search_text_nodes(query_embedding)
#         related_nodes.extend(nodes)

#     related_nodes = list(set(related_nodes))
#     return related_nodes


# question = "Denny"
# retrieved_nodes = retrieve_from_videograph(video_graph, question)
# print(retrieved_nodes)