In [None]:
# Run this code to analyse the whole database of videos and process it

### 1)Imports and setup

In [1]:
import os
import base64
import aiohttp
import asyncio
import json
import imageio
import re
import time
from PIL import Image
import numpy as np
import colorsys
import aiofiles
import nest_asyncio
from tqdm.asyncio import tqdm
from dotenv import load_dotenv
from scenedetect import VideoManager, SceneManager
from scenedetect.detectors import ContentDetector
import logging
from datetime import datetime
import csv
from rapidfuzz import process, fuzz


# Configure logging
logging.basicConfig(level=logging.INFO)

# Load OpenAI API key from .env file
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Apply nest_asyncio to handle the running event loop
nest_asyncio.apply()

# Concurrency limit
semaphore = asyncio.Semaphore(5)

# A dictionary to store characters across frames
character_frames = {}


In [2]:
#TRACK API USAGE CALLS

# Initialize API usage tracking
api_usage = {
    "total_api_calls": 0,
    "total_tokens_used": 0,
    "model_used": "gpt-4"  # Assuming you're using GPT-4
}


### 2) Video Analysis Functions

In [3]:
def analyze_video(video_path, threshold=27.0):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"The video file {video_path} does not exist.")
    
    video_manager = VideoManager([video_path])
    scene_manager = SceneManager()
    scene_manager.add_detector(ContentDetector(threshold=threshold))

    video_manager.set_downscale_factor()
    video_manager.start()

    scene_manager.detect_scenes(frame_source=video_manager)
    scene_list = scene_manager.get_scene_list()

    video_manager.release()

    logging.info(f'Detected {len(scene_list)} scenes:')
    for i, scene in enumerate(scene_list):
        logging.info(f'Scene {i + 1}: Start {scene[0].get_timecode()} / Frame {scene[0].get_frames()}, '
              f'End {scene[1].get_timecode()} / Frame {scene[1].get_frames()}')

    return scene_list

def get_video_length(video_path):
    # You can use a tool like OpenCV, ffmpeg, or similar to calculate video length
    import cv2
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_length = frame_count / fps
    cap.release()
    return video_length

### 3) Frame Extraction Function

In [4]:
def extract_frames_imageio(video_path, scenes, output_dir):
    reader = imageio.get_reader(video_path)
    for i, scene in enumerate(scenes):
        start_frame, end_frame = scene
        
        # Convert FrameTimecode to integer frame numbers
        start_frame_num = int(start_frame)
        end_frame_num = int(end_frame)
        
        # Calculate the middle frame of the scene
        middle_frame = (start_frame_num + end_frame_num) // 2
        
        # Set the reader to the middle frame and extract it
        reader.set_image_index(middle_frame)
        frame = reader.get_next_data()
        
        # Save the frame as an image with frame number in the filename
        output_path = os.path.join(output_dir, f'scene_{i + 1}_frame_{middle_frame}.jpg')
        imageio.imwrite(output_path, frame)
        print(f"Extracted and saved middle frame of scene {i + 1} as {output_path}", flush=True)


### 4) Image Processing Function

In [5]:
async def encode_image(image_path):
    async with aiofiles.open(image_path, "rb") as image_file:
        content = await image_file.read()
        return base64.b64encode(content).decode('utf-8')

def get_color_category(color):
    r, g, b = [x / 255.0 for x in color]
    h, l, s = colorsys.rgb_to_hls(r, g, b)

    primary_hues = {
        "red": (0.0, 0.1),  
        "yellow": (0.1, 0.18),
        "green": (0.25, 0.4),
        "blue": (0.55, 0.75),
    }

    for color_name, hue_range in primary_hues.items():
        if hue_range[0] <= h <= hue_range[1]:
            return color_name

    if (l >= 0.9 and s <= 0.1):
        return "white"
    if (l <= 0.1 and s <= 0.1):
        return "black"

    return "non-primary"

def analyze_image_colors(image_path):
    image = Image.open(image_path)
    image = image.convert('RGB')
    data = np.array(image)

    unique_colors, counts = np.unique(data.reshape(-1, data.shape[2]), axis=0, return_counts=True)
    total_pixels = int(counts.sum())

    color_counts = {
        "Red": 0,
        "Yellow": 0,
        "Green": 0,
        "Blue": 0,
        "White": 0,
        "Black": 0,
        "Non-primary": 0
    }

    for color, count in zip(unique_colors, counts):
        category = get_color_category(tuple(color))
        color_counts[category.capitalize()] += int(count)

    color_percentages = {color: (count / total_pixels) * 100 for color, count in color_counts.items()}
    primary_total = color_counts["Red"] + color_counts["Yellow"] + color_counts["Blue"]
    color_dominance = "Primary colors" if primary_total > color_counts["Non-primary"] else "Non-primary colors"

    return {
        "Color Analysis": {
            "Colors Found": {
                color: {
                    "Pixel Count": count,
                    "Percentage": f"{color_percentages[color]:.2f}%"
                } for color, count in color_counts.items()
            },
            "Dominance": color_dominance
        }
    }


### 5) OpenAI API Interaction

In [6]:
async def send_image_to_openai(image_path, base64_image, retries=3):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": """
                        Analyze the following image and provide a detailed description in the format of JSON only. Ensure the output is strictly in JSON format without any additional text or code block formatting. The JSON should include the following standardized labels:

                        1. **Image Analysis**: The root dictionary containing all analysis data.
                        
                        2. **Suitability**:
                            - "Nudity": Boolean indicating the presence of nudity.
                            - "Obscene Gestures": Boolean indicating the presence of obscene gestures.
                            - "Alcohol": Boolean indicating the presence of alcohol.
                            - "Drugs": Boolean indicating the presence of drugs.
                            - "Addictions": Boolean indicating the presence of addictions.

                        3. **Objects**:
                            - "Total Objects Identified": Integer representing the total number of objects identified.
                            - "Average Features Per Object": Float representing the average number of features per object.
                            - "Objects Details": Dictionary containing details of each object, where each object is labeled as "Object_1", "Object_2", etc., with the following structure:
                                - "Name": The name of the object - as simplest and descriptive mossible.
                                - "Portion Boolean": 0-1 output indicating if the object is a portion of a larger object (1) or a complete object (0). For example, a leg is a portion of a human. However, if the object is just cropped but clearly identifiable as a complete object, it should be considered a complete object.
                                - "Color": The color of the object.
                                - "Features": List of features of the object.
                                - "Total Features": Integer representing the number of features for the object.

                        4. **Place**:
                            - "Name": The name of the place - as simplest and descriptive mossible.
                            - "Certainty Boolean": 0-1 output indicating if the place is clearly identifiable (1) or not (0).
                            - "Fantasy/Adventurous Place": Boolean (0-1) indicating whether the place is classified as a fantasy/adventurous place or not.
                            - "Explanation": Detailed explanation of why the place is classified as fantasy/adventurous or not. Fantasy places are those that do not exist in reality, and adventurous places are defined as those involving clear statements of traveling to space or another country.

                        5. **Characters**:
                            - "Total Characters Identified": Integer representing the total number of characters identified.
                            - "Average Features Per Character": Float representing the average number of features per character.
                            - "Character Details": Dictionary containing details of each character, where each character is labeled as "Character_1", "Character_2", etc., with the following structure:
                                - "Name": The name of the character - as simplest and descriptive mossible.
                                - "Portion Boolean": 0-1 output indicating if the character is a portion of a larger character (1) or a complete character (0). For example, a leg is a portion of a human. However, if the character is just cropped but clearly identifiable as a complete character, it should be considered a complete character.
                                - "Human or Non-Human": 0-1 output indicating if the character appears human (1) or non-human (0). Anthropomorphized characters or any other combination not fully human are considered non-human.
                                - "Physical Features": List of physical features of the character.
                                - "Explanation": Explanation for why the character is classified as human or non-human, and why these physical features are inferred.
                                - "Age": Expected age range of the character (a single number).
                            **Note**: If the "character" consists of only a part of a body (such as a hand, leg, or face without enough distinguishing features to identify it as a complete character), do not count it as a "character."

                        Ensure that the structure of the JSON output strictly adheres to these standardized labels.
                        """
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 750
    }

    for attempt in range(retries):
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
                    # Log the status code and full response for debugging
                    status = response.status
                    response_text = await response.text()
                    
                    # print(f"Response Status Code: {status}")
                    # print(f"Response Content: {response_text}")

                    if status == 429:
                        print("Rate limit exceeded, retrying...")
                        await asyncio.sleep(2 ** attempt)
                        continue
                    elif status == 200:
                        content = await response.json()
                        
                        # Log the full JSON content
                        # print(f"Full JSON Response for {image_path}: {content}")
                        
                        if 'choices' in content:
                            message_content = content['choices'][0].get('message', {}).get('content', '').strip()
                            try:
                                return json.loads(message_content)
                            except json.JSONDecodeError as e:
                                print(f"Error decoding JSON from OpenAI response for {image_path}: {e}")
                                # print(f"OpenAI Response Content: {message_content}")
                                return None
                        else:
                            print(f"Unexpected response format from OpenAI API for {image_path}.")
                            return None
                    else:
                        print(f"Request failed with status code {status} for {image_path}.")
                        # print(f"Response Content: {response_text}")
                        return None
        except aiohttp.ClientError as e:
            print(f"Request failed due to a client error: {e}")
            await asyncio.sleep(2 ** attempt)
        except Exception as e:
            print(f"Unexpected error occurred: {e}")
            await asyncio.sleep(2 ** attempt)
    return None


### 6) Scene Processing Functions

In [7]:
async def process_scenes_output(output_dir, json_output_dir):
    os.makedirs(json_output_dir, exist_ok=True)
    scenes = sorted([f for f in os.listdir(output_dir) if f.endswith('.jpg')], key=extract_scene_number)
    total_scenes = len(scenes)
    with tqdm(total=total_scenes, desc="Processing Scenes", unit="scene") as pbar:
        tasks = [process_single_scene(i, scene, output_dir, json_output_dir, pbar) for i, scene in enumerate(scenes)]
        await asyncio.gather(*tasks)


async def process_single_scene(i, scene, output_dir, json_output_dir, pbar):
    async with semaphore:  # Limit concurrent execution
        scene_path = os.path.join(output_dir, scene)

        # Encode image in base64
        base64_image = await encode_image(scene_path)

        # Perform color analysis
        color_analysis_result = analyze_image_colors(scene_path)

        # Send image to OpenAI for further analysis
        openai_response = await send_image_to_openai(scene_path, base64_image)

        # Check if openai_response is valid (not None or empty)
        if not openai_response:
            print(f"Skipping {scene} due to invalid OpenAI response.")
            pbar.update(1)
            return

        # Combine both results, and include the reference to the image file
        final_output = {
            "Image File": scene,
            "Image Analysis": {
                **color_analysis_result["Color Analysis"],
                **openai_response.get("Image Analysis", {})
            }
        }

        # The filename already includes the scene number and frame number
        output_filename = os.path.splitext(scene)[0] + '_analysis.json'
        output_path = os.path.join(json_output_dir, output_filename)

        try:
            async with aiofiles.open(output_path, 'w') as json_file:
                await json_file.write(json.dumps(final_output, indent=4))
                print(f"Saved analysis for {scene} as {output_filename}")
        except Exception as e:
            print(f"Failed to save analysis for {scene}: {e}")

        pbar.update(1)


def extract_scene_number(filename):
    match = re.search(r'\d+', filename)
    return int(match.group()) if match else -1

def extract_frame_number(filename):
    match = re.search(r'_frame_(\d+)', filename)
    return int(match.group(1)) if match else -1



### 7) Run whole analysis of each json output

Image Path Construction: get_image_path generates the correct path to the image file based on the JSON filename.

Entity Extraction:extract_entities_from_json pulls characters, objects, and places from the JSON data.

Image-to-Image Comparison:perform_image_to_image_comparison compares partial objects with full objects using the OpenAI API.

Entity Comparison:compare_entities handles both name-based and image-based comparisons to decide whether two entities should be consolidated.

Consolidation:Entities across frames are consolidated into a single summary file that tracks where each entity was found.

Main Execution:The script runs through all JSON files, processes the entities, and saves the consolidated results to a summary JSON file.

Key Features of This Implementation:
Text-Based Comparison: The code first attempts to merge entities based on exact name matches. If no match is found, it uses the OpenAI API to determine if two entities with different names should be merged.

Image-to-Image Comparison: If one of the entities is flagged as a portion, or if names don't match but the entities might still be the same, the code performs an image-to-image comparison using the OpenAI API.

Efficient Processing: The code processes each frame sequentially and logs all merges into merged_entities_log, ensuring you have a record of what entities were merged, including their original names and frames.

No Overwritten Functionality: The original image analysis functionality is preserved and integrated smoothly with the text-based comparisons.

Number of Unique Characters, Objects, and Places: This can be done by counting the keys in the consolidated_data dictionary.
Average Characters per Frame: This can be calculated by summing up all instances of characters found across frames and dividing by the total number of frames where characters appear.
Average Features per Character/Object: Calculate this by summing the features of all characters/objects and dividing by the total number of characters/objects.
Overall Color Analysis: Aggregate the color data from all JSON files.
Filter Compliance: Check for any instances where the filters (e.g., nudity, drugs) are not compliant and log the frame numbers.

In [15]:
# import os
# import json
# import nest_asyncio
# import asyncio
# import time
# import base64
# import aiohttp

# # Set your directories and variables
# json_output_dir = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/json_output'
# image_output_dir = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output'
# summary_json_path = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas_summary.json'
# video_title = "Bananas_in_pyjamas.mp4"
# video_file_path = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/02_Video_DB/Bananas_in_pyjamas.mp4'
# api_key = os.getenv("OPENAI_API_KEY")

# # Apply nest_asyncio to handle the running event loop
# nest_asyncio.apply()

# # Dummy API usage tracking
# api_usage = {
#     "total_api_calls": 0,
#     "total_tokens_used": 0,
#     "model_used": "gpt-4"  # Assuming you're using GPT-4
# }

# # Function to encode image to base64
# def encode_image_to_base64(image_path):
#     if not os.path.isfile(image_path):
#         raise ValueError(f"Image path is invalid: {image_path}")
#     with open(image_path, "rb") as image_file:
#         return base64.b64encode(image_file.read()).decode("utf-8")

# # Function to perform image-to-image comparison
# async def perform_image_to_image_comparison(entity1, entity2, image_path1, image_path2, api_key, api_usage):
#     base64_image1 = encode_image_to_base64(image_path1)
#     base64_image2 = encode_image_to_base64(image_path2)

#     prompt = """
#     You are an expert in image analysis. Compare the two provided images and determine if they represent the same object or character, even if one is a partial view. Consider features, colors, and context.

#     Return 'True' if the images depict the same object or character, 'False' if they are different, and 'Uncertain' if unsure.
#     """

#     headers = {
#         "Content-Type": "application/json",
#         "Authorization": f"Bearer {api_key}"
#     }

#     payload = {
#         "model": api_usage.get("model_used", "gpt-4"),
#         "messages": [{"role": "user", "content": prompt}],
#         "images": [{"image": base64_image1}, {"image": base64_image2}],
#         "max_tokens": 100
#     }

#     async with aiohttp.ClientSession() as session:
#         try:
#             async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
#                 response_json = await response.json()

#                 # Increment API usage counters
#                 api_usage["total_api_calls"] += 1
#                 api_usage["total_tokens_used"] += response_json.get("usage", {}).get("total_tokens", 0)

#                 answer = response_json.get('choices', [{}])[0].get('message', {}).get('content', '').strip().lower()

#                 if "uncertain" in answer:
#                     return "uncertain"
#                 elif "true" in answer:
#                     return "true"
#                 else:
#                     return "false"
#         except KeyError as e:
#             print(f"Error accessing API response: {e}")
#             return "false"
#         except Exception as e:
#             print(f"Unexpected error: {e}")
#             return "false"

# # Function to consolidate data from JSON
# def consolidate_from_json(json_data, consolidated_data):
#     if "Image Analysis" in json_data:
#         characters = json_data["Image Analysis"].get("Characters", {}).get("Character Details", {})
#         objects = json_data["Image Analysis"].get("Objects", {}).get("Objects Details", {})
#         place = json_data["Image Analysis"].get("Place", {})

#         # Consolidate characters
#         for char_id, char_data in characters.items():
#             if char_data["Name"] not in consolidated_data["characters"]:
#                 consolidated_data["characters"][char_data["Name"]] = char_data

#         # Consolidate objects
#         for obj_id, obj_data in objects.items():
#             if obj_data["Name"] not in consolidated_data["objects"]:
#                 consolidated_data["objects"][obj_data["Name"]] = obj_data

#         # Consolidate places
#         if place["Name"] not in consolidated_data["places"]:
#             consolidated_data["places"][place["Name"]] = place

# # Function to perform image comparisons on portions
# async def perform_image_to_image_comparisons(consolidated_data):
#     # Example logic for image-to-image comparisons
#     for entity_type in ["characters", "objects"]:
#         for entity_name, entity_data in consolidated_data[entity_type].items():
#             # Implement your image comparison logic here if needed
#             pass

# # Function to save the final summary to a JSON file
# def save_summary_to_json(consolidated_data, output_path, video_title, file_size, processing_time, merged_entities_log, all_json_files_data, api_usage, scenes_per_minute, number_of_scenes, video_length):
#     summary_statistics = {
#         "Number of Unique Characters": len(consolidated_data["characters"]),
#         "Number of Unique Objects": len(consolidated_data["objects"]),
#         "Number of Unique Places": len(consolidated_data["places"]),
#         # Add more statistics here based on your actual data
#     }

#     summary = {
#         "Video Title": video_title,
#         "File Size (bytes)": file_size,
#         "Video Length (seconds)": video_length,
#         "Number of Scenes": number_of_scenes,
#         "Scenes per Minute": scenes_per_minute,
#         "Processing Time (seconds)": processing_time,
#         "API Usage": api_usage,
#         "Summary Statistics": summary_statistics,
#         "Consolidated Data": consolidated_data,
#         "Merged Entities Log": merged_entities_log
#     }

#     with open(output_path, 'w') as f:
#         json.dump(summary, f, indent=4)


In [55]:
import os
import json
import aiohttp
import asyncio
import nest_asyncio
import openai

# Set your directories and variables
json_output_dir = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/json_output'
final_entities_json_path = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/final_entities.json'
final_final_json_path = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/final_final_entities.json'
api_key = os.getenv("OPENAI_API_KEY")

# Apply nest_asyncio to handle the running event loop
nest_asyncio.apply()

# Initialize OpenAI API
openai.api_key = api_key

# Function to save entities to JSON
def save_entities_to_json(entities, path):
    with open(path, 'w') as f:
        json.dump(entities, f, indent=4)

# Function to consolidate entities from a JSON file
def consolidate_from_json(json_data, consolidated_data, merge_tracking, json_file_name):
    frame_number = json_file_name.split('_')[3]  # Extract frame number

    if "Image Analysis" in json_data:
        characters = json_data["Image Analysis"].get("Characters", {}).get("Character Details", {})
        for key, details in characters.items():
            name = details.get("Name")
            if name:
                if name not in consolidated_data["characters"]:
                    consolidated_data["characters"][name] = details
                    consolidated_data["characters"][name]["merged_from"] = []
                    merge_tracking["characters"][name] = {"merged_from": []}
                if frame_number not in consolidated_data["characters"][name]["merged_from"]:
                    consolidated_data["characters"][name]["merged_from"].append(frame_number)
                    merge_tracking["characters"][name]["merged_from"].append(frame_number)

        objects = json_data["Image Analysis"].get("Objects", {}).get("Objects Details", {})
        for key, details in objects.items():
            name = details.get("Name")
            if name:
                if name not in consolidated_data["objects"]:
                    consolidated_data["objects"][name] = details
                    consolidated_data["objects"][name]["merged_from"] = []
                    merge_tracking["objects"][name] = {"merged_from": []}
                if frame_number not in consolidated_data["objects"][name]["merged_from"]:
                    consolidated_data["objects"][name]["merged_from"].append(frame_number)
                    merge_tracking["objects"][name]["merged_from"].append(frame_number)

        place = json_data["Image Analysis"].get("Place", {})
        place_name = place.get("Name")
        if place_name:
            if place_name not in consolidated_data["places"]:
                consolidated_data["places"][place_name] = place
                consolidated_data["places"][place_name]["merged_from"] = []
                merge_tracking["places"][place_name] = {"merged_from": []}
            if frame_number not in consolidated_data["places"][place_name]["merged_from"]:
                consolidated_data["places"][place_name]["merged_from"].append(frame_number)
                merge_tracking["places"][place_name]["merged_from"].append(frame_number)

# Function to cluster entities using OpenAI API and name the clusters
async def cluster_entities(api_key, entities):
    # Generate lists for characters, objects, and places from entities
    character_list = ', '.join(entities['characters'].keys())
    object_list = ', '.join(entities['objects'].keys())
    place_list = ', '.join(entities['places'].keys())

    if not character_list and not object_list and not place_list:
        return "No entities available to cluster."

    # Adjusted OpenAI prompt to return structured output (dictionaries)
    prompt = f"""
    You are tasked with clustering and naming entities from a TV show. Below are lists of characters, objects, and places extracted from different scenes. These lists contain multiple labels for the same entity.

    **Instructions:**

    1. Group the characters, objects, and places that refer to the same entity and suggest a single **final name** for each group.
    2. Return the result as a dictionary where each cluster (key) contains the entities (values) that belong to that cluster.
    3. Use this format:

    {{
      "Characters Clusters": {{
        "Final Name 1": ["Character 1", "Character 2", ...],
        "Final Name 2": ["Character 3", "Character 4", ...]
      }},
      "Objects Clusters": {{
        "Final Name 1": ["Object 1", "Object 2", ...],
        "Final Name 2": ["Object 3", "Object 4", ...]
      }},
      "Places Clusters": {{
        "Final Name 1": ["Place 1", "Place 2", ...],
        "Final Name 2": ["Place 3", "Place 4", ...]
      }}
    }}

    **Characters:**
    {character_list}

    **Objects:**
    {object_list}

    **Places:**
    {place_list}
    """

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4",
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 2000
    }

    async with aiohttp.ClientSession() as session:
        try:
            async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
                response_json = await response.json()

                # Debugging: Log the raw OpenAI response before parsing
                print("Raw OpenAI response:")
                print(response_json)

                clusters = response_json.get('choices', [{}])[0].get('message', {}).get('content', '')
                return clusters
        except Exception as e:
            print(f"Error accessing API response: {e}")
            return ""

# Function to merge clusters with final_final_entities.json, keeping names and frame numbers
def merge_clusters_with_entities(final_entities, clusters):
    merged_entities = {"characters": {}, "objects": {}, "places": {}}

    # Process characters
    character_clusters = clusters.get("Characters Clusters", {})
    for final_name, cluster_items in character_clusters.items():
        merged_entities["characters"][final_name] = {"merged_from": [], "merged_names": []}
        for item in cluster_items:
            if item in final_entities["characters"]:
                # Merge entity details
                entity_data = final_entities["characters"][item]
                merged_entities["characters"][final_name] = {
                    **entity_data,
                    "Name": final_name,
                    "merged_from": list(set(merged_entities["characters"][final_name]["merged_from"] + entity_data["merged_from"])),
                    "merged_names": list(set(merged_entities["characters"][final_name]["merged_names"] + [item]))
                }

    # Process objects
    object_clusters = clusters.get("Objects Clusters", {})
    for final_name, cluster_items in object_clusters.items():
        merged_entities["objects"][final_name] = {"merged_from": [], "merged_names": []}
        for item in cluster_items:
            if item in final_entities["objects"]:
                # Merge entity details
                entity_data = final_entities["objects"][item]
                merged_entities["objects"][final_name] = {
                    **entity_data,
                    "Name": final_name,
                    "merged_from": list(set(merged_entities["objects"][final_name]["merged_from"] + entity_data["merged_from"])),
                    "merged_names": list(set(merged_entities["objects"][final_name]["merged_names"] + [item]))
                }

    # Process places
    place_clusters = clusters.get("Places Clusters", {})
    for final_name, cluster_items in place_clusters.items():
        merged_entities["places"][final_name] = {"merged_from": [], "merged_names": []}
        for item in cluster_items:
            if item in final_entities["places"]:
                # Merge entity details
                entity_data = final_entities["places"][item]
                merged_entities["places"][final_name] = {
                    **entity_data,
                    "Name": final_name,
                    "merged_from": list(set(merged_entities["places"][final_name]["merged_from"] + entity_data["merged_from"])),
                    "merged_names": list(set(merged_entities["places"][final_name]["merged_names"] + [item]))
                }

    return merged_entities

# Main async function for clustering and merging entities
async def cluster_and_merge_entities():
    # Load final final entities
    with open(final_final_json_path, 'r') as file:
        final_entities = json.load(file)

    # Cluster entities using OpenAI API and get the final names for the clusters
    clusters_json = await cluster_entities(api_key, final_entities)

    # Debugging: Print the raw clusters JSON before attempting to parse it
    print("Raw Clusters JSON:", clusters_json)

    # Parse the OpenAI output to a dictionary
    try:
        clusters = json.loads(clusters_json)
    except json.JSONDecodeError:
        print("Error decoding OpenAI response into JSON.")
        return

    # Merge clusters into final final JSON
    merged_final_entities = merge_clusters_with_entities(final_entities, clusters)

    # Save final merged entities to a new JSON
    merged_final_entities_path = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/merged_final_final_entities.json'
    save_entities_to_json(merged_final_entities, merged_final_entities_path)

    print(f"Merged final entities saved to: {merged_final_entities_path}")

# Run the async function to cluster and merge entities
asyncio.get_event_loop().run_until_complete(cluster_and_merge_entities())


Raw OpenAI response:
{'id': 'chatcmpl-A3nSrTNbViqhWGBNkDJQ29YMttuOO', 'object': 'chat.completion', 'created': 1725468489, 'model': 'gpt-4-0613', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '{\n  "Characters Clusters": {\n    "Banana Character": ["Banana character", "Banana 1", "Banana 2", "Banana Man", "Banana Character 1", "Banana Character 2", "Cartoon Banana Character", "Banana Character", "Banana Character A", "Banana Character B", "Yellow Character", "Banana Character Left", "Banana Character Right", "Banana Character Standing", "Fallen Banana Character", "Banana character 1", "Banana character 2"], \n    "Bear Character": ["Bear Boy", "Bear Girl", "Bear Character 1", "Bear Character 2", "Bear Character 3", "Bear Character", "Bear Child", "Bear 1", "Bear 2", "Bear 3", "Bear", "Light Brown Bear", "Dark Brown Bear", "Bear Cub 1", "Bear Cub 2", "Bear Cub 3", "Bear One", "Bear Two", "Bear Three", "Girl 1", "Girl 2", "Animated bear", "Male Bear", "Female Bear",

### 8) Main Function Execution

In [17]:
import os
import time
import asyncio
from tqdm import tqdm

# Function to process videos in a directory
def process_videos_in_directory(directory_path, output_base_dir):
    video_files = [f for f in os.listdir(directory_path) if f.endswith(('.mp4', '.avi', '.mkv'))]

    if not video_files:
        print("No video files found in the directory.", flush=True)
        return

    with tqdm(total=len(video_files), desc="Processing Videos", unit="video") as pbar:
        for i, video_file in enumerate(video_files):
            start_time = time.time()

            try:
                video_path = os.path.join(directory_path, video_file)
                video_name = os.path.splitext(video_file)[0]
                video_size = os.path.getsize(video_path)

                video_output_dir = os.path.join(output_base_dir, video_name)
                scenes_output_dir = os.path.join(video_output_dir, 'scenes_output')
                json_output_dir = os.path.join(video_output_dir, 'json_output')

                os.makedirs(scenes_output_dir, exist_ok=True)

                print(f"Processing video {i + 1}/{len(video_files)}: {video_file}", flush=True)

                # Scene analysis and frame extraction
                scenes = analyze_video(video_path)
                extract_frames_imageio(video_path, scenes, scenes_output_dir)
                asyncio.run(process_scenes_output(scenes_output_dir, json_output_dir))

                end_time = time.time()
                processing_time = end_time - start_time

                video_length = get_video_length(video_path)
                scenes_per_minute = len(scenes) / (video_length / 60)

                asyncio.run(run_additional_processing(json_output_dir, scenes_output_dir, video_file, video_size, processing_time, scenes_per_minute, len(scenes), video_length))

                print(f"Finished processing video: {video_file}", flush=True)
                pbar.update(1)
            except Exception as e:
                print(f"Error processing video {video_file}: {e}", flush=True)

# Run additional processing for videos
async def run_additional_processing(json_output_dir, image_output_dir, video_file, video_size, processing_time, scenes_per_minute, number_of_scenes, video_length):
    api_key = os.getenv("OPENAI_API_KEY")
    api_usage = {"total_api_calls": 0, "total_tokens_used": 0, "model_used": "gpt-4"}
    old_to_new_name_map = {}

    start_time = time.time()
    all_json_files_data = []
    merged_entities_log = []  # Initialize merged_entities_log here

    # Load all JSON files data
    json_files = [f for f in os.listdir(json_output_dir) if f.endswith('.json')]
    for json_file in json_files:
        with open(os.path.join(json_output_dir, json_file), 'r') as file:
            all_json_files_data.append(json.load(file))

    with tqdm(total=4, desc="Overall Progress", unit="step") as overall_pbar:
        try:
            # Step 1: Initial extraction and consolidation
            consolidated_data = await initial_consolidation(json_output_dir, image_output_dir)
            print_consolidated_data(consolidated_data, title="Initial")
            overall_pbar.update(1)

            # Step 2: Consolidate entities using OpenAI
            consolidated_data, consolidated_merged_entities_log = await consolidate_all_entities(consolidated_data, api_key, api_usage, old_to_new_name_map)
            merged_entities_log.extend(consolidated_merged_entities_log)  # Combine logs
            print_consolidated_data(consolidated_data, title="After OpenAI Consolidation")
            overall_pbar.update(1)

            # Step 3: Handle image-to-image comparisons for portions
            consolidated_data, portion_merged_log = await handle_portion_comparisons(consolidated_data, api_key, api_usage)
            merged_entities_log.extend(portion_merged_log)  # Combine logs
            print_consolidated_data(consolidated_data, title="After Portion Comparisons")
            overall_pbar.update(1)

            # Step 4: Final image-to-image comparison for characters
            consolidated_data, final_character_merged_log = await final_image_to_image_comparison_for_characters(consolidated_data, api_key, api_usage)
            merged_entities_log.extend(final_character_merged_log)  # Combine logs
            print_consolidated_data(consolidated_data, title="Final After Image Comparisons")
            overall_pbar.update(1)

        except Exception as e:
            print(f"Error during additional processing for video {video_file}: {e}", flush=True)

    end_time = time.time()
    processing_time += (end_time - start_time)

    # Save the final summary using the comprehensive `save_summary_to_json` function
    summary_output_path = os.path.join(os.path.dirname(json_output_dir), f"{os.path.basename(json_output_dir)}_summary.json")
    save_summary_to_json(consolidated_data, summary_output_path, video_file, video_size, processing_time, merged_entities_log, all_json_files_data, api_usage, scenes_per_minute, number_of_scenes, video_length)


# Main script execution
old_to_new_name_map = {}
video_directory = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/02_Video_DB'
output_base_directory = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB'

process_videos_in_directory(video_directory, output_base_directory)
print("FINISHED PROCESSING ALL VIDEOS.", flush=True)

Processing Videos:   0%|          | 0/1 [00:00<?, ?video/s]

Processing video 1/1: Bananas_in_pyjamas.mp4


ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 25.000 FPS, resolution: 640 x 360
INFO:pyscenedetect:Downscale factor set to 2, effective resolution: 320 x 180
INFO:pyscenedetect:Detecting scenes...
INFO:root:Detected 159 scenes:
INFO:root:Scene 1: Start 00:00:00.000 / Frame 0, End 00:00:08.360 / Frame 209
INFO:root:Scene 2: Start 00:00:08.360 / Frame 209, End 00:00:19.320 / Frame 483
INFO:root:Scene 3: Start 00:00:19.320 / Frame 483, End 00:00:24.040 / Frame 601
INFO:root:Scene 4: Start 00:00:24.040 / Frame 601, End 00:00:24.800 / Frame 620
INFO:root:Scene 5: Start 00:00:24.800 / Frame 620, End 00:00:26.600 / Frame 665
INFO:root:Scene 6: Start 00:00:26.600 / Frame 665, End 00:00:28.600 / Frame 715
INFO:root:Scene 7: Start 00:00:28.600 / Frame 715, End 00:00:29.960 / Frame 749
INFO:root:Scene 8: Start 00:00:29.960 / Frame 749, End 00:00:31.880 / Frame 797
INFO:root:Scene 9: Start 00:00:31.880 / Frame 797, End 00:00:35.24

Extracted and saved middle frame of scene 1 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_1_frame_104.jpg
Extracted and saved middle frame of scene 2 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_2_frame_346.jpg
Extracted and saved middle frame of scene 3 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_3_frame_542.jpg
Extracted and saved middle frame of scene 4 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_4_frame_610.jpg
Extracted and saved middle frame of scene 5 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_5_frame_642.jpg
Extracted and saved middle frame of scene 6 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesi



Saved analysis for scene_1_frame_104.jpg as scene_1_frame_104_analysis.json




Saved analysis for scene_4_frame_610.jpg as scene_4_frame_610_analysis.json




Saved analysis for scene_3_frame_542.jpg as scene_3_frame_542_analysis.json




Saved analysis for scene_5_frame_642.jpg as scene_5_frame_642_analysis.json




Saved analysis for scene_2_frame_346.jpg as scene_2_frame_346_analysis.json




Saved analysis for scene_6_frame_690.jpg as scene_6_frame_690_analysis.json




Saved analysis for scene_7_frame_732.jpg as scene_7_frame_732_analysis.json




Saved analysis for scene_8_frame_773.jpg as scene_8_frame_773_analysis.json




Saved analysis for scene_11_frame_1003.jpg as scene_11_frame_1003_analysis.json




Saved analysis for scene_10_frame_914.jpg as scene_10_frame_914_analysis.json




Saved analysis for scene_12_frame_1066.jpg as scene_12_frame_1066_analysis.json




Saved analysis for scene_9_frame_839.jpg as scene_9_frame_839_analysis.json




Saved analysis for scene_14_frame_1306.jpg as scene_14_frame_1306_analysis.json




Saved analysis for scene_13_frame_1162.jpg as scene_13_frame_1162_analysis.json




Saved analysis for scene_15_frame_1411.jpg as scene_15_frame_1411_analysis.json
Saved analysis for scene_16_frame_1587.jpg as scene_16_frame_1587_analysis.json




Saved analysis for scene_17_frame_1738.jpg as scene_17_frame_1738_analysis.json




Saved analysis for scene_19_frame_1907.jpg as scene_19_frame_1907_analysis.json




Saved analysis for scene_20_frame_2000.jpg as scene_20_frame_2000_analysis.json




Saved analysis for scene_18_frame_1812.jpg as scene_18_frame_1812_analysis.json




Saved analysis for scene_21_frame_2129.jpg as scene_21_frame_2129_analysis.json




Saved analysis for scene_22_frame_2286.jpg as scene_22_frame_2286_analysis.json




Saved analysis for scene_23_frame_2429.jpg as scene_23_frame_2429_analysis.json




Saved analysis for scene_24_frame_2517.jpg as scene_24_frame_2517_analysis.json




Saved analysis for scene_25_frame_2622.jpg as scene_25_frame_2622_analysis.json




Saved analysis for scene_26_frame_2747.jpg as scene_26_frame_2747_analysis.json




Saved analysis for scene_27_frame_2868.jpg as scene_27_frame_2868_analysis.json




Saved analysis for scene_28_frame_2995.jpg as scene_28_frame_2995_analysis.json




Saved analysis for scene_29_frame_3126.jpg as scene_29_frame_3126_analysis.json
Rate limit exceeded, retrying...




Saved analysis for scene_30_frame_3237.jpg as scene_30_frame_3237_analysis.json
Rate limit exceeded, retrying...




Saved analysis for scene_32_frame_3465.jpg as scene_32_frame_3465_analysis.json
Saved analysis for scene_33_frame_3548.jpg as scene_33_frame_3548_analysis.json




Saved analysis for scene_35_frame_3780.jpg as scene_35_frame_3780_analysis.json
Saved analysis for scene_34_frame_3647.jpg as scene_34_frame_3647_analysis.json




Saved analysis for scene_31_frame_3342.jpg as scene_31_frame_3342_analysis.json
Rate limit exceeded, retrying...




Saved analysis for scene_38_frame_4098.jpg as scene_38_frame_4098_analysis.json




Saved analysis for scene_37_frame_3974.jpg as scene_37_frame_3974_analysis.json




Saved analysis for scene_39_frame_4220.jpg as scene_39_frame_4220_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_36_frame_3897.jpg: Unterminated string starting at: line 114 column 11 (char 3100)
Skipping scene_36_frame_3897.jpg due to invalid OpenAI response.
Saved analysis for scene_40_frame_4316.jpg as scene_40_frame_4316_analysis.json




Saved analysis for scene_42_frame_4503.jpg as scene_42_frame_4503_analysis.json




Saved analysis for scene_41_frame_4430.jpg as scene_41_frame_4430_analysis.json




Saved analysis for scene_43_frame_4559.jpg as scene_43_frame_4559_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_44_frame_4613.jpg: Unterminated string starting at: line 89 column 48 (char 2936)
Skipping scene_44_frame_4613.jpg due to invalid OpenAI response.




Saved analysis for scene_45_frame_4652.jpg as scene_45_frame_4652_analysis.json




Saved analysis for scene_47_frame_4763.jpg as scene_47_frame_4763_analysis.json




Saved analysis for scene_46_frame_4707.jpg as scene_46_frame_4707_analysis.json




Saved analysis for scene_49_frame_4844.jpg as scene_49_frame_4844_analysis.json




Saved analysis for scene_48_frame_4802.jpg as scene_48_frame_4802_analysis.json




Saved analysis for scene_50_frame_4914.jpg as scene_50_frame_4914_analysis.json




Saved analysis for scene_51_frame_5101.jpg as scene_51_frame_5101_analysis.json




Saved analysis for scene_52_frame_5265.jpg as scene_52_frame_5265_analysis.json




Saved analysis for scene_53_frame_5317.jpg as scene_53_frame_5317_analysis.json




Saved analysis for scene_54_frame_5364.jpg as scene_54_frame_5364_analysis.json
Saved analysis for scene_55_frame_5460.jpg as scene_55_frame_5460_analysis.json




Saved analysis for scene_57_frame_5596.jpg as scene_57_frame_5596_analysis.json




Saved analysis for scene_56_frame_5553.jpg as scene_56_frame_5553_analysis.json
Saved analysis for scene_59_frame_5777.jpg as scene_59_frame_5777_analysis.json




Saved analysis for scene_58_frame_5687.jpg as scene_58_frame_5687_analysis.json




Saved analysis for scene_60_frame_5863.jpg as scene_60_frame_5863_analysis.json




Saved analysis for scene_62_frame_6137.jpg as scene_62_frame_6137_analysis.json




Saved analysis for scene_61_frame_6014.jpg as scene_61_frame_6014_analysis.json




Saved analysis for scene_64_frame_6271.jpg as scene_64_frame_6271_analysis.json




Saved analysis for scene_66_frame_6442.jpg as scene_66_frame_6442_analysis.json




Saved analysis for scene_63_frame_6196.jpg as scene_63_frame_6196_analysis.json




Saved analysis for scene_65_frame_6365.jpg as scene_65_frame_6365_analysis.json




Saved analysis for scene_67_frame_6525.jpg as scene_67_frame_6525_analysis.json




Saved analysis for scene_68_frame_6606.jpg as scene_68_frame_6606_analysis.json




Saved analysis for scene_70_frame_6862.jpg as scene_70_frame_6862_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_69_frame_6707.jpg: Unterminated string starting at: line 92 column 19 (char 2876)
Skipping scene_69_frame_6707.jpg due to invalid OpenAI response.




Saved analysis for scene_72_frame_7089.jpg as scene_72_frame_7089_analysis.json




Saved analysis for scene_73_frame_7186.jpg as scene_73_frame_7186_analysis.json




Saved analysis for scene_71_frame_6997.jpg as scene_71_frame_6997_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_74_frame_7306.jpg: Expecting value: line 83 column 17 (char 2863)
Skipping scene_74_frame_7306.jpg due to invalid OpenAI response.




Saved analysis for scene_76_frame_7485.jpg as scene_76_frame_7485_analysis.json




Saved analysis for scene_77_frame_7715.jpg as scene_77_frame_7715_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_75_frame_7413.jpg: Unterminated string starting at: line 89 column 26 (char 2897)
Skipping scene_75_frame_7413.jpg due to invalid OpenAI response.




Saved analysis for scene_78_frame_8014.jpg as scene_78_frame_8014_analysis.json




Saved analysis for scene_79_frame_8136.jpg as scene_79_frame_8136_analysis.json




Saved analysis for scene_82_frame_8637.jpg as scene_82_frame_8637_analysis.json




Saved analysis for scene_80_frame_8265.jpg as scene_80_frame_8265_analysis.json




Saved analysis for scene_81_frame_8497.jpg as scene_81_frame_8497_analysis.json




Saved analysis for scene_84_frame_8745.jpg as scene_84_frame_8745_analysis.json




Saved analysis for scene_83_frame_8673.jpg as scene_83_frame_8673_analysis.json




Saved analysis for scene_86_frame_8886.jpg as scene_86_frame_8886_analysis.json




Saved analysis for scene_85_frame_8821.jpg as scene_85_frame_8821_analysis.json




Rate limit exceeded, retrying...
Saved analysis for scene_87_frame_8952.jpg as scene_87_frame_8952_analysis.json




Saved analysis for scene_89_frame_9155.jpg as scene_89_frame_9155_analysis.json




Saved analysis for scene_88_frame_9013.jpg as scene_88_frame_9013_analysis.json




Saved analysis for scene_91_frame_9391.jpg as scene_91_frame_9391_analysis.json




Saved analysis for scene_90_frame_9277.jpg as scene_90_frame_9277_analysis.json




Saved analysis for scene_92_frame_9529.jpg as scene_92_frame_9529_analysis.json




Saved analysis for scene_93_frame_9634.jpg as scene_93_frame_9634_analysis.json




Saved analysis for scene_94_frame_9733.jpg as scene_94_frame_9733_analysis.json




Saved analysis for scene_95_frame_9886.jpg as scene_95_frame_9886_analysis.json




Saved analysis for scene_96_frame_10046.jpg as scene_96_frame_10046_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_97_frame_10198.jpg: Expecting property name enclosed in double quotes: line 87 column 32 (char 2947)
Skipping scene_97_frame_10198.jpg due to invalid OpenAI response.




Saved analysis for scene_99_frame_10399.jpg as scene_99_frame_10399_analysis.json




Saved analysis for scene_98_frame_10350.jpg as scene_98_frame_10350_analysis.json




Saved analysis for scene_101_frame_10492.jpg as scene_101_frame_10492_analysis.json




Saved analysis for scene_100_frame_10438.jpg as scene_100_frame_10438_analysis.json




Saved analysis for scene_103_frame_10616.jpg as scene_103_frame_10616_analysis.json




Saved analysis for scene_104_frame_10709.jpg as scene_104_frame_10709_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_102_frame_10537.jpg: Unterminated string starting at: line 90 column 11 (char 2867)
Skipping scene_102_frame_10537.jpg due to invalid OpenAI response.




Saved analysis for scene_105_frame_10772.jpg as scene_105_frame_10772_analysis.json




Saved analysis for scene_108_frame_11178.jpg as scene_108_frame_11178_analysis.json




Saved analysis for scene_107_frame_11084.jpg as scene_107_frame_11084_analysis.json




Saved analysis for scene_110_frame_11458.jpg as scene_110_frame_11458_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_106_frame_10921.jpg: Expecting property name enclosed in double quotes: line 89 column 68 (char 2954)
Skipping scene_106_frame_10921.jpg due to invalid OpenAI response.




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_109_frame_11325.jpg: Expecting value: line 96 column 31 (char 2911)
Skipping scene_109_frame_11325.jpg due to invalid OpenAI response.




Saved analysis for scene_112_frame_11702.jpg as scene_112_frame_11702_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_111_frame_11582.jpg: Unterminated string starting at: line 89 column 61 (char 2889)
Skipping scene_111_frame_11582.jpg due to invalid OpenAI response.




Saved analysis for scene_114_frame_12113.jpg as scene_114_frame_12113_analysis.json




Saved analysis for scene_113_frame_11811.jpg as scene_113_frame_11811_analysis.json




Saved analysis for scene_115_frame_12382.jpg as scene_115_frame_12382_analysis.json




Saved analysis for scene_117_frame_12584.jpg as scene_117_frame_12584_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_116_frame_12485.jpg: Expecting ',' delimiter: line 83 column 19 (char 2947)
Skipping scene_116_frame_12485.jpg due to invalid OpenAI response.




Saved analysis for scene_118_frame_12768.jpg as scene_118_frame_12768_analysis.json




Saved analysis for scene_121_frame_13098.jpg as scene_121_frame_13098_analysis.json




Saved analysis for scene_120_frame_12998.jpg as scene_120_frame_12998_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_119_frame_12935.jpg: Unterminated string starting at: line 95 column 11 (char 2947)
Skipping scene_119_frame_12935.jpg due to invalid OpenAI response.




Saved analysis for scene_122_frame_13240.jpg as scene_122_frame_13240_analysis.json




Saved analysis for scene_123_frame_13397.jpg as scene_123_frame_13397_analysis.json




Saved analysis for scene_124_frame_13535.jpg as scene_124_frame_13535_analysis.json




Saved analysis for scene_125_frame_13612.jpg as scene_125_frame_13612_analysis.json




Saved analysis for scene_127_frame_13747.jpg as scene_127_frame_13747_analysis.json




Saved analysis for scene_126_frame_13665.jpg as scene_126_frame_13665_analysis.json




Saved analysis for scene_128_frame_13906.jpg as scene_128_frame_13906_analysis.json




Saved analysis for scene_130_frame_14121.jpg as scene_130_frame_14121_analysis.json




Saved analysis for scene_129_frame_14053.jpg as scene_129_frame_14053_analysis.json




Saved analysis for scene_131_frame_14180.jpg as scene_131_frame_14180_analysis.json




Saved analysis for scene_133_frame_14473.jpg as scene_133_frame_14473_analysis.json




Saved analysis for scene_135_frame_14653.jpg as scene_135_frame_14653_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_132_frame_14314.jpg: Unterminated string starting at: line 113 column 26 (char 3059)
Skipping scene_132_frame_14314.jpg due to invalid OpenAI response.




Saved analysis for scene_134_frame_14551.jpg as scene_134_frame_14551_analysis.json




Saved analysis for scene_136_frame_14733.jpg as scene_136_frame_14733_analysis.json




Saved analysis for scene_139_frame_15009.jpg as scene_139_frame_15009_analysis.json




Saved analysis for scene_137_frame_14834.jpg as scene_137_frame_14834_analysis.json




Saved analysis for scene_138_frame_14947.jpg as scene_138_frame_14947_analysis.json




Saved analysis for scene_140_frame_15062.jpg as scene_140_frame_15062_analysis.json




Saved analysis for scene_141_frame_15104.jpg as scene_141_frame_15104_analysis.json




Saved analysis for scene_143_frame_15317.jpg as scene_143_frame_15317_analysis.json




Saved analysis for scene_142_frame_15187.jpg as scene_142_frame_15187_analysis.json




Saved analysis for scene_144_frame_15473.jpg as scene_144_frame_15473_analysis.json




Saved analysis for scene_145_frame_15663.jpg as scene_145_frame_15663_analysis.json




Saved analysis for scene_146_frame_15808.jpg as scene_146_frame_15808_analysis.json




Saved analysis for scene_151_frame_16243.jpg as scene_151_frame_16243_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_147_frame_15955.jpg: Unterminated string starting at: line 88 column 53 (char 2879)
Skipping scene_147_frame_15955.jpg due to invalid OpenAI response.
Saved analysis for scene_149_frame_16133.jpg as scene_149_frame_16133_analysis.json




Saved analysis for scene_150_frame_16170.jpg as scene_150_frame_16170_analysis.json




Saved analysis for scene_148_frame_16085.jpg as scene_148_frame_16085_analysis.json




Saved analysis for scene_153_frame_16357.jpg as scene_153_frame_16357_analysis.json




Saved analysis for scene_156_frame_16616.jpg as scene_156_frame_16616_analysis.json




Error decoding JSON from OpenAI response for /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas/scenes_output/scene_154_frame_16390.jpg: Unterminated string starting at: line 113 column 26 (char 3079)
Skipping scene_154_frame_16390.jpg due to invalid OpenAI response.




Saved analysis for scene_155_frame_16482.jpg as scene_155_frame_16482_analysis.json
Saved analysis for scene_152_frame_16324.jpg as scene_152_frame_16324_analysis.json




Saved analysis for scene_157_frame_16697.jpg as scene_157_frame_16697_analysis.json




Saved analysis for scene_159_frame_17004.jpg as scene_159_frame_17004_analysis.json


Processing Scenes: 100%|██████████| 159/159 [06:44<00:00,  2.54s/scene]


Saved analysis for scene_158_frame_16860.jpg as scene_158_frame_16860_analysis.json





Initial Characters:
Banana character: Found in frames [2622, 3342, 3342, 2868, 13240, 13240, 7186, 15473, 15473, 5460, 5460, 3465, 3465, 12998, 12998, 773, 773, 4844]
Bear Boy: Found in frames [6862]
Bear Girl: Found in frames [6862, 6862]
Banana 1: Found in frames [12768, 4430, 3780, 14834, 4559, 5101, 12382, 1587]
Banana 2: Found in frames [12768, 4430, 3780, 14834, 4559, 5101, 12382, 1587]
Mouse: Found in frames [12768, 2747, 11811, 9529, 9391, 11458, 9155, 12113, 16357, 14834, 9634, 5101, 9886, 1411, 4503, 1907]
Banana Man: Found in frames [8745, 8745]
Cartoon Mouse Character: Found in frames [4220, 5777, 13535]
Bear Character 1: Found in frames [13665, 4652, 15187, 14947]
Bear Character 2: Found in frames [13665, 4652, 15187, 14947]
Bear Character 3: Found in frames [13665, 14947]
Banana Character 1: Found in frames [16860, 6271, 4707, 16482, 14551, 6365, 8136, 10438, 5687, 13397, 839, 16616, 346, 8497, 6014, 10350, 16133, 3548, 13612, 6606, 7715, 10772, 9013, 1907]
Banana Charac

Overall Progress:  25%|██▌       | 1/4 [00:00<00:00, 54.42step/s]

Error processing video Bananas_in_pyjamas.mp4: cannot access local variable 'merged_entities_log' where it is not associated with a value



Processing Videos:   0%|          | 0/1 [07:17<?, ?video/s]

FINISHED PROCESSING ALL VIDEOS.



