In [None]:
# Run this code to analyse the whole database of videos and process it

### 1)Imports and setup

In [1]:
import os
import base64
import aiohttp
import asyncio
import json
import imageio
import re
import time
from PIL import Image
import numpy as np
import colorsys
import aiofiles
import nest_asyncio
from tqdm.asyncio import tqdm
from dotenv import load_dotenv
from scenedetect import VideoManager, SceneManager
from scenedetect.detectors import ContentDetector
import logging
from datetime import datetime
import csv
from rapidfuzz import process, fuzz


# Configure logging
logging.basicConfig(level=logging.INFO)

# Load OpenAI API key from .env file
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Apply nest_asyncio to handle the running event loop
nest_asyncio.apply()

# Concurrency limit
semaphore = asyncio.Semaphore(5)

# A dictionary to store characters across frames
character_frames = {}


In [2]:
#TRACK API USAGE CALLS

# Initialize API usage tracking
api_usage = {
    "total_api_calls": 0,
    "total_tokens_used": 0,
    "model_used": "gpt-4"  # Assuming you're using GPT-4
}


### 2) Video Analysis Functions

In [3]:
def analyze_video(video_path, threshold=27.0):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"The video file {video_path} does not exist.")
    
    video_manager = VideoManager([video_path])
    scene_manager = SceneManager()
    scene_manager.add_detector(ContentDetector(threshold=threshold))

    video_manager.set_downscale_factor()
    video_manager.start()

    scene_manager.detect_scenes(frame_source=video_manager)
    scene_list = scene_manager.get_scene_list()

    video_manager.release()

    logging.info(f'Detected {len(scene_list)} scenes:')
    for i, scene in enumerate(scene_list):
        logging.info(f'Scene {i + 1}: Start {scene[0].get_timecode()} / Frame {scene[0].get_frames()}, '
              f'End {scene[1].get_timecode()} / Frame {scene[1].get_frames()}')

    return scene_list

def get_video_length(video_path):
    # You can use a tool like OpenCV, ffmpeg, or similar to calculate video length
    import cv2
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_length = frame_count / fps
    cap.release()
    return video_length

### 3) Frame Extraction Function

In [4]:
def extract_frames_imageio(video_path, scenes, output_dir):
    reader = imageio.get_reader(video_path)
    for i, scene in enumerate(scenes):
        start_frame, end_frame = scene
        
        # Convert FrameTimecode to integer frame numbers
        start_frame_num = int(start_frame)
        end_frame_num = int(end_frame)
        
        # Calculate the middle frame of the scene
        middle_frame = (start_frame_num + end_frame_num) // 2
        
        # Set the reader to the middle frame and extract it
        reader.set_image_index(middle_frame)
        frame = reader.get_next_data()
        
        # Save the frame as an image with frame number in the filename
        output_path = os.path.join(output_dir, f'scene_{i + 1}_frame_{middle_frame}.jpg')
        imageio.imwrite(output_path, frame)
        print(f"Extracted and saved middle frame of scene {i + 1} as {output_path}", flush=True)


### 4) Image Processing Function

In [5]:
async def encode_image(image_path):
    async with aiofiles.open(image_path, "rb") as image_file:
        content = await image_file.read()
        return base64.b64encode(content).decode('utf-8')

def get_color_category(color):
    r, g, b = [x / 255.0 for x in color]
    h, l, s = colorsys.rgb_to_hls(r, g, b)

    primary_hues = {
        "red": (0.0, 0.1),  
        "yellow": (0.1, 0.18),
        "green": (0.25, 0.4),
        "blue": (0.55, 0.75),
    }

    for color_name, hue_range in primary_hues.items():
        if hue_range[0] <= h <= hue_range[1]:
            return color_name

    if (l >= 0.9 and s <= 0.1):
        return "white"
    if (l <= 0.1 and s <= 0.1):
        return "black"

    return "non-primary"

def analyze_image_colors(image_path):
    image = Image.open(image_path)
    image = image.convert('RGB')
    data = np.array(image)

    unique_colors, counts = np.unique(data.reshape(-1, data.shape[2]), axis=0, return_counts=True)
    total_pixels = int(counts.sum())

    color_counts = {
        "Red": 0,
        "Yellow": 0,
        "Green": 0,
        "Blue": 0,
        "White": 0,
        "Black": 0,
        "Non-primary": 0
    }

    for color, count in zip(unique_colors, counts):
        category = get_color_category(tuple(color))
        color_counts[category.capitalize()] += int(count)

    color_percentages = {color: (count / total_pixels) * 100 for color, count in color_counts.items()}
    primary_total = color_counts["Red"] + color_counts["Yellow"] + color_counts["Blue"]
    color_dominance = "Primary colors" if primary_total > color_counts["Non-primary"] else "Non-primary colors"

    return {
        "Color Analysis": {
            "Colors Found": {
                color: {
                    "Pixel Count": count,
                    "Percentage": f"{color_percentages[color]:.2f}%"
                } for color, count in color_counts.items()
            },
            "Dominance": color_dominance
        }
    }


### 5) OpenAI API Interaction

In [6]:
async def send_image_to_openai(image_path, base64_image, retries=3):
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4o-mini",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": """
                        Analyze the following image and provide a detailed description in the format of JSON only. Ensure the output is strictly in JSON format without any additional text or code block formatting. The JSON should include the following standardized labels:

                        1. **Image Analysis**: The root dictionary containing all analysis data.
                        
                        2. **Suitability**:
                            - "Partial Nudity": Boolean indicating the presence of nudity (e.g., bare torso or non explicits).
                            - "Full Nudity": Boolean indicating the presence of explicit nudity of private body parts.
                            - "Obscene Gestures": Boolean indicating the presence of obscene gestures.
                            - "Alcohol": Boolean indicating the presence of alcohol.
                            - "Drugs": Boolean indicating the presence of drugs.
                            - "Addictions": Boolean indicating the presence of addictions.

                        3. **Objects**:
                            - "Total Objects Identified": Integer representing the total number of objects identified.
                            - "Average Features Per Object": Float representing the average number of features per object.
                            - "Objects Details": Dictionary containing details of each object, where each object is labeled as "Object_1", "Object_2", etc., with the following structure:
                                - "Name": The name of the object - as simplest and descriptive possible.
                                - "Portion Boolean": 0-1 output indicating if the object is a portion of a larger object (1) or a complete object (0). For example, a leg is a portion of a human. However, if the object is just cropped but clearly identifiable as a complete object, it should be considered a complete object.
                                - "Color": The color of the object.
                                - "Features": List of features of the object.
                                - "Total Features": Integer representing the number of features for the object.

                        4. **Place**:
                            - "Name": The name of the place - as simplest and descriptive (dont use generic such as cartoon, unknown, setting).
                            - "Certainty Boolean": 0-1 output indicating if the place is clearly identifiable (1) or not (0).
                            - "Fantasy/Adventurous Place": Boolean (0-1) indicating whether the place is classified as a fantasy/adventurous place or not.
                            - "Explanation": Detailed explanation of why the place is classified as fantasy/adventurous or not. Fantasy places are those that do not exist in reality, and adventurous places are defined as those involving clear statements of traveling to space or another country.

                        5. **Characters**:
                            - "Total Characters Identified": Integer representing the total number of characters identified.
                            - "Average Features Per Character": Float representing the average number of features per character.
                            - "Character Details": Dictionary containing details of each character, where each character is labeled as "Character_1", "Character_2", etc., with the following structure:
                                - "Name": The name of the character - as simplest and descriptive as possible.
                                - "Portion Boolean": 0-1 output indicating if the character is a portion of a larger character (1) or a complete character (0). For example, a leg is a portion of a human. However, if the character is just cropped but clearly identifiable as a complete character, it should be considered a complete character.
                                - "Human or Non-Human": 0-1 output indicating if the character appears human (1) or non-human (0). Anthropomorphized characters or any other combination not fully human are considered non-human.
                                - "Physical Features": List of physical features of the character.
                                - "Explanation": Explanation for why the character is classified as human or non-human, and why these physical features are inferred.
                                - "Age": Expected age range of the character (a single number).
                            **Note**: If the "character" consists of only a part of a body (such as a hand, leg, or face without enough distinguishing features to identify it as a complete character), do not count it as a "character."

                        Ensure that the structure of the JSON output strictly adheres to these standardized labels.
                        """
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{base64_image}"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 750
    }

    for attempt in range(retries):
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
                    status = response.status
                    response_text = await response.text()

                    if status == 429:
                        print("Rate limit exceeded, retrying...")
                        await asyncio.sleep(2 ** attempt)
                        continue
                    elif status == 200:
                        content = await response.json()

                        # Track API usage
                        api_usage['total_api_calls'] += 1
                        api_usage['total_tokens_used'] += content.get('usage', {}).get('total_tokens', 0)
                        api_usage['model_used'] = content.get('model', 'gpt-4o-mini')

                        # Handle and return JSON content
                        if 'choices' in content:
                            message_content = content['choices'][0].get('message', {}).get('content', '').strip()
                            try:
                                return json.loads(message_content)
                            except json.JSONDecodeError as e:
                                print(f"Error decoding JSON from OpenAI response for {image_path}: {e}")
                                return None
                        else:
                            print(f"Unexpected response format from OpenAI API for {image_path}.")
                            return None
                    else:
                        print(f"Request failed with status code {status} for {image_path}.")
                        return None
        except aiohttp.ClientError as e:
            print(f"Request failed due to a client error: {e}")
            await asyncio.sleep(2 ** attempt)
        except Exception as e:
            print(f"Unexpected error occurred: {e}")
            await asyncio.sleep(2 ** attempt)
    return None


### 6) Scene Processing Functions

In [7]:
async def process_scenes_output(output_dir, json_output_dir):
    os.makedirs(json_output_dir, exist_ok=True)
    scenes = sorted([f for f in os.listdir(output_dir) if f.endswith('.jpg')], key=extract_scene_number)
    total_scenes = len(scenes)
    with tqdm(total=total_scenes, desc="Processing Scenes", unit="scene") as pbar:
        tasks = [process_single_scene(i, scene, output_dir, json_output_dir, pbar) for i, scene in enumerate(scenes)]
        await asyncio.gather(*tasks)


async def process_single_scene(i, scene, output_dir, json_output_dir, pbar):
    async with semaphore:  # Limit concurrent execution
        scene_path = os.path.join(output_dir, scene)

        # Encode image in base64
        base64_image = await encode_image(scene_path)

        # Perform color analysis
        color_analysis_result = analyze_image_colors(scene_path)

        # Send image to OpenAI for further analysis
        openai_response = await send_image_to_openai(scene_path, base64_image)

        # Check if openai_response is valid (not None or empty)
        if not openai_response:
            print(f"Skipping {scene} due to invalid OpenAI response.")
            pbar.update(1)
            return

        # Combine both results, and include the reference to the image file
        final_output = {
            "Image File": scene,
            "Image Analysis": {
                **color_analysis_result["Color Analysis"],
                **openai_response.get("Image Analysis", {})
            }
        }

        # The filename already includes the scene number and frame number
        output_filename = os.path.splitext(scene)[0] + '_analysis.json'
        output_path = os.path.join(json_output_dir, output_filename)

        try:
            async with aiofiles.open(output_path, 'w') as json_file:
                await json_file.write(json.dumps(final_output, indent=4))
                print(f"Saved analysis for {scene} as {output_filename}")
        except Exception as e:
            print(f"Failed to save analysis for {scene}: {e}")

        pbar.update(1)


def extract_scene_number(filename):
    match = re.search(r'\d+', filename)
    return int(match.group()) if match else -1

def extract_frame_number(filename):
    match = re.search(r'_frame_(\d+)', filename)
    return int(match.group(1)) if match else -1



### 7) Luminance and Edge Density

In [8]:
import os
import cv2
import numpy as np

# Helper function to calculate the average luminance of a frame
def calculate_luminance(image):
    grayscale = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return np.mean(grayscale)

# Function to save the first and last frames of a scene side by side, with a label if strong luminance is detected
def save_frame_pair(luminance_output_dir, scene_index, last_frame, first_frame, strong_luminance=False):
    # Concatenate images horizontally (side by side)
    concatenated_image = np.concatenate((last_frame, first_frame), axis=1)

    # Add a label in the filename if strong luminance change is detected
    filename_suffix = "_STRONG_LUMINANCE" if strong_luminance else ""

    # Save the frame comparison image in the correct directory
    output_image_path = os.path.join(luminance_output_dir, f"scene_{scene_index + 1}_to_{scene_index + 2}{filename_suffix}.jpg")
    cv2.imwrite(output_image_path, concatenated_image)
    print(f"Saved frame comparison image to {output_image_path}")

    

# Analyze scene transitions for luminance changes and detect short scenes, saving the images
def analyze_scenes_for_flicker_and_short_scenes(video_path, scenes, video_length, luminance_output_dir, short_scene_threshold=1.0, luminance_threshold=25):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_scenes = len(scenes)
    strong_luminance_changes = 0
    short_scenes_count = 0

    print(f"Analyzing scenes for flicker and short scene detection...", flush=True)

    for scene_index in range(total_scenes - 1):
        start_timecode, end_timecode = scenes[scene_index]
        next_start_timecode, next_end_timecode = scenes[scene_index + 1]

        # Convert timecodes to frame numbers
        start_frame = int(start_timecode.get_frames())
        end_frame = int(end_timecode.get_frames())
        next_start_frame = int(next_start_timecode.get_frames())

        # Get the last frame of the current scene
        cap.set(cv2.CAP_PROP_POS_FRAMES, end_frame - 1)
        ret1, last_frame = cap.read()

        # Get the first frame of the next scene
        cap.set(cv2.CAP_PROP_POS_FRAMES, next_start_frame)
        ret2, first_frame = cap.read()

        if not ret1 or not ret2:
            print(f"Warning: Failed to read frames for scene {scene_index + 1} to {scene_index + 2}. Skipping this scene.")
            continue

        # Calculate luminance for the last frame of the current scene and the first frame of the next scene
        last_luminance = calculate_luminance(last_frame)
        first_luminance = calculate_luminance(first_frame)

        # Check for strong luminance changes
        strong_luminance = False
        if last_luminance is not None and first_luminance is not None:
            luminance_change = abs(first_luminance - last_luminance)
            if luminance_change > luminance_threshold:
                strong_luminance_changes += 1
                strong_luminance = True

        # Save the frame comparison image
        save_frame_pair(luminance_output_dir, scene_index, last_frame, first_frame, strong_luminance)

    cap.release()

    # Calculate percentages
    percentage_strong_transitions = (strong_luminance_changes / (total_scenes - 1)) * 100 if total_scenes > 1 else 0
    percentage_short_scenes = (short_scenes_count / total_scenes) * 100 if total_scenes > 0 else 0

    return {
        "percentage_strong_luminance_transitions": percentage_strong_transitions,
        "percentage_short_scenes": percentage_short_scenes,
        "number_of_strong_luminance_transitions": strong_luminance_changes,
        "number_of_short_scenes": short_scenes_count
    }



# Function to calculate edge density in an image and save it in the correct folder
def calculate_edge_density(image_path, edges_output_dir):
    try:
        # Read the image in grayscale
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        if image is None:
            raise ValueError(f"Failed to load image: {image_path}")

        # Apply Canny edge detection
        edges = cv2.Canny(image, 100, 200)

        # Calculate the percentage of edge pixels
        edge_pixels = np.sum(edges > 0)
        total_pixels = image.size
        edge_density = (edge_pixels / total_pixels) * 100

        # Save the edge-detected image in the edges_output_dir
        image_filename = os.path.basename(image_path)  # Get the image filename
        edge_image_path = os.path.join(edges_output_dir, f"{image_filename.replace('.jpg', '_edges.jpg')}")
        cv2.imwrite(edge_image_path, edges)
        print(f"Saved edge detection image to {edge_image_path}")

        return edge_density

    except Exception as e:
        print(f"Error processing {image_path}: {e}")
        return None


def analyze_edge_density_for_scenes(scenes_output_dir, edges_output_dir):
    edge_densities = []

    # Process each frame in the scenes_output_dir
    for image_file in os.listdir(scenes_output_dir):
        if image_file.endswith(".jpg") and "_edges" not in image_file:
            image_path = os.path.join(scenes_output_dir, image_file)
            edge_density = calculate_edge_density(image_path, edges_output_dir)
            edge_image_path = os.path.join(edges_output_dir, f"{image_file.replace('.jpg', '_edges.jpg')}")

            # Save edge-detected image in the edges_output_dir
            cv2.imwrite(edge_image_path, edge_density)  # Save the detected edge image to the edges_output_dir

            # Only add to the list if edge_density is valid
            if edge_density is not None:
                edge_densities.append(edge_density)

    # Calculate the average edge density
    avg_edge_density = sum(edge_densities) / len(edge_densities) if edge_densities else 0

    print(f"Average edge density for all scenes: {avg_edge_density}%")
    
    # Return the average edge density
    return {"average_edge_density": avg_edge_density}



# Edge detection function
def save_edge_detection_images(scenes_output_dir, edges_output_dir):
    # Ensure the edges_output_dir is created
    os.makedirs(edges_output_dir, exist_ok=True)

    # Iterate through the scenes output directory
    for image_file in os.listdir(scenes_output_dir):
        if image_file.endswith(".jpg") and "_edges" not in image_file:
            image_path = os.path.join(scenes_output_dir, image_file)
            
            # Perform edge detection and save the image in edges_output_dir
            edge_density = calculate_edge_density(image_path, edges_output_dir)
            print(f"Edge density for {image_file}: {edge_density}%")



# Example to run the full process with your existing functions
def process_video_with_flicker_analysis(video_path, output_dir, short_scene_threshold=3.0, luminance_threshold=25):
    # Get the video length
    video_length = get_video_length(video_path)
    
    # Detect scenes using your analyze_video function
    scenes = analyze_video(video_path)
    
    # Ensure scenes are properly detected
    if not scenes:
        print("No scenes detected.")
        return
    
    print(f"Total scenes detected: {len(scenes)}")
    
    # Analyze luminance changes and short scenes
    flicker_and_short_scene_stats = analyze_scenes_for_flicker_and_short_scenes(video_path, scenes, video_length, output_dir, short_scene_threshold, luminance_threshold)
    
    # Output results
    return flicker_and_short_scene_stats

# # Call the function with your video path and output directory
# video_path = "/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/02_Video_DB/Bananas_in_pyjamas copy.mp4"
# output_dir = "/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy"
# process_video_with_flicker_analysis(video_path, output_dir)


### 7) Run whole analysis of each json output

Image Path Construction: get_image_path generates the correct path to the image file based on the JSON filename.

Entity Extraction:extract_entities_from_json pulls characters, objects, and places from the JSON data.

Image-to-Image Comparison:perform_image_to_image_comparison compares partial objects with full objects using the OpenAI API.

Entity Comparison:compare_entities handles both name-based and image-based comparisons to decide whether two entities should be consolidated.

Consolidation:Entities across frames are consolidated into a single summary file that tracks where each entity was found.

Main Execution:The script runs through all JSON files, processes the entities, and saves the consolidated results to a summary JSON file.

Key Features of This Implementation:
Text-Based Comparison: The code first attempts to merge entities based on exact name matches. If no match is found, it uses the OpenAI API to determine if two entities with different names should be merged.

Image-to-Image Comparison: If one of the entities is flagged as a portion, or if names don't match but the entities might still be the same, the code performs an image-to-image comparison using the OpenAI API.

Efficient Processing: The code processes each frame sequentially and logs all merges into merged_entities_log, ensuring you have a record of what entities were merged, including their original names and frames.

No Overwritten Functionality: The original image analysis functionality is preserved and integrated smoothly with the text-based comparisons.

Number of Unique Characters, Objects, and Places: This can be done by counting the keys in the consolidated_data dictionary.
Average Characters per Frame: This can be calculated by summing up all instances of characters found across frames and dividing by the total number of frames where characters appear.
Average Features per Character/Object: Calculate this by summing the features of all characters/objects and dividing by the total number of characters/objects.
Overall Color Analysis: Aggregate the color data from all JSON files.
Filter Compliance: Check for any instances where the filters (e.g., nudity, drugs) are not compliant and log the frame numbers.

In [9]:
import os
import json
import aiohttp
import asyncio
import nest_asyncio
import openai

# Apply nest_asyncio to handle the running event loop
nest_asyncio.apply()

# Initialize OpenAI API
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OpenAI API key is not set.")
openai.api_key = api_key

#Initial consolidation of entities
def initial_consolidation(json_files):
    consolidated_data = {"characters": {}, "objects": {}, "places": {}}
    merge_tracking = {"characters": {}, "objects": {}, "places": {}}

    for json_file in json_files:
        with open(json_file, 'r') as f:
            json_data = json.load(f)
            consolidate_from_json(json_data, consolidated_data, merge_tracking, os.path.basename(json_file))

    return consolidated_data


# Function to save entities to JSON
def save_entities_to_json(entities, path):
    with open(path, 'w') as f:
        json.dump(entities, f, indent=4)

# Function to consolidate entities from a JSON file
# Function to consolidate entities from a JSON file
def consolidate_from_json(json_data, consolidated_data, merge_tracking, json_file_name):
    frame_number = json_file_name.split('_')[3]  # Extract frame number

    if "Image Analysis" in json_data:
        characters = json_data["Image Analysis"].get("Characters", {}).get("Character Details", {})
        for key, details in characters.items():
            name = details.get("Name")
            if name:
                if name not in consolidated_data["characters"]:
                    consolidated_data["characters"][name] = details
                    consolidated_data["characters"][name]["merged_from"] = []
                    merge_tracking["characters"][name] = {"merged_from": []}
                if frame_number not in consolidated_data["characters"][name]["merged_from"]:
                    consolidated_data["characters"][name]["merged_from"].append(frame_number)
                    merge_tracking["characters"][name]["merged_from"].append(frame_number)

        objects = json_data["Image Analysis"].get("Objects", {}).get("Objects Details", {})
        for key, details in objects.items():
            name = details.get("Name")
            if name:
                if name not in consolidated_data["objects"]:
                    consolidated_data["objects"][name] = details
                    consolidated_data["objects"][name]["merged_from"] = []
                    merge_tracking["objects"][name] = {"merged_from": []}
                if frame_number not in consolidated_data["objects"][name]["merged_from"]:
                    consolidated_data["objects"][name]["merged_from"].append(frame_number)
                    merge_tracking["objects"][name]["merged_from"].append(frame_number)

        place = json_data["Image Analysis"].get("Place", {})
        place_name = place.get("Name")
        if place_name:
            if place_name not in consolidated_data["places"]:
                consolidated_data["places"][place_name] = place
                consolidated_data["places"][place_name]["merged_from"] = []
                merge_tracking["places"][place_name] = {"merged_from": []}
            if frame_number not in consolidated_data["places"][place_name]["merged_from"]:
                consolidated_data["places"][place_name]["merged_from"].append(frame_number)
                merge_tracking["places"][place_name]["merged_from"].append(frame_number)


# Function to cluster entities using OpenAI API and name the clusters
# Initialize API usage tracking
api_usage = {
    "total_api_calls": 0,
    "total_tokens_used": 0,
    "model_used": "gpt-4",  # Set the default model name here
}

# Function to cluster entities using OpenAI API and track token usage
async def cluster_entities(api_key, entities):
    # Generate lists for characters, objects, and places from entities
    character_list = ', '.join(entities['characters'].keys())
    object_list = ', '.join(entities['objects'].keys())
    place_list = ', '.join(entities['places'].keys())

    if not character_list and not object_list and not place_list:
        return "No entities available to cluster."

    # Adjusted OpenAI prompt to return structured output (dictionaries)
    prompt = f"""
    You are tasked with clustering and naming entities from a TV show. Below are lists of characters, objects, and places extracted from different scenes. These lists sometimes contain multiple labels for the same entity.

    **Instructions:**

    1. Group the characters, objects, and places that refer to the same entity and suggest a single **final name** for each group ( 1. be smart what could be the same individual/object in the show one and what couldn't. 2.same for places - if it is another house environment or not the same in general outdoor place).
    2. Return the result as a dictionary where each cluster (key) contains the entities (values) that belong to that cluster.
    3. Use this format:

    {{
      "Characters Clusters": {{
        "Final Name 1": ["Character 1", "Character 2", ...],
        "Final Name 2": ["Character 3", "Character 4", ...]
      }},
      "Objects Clusters": {{
        "Final Name 1": ["Object 1", "Object 2", ...],
        "Final Name 2": ["Object 3", "Object 4", ...]
      }},
      "Places Clusters": {{
        "Final Name 1": ["Place 1", "Place 2", ...],
        "Final Name 2": ["Place 3", "Place 4", ...]
      }}
    }}

    **Characters:**
    {character_list}

    **Objects:**
    {object_list}

    **Places:**
    {place_list}
    """

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4",  # Ensure you set the correct model here
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 2000
    }

    async with aiohttp.ClientSession() as session:
        try:
            async with session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) as response:
                response_json = await response.json()
                clusters = response_json.get('choices', [{}])[0].get('message', {}).get('content', '')

                # Track API usage
                api_usage['total_api_calls'] += 1
                api_usage['total_tokens_used'] += response_json.get('usage', {}).get('total_tokens', 0)
                api_usage['model_used'] = response_json.get('model', 'gpt-4')

                return json.loads(clusters)  # Convert the response to JSON
        except Exception as e:
            print(f"Error accessing API response: {e}")
            return {}


# Function to merge clusters with final_final_entities.json, keeping names and frame numbers
def merge_clusters_with_entities(final_entities, clusters):
    merged_entities = {"characters": {}, "objects": {}, "places": {}}

    # Process characters
    character_clusters = clusters.get("Characters Clusters", {})
    for final_name, cluster_items in character_clusters.items():
        merged_entities["characters"][final_name] = {"merged_from": [], "merged_names": []}
        for item in cluster_items:
            if item in final_entities["characters"]:
                entity_data = final_entities["characters"][item]
                merged_entities["characters"][final_name] = {
                    **entity_data,
                    "Name": final_name,
                    "merged_from": list(set(merged_entities["characters"][final_name]["merged_from"] + entity_data["merged_from"])),
                    "merged_names": list(set(merged_entities["characters"][final_name]["merged_names"] + [item]))
                }

    # Process objects
    object_clusters = clusters.get("Objects Clusters", {})
    for final_name, cluster_items in object_clusters.items():
        merged_entities["objects"][final_name] = {"merged_from": [], "merged_names": []}
        for item in cluster_items:
            if item in final_entities["objects"]:
                entity_data = final_entities["objects"][item]
                merged_entities["objects"][final_name] = {
                    **entity_data,
                    "Name": final_name,
                    "merged_from": list(set(merged_entities["objects"][final_name]["merged_from"] + entity_data["merged_from"])),
                    "merged_names": list(set(merged_entities["objects"][final_name]["merged_names"] + [item]))
                }

    # Process places
    place_clusters = clusters.get("Places Clusters", {})
    for final_name, cluster_items in place_clusters.items():
        merged_entities["places"][final_name] = {"merged_from": [], "merged_names": []}
        for item in cluster_items:
            if item in final_entities["places"]:
                entity_data = final_entities["places"][item]
                merged_entities["places"][final_name] = {
                    **entity_data,
                    "Name": final_name,
                    "merged_from": list(set(merged_entities["places"][final_name]["merged_from"] + entity_data["merged_from"])),
                    "merged_names": list(set(merged_entities["places"][final_name]["merged_names"] + [item]))
                }

    return merged_entities

# Main async function for clustering and merging entities
# Main async function for clustering and merging entities
async def cluster_and_merge_entities(api_key, json_output_dir, video_folder_path):
    json_files = [os.path.join(json_output_dir, f) for f in os.listdir(json_output_dir) if f.endswith('.json')]

    if not json_files:
        print("No JSON files found for consolidation.")
        return None

    # Consolidate entities from all JSON files
    final_entities = initial_consolidation(json_files)  # Ensure only json_files is passed here

    # Cluster entities using OpenAI API
    clusters = await cluster_entities(api_key, final_entities)

    if not clusters:
        print("No clusters returned from OpenAI API.")
        return None

    # Merge clusters into final final JSON
    merged_final_entities = merge_clusters_with_entities(final_entities, clusters)

    # Save the merged entities to JSON
    merged_final_entities_path = os.path.join(video_folder_path, 'final_summary.json')
    save_entities_to_json(merged_final_entities, merged_final_entities_path)
    print(f"Merged entities saved to {merged_final_entities_path}")
    
    return merged_final_entities  # Return merged entities for further use



# # Example usage
# if __name__ == "__main__":
#     # Define paths
#     final_final_json_path = "path/to/final_final_entities.json"
#     video_folder_path = "path/to/video_folder"
    
#     # Run the async function
#     asyncio.run(cluster_and_merge_entities(api_key, final_final_json_path, video_folder_path))


In [10]:
def compute_summary_statistics(entities):
    statistics = {
        "number_of_characters": len(entities.get("characters", {})),
        "number_of_objects": len(entities.get("objects", {})),
        "number_of_places": len(entities.get("places", {})),
        "avg_features_per_character": 0,
        "avg_features_per_object": 0,
        "avg_appearances_per_character": 0,
        "avg_appearances_per_object": 0,
    }

    # Calculate average features per character
    total_character_features = 0
    total_character_appearances = 0
    for char, details in entities.get("characters", {}).items():
        total_character_features += len(details.get("Physical Features", []))
        total_character_appearances += len(details.get("merged_from", []))
    
    if statistics["number_of_characters"] > 0:
        statistics["avg_features_per_character"] = total_character_features / statistics["number_of_characters"]
        statistics["avg_appearances_per_character"] = total_character_appearances / statistics["number_of_characters"]

    # Calculate average features per object
    total_object_features = 0
    total_object_appearances = 0
    for obj, details in entities.get("objects", {}).items():
        total_object_features += details.get("Total Features", 0)
        total_object_appearances += len(details.get("merged_from", []))

    if statistics["number_of_objects"] > 0:
        statistics["avg_features_per_object"] = total_object_features / statistics["number_of_objects"]
        statistics["avg_appearances_per_object"] = total_object_appearances / statistics["number_of_objects"]

    return statistics

def modify_summary_json_with_statistics(entities, path):
    # Compute statistics
    statistics = compute_summary_statistics(entities)
    
    # Load the original JSON data
    with open(path, 'r') as f:
        data = json.load(f)
    
    # Create a new dictionary with statistics at the top
    modified_data = {
        "summary_statistics": statistics,
        **data  # This merges the existing data under the statistics
    }

    # Save the modified data back to the JSON
    with open(path, 'w') as f:
        json.dump(modified_data, f, indent=4)

    print(f"Summary statistics added and saved to {path}")


In [11]:
### Save additional Stats

In [12]:
def save_additional_stats(video_path, scenes_output_dir, start_time, end_time, video_output_dir, json_output_dir, summary_statistics, scenes, edges_output_dir, flicker_and_short_scene_stats, edge_density_stats, percentage_place_changes=None):
    # Initialize additional information
    additional_info = {
        "video_title": None,
        "video_size_bytes": None,
        "video_length_seconds": None,
        "number_of_scenes": None,
        "processing_time_seconds": None
    }

    # Extract video title
    try:
        additional_info["video_title"] = os.path.splitext(os.path.basename(video_path))[0]
        print(f"Video title: {additional_info['video_title']}", flush=True)
    except Exception as e:
        print(f"Error extracting video title: {e}", flush=True)

    # Extract video size
    try:
        additional_info["video_size_bytes"] = os.path.getsize(video_path)
        print(f"Video size: {additional_info['video_size_bytes']} bytes", flush=True)
    except Exception as e:
        print(f"Error getting video size: {e}", flush=True)

    # Get video length
    try:
        additional_info["video_length_seconds"] = get_video_length(video_path)
        print(f"Video length: {additional_info['video_length_seconds']} seconds", flush=True)
    except Exception as e:
        print(f"Error calculating video length: {e}", flush=True)

    # Count the number of scenes
    try:
        additional_info["number_of_scenes"] = len(scenes)
        print(f"Number of scenes: {additional_info['number_of_scenes']}", flush=True)
    except Exception as e:
        print(f"Error counting scenes: {e}", flush=True)

    # Calculate processing time
    try:
        additional_info["processing_time_seconds"] = end_time - start_time
        print(f"Processing time: {additional_info['processing_time_seconds']} seconds", flush=True)
    except Exception as e:
        print(f"Error calculating processing time: {e}", flush=True)

    # Include luminance analysis results
    additional_info["luminance_analysis"] = flicker_and_short_scene_stats

    # Analyze all JSON files in the json_output folder
    try:
        json_stats = analyze_json_files(json_output_dir)
        additional_info.update(json_stats)
    except Exception as e:
        print(f"Error analyzing JSON files: {e}", flush=True)

    # Include API usage stats
    additional_info["api_usage"] = {
        "total_api_calls": api_usage["total_api_calls"],
        "total_tokens_used": api_usage["total_tokens_used"],
        "model_used": api_usage["model_used"]
    }

    # Include the summary statistics in the same JSON file
    additional_info["summary_statistics"] = summary_statistics

    # Include the already calculated edge density stats
    additional_info.update(edge_density_stats)

    # Add the place continuity percentage if available
    if percentage_place_changes is not None:
        additional_info["place_discontinuity_percentage"] = percentage_place_changes
        print(f"Place continuity percentage: {percentage_place_changes}%")

    # Save the additional stats in a new JSON file inside the video folder
    stats_output_path = os.path.join(video_output_dir, f'{additional_info["video_title"]}_stats.json')

    try:
        with open(stats_output_path, 'w') as f:
            json.dump(additional_info, f, indent=4)
        print(f"Additional stats saved to {stats_output_path}", flush=True)
    except Exception as e:
        print(f"Error saving stats to JSON: {e}", flush=True)


In [13]:
def analyze_json_files(json_output_dir):
    color_distribution = {
        "Red": 0,
        "Yellow": 0,
        "Green": 0,
        "Blue": 0,
        "White": 0,
        "Black": 0,
        "Non-primary": 0
    }
    total_frames = 0
    non_compliant_frames = []
    total_fantasy_places = 0
    total_places = 0

    # Iterate over all JSON files in the json_output folder
    for json_file in os.listdir(json_output_dir):
        if not json_file.endswith('.json'):
            continue

        try:
            with open(os.path.join(json_output_dir, json_file), 'r') as f:
                data = json.load(f)

            # Get color data
            image_analysis = data.get("Image Analysis", {})
            colors_found = image_analysis.get("Colors Found", {})

            # Ensure "Colors Found" contains the expected structure
            for color in ["Red", "Yellow", "Green", "Blue", "White", "Black", "Non-primary"]:
                color_info = colors_found.get(color, {})
                percentage_str = color_info.get("Percentage", "0%").replace('%', '')
                try:
                    color_percentage = float(percentage_str)
                    color_distribution[color] += color_percentage
                except (ValueError, TypeError):
                    print(f"Invalid color percentage in file {json_file} for color {color}")
                    continue

            total_frames += 1

            # Check for compliance issues (Suitability)
            suitability = image_analysis.get("Suitability", {})
            if suitability:  # Make sure suitability exists and is not empty
                non_compliant_features = [feature for feature, value in suitability.items() if value]
                if non_compliant_features:
                    non_compliant_frames.append({
                        "frame": json_file,
                        "features": non_compliant_features
                    })

            # Check for fantasy/adventurous places
            place = image_analysis.get("Place", {})
            if place.get("Certainty Boolean") == 1:
                total_places += 1
                if place.get("Fantasy/Adventurous Place") == 1:
                    total_fantasy_places += 1

        except json.JSONDecodeError:
            print(f"Error decoding JSON in file {json_file}")
        except Exception as e:
            print(f"Error processing file {json_file}: {e}")

    # Calculate final stats
    overall_color_distribution = {color: (value / total_frames) if total_frames > 0 else 0
                                  for color, value in color_distribution.items()}
    percentage_fantasy_places = (total_fantasy_places / total_places * 100) if total_places > 0 else 0

    return {
        "Overall Color Distribution": overall_color_distribution,
        "Non-Compliant Frames": non_compliant_frames,
        "Percentage of Fantasy/Adventurous Places": percentage_fantasy_places
    }


In [14]:
### Situational Analysis (if changes in places between scenes)

In [15]:
import os
import json

def load_json_data(json_file_path):
    with open(json_file_path, 'r') as f:
        return json.load(f)

def compare_place_clusters(scene_1_place, scene_2_place, clusters):
    """
    Compare two place names and check if they belong to the same cluster.
    Return True if they are in the same cluster, False otherwise.
    """
    place_clusters = clusters.get("Places Clusters", {})
    for cluster_name, places in place_clusters.items():
        if scene_1_place in places and scene_2_place in places:
            return True
    return False

def process_scene_comparisons(json_output_dir, clusters, output_registry_path):
    """
    Compare places between consecutive scenes and log the results.
    """
    scene_files = sorted([f for f in os.listdir(json_output_dir) if f.endswith('_analysis.json')])
    registry = []
    total_comparisons = 0
    place_changes = 0

    # Loop through consecutive scene files
    for i in range(len(scene_files) - 1):
        scene_1_file = scene_files[i]
        scene_2_file = scene_files[i + 1]

        # Load the JSON data
        scene_1_data = load_json_data(os.path.join(json_output_dir, scene_1_file))
        scene_2_data = load_json_data(os.path.join(json_output_dir, scene_2_file))

        # Extract place names
        scene_1_place = scene_1_data.get("Image Analysis", {}).get("Place", {}).get("Name", None)
        scene_2_place = scene_2_data.get("Image Analysis", {}).get("Place", {}).get("Name", None)

        # Skip scenes without a place name
        if not scene_1_place or not scene_2_place:
            continue

        total_comparisons += 1

        # Compare place clusters
        same_cluster = compare_place_clusters(scene_1_place, scene_2_place, clusters)

        # Log the comparison result
        registry.append({
            "scene_start": scene_1_file,
            "scene_end": scene_2_file,
            "scene_start_place": scene_1_place,
            "scene_end_place": scene_2_place,
            "same_cluster": same_cluster
        })

        # Count place changes
        if not same_cluster:
            place_changes += 1

    # Calculate the percentage of scene-to-scene place changes
    percentage_place_changes = (place_changes / total_comparisons) * 100 if total_comparisons > 0 else 0

    # Save the comparison registry to a JSON file
    output_data = {
        "comparisons": registry,
        "total_comparisons": total_comparisons,
        "place_changes": place_changes,
        "percentage_place_changes": percentage_place_changes
    }

    # Ensure the output directory exists
    output_registry_dir = os.path.dirname(output_registry_path)
    os.makedirs(output_registry_dir, exist_ok=True)

    # Save the results
    with open(output_registry_path, 'w') as f:
        json.dump(output_data, f, indent=4)

    print(f"Registry saved to {output_registry_path}")
    print(f"Percentage of scene-to-scene place changes: {percentage_place_changes}%")


In [23]:
#places and chars

In [29]:
def analyze_character_object_continuity(json_output_dir, clusters, entity_type="characters"):
    """
    Analyze continuity for characters or objects between consecutive JSON files and print the results.
    
    Args:
        json_output_dir (str): The directory where the JSON scene analysis files are stored.
        clusters (dict): The clusters from the final summary JSON.
        entity_type (str): Either 'characters' or 'objects' to indicate which entities to analyze.

    Returns:
        None: Only prints the continuity analysis.
    """
    json_files = sorted([f for f in os.listdir(json_output_dir) if f.endswith('.json')])
    
    total_comparisons = 0
    changes = 0

    # Loop through consecutive scenes
    for i in range(len(json_files) - 1):
        scene_1_file = json_files[i]
        scene_2_file = json_files[i + 1]

        scene_1_data = load_json_data(os.path.join(json_output_dir, scene_1_file))
        scene_2_data = load_json_data(os.path.join(json_output_dir, scene_2_file))

        if entity_type == "characters":
            scene_1_entities = scene_1_data.get("Image Analysis", {}).get("Characters", {}).get("Character Details", {})
            scene_2_entities = scene_2_data.get("Image Analysis", {}).get("Characters", {}).get("Character Details", {})
        else:  # for objects
            scene_1_entities = scene_1_data.get("Image Analysis", {}).get("Objects", {}).get("Objects Details", {})
            scene_2_entities = scene_2_data.get("Image Analysis", {}).get("Objects", {}).get("Objects Details", {})

        total_comparisons += 1
        match_found = False

        # Compare entities between two scenes
        for entity_1_name, entity_1 in scene_1_entities.items():
            for entity_2_name, entity_2 in scene_2_entities.items():
                cluster_name = compare_clusters(entity_1["Name"], entity_2["Name"], clusters, entity_type)
                if cluster_name:
                    match_found = True
                    print(f"Matched {entity_type[:-1]} '{entity_1['Name']}' in scene '{scene_1_file}' with '{entity_2['Name']}' in scene '{scene_2_file}' (Cluster: {cluster_name})")

        if not match_found:
            changes += 1
            print(f"No {entity_type} match found between scene '{scene_1_file}' and scene '{scene_2_file}'")

    percentage_changes = (changes / total_comparisons) * 100 if total_comparisons > 0 else 0

    # Print summary of the analysis
    print(f"\nSummary for {entity_type}:")
    print(f"Total comparisons: {total_comparisons}")
    print(f"{entity_type[:-1].capitalize()} changes: {changes}")
    print(f"Percentage of {entity_type[:-1]} changes: {percentage_changes:.2f}%")


### 8) Main Function Execution

In [33]:
import os
import time
import asyncio
from tqdm import tqdm

async def process_video(video_file, directory_path, output_base_dir, api_key):
    try:
        # Track the start time
        start_time = time.time()

        video_path = os.path.join(directory_path, video_file)
        video_name = os.path.splitext(video_file)[0]
        video_size = os.path.getsize(video_path)

        video_output_dir = os.path.join(output_base_dir, video_name)
        scenes_output_dir = os.path.join(video_output_dir, 'scenes_output')
        json_output_dir = os.path.join(video_output_dir, 'json_output')
        luminance_output_dir = os.path.join(video_output_dir, 'luminance_output')

        os.makedirs(scenes_output_dir, exist_ok=True)
        os.makedirs(json_output_dir, exist_ok=True)
        os.makedirs(luminance_output_dir, exist_ok=True)

        print(f"Processing video: {video_file}", flush=True)

        # Analyze video for scenes
        print("Starting scene analysis...", flush=True)
        scenes = analyze_video(video_path)
        extract_frames_imageio(video_path, scenes, scenes_output_dir)
        await process_scenes_output(scenes_output_dir, json_output_dir)  # Ensure this finishes
        print("Scene analysis complete.", flush=True)

        # Perform luminance analysis and get the results
        print("Starting luminance analysis...", flush=True)
        flicker_and_short_scene_stats = process_video_with_flicker_analysis(video_path, luminance_output_dir)
        print("Luminance analysis complete.", flush=True)

        # Merge entities
        final_json_path = os.path.join(json_output_dir, 'final_entities.json')
        print("Merging entities...", flush=True)
        merged_final_entities = await cluster_and_merge_entities(api_key, json_output_dir, video_output_dir)

        if merged_final_entities is None:
            print(f"Error: Failed to merge entities for video {video_file}")
            return
        print("Entity merge complete.", flush=True)

        # Create the edges_output directory and process each scene for edge detection
        edges_output_dir = os.path.join(video_output_dir, 'edges_output')
        os.makedirs(edges_output_dir, exist_ok=True)
        print("Starting edge detection...", flush=True)
        save_edge_detection_images(scenes_output_dir, edges_output_dir)
        print("Edge detection complete.", flush=True)

        # Compute edge density statistics
        print("Analyzing edge density...", flush=True)
        edge_density_stats = analyze_edge_density_for_scenes(scenes_output_dir, edges_output_dir)
        print("Edge density analysis complete.", flush=True)

        # Compute summary statistics
        print("Computing summary statistics...", flush=True)
        summary_statistics = compute_summary_statistics(merged_final_entities)
        print("Summary statistics computation complete.", flush=True)

        # Track the end time
        end_time = time.time()

        # Scene comparison step (new addition)
        clusters_file = os.path.join(video_output_dir, 'final_summary.json')

        # Ensure final_summary.json exists
        if os.path.exists(clusters_file):
            clusters = load_json_data(clusters_file)
            
            # Analyze place continuity
            print("Starting place continuity analysis...", flush=True)
            process_scene_comparisons(json_output_dir, clusters, os.path.join(video_output_dir, 'places_continuity', 'continuity_registry.json'))
            continuity_data = load_json_data(os.path.join(video_output_dir, 'places_continuity', 'continuity_registry.json'))
            percentage_place_changes = continuity_data.get('percentage_place_changes', 0)
            print("Place continuity analysis complete.", flush=True)
            
            # Analyze character continuity (prints results)
            print("Starting character continuity analysis...", flush=True)
            analyze_character_object_continuity(json_output_dir, clusters, entity_type="characters")
            
            # Analyze object continuity (prints results)
            print("Starting object continuity analysis...", flush=True)
            analyze_character_object_continuity(json_output_dir, clusters, entity_type="objects")
            
        else:
            print(f"Error: {clusters_file} does not exist. Skipping scene comparison.")
            percentage_place_changes = None

        # Ensure save_additional_stats gets the expected data
        print("Saving additional stats...", flush=True)
        save_additional_stats(
            video_path, scenes_output_dir, start_time, end_time, video_output_dir, json_output_dir,
            summary_statistics, scenes, edges_output_dir, flicker_and_short_scene_stats, edge_density_stats,
            percentage_place_changes  # Make sure place changes are passed
        )
        print(f"Finished processing video: {video_file}", flush=True)

    except Exception as e:
        print(f"Error processing video {video_file}: {e}", flush=True)

async def process_videos_in_directory(directory_path, output_base_dir, api_key):
    video_files = [f for f in os.listdir(directory_path) if f.endswith(('.mp4', '.avi', '.mkv'))]

    if not video_files:
        print("No video files found in the directory.", flush=True)
        return

    with tqdm(total=len(video_files), desc="Processing Videos", unit="video") as pbar:
        for video_file in video_files:
            await process_video(video_file, directory_path, output_base_dir, api_key)
            pbar.update(1)

# Ensure the main script has appropriate paths
video_directory = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/02_Video_DB'
output_base_directory = '/Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB'

# Run the async function (sequential processing)
asyncio.run(process_videos_in_directory(video_directory, output_base_directory, api_key))
print("FINISHED PROCESSING ALL VIDEOS.", flush=True)


Processing Videos:   0%|          | 0/2 [00:00<?, ?video/s]

Processing video: Bananas_in_pyjamas copy.mp4
Starting scene analysis...


ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 24.969 FPS, resolution: 640 x 360
INFO:pyscenedetect:Downscale factor set to 2, effective resolution: 320 x 180
INFO:pyscenedetect:Detecting scenes...
INFO:root:Detected 3 scenes:
INFO:root:Scene 1: Start 00:00:00.000 / Frame 0, End 00:00:08.370 / Frame 209
INFO:root:Scene 2: Start 00:00:08.370 / Frame 209, End 00:00:19.344 / Frame 483
INFO:root:Scene 3: Start 00:00:19.344 / Frame 483, End 00:00:22.147 / Frame 553


Extracted and saved middle frame of scene 1 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/scenes_output/scene_1_frame_104.jpg
Extracted and saved middle frame of scene 2 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/scenes_output/scene_2_frame_346.jpg
Extracted and saved middle frame of scene 3 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/scenes_output/scene_3_frame_518.jpg




Saved analysis for scene_1_frame_104.jpg as scene_1_frame_104_analysis.json




Saved analysis for scene_3_frame_518.jpg as scene_3_frame_518_analysis.json


Processing Scenes: 100%|██████████| 3/3 [00:13<00:00,  4.53s/scene]

Saved analysis for scene_2_frame_346.jpg as scene_2_frame_346_analysis.json
Scene analysis complete.
Starting luminance analysis...



ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 24.969 FPS, resolution: 640 x 360
INFO:pyscenedetect:Downscale factor set to 2, effective resolution: 320 x 180
INFO:pyscenedetect:Detecting scenes...
INFO:root:Detected 3 scenes:
INFO:root:Scene 1: Start 00:00:00.000 / Frame 0, End 00:00:08.370 / Frame 209
INFO:root:Scene 2: Start 00:00:08.370 / Frame 209, End 00:00:19.344 / Frame 483
INFO:root:Scene 3: Start 00:00:19.344 / Frame 483, End 00:00:22.147 / Frame 553


Total scenes detected: 3
Analyzing scenes for flicker and short scene detection...
Saved frame comparison image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/luminance_output/scene_1_to_2.jpg
Saved frame comparison image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/luminance_output/scene_2_to_3_STRONG_LUMINANCE.jpg
Luminance analysis complete.
Merging entities...
Merged entities saved to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/final_summary.json
Entity merge complete.
Starting edge detection...
Saved edge detection image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/Bananas_in_pyjamas copy/edges_output/scene_2_frame_346_edges.jpg
Edge density for scene_2_frame_346.jpg: 2.2061631944444446%
Saved edge detection image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/1

Processing Videos:  50%|█████     | 1/2 [00:23<00:23, 23.34s/video]

Processing video: shinchantrim.mp4
Starting scene analysis...


ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 29.979 FPS, resolution: 442 x 360
INFO:pyscenedetect:Detecting scenes...
INFO:root:Detected 5 scenes:
INFO:root:Scene 1: Start 00:00:00.000 / Frame 0, End 00:00:01.101 / Frame 33
INFO:root:Scene 2: Start 00:00:01.101 / Frame 33, End 00:00:02.635 / Frame 79
INFO:root:Scene 3: Start 00:00:02.635 / Frame 79, End 00:00:04.803 / Frame 144
INFO:root:Scene 4: Start 00:00:04.803 / Frame 144, End 00:00:07.405 / Frame 222
INFO:root:Scene 5: Start 00:00:07.405 / Frame 222, End 00:00:07.605 / Frame 228


Extracted and saved middle frame of scene 1 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/scenes_output/scene_1_frame_16.jpg
Extracted and saved middle frame of scene 2 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/scenes_output/scene_2_frame_56.jpg
Extracted and saved middle frame of scene 3 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/scenes_output/scene_3_frame_111.jpg
Extracted and saved middle frame of scene 4 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/scenes_output/scene_4_frame_183.jpg
Extracted and saved middle frame of scene 5 as /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/scenes_output/scene_5_frame_225.jpg




Saved analysis for scene_4_frame_183.jpg as scene_4_frame_183_analysis.json




Saved analysis for scene_3_frame_111.jpg as scene_3_frame_111_analysis.json




Saved analysis for scene_1_frame_16.jpg as scene_1_frame_16_analysis.json




Saved analysis for scene_2_frame_56.jpg as scene_2_frame_56_analysis.json


Processing Scenes: 100%|██████████| 5/5 [00:09<00:00,  1.97s/scene]

Saved analysis for scene_5_frame_225.jpg as scene_5_frame_225_analysis.json
Scene analysis complete.
Starting luminance analysis...



ERROR:pyscenedetect:VideoManager is deprecated and will be removed.
INFO:pyscenedetect:Loaded 1 video, framerate: 29.979 FPS, resolution: 442 x 360
INFO:pyscenedetect:Detecting scenes...
INFO:root:Detected 5 scenes:
INFO:root:Scene 1: Start 00:00:00.000 / Frame 0, End 00:00:01.101 / Frame 33
INFO:root:Scene 2: Start 00:00:01.101 / Frame 33, End 00:00:02.635 / Frame 79
INFO:root:Scene 3: Start 00:00:02.635 / Frame 79, End 00:00:04.803 / Frame 144
INFO:root:Scene 4: Start 00:00:04.803 / Frame 144, End 00:00:07.405 / Frame 222
INFO:root:Scene 5: Start 00:00:07.405 / Frame 222, End 00:00:07.605 / Frame 228


Total scenes detected: 5
Analyzing scenes for flicker and short scene detection...
Saved frame comparison image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/luminance_output/scene_1_to_2.jpg
Saved frame comparison image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/luminance_output/scene_2_to_3_STRONG_LUMINANCE.jpg
Saved frame comparison image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/luminance_output/scene_3_to_4.jpg
Saved frame comparison image to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/luminance_output/scene_4_to_5.jpg
Luminance analysis complete.
Merging entities...
Merged entities saved to /Users/santiagowon/Dropbox/Santiago/01. Maestria/Tesis/11_Project_Analysed_DB/shinchantrim/final_summary.json
Entity merge complete.
Starting edge detection...
Saved edge detection image to /Users/s

Processing Videos: 100%|██████████| 2/2 [00:43<00:00, 21.56s/video]

FINISHED PROCESSING ALL VIDEOS.





In [None]:
## Test for Chars and Objs