In [1]:
!pip install torch torchvision opencv-python-headless
!pip install openai
!pip install opencv-python pillow requests
!pip install matplotlib
!pip install matplotlib opencv-python-headless
!pip install pillow
!pip install cucim[plugins]



In [2]:
# Install the CUDA 12 specific version of cuCIM
!pip install cucim-cu12

# Install CuPy for CUDA 12
!pip install cupy-cuda12x

# Install the required image format plugins
!pip install pylibcucim-cu12



In [3]:
from IPython.display import display, Image, Audio
import openai
import cv2  # We're using OpenCV to read video, to install !pip install opencv-python
import base64
import time
from openai import OpenAI
import os
import requests
import torch

In [4]:
# In a Python cell
!lspci | grep -i nvidia
!nvidia-smi
!nvcc --version
!which python
!python --version

0000:17:00.0 VGA compatible controller: NVIDIA Corporation GA102GL [RTX A6000] (rev a1)
0000:17:00.1 Audio device: NVIDIA Corporation GA102 High Definition Audio Controller (rev a1)
0000:65:00.0 VGA compatible controller: NVIDIA Corporation GA102GL [RTX A6000] (rev a1)
0000:65:00.1 Audio device: NVIDIA Corporation GA102 High Definition Audio Controller (rev a1)
Wed Jun 11 13:25:36 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 570.133.07     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   000000

In [5]:
!nvidia-smi

Wed Jun 11 13:25:37 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 570.133.07     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:17:00.0 Off |                  Off |
| 30%   37C    P5             21W /  300W |      15MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A6000               Off |   00

In [6]:
import torch

print(torch.cuda.is_available())  # Should return True
print(torch.cuda.current_device())  # Should return 0
print(torch.cuda.get_device_name(0))  # Should return "NVIDIA RTX A6000" (not A100)
!nvidia-smi

True
0
NVIDIA RTX A6000
Wed Jun 11 13:25:38 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 570.133.07     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:17:00.0 Off |                  Off |
| 30%   38C    P3             43W /  300W |      18MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A6000 

In [7]:
f = open("/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt", "r")
mykey = f.read()

#ZER0-SHOT
* Clear, direct instructions
* No examples or demonstrations
* No training or fine-tuning
* Each task stands alone

Implementation:
* Like being given a task with no examples or practice
* Direct analysis without prior context
* Each question stands alone
* No dependencies between questions
* Like taking a test without studying examples first

In [8]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/ZERO"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class ZeroShotGPTAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.max_frames_per_request = 20
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

    def process_frames_in_chunks(self, prompt, frame_data):
        """Process frames in chunks using GPT API"""
        url = "https://api.openai.com/v1/chat/completions"
        all_responses = []

        # Process frames in chunks
        for i in range(0, len(frame_data), self.chunk_size):
            chunk = frame_data[i:i + self.chunk_size]

            # Pure zero-shot - no system message or context
            messages = [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]

            # Add each frame to the user message content
            for frame in chunk:
                # Detect if it's a PNG or JPEG based on the frame_info
                mime_type = "image/png"  # Default to PNG since the files are PNGs

                messages[0]["content"].append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{mime_type};base64,{frame}",
                        "detail": "high"
                    }
                })

            # Make API request
            payload = {
                "model": "gpt-4o",
                "messages": messages,
                "max_tokens": 4096,
                "temperature": 0.1
            }

            try:
                print(f"Sending chunk {i//self.chunk_size + 1}/{(len(frame_data) + self.chunk_size - 1)//self.chunk_size} to GPT-4o...")
                response = requests.post(url, headers=self.headers, json=payload)

                # Check for errors
                if response.status_code != 200:
                    error_detail = response.json() if response.text else response.text
                    print(f"API Error {response.status_code}: {error_detail}")
                    all_responses.append(f"Error {response.status_code}: {error_detail}")
                    continue

                result = response.json()
                if "choices" in result and result["choices"]:
                    response_content = result["choices"][0]["message"]["content"]
                    print(f"Received response for chunk {i//self.chunk_size + 1}")
                    all_responses.append(response_content)
                else:
                    all_responses.append(f"Error: No response from API for chunk {i//self.chunk_size + 1}")

            except Exception as e:
                print(f"Error for chunk {i//self.chunk_size + 1}: {str(e)}")
                all_responses.append(f"Error: {str(e)}")

            # Rate limiting
            print(f"Waiting 3 seconds before next request...")
            time.sleep(3)

        # Combine all chunk responses
        combined_response = "\n\n=== NEXT CHUNK ===\n\n".join(all_responses)
        return combined_response

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with pure zero-shot approach"""
        try:
            frame_names = list(frames_data.keys())

            # Improved sorting function for frame numbers
            def extract_frame_number(filename):
                try:
                    # Handle different naming patterns
                    if '_frame_' in filename:
                        parts = filename.split('_frame_')
                        if len(parts) > 1:
                            number_part = parts[1].split('.')[0]
                            return int(number_part)
                    elif 'frame' in filename.lower():
                        # Alternative pattern matching
                        import re
                        numbers = re.findall(r'\d+', filename)
                        if numbers:
                            return int(numbers[-1])  # Use the last number found
                except Exception as e:
                    print(f"Error extracting frame number from {filename}: {str(e)}")
                    return 0

            sorted_frames = sorted(frame_names, key=extract_frame_number)

            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) ===")
            print(f"Total frames loaded: {len(frames_data)}")
            print(f"Frame names sample: {sorted_frames[:5]}{'...' if len(sorted_frames) > 5 else ''}")

            results = {}
            timestamp = time.strftime("%Y%m%d_%H%M%S")

            # True zero-shot prompt - no formatting instructions or examples
            prompt = "What's happening in these frames? Describe the scene and any notable actions or events."

            try:
                frame_data = [frames_data[frame_name] for frame_name in sorted_frames
                             if frame_name in frames_data and frames_data[frame_name]]

                if not frame_data:
                    answer = "No valid frames were available for analysis."
                    print("WARNING: No valid frames were available for analysis.")
                else:
                    print(f"Processing {len(frame_data)} valid frames...")
                    answer = self.process_frames_in_chunks(prompt, frame_data)

                results["Analysis"] = {
                    "crime_type": crime_type,
                    "prompt": prompt,
                    "answer": answer,
                    "frames_used": len(sorted_frames),
                    "valid_frames": len(frame_data)
                }

                # Save results
                self.save_results(results, f"{crime_type}_{video_id}_zero_shot_analysis_{timestamp}.json")
                print(f"Analysis for {video_id} ({crime_type}) completed and saved.")

            except Exception as e:
                print(f"Error processing analysis: {str(e)}")
                results["Analysis"] = {
                    "crime_type": crime_type,
                    "prompt": prompt,
                    "error": str(e),
                    "frames_used": len(sorted_frames)
                }

            return results

        except Exception as e:
            print(f"Error in analyze_frames: {str(e)}")
            raise

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 10 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file)
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with zero-shot analysis"""
    # Initialize analyzer
    analyzer = ZeroShotGPTAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with zero-shot analysis...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with zero-shot approach
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"zero_shot_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def check_authentication():
    """Placeholder function to check authentication"""
    return True

def run():
    """Main execution function"""
    print("Zero-Shot Crime Video Analysis - ALL Frames")
    print("="*50)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Check authentication
    if not check_authentication():
        print("✗ Authentication not completed.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)

    for video_id, video_results in results.items():
        if video_results and 'Analysis' in video_results:
            total_frames_processed += video_results['Analysis'].get('valid_frames', 0)

    print("\n" + "="*50)
    print(f"PROCESSING COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print("="*50)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Zero-Shot Crime Video Analysis - ALL Frames
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/ZERO
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson', 'Vandalism', 'Shooting', 'Abuse', 'Explosion', 'Assault']

Scanning Burglary directory...
  Found 398 files
  Identified 2 unique videos:
    Burglary001_x264: 266 frames
    Burglary008_x264: 132 frames


#Sequential Prompting:
In sequential prompting, we build upon previous answers, using them to inform subsequent prompts in a sequence. This creates a chain of prompts where each builds on what came before.

1. Sequential Prompting Implementation
Key Features:

Processes all frames in chunks of 10 frames each
Uses a progressive sequence of 5 prompts that build on each previous response:

- High-level scene description
- People identification and actions
- Potential criminal activities
- Objects/items involved in the incident
- Chronological timeline of events


Includes a final synthesis step that consolidates findings across all chunks

Implementation Highlights:

- Each step processes all chunks independently
- Previous step responses are provided as context for each new step
- Results from all chunks are combined before proceeding to the next step
- Saves intermediate results after each step for monitoring progress



In [9]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/SEQUENTIAL"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class SequentialPromptingAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

        # Sequential prompting flow - each prompt builds on previous responses
        self.prompt_sequence = [
            "What's happening in these frames? Describe the scene at a high level.",
            "Based on what you observed in your previous response, who are the main people in the scene and what are they doing?",
            "Looking at the actions you described, do you observe any potential criminal activities? If so, describe them in detail.",
            "Based on your crime analysis, what objects or items are involved in the incident?",
            "Considering all your observations, create a chronological timeline of events shown in these frames."
        ]

    def process_frames_with_sequential_prompts(self, frames_data, video_id):
        """Process frames with sequential prompting approach"""
        url = "https://api.openai.com/v1/chat/completions"

        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames for sequential analysis")

        # Process all frames by dividing them into chunks
        chunk_size = 10
        frame_chunks = [frame_data[i:i+chunk_size] for i in range(0, total_frames, chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {chunk_size} frames each")

        # Initialize conversation history
        conversation = [
            {
                "role": "system",
                "content": "You are analyzing video frames showing a potential crime scene. Provide detailed observations based on what you see."
            }
        ]

        sequence_results = {}
        all_chunk_responses = {}

        # Process each prompt in sequence
        for step, prompt in enumerate(self.prompt_sequence, 1):
            print(f"Processing sequential prompt {step}/{len(self.prompt_sequence)}")

            # For each step, collect responses from all chunks
            step_responses = []

            # Process each chunk of frames for this step
            for chunk_idx, chunk in enumerate(frame_chunks):
                print(f"  Processing chunk {chunk_idx+1}/{len(frame_chunks)} for step {step}...")

                # Create a clean conversation state for each chunk that includes previous steps
                # but starts fresh for this chunk
                if step == 1:
                    # First step only needs system message
                    chunk_conversation = [conversation[0]]
                else:
                    # Later steps include previous step responses in conversation
                    chunk_conversation = [conversation[0]]
                    for prev_step in range(1, step):
                        # Include previous prompt
                        chunk_conversation.append({
                            "role": "user",
                            "content": self.prompt_sequence[prev_step-1]
                        })
                        # Include aggregated response from previous step
                        chunk_conversation.append({
                            "role": "assistant",
                            "content": all_chunk_responses[f"Step {prev_step}"]
                        })

                # Create the user message with prompt and frames
                user_message = {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"{prompt} (Analyzing frames {chunk_idx*chunk_size+1}-{min((chunk_idx+1)*chunk_size, total_frames)} of {total_frames})"
                        }
                    ]
                }

                # Include frames for this chunk
                for frame in chunk:
                    # Default to PNG since the files are PNGs
                    mime_type = "image/png"

                    user_message["content"].append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{mime_type};base64,{frame}",
                            "detail": "high"
                        }
                    })

                # Add user message to chunk conversation
                chunk_conversation.append(user_message)

                # Make API request for this chunk
                payload = {
                    "model": "gpt-4o",
                    "messages": chunk_conversation,
                    "max_tokens": 4096,
                    "temperature": 0.1
                }

                try:
                    print(f"    Sending request to GPT-4o for chunk {chunk_idx+1}...")
                    response = requests.post(url, headers=self.headers, json=payload)

                    if response.status_code != 200:
                        error_detail = response.json() if response.text else response.text
                        print(f"    API Error {response.status_code}: {error_detail}")
                        step_responses.append(f"Error processing chunk {chunk_idx+1}: {error_detail}")
                        continue

                    result = response.json()
                    if "choices" in result and result["choices"]:
                        assistant_response = result["choices"][0]["message"]["content"]
                        print(f"    Received response for chunk {chunk_idx+1}")
                        step_responses.append(assistant_response)
                    else:
                        step_responses.append(f"No response from API for chunk {chunk_idx+1}")

                except Exception as e:
                    print(f"    Error in chunk {chunk_idx+1} for step {step}: {str(e)}")
                    step_responses.append(f"Error processing chunk {chunk_idx+1}: {str(e)}")

                # Rate limiting between chunks
                print(f"    Waiting 3 seconds before next request...")
                time.sleep(3)

            # Combine all chunk responses for this step
            combined_response = "\n\n=== NEXT CHUNK ===\n\n".join(step_responses)

            # Save the combined response for this step
            sequence_results[f"Step {step}"] = {
                "prompt": prompt,
                "response": combined_response
            }

            # Store for use in next steps
            all_chunk_responses[f"Step {step}"] = combined_response

            # Add to main conversation for later reference
            conversation.append({
                "role": "user",
                "content": prompt
            })
            conversation.append({
                "role": "assistant",
                "content": combined_response
            })

            # Save intermediate results after each step
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            step_result = {
                f"Step {step}": sequence_results[f"Step {step}"]
            }
            self.save_results(step_result, f"{video_id}_sequential_step{step}_{timestamp}.json")
            print(f"  Step {step} results saved.")

            # Rate limiting between steps
            print(f"  Waiting 5 seconds before next step...")
            time.sleep(5)

        # Add a final synthesis step to bring everything together
        synthesis_prompt = "Based on all your previous analyses of ALL frame chunks, provide a comprehensive final assessment of the entire video. Summarize what crime appears to be taking place, who is involved, and how events unfolded across all the frames."

        user_message = {
            "role": "user",
            "content": synthesis_prompt
        }

        conversation.append(user_message)

        # Make API request for final synthesis
        payload = {
            "model": "gpt-4o",
            "messages": conversation,
            "max_tokens": 4096,
            "temperature": 0.1
        }

        try:
            print("Performing final synthesis of all analyses...")
            response = requests.post(url, headers=self.headers, json=payload)

            if response.status_code == 200:
                result = response.json()
                if "choices" in result and result["choices"]:
                    assistant_response = result["choices"][0]["message"]["content"]
                    print("Synthesis complete!")

                    # Save synthesis results
                    sequence_results["Final Synthesis"] = {
                        "prompt": synthesis_prompt,
                        "response": assistant_response
                    }

                    timestamp = time.strftime("%Y%m%d_%H%M%S")
                    synthesis_result = {
                        "Final Synthesis": sequence_results["Final Synthesis"]
                    }
                    self.save_results(synthesis_result, f"{video_id}_sequential_synthesis_{timestamp}.json")
                    print("Synthesis results saved.")
            else:
                error_detail = response.json() if response.text else response.text
                print(f"API Error in synthesis: {response.status_code}: {error_detail}")

        except Exception as e:
            print(f"Error in final synthesis: {str(e)}")

        return {
            "sequential_results": sequence_results,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": chunk_size
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with sequential prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH SEQUENTIAL PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_with_sequential_prompts(frames_data, video_id)

            # Add crime type to results
            if "sequential_results" in results:
                results["crime_type"] = crime_type

            # Save complete results
            self.save_results(results, f"{crime_type}_{video_id}_sequential_complete_{timestamp}.json")
            print(f"Complete sequential analysis for {video_id} ({crime_type}) saved.")

            return results

        except Exception as e:
            print(f"Error in sequential analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with sequential prompting"""
    # Initialize analyzer
    analyzer = SequentialPromptingAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with sequential prompting...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with sequential prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_info['crime_type']}_{video_info['video_id']}_{timestamp}.json")
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"sequential_prompting_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete sequential analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def run():
    """Main execution function"""
    print("Sequential Prompting Crime Video Analysis - ALL Frames")
    print("="*50)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_videos_processed = len(results)

    print("\n" + "="*50)
    print(f"SEQUENTIAL PROMPTING COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print("="*50)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Sequential Prompting Crime Video Analysis - ALL Frames
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/SEQUENTIAL
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson', 'Vandalism', 'Shooting', 'Abuse', 'Explosion', 'Assault']

Scanning Burglary directory...
  Found 398 files
  Identified 2 unique videos:
    Burglary001_x264: 266 frames
    Burglary008_

#Least-To-Most Prompting
2. Least-To-Most Prompting Implementation
Key Features:

Processes all frames in chunks of 10 frames each
Uses a sequence of 8 prompts with increasing complexity:

- Simple object identification (low complexity)
- People identification (low complexity)
- Location/setting description (low complexity)
- Basic action identification (low complexity)
- Interaction analysis (medium complexity)
- Unusual behavior detection (medium complexity)
- Criminal activity analysis (high complexity)
- Comprehensive timeline (high complexity)


Includes a final comprehensive assessment that integrates all analyses

Implementation Highlights:

- Each step clearly labeled with its complexity level (Low, Medium, High)
- Builds from simple identification tasks to complex analysis
- Each chunk gets analyzed separately, with results combined before the next step
- Previous observation steps provide context for later analytical steps

In [10]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/LEAST-TO-MOST"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class LeastToMostAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

        # Least-to-Most prompting sequence - starts with simpler tasks and gradually increases complexity
        self.prompt_sequence = [
            # Simple object and scene identification
            "List all visible objects in these frames. Just identify what you can see - furniture, tools, vehicles, etc.",

            # Simple people identification
            "Identify the people visible in these frames. How many are there? Describe each person's basic appearance (clothing, general features).",

            # Simple spatial relationships
            "Describe the location and setting shown in these frames. What kind of place is this? Describe the spatial layout.",

            # Basic actions (still simple)
            "What actions are the people performing in these frames? List simple actions you can observe.",

            # Interactions (medium complexity)
            "How are the people interacting with each other and with objects in the scene? Describe specific interactions.",

            # Unusual behaviors (medium complexity)
            "Do you notice any unusual, concerning, or potentially suspicious behaviors in these frames? If so, what specifically seems unusual?",

            # Criminal analysis (higher complexity)
            "Based on your previous observations, analyze whether any potential criminal activities might be occurring. What specific elements suggest criminal behavior?",

            # Comprehensive timeline (highest complexity)
            "Using all your previous observations, construct a detailed chronological timeline of events shown in these frames. Include who did what, when, and potential motives."
        ]

    def process_frames_with_least_to_most(self, frames_data, video_id):
        """Process frames with least-to-most prompting approach"""
        url = "https://api.openai.com/v1/chat/completions"

        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames for least-to-most analysis")

        # Process all frames by dividing them into chunks
        chunk_size = 10
        frame_chunks = [frame_data[i:i+chunk_size] for i in range(0, total_frames, chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {chunk_size} frames each")

        # Initialize conversation history with system message
        conversation = [
            {
                "role": "system",
                "content": "You are analyzing video frames from a potential crime scene. Start with basic observations and gradually build to more complex analysis as directed by the prompts."
            }
        ]

        ltm_results = {}
        all_chunk_responses = {}

        # Process each prompt in sequence, building complexity
        for step, prompt in enumerate(self.prompt_sequence, 1):
            print(f"Processing least-to-most prompt {step}/{len(self.prompt_sequence)}: {prompt[:30]}...")

            # For each step, collect responses from all chunks
            step_responses = []

            # Process each chunk of frames for this step
            for chunk_idx, chunk in enumerate(frame_chunks):
                print(f"  Processing chunk {chunk_idx+1}/{len(frame_chunks)} for step {step}...")

                # Create a clean conversation state for each chunk that includes previous steps
                # but starts fresh for this chunk
                if step == 1:
                    # First step only needs system message
                    chunk_conversation = [conversation[0]]
                else:
                    # Later steps include previous step responses in conversation
                    chunk_conversation = [conversation[0]]
                    for prev_step in range(1, step):
                        # Include previous prompt
                        chunk_conversation.append({
                            "role": "user",
                            "content": self.prompt_sequence[prev_step-1]
                        })
                        # Include aggregated response from previous step
                        chunk_conversation.append({
                            "role": "assistant",
                            "content": all_chunk_responses[f"Step {prev_step}"]
                        })

                # Create the user message with prompt and frames
                user_message = {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"{prompt} (Analyzing frames {chunk_idx*chunk_size+1}-{min((chunk_idx+1)*chunk_size, total_frames)} of {total_frames})"
                        }
                    ]
                }

                # Include frames for this chunk
                for frame in chunk:
                    # Default to PNG since the files are PNGs
                    mime_type = "image/png"

                    user_message["content"].append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:{mime_type};base64,{frame}",
                            "detail": "high"
                        }
                    })

                # Add user message to chunk conversation
                chunk_conversation.append(user_message)

                # Make API request for this chunk
                payload = {
                    "model": "gpt-4o",
                    "messages": chunk_conversation,
                    "max_tokens": 4096,
                    "temperature": 0.1
                }

                try:
                    print(f"    Sending request to GPT-4o for chunk {chunk_idx+1}...")
                    response = requests.post(url, headers=self.headers, json=payload)

                    if response.status_code != 200:
                        error_detail = response.json() if response.text else response.text
                        print(f"    API Error {response.status_code}: {error_detail}")
                        step_responses.append(f"Error processing chunk {chunk_idx+1}: {error_detail}")
                        continue

                    result = response.json()
                    if "choices" in result and result["choices"]:
                        assistant_response = result["choices"][0]["message"]["content"]
                        print(f"    Received response for chunk {chunk_idx+1}")
                        step_responses.append(assistant_response)
                    else:
                        step_responses.append(f"No response from API for chunk {chunk_idx+1}")

                except Exception as e:
                    print(f"    Error in chunk {chunk_idx+1} for step {step}: {str(e)}")
                    step_responses.append(f"Error processing chunk {chunk_idx+1}: {str(e)}")

                # Rate limiting between chunks
                print(f"    Waiting 3 seconds before next request...")
                time.sleep(3)

            # Combine all chunk responses for this step
            combined_response = "\n\n=== NEXT CHUNK ===\n\n".join(step_responses)

            # Save the combined response for this step
            ltm_results[f"Step {step}"] = {
                "prompt": prompt,
                "complexity_level": "Low" if step <= 3 else "Medium" if step <= 6 else "High",
                "response": combined_response
            }

            # Store for use in next steps
            all_chunk_responses[f"Step {step}"] = combined_response

            # Add to main conversation for the final synthesis
            conversation.append({
                "role": "user",
                "content": prompt
            })
            conversation.append({
                "role": "assistant",
                "content": combined_response
            })

            # Save intermediate results after each step
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            step_result = {
                f"Step {step}": ltm_results[f"Step {step}"]
            }
            self.save_results(step_result, f"{video_id}_ltm_step{step}_{timestamp}.json")
            print(f"  Step {step} results saved.")

            # Rate limiting between steps
            print(f"  Waiting 5 seconds before next step...")
            time.sleep(5)

        # Add a final synthesis prompt to integrate all previous analyses
        if len(ltm_results) == len(self.prompt_sequence):
            print("Performing final synthesis of all analyses...")
            synthesis_prompt = "Based on all your previous analyses of ALL frame chunks, provide a comprehensive assessment of what crime appears to be taking place, who is involved, how it was executed, and what evidence is visible in the frames."

            # Create final synthesis message
            user_message = {
                "role": "user",
                "content": synthesis_prompt
            }

            conversation.append(user_message)

            # Make API request for final synthesis
            payload = {
                "model": "gpt-4o",
                "messages": conversation,
                "max_tokens": 4096,
                "temperature": 0.1
            }

            try:
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code == 200:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        assistant_response = result["choices"][0]["message"]["content"]
                        print("Synthesis complete!")

                        # Save synthesis results
                        ltm_results["Final Synthesis"] = {
                            "prompt": synthesis_prompt,
                            "complexity_level": "Highest",
                            "response": assistant_response
                        }

                        timestamp = time.strftime("%Y%m%d_%H%M%S")
                        synthesis_result = {
                            "Final Synthesis": ltm_results["Final Synthesis"]
                        }
                        self.save_results(synthesis_result, f"{video_id}_ltm_synthesis_{timestamp}.json")
                        print("Synthesis results saved.")
                else:
                    error_detail = response.json() if response.text else response.text
                    print(f"API Error in synthesis: {response.status_code}: {error_detail}")
            except Exception as e:
                print(f"Error in final synthesis: {str(e)}")

        return {
            "least_to_most_results": ltm_results,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": chunk_size
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with least-to-most prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH LEAST-TO-MOST PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_with_least_to_most(frames_data, video_id)

            # Add crime type to results
            if "least_to_most_results" in results:
                results["crime_type"] = crime_type

            # Save complete results
            self.save_results(results, f"{crime_type}_{video_id}_ltm_complete_{timestamp}.json")
            print(f"Complete least-to-most analysis for {video_id} ({crime_type}) saved.")

            return results

        except Exception as e:
            print(f"Error in least-to-most analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with least-to-most prompting"""
    # Initialize analyzer
    analyzer = LeastToMostAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with least-to-most prompting...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with least-to-most prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_info['crime_type']}_{video_info['video_id']}_{timestamp}.json")
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"least_to_most_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete least-to-most analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def run():
    """Main execution function"""
    print("Least-To-Most Prompting Crime Video Analysis - ALL Frames")
    print("="*50)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    print("\n" + "="*50)
    print(f"LEAST-TO-MOST PROMPTING COMPLETE!")
    print(f"Videos processed: {len(results)}")
    print("="*50)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Least-To-Most Prompting Crime Video Analysis - ALL Frames
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/LEAST-TO-MOST
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson', 'Vandalism', 'Shooting', 'Abuse', 'Explosion', 'Assault']

Scanning Burglary directory...
  Found 398 files
  Identified 2 unique videos:
    Burglary001_x264: 266 frames
    Burgla

#ReAct
ReAct Prompting Approach
The ReAct technique follows a structured thinking cycle:

- Thought: Reasoning about what's being observed and its potential meaning
- Action: Deciding what specific aspect to focus on analyzing next
- Observation: Making detailed observations about that specific aspect
- Decision: Drawing a conclusion based on accumulated observations

Implementation Highlights

Explicit ReAct Framework:

- The prompt explicitly guides the model through the four-step cycle (Thought → Action → Observation → Decision)
- Applies this cycle to multiple aspects: people, actions, objects, spatial relationships, etc.


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk gets a full ReAct analysis independently


Evidence Chain Analysis:

- After all chunks are processed, a second pass synthesizes all analysis chunks
- Creates a coherent evidence chain that resolves any contradictions between chunks
- Explicitly establishes what crime occurred, who was involved, and the sequence of events
- Assesses confidence level in the final determination


Distinct Prompting Strategy:

- Unlike sequential or least-to-most approaches, ReAct emphasizes explicit reasoning Forces the model to justify conclusions with evidence
Creates a clear chain of reasoning that mimics human investigative processes



Benefits of ReAct for Crime Analysis
The ReAct approach is particularly well-suited for crime analysis because:

- Transparency: The reasoning and decision process is explicit and traceable
- Evidence-Based: Conclusions are directly linked to specific observations
- Methodical: Forces a structured investigation approach rather than jumping to conclusions
- Forensic-Style: Resembles how human investigators approach crime scenes

The final step, which combines all chunk analyses into a cohesive evidence chain, ensures that the entire video is considered holistically despite being processed in chunks. This produces a comprehensive analysis that identifies consistent evidence across the entire video sequence.
This implementation provides a robust framework for analyzing crime videos that explicitly shows the reasoning process behind each conclusion, making it particularly valuable for scenarios where explaining the rationale is as important as the conclusions themselves.

In [11]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/REACT"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class ReActAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

        # Base ReAct prompt that guides the model through a reasoning and acting cycle
        self.react_prompt_template = """
Analyze these frames using the ReAct approach (Reasoning and Acting). For each important element you observe:

1. Thought: Reason about what you're seeing and what it might mean
2. Action: Describe what specific aspect you'll focus on analyzing next
3. Observation: Make detailed observations about that aspect
4. Decision: Draw a conclusion based on your observations

Specifically, follow this cycle for:
- People and their appearances
- Actions and behaviors
- Objects and items
- Spatial relationships
- Temporal sequence of events
- Potential criminal activity

After going through these cycles, provide your final analysis of what crime appears to be occurring, who is involved, and what evidence supports this conclusion.

You are now analyzing frames {frame_range} of {total_frames}.
"""

    def process_frames_with_react(self, frames_data, video_id):
        """Process frames with ReAct prompting approach"""
        url = "https://api.openai.com/v1/chat/completions"

        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames with ReAct approach")

        # Process all frames by dividing them into chunks
        chunk_size = 10
        frame_chunks = [frame_data[i:i+chunk_size] for i in range(0, total_frames, chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {chunk_size} frames each")

        # Initialize results and conversation
        react_results = {}
        all_chunk_analysis = []

        # System message that sets up the ReAct approach
        system_message = {
            "role": "system",
            "content": "You are an expert crime scene analyst using the ReAct approach to analyze video frames. First reason about what you see (Thought), then decide what to focus on (Action), then make detailed observations (Observation), and finally draw conclusions (Decision). Be methodical and evidence-based."
        }

        # Process each chunk of frames
        for chunk_idx, chunk in enumerate(frame_chunks):
            frame_start = chunk_idx * chunk_size + 1
            frame_end = min((chunk_idx + 1) * chunk_size, total_frames)
            frame_range = f"{frame_start}-{frame_end}"

            print(f"Processing chunk {chunk_idx+1}/{len(frame_chunks)} (frames {frame_range})...")

            # Create the prompt for this chunk
            react_prompt = self.react_prompt_template.format(frame_range=frame_range, total_frames=total_frames)

            # Reset conversation for each chunk
            chunk_conversation = [system_message]

            # Create user message with prompt and frames
            user_message = {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": react_prompt
                    }
                ]
            }

            # Add frames to the message
            for frame in chunk:
                # Default to PNG since the files are PNGs
                mime_type = "image/png"

                user_message["content"].append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{mime_type};base64,{frame}",
                        "detail": "high"
                    }
                })

            # Add user message to conversation
            chunk_conversation.append(user_message)

            # Make API request for initial ReAct analysis
            payload = {
                "model": "gpt-4o",
                "messages": chunk_conversation,
                "max_tokens": 4096,
                "temperature": 0.1
            }

            try:
                print(f"  Sending request to GPT-4o for chunk {chunk_idx+1}...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code != 200:
                    error_detail = response.json() if response.text else response.text
                    print(f"  API Error {response.status_code}: {error_detail}")
                    chunk_analysis = f"Error processing chunk {chunk_idx+1}: {error_detail}"
                else:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        chunk_analysis = result["choices"][0]["message"]["content"]
                        print(f"  Received response for chunk {chunk_idx+1}")
                    else:
                        chunk_analysis = f"No response from API for chunk {chunk_idx+1}"

                # Save analysis for this chunk
                react_results[f"Chunk {chunk_idx+1}"] = {
                    "frame_range": frame_range,
                    "react_analysis": chunk_analysis
                }

                # Append to collection of all analyses
                all_chunk_analysis.append(chunk_analysis)

                # Save intermediate results for this chunk
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                chunk_result = {
                    f"Chunk {chunk_idx+1}": react_results[f"Chunk {chunk_idx+1}"]
                }
                self.save_results(chunk_result, f"{video_id}_react_chunk{chunk_idx+1}_{timestamp}.json")
                print(f"  Chunk {chunk_idx+1} results saved.")

            except Exception as e:
                print(f"  Error in chunk {chunk_idx+1}: {str(e)}")
                react_results[f"Chunk {chunk_idx+1}"] = {
                    "frame_range": frame_range,
                    "error": str(e)
                }

            # Rate limiting between chunks
            print(f"  Waiting 3 seconds before next request...")
            time.sleep(3)

        # After processing all chunks, perform a comprehensive analysis
        if all_chunk_analysis:
            print("Performing comprehensive evidence chain analysis across all chunks...")

            # Create a combined conversation for the evidence chain analysis
            # Create chunk analysis text separately to avoid f-string backslash issues
            chunk_analyses_text = ""
            separator = "-" * 40

            for i, analysis in enumerate(all_chunk_analysis):
                start_frame = i * chunk_size + 1
                end_frame = min((i + 1) * chunk_size, total_frames)
                chunk_text = f"CHUNK {i+1} (Frames {start_frame}-{end_frame}):\n{analysis}\n\n{separator}\n"
                chunk_analyses_text += chunk_text

            evidence_chain_conversation = [
                {
                    "role": "system",
                    "content": "You are an expert forensic analyst reviewing evidence from a crime scene. Your task is to construct a coherent evidence chain from multiple analyses of video frames, focusing on establishing what crime occurred, who was involved, and the sequence of events."
                },
                {
                    "role": "user",
                    "content": f"""
I have analyzed a crime scene video in {len(frame_chunks)} chunks using the ReAct approach. Below are the analyses for each chunk of frames.

Your task:
1. Review all chunk analyses
2. Identify consistent evidence across chunks
3. Resolve any contradictions between chunks
4. Construct a single coherent narrative of the crime
5. List the evidence that supports your conclusions
6. Assess confidence in your final determination

Here are the chunk analyses:

{separator}
{chunk_analyses_text}

Based on all this evidence, provide your final analysis of what crime occurred, who was involved, and how it happened.
"""
                }
            ]

            # Make API request for evidence chain analysis
            payload = {
                "model": "gpt-4o",
                "messages": evidence_chain_conversation,
                "max_tokens": 4096,
                "temperature": 0.1
            }

            try:
                print("Sending request for evidence chain analysis...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code == 200:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        evidence_chain_analysis = result["choices"][0]["message"]["content"]
                        print("Evidence chain analysis complete!")

                        # Save evidence chain analysis
                        react_results["Evidence Chain Analysis"] = {
                            "analysis": evidence_chain_analysis
                        }

                        # Save evidence chain separately
                        timestamp = time.strftime("%Y%m%d_%H%M%S")
                        evidence_chain_result = {
                            "Evidence Chain Analysis": react_results["Evidence Chain Analysis"]
                        }
                        self.save_results(evidence_chain_result, f"{video_id}_react_evidence_chain_{timestamp}.json")
                        print("Evidence chain analysis saved.")
                else:
                    error_detail = response.json() if response.text else response.text
                    print(f"API Error in evidence chain analysis: {response.status_code}: {error_detail}")
            except Exception as e:
                print(f"Error in evidence chain analysis: {str(e)}")
                react_results["Evidence Chain Analysis"] = {
                    "error": str(e)
                }

        return {
            "react_results": react_results,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": chunk_size
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with ReAct prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH REACT PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_with_react(frames_data, video_id)

            # Add crime type to results
            if "react_results" in results:
                results["crime_type"] = crime_type

            # Save complete results
            self.save_results(results, f"{crime_type}_{video_id}_react_complete_{timestamp}.json")
            print(f"Complete ReAct analysis for {video_id} ({crime_type}) saved.")

            return results

        except Exception as e:
            print(f"Error in ReAct analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with ReAct prompting"""
    # Initialize analyzer
    analyzer = ReActAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with ReAct prompting...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with ReAct prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_info['crime_type']}_{video_info['video_id']}_{timestamp}.json")
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"react_prompting_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete ReAct analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def run():
    """Main execution function"""
    print("ReAct Prompting Crime Video Analysis - ALL Frames")
    print("="*50)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    print("\n" + "="*50)
    print(f"REACT PROMPTING COMPLETE!")
    print(f"Videos processed: {len(results)}")
    print("="*50)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
ReAct Prompting Crime Video Analysis - ALL Frames
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/REACT
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson', 'Vandalism', 'Shooting', 'Abuse', 'Explosion', 'Assault']

Scanning Burglary directory...
  Found 398 files
  Identified 2 unique videos:
    Burglary001_x264: 266 frames
    Burglary008_x264: 132 

#Iterative prompting
Iterative Prompting Approach
The Iterative Prompting technique follows a structured refinement process:

- Initial Analysis: The model provides a first-pass analysis of what appears to be happening in the frames
- Guided Iterations: Through a series of targeted follow-up prompts, the model refines specific aspects of its analysis
- Progressive Improvement: Each round builds on previous insights while addressing potential weaknesses or gaps
- Final Synthesis: After multiple refinement rounds, the model creates a final, comprehensive assessment

Implementation Highlights

Multi-Round Refinement:

Starts with an initial general analysis prompt
Follows with 4 specialized refinement rounds:

- People and relationships focus
- Actions and intent focus
- Criminal elements and evidence focus
- Critical examination (missing elements, alternative interpretations)


Concludes with a final synthesis prompt for each chunk


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk undergoes the full iterative process independently


Conversation Continuity:

- Maintains the complete conversation history throughout all rounds
- Each refinement builds on the accumulated context from previous rounds
- Creates a progressive improvement cycle where later responses incorporate earlier insights


Holistic Synthesis:

- After all chunks are iteratively analyzed, performs a final cross-chunk synthesis
- Creates a coherent narrative of the entire incident
- Addresses any discrepancies between chunk analyses

In [12]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/TRUE-ITERATIVE"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class TrueIterativeGPTAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        self.max_iterations = 8  # Maximum iterations before stopping
        self.convergence_threshold = 0.7  # Similarity threshold for convergence
        os.makedirs(self.save_dir, exist_ok=True)

        # Core iterative question that gets refined each time
        self.core_question = "Analyze these video frames for criminal activity. What crime is occurring, who is involved, what evidence supports your conclusion, and how confident are you in this assessment?"

    def calculate_similarity(self, text1, text2):
        """Simple similarity calculation based on word overlap"""
        if not text1 or not text2:
            return 0.0

        # Convert to lowercase and split into words
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())

        # Calculate Jaccard similarity
        intersection = words1.intersection(words2)
        union = words1.union(words2)

        if len(union) == 0:
            return 0.0

        return len(intersection) / len(union)

    def has_converged(self, current_response, previous_response):
        """Check if the analysis has converged (responses are very similar)"""
        if not previous_response:
            return False

        similarity = self.calculate_similarity(current_response, previous_response)
        print(f"    Similarity to previous: {similarity:.3f} (threshold: {self.convergence_threshold})")

        return similarity >= self.convergence_threshold

    def extract_confidence_score(self, response):
        """Extract confidence score from response if mentioned"""
        confidence_keywords = ["confidence", "confident", "certainty", "sure", "probability"]
        response_lower = response.lower()

        # Look for percentage mentions
        import re
        percentages = re.findall(r'(\d+)%', response)
        if percentages:
            return max([int(p) for p in percentages]) / 100.0

        # Look for confidence keywords with qualifiers
        if any(keyword in response_lower for keyword in ["very confident", "highly confident", "extremely confident"]):
            return 0.9
        elif any(keyword in response_lower for keyword in ["confident", "fairly confident"]):
            return 0.7
        elif any(keyword in response_lower for keyword in ["somewhat confident", "moderately confident"]):
            return 0.5
        elif any(keyword in response_lower for keyword in ["low confidence", "uncertain", "unsure"]):
            return 0.3

        return 0.5  # Default moderate confidence

    def process_frames_truly_iteratively(self, frames_data, video_id):
        """Process frames using TRUE iterative prompting - same question refined repeatedly"""
        url = "https://api.openai.com/v1/chat/completions"

        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames with TRUE iterative prompting")

        # Process all frames by dividing them into chunks
        frame_chunks = [frame_data[i:i+self.chunk_size] for i in range(0, total_frames, self.chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {self.chunk_size} frames each")

        all_iterations = {}
        previous_response = None
        converged = False

        print(f"Starting TRUE iterative analysis with max {self.max_iterations} iterations...")
        print(f"Core question: {self.core_question}")
        print(f"Convergence threshold: {self.convergence_threshold}")

        # TRUE ITERATIVE LOOP - Same question, progressively refined
        for iteration_num in range(1, self.max_iterations + 1):
            print(f"\n=== TRUE ITERATION {iteration_num}/{self.max_iterations} ===")

            iteration_responses = []

            # Process each chunk of frames for this iteration
            for chunk_idx, chunk in enumerate(frame_chunks):
                frame_start = chunk_idx * self.chunk_size + 1
                frame_end = min((chunk_idx + 1) * self.chunk_size, total_frames)
                frame_range = f"{frame_start}-{frame_end}"

                print(f"  Processing chunk {chunk_idx+1}/{len(frame_chunks)} (frames {frame_range}) for iteration {iteration_num}...")

                # Build TRUE iterative prompt
                if iteration_num == 1:
                    # First iteration - ask the core question
                    iterative_prompt = f"""ITERATION {iteration_num} - Initial Analysis

{self.core_question}

Be thorough and specific in your analysis. Include your confidence level in your assessment."""

                else:
                    # Subsequent iterations - refine based on previous response
                    iterative_prompt = f"""ITERATION {iteration_num} - Refining Previous Analysis

PREVIOUS ANALYSIS FROM ITERATION {iteration_num-1}:
{previous_response[:800]}...

Now, analyze these SAME frames again with the SAME core question, but refine your analysis:

{self.core_question}

REFINEMENT INSTRUCTIONS:
- Review your previous analysis carefully
- Look for details you may have missed
- Reconsider your conclusions with fresh perspective
- Identify any errors or oversights in your previous assessment
- Improve the accuracy and depth of your analysis
- If you're more confident now, explain why
- If you're less confident, explain what creates uncertainty
- What new insights do you have upon re-examination?

Provide your REFINED analysis of the same core question."""

                # Initialize conversation for this chunk and iteration
                conversation = [
                    {
                        "role": "system",
                        "content": "You are an expert forensic analyst examining video frames from a potential crime scene. Your analysis will be iteratively refined through repeated examination of the same core question to improve accuracy and completeness."
                    }
                ]

                # Create user message with prompt and frames
                user_message = {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": iterative_prompt
                        }
                    ]
                }

                # Add frames to the message
                for frame in chunk:
                    user_message["content"].append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/png;base64,{frame}",
                            "detail": "high"
                        }
                    })

                # Add user message to conversation
                conversation.append(user_message)

                # Make API request
                payload = {
                    "model": "gpt-4o",
                    "messages": conversation,
                    "max_tokens": 4096,
                    "temperature": 0.1
                }

                try:
                    response = requests.post(url, headers=self.headers, json=payload)

                    if response.status_code != 200:
                        error_detail = response.json() if response.text else response.text
                        print(f"    API Error {response.status_code}: {error_detail}")
                        continue

                    result = response.json()
                    if "choices" in result and result["choices"]:
                        chunk_analysis = result["choices"][0]["message"]["content"]
                        iteration_responses.append(chunk_analysis)
                        print(f"    Received analysis for chunk {chunk_idx+1}")

                except Exception as e:
                    print(f"    Error in iteration {iteration_num}, chunk {chunk_idx+1}: {str(e)}")

                # Rate limiting
                print(f"    Waiting 2 seconds before next chunk...")
                time.sleep(2)

            # Combine responses for this iteration
            if len(iteration_responses) == 1:
                current_response = iteration_responses[0]
            else:
                current_response = "\n\n=== NEXT CHUNK ===\n\n".join(iteration_responses)

            # Extract confidence for this iteration
            confidence = self.extract_confidence_score(current_response)

            # Check for convergence
            if previous_response:
                converged = self.has_converged(current_response, previous_response)

            # Store this iteration's data
            iteration_data = {
                "iteration": iteration_num,
                "type": "true_iterative_refinement",
                "core_question": self.core_question,
                "prompt_used": iterative_prompt,
                "response": current_response,
                "confidence_extracted": confidence,
                "converged": converged,
                "similarity_to_previous": self.calculate_similarity(current_response, previous_response) if previous_response else 0.0
            }

            all_iterations[f"iteration_{iteration_num}"] = iteration_data

            print(f"  Confidence level: {confidence:.2f}")
            print(f"  Response preview: {current_response[:200]}...")

            # Check for convergence
            if converged:
                print(f"  *** CONVERGENCE ACHIEVED at iteration {iteration_num} ***")
                break
            elif iteration_num < self.max_iterations:
                print(f"  Continuing to iteration {iteration_num + 1} (not yet converged)")

            # Update previous response for next iteration
            previous_response = current_response

            # Rate limiting between iterations
            print(f"  Waiting 3 seconds before next iteration...")
            time.sleep(3)

        # Create convergence summary
        convergence_summary = {
            "total_iterations_run": iteration_num,
            "max_iterations_allowed": self.max_iterations,
            "converged": converged,
            "convergence_threshold": self.convergence_threshold,
            "final_confidence": confidence,
            "methodology": "True iterative refinement - same question refined repeatedly"
        }

        if converged:
            convergence_summary["convergence_iteration"] = iteration_num
            convergence_summary["convergence_reason"] = f"Response similarity reached {self.convergence_threshold} threshold"
        else:
            convergence_summary["convergence_reason"] = f"Maximum iterations ({self.max_iterations}) reached without convergence"

        return {
            "true_iterative_results": all_iterations,
            "convergence_summary": convergence_summary,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": self.chunk_size
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with TRUE iterative prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH TRUE ITERATIVE PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_truly_iteratively(frames_data, video_id)

            # Add crime type to results
            if "true_iterative_results" in results:
                results["crime_type"] = crime_type

            # Save complete results
            self.save_results(results, f"{crime_type}_{video_id}_true_iterative_complete_{timestamp}.json")
            print(f"Complete TRUE iterative analysis for {video_id} ({crime_type}) saved.")

            return results

        except Exception as e:
            print(f"Error in TRUE iterative analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with TRUE iterative prompting"""
    # Initialize analyzer
    analyzer = TrueIterativeGPTAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with TRUE iterative prompting...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with TRUE iterative prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_info['crime_type']}_{video_info['video_id']}_{timestamp}.json")
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"true_iterative_gpt_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete TRUE iterative analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def run():
    """Main execution function"""
    print("TRUE Iterative Prompting Crime Video Analysis with GPT-4o - ALL Frames")
    print("="*70)
    print("TRUE ITERATIVE = Same question refined repeatedly until convergence")
    print("="*70)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_iterations_run = 0
    convergence_achieved = 0

    for video_id, video_results in results.items():
        if video_results and 'convergence_summary' in video_results:
            summary = video_results['convergence_summary']
            total_frames_processed += video_results.get('frames_used', 0)
            total_iterations_run += summary.get('total_iterations_run', 0)
            if summary.get('converged', False):
                convergence_achieved += 1

    print("\n" + "="*70)
    print(f"TRUE ITERATIVE PROMPTING COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total iterations run: {total_iterations_run}")
    print(f"Convergence achieved: {convergence_achieved}/{total_videos_processed} videos")
    print(f"Model used: GPT-4o")
    print(f"Method: Same core question refined repeatedly")
    print("="*70)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
TRUE Iterative Prompting Crime Video Analysis with GPT-4o - ALL Frames
TRUE ITERATIVE = Same question refined repeatedly until convergence
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/TRUE-ITERATIVE
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson', 'Vandalism', 'Shooting', 'Abuse', 'Explosion', 'Assault']

Scanning Burglary directory...
  Found 3

#Self-Consistency
This approach generates multiple independent analyses and determines the most reliable interpretation through consensus.
Self-Consistency Prompting Approach
The Self-Consistency technique follows a unique multi-analysis process:

Multiple Independent Analyses: The system generates several different analyses of the same frames
Diverse Perspectives: Each analysis uses a different prompt template to encourage varied viewpoints
Consensus Determination: The system identifies areas of agreement and disagreement across analyses
Confidence Assessment: For each key element, the level of consensus is explicitly evaluated

Implementation Highlights

Multi-Perspective Analysis:

Generates 5 independent analyses for each chunk of frames
Uses 5 distinct prompt templates to encourage diversity:

Standard analytical perspective
Forensic analyst perspective
Detective/law enforcement perspective
Security expert perspective
Witness testimony perspective


Higher temperature settings (0.5) for greater response diversity


Complete Frame Processing:

Processes all frames in chunks of 10 frames each
Each chunk undergoes the full multi-analysis process


Two-Level Consensus Building:

Chunk-Level Consensus: After generating multiple analyses for each chunk, determines consensus on:

Crime type
Perpetrator description
Victim description
Key actions
Evidence
Timeline


Cross-Chunk Consensus: After processing all chunks, synthesizes a final consensus across the entire video

In [13]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/SELF-CONSISTENCY"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class SelfConsistencyGPTAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        self.num_independent_runs = 5  # Number of independent analyses for consistency checking
        os.makedirs(self.save_dir, exist_ok=True)

    def make_gpt_request(self, messages, temperature=0.3):
        """Make a single request to GPT API with specified temperature"""
        url = "https://api.openai.com/v1/chat/completions"

        payload = {
            "model": "gpt-4o",
            "messages": messages,
            "max_tokens": 4096,
            "temperature": temperature  # Variable temperature for diversity
        }

        try:
            response = requests.post(url, headers=self.headers, json=payload)

            if response.status_code != 200:
                error_detail = response.json() if response.text else response.text
                print(f"API Error {response.status_code}: {error_detail}")
                return f"Error {response.status_code}: {error_detail}"

            result = response.json()
            if "choices" in result and result["choices"]:
                return result["choices"][0]["message"]["content"]
            else:
                return "Error: No response from API"

        except Exception as e:
            print(f"API request error: {str(e)}")
            return f"Error: {str(e)}"

    def generate_independent_analysis(self, frames_data, frame_data, run_number):
        """Generate a single independent analysis of the frames"""
        print(f"  Generating independent analysis #{run_number}...")

        # Comprehensive analysis prompt (same for all runs to ensure consistency testing)
        comprehensive_prompt = """Analyze these video frames thoroughly and provide a comprehensive assessment. Please address the following aspects:

1. SCENE DESCRIPTION: Describe the environment, setting, and overall context.

2. PEOPLE IDENTIFICATION: Identify all individuals present, their apparent ages, genders, and relationships.

3. ACTIONS AND BEHAVIORS: Detail what each person is doing and their specific behaviors.

4. INTERACTIONS: Analyze how the people are interacting with each other.

5. BODY LANGUAGE: Examine postures, facial expressions, and non-verbal communication.

6. POWER DYNAMICS: Assess any signs of dominance, submission, control, or vulnerability.

7. CONCERNING ELEMENTS: Identify any red flags, inappropriate behaviors, or signs of potential harm.

8. OVERALL ASSESSMENT: Provide your comprehensive conclusion about what is happening and level of concern.

Be thorough, specific, and evidence-based in your analysis."""

        run_responses = []

        # Use slightly different temperature for each run to encourage diversity
        temperatures = [0.1, 0.2, 0.3, 0.4, 0.5]
        temperature = temperatures[run_number % len(temperatures)]

        # Process frames in chunks for this independent run
        for i in range(0, len(frame_data), self.chunk_size):
            chunk = frame_data[i:i + self.chunk_size]

            # Create conversation with system and user messages
            conversation = [
                {
                    "role": "system",
                    "content": "You are analyzing video footage evidence. Provide detailed, objective analysis of what you observe."
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": comprehensive_prompt
                        }
                    ]
                }
            ]

            # Add frames to the user message
            for frame in chunk:
                conversation[1]["content"].append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{frame}",
                        "detail": "high"
                    }
                })

            # Make API request for this chunk
            response = self.make_gpt_request(conversation, temperature)
            run_responses.append(response)

            # Rate limiting
            time.sleep(2)

        # Combine responses for this run
        if len(run_responses) == 1:
            combined_response = run_responses[0]
        else:
            combined_response = "\n\n=== NEXT CHUNK ===\n\n".join(run_responses)

        return combined_response

    def analyze_consistency(self, independent_analyses):
        """Analyze consistency across independent runs"""
        print("Analyzing consistency across independent analyses...")

        # Create consistency analysis prompt
        consistency_prompt = f"""You have {len(independent_analyses)} independent analyses of the same video frames. Please analyze the consistency across these analyses and provide:

1. CONSISTENT FINDINGS: What observations appear consistently across most or all analyses?

2. INCONSISTENT FINDINGS: What observations vary significantly between analyses?

3. CONFIDENCE LEVELS: Which findings have high confidence (consistent) vs low confidence (inconsistent)?

4. DISCREPANCY ANALYSIS: Where analyses disagree, what might explain the differences?

5. CONSENSUS ASSESSMENT: Based on the most consistent findings, what is the most reliable assessment?

6. RELIABILITY SCORE: Rate the overall reliability of the consensus (1-10 scale).

Here are the independent analyses:

"""

        # Add each independent analysis
        for i, analysis in enumerate(independent_analyses, 1):
            consistency_prompt += f"\n=== INDEPENDENT ANALYSIS #{i} ===\n{analysis}\n"

        consistency_prompt += "\n\nNow provide your consistency analysis:"

        # Make request for consistency analysis
        conversation = [
            {
                "role": "system",
                "content": "You are synthesizing multiple analyses of the same scene to find commonalities and generate a consensus view."
            },
            {
                "role": "user",
                "content": consistency_prompt
            }
        ]

        consistency_analysis = self.make_gpt_request(conversation, temperature=0.1)
        return consistency_analysis

    def process_frames_self_consistency(self, frames_data, video_id):
        """Process frames using self-consistency prompting strategy"""
        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames with self-consistency approach")
        print(f"Starting self-consistency analysis with {self.num_independent_runs} independent runs...")

        # Generate multiple independent analyses
        independent_analyses = []
        for run_num in range(1, self.num_independent_runs + 1):
            print(f"\n--- Independent Run {run_num}/{self.num_independent_runs} ---")
            analysis = self.generate_independent_analysis(frames_data, frame_data, run_num)
            independent_analyses.append(analysis)
            print(f"  Completed run {run_num}")

            # Longer delay between independent runs
            if run_num < self.num_independent_runs:
                print(f"  Waiting 5 seconds before next independent run...")
                time.sleep(5)

        # Analyze consistency across runs
        print(f"\n--- Consistency Analysis ---")
        consistency_analysis = self.analyze_consistency(independent_analyses)

        # Compile results
        results = {
            "independent_analyses": {},
            "consistency_analysis": consistency_analysis,
            "methodology": {
                "num_runs": self.num_independent_runs,
                "approach": "Multiple independent analyses with consistency checking",
                "temperatures_used": [0.1, 0.2, 0.3, 0.4, 0.5]
            }
        }

        # Store each independent analysis
        for i, analysis in enumerate(independent_analyses, 1):
            results["independent_analyses"][f"run_{i}"] = {
                "run_number": i,
                "analysis": analysis,
                "temperature": [0.1, 0.2, 0.3, 0.4, 0.5][(i-1) % 5]
            }

        return {
            "self_consistency_results": results,
            "frames_used": total_frames,
            "analyses_performed": self.num_independent_runs
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with self-consistency prompting approach"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_self_consistency(frames_data, video_id)

            # Wrap results in the expected format
            final_results = {
                "Self_Consistency_Analysis": {
                    "method": "self_consistency_prompting",
                    "description": "Multiple independent analyses with consistency verification",
                    "crime_type": crime_type,
                    "consistency_results": results["self_consistency_results"],
                    "frames_used": results["frames_used"],
                    "valid_frames": results["frames_used"],
                    "analysis_timestamp": timestamp
                }
            }

            # Save results
            self.save_results(final_results, f"{crime_type}_{video_id}_self_consistency_analysis_{timestamp}.json")
            print(f"Self-consistency analysis for {video_id} ({crime_type}) completed and saved.")

            return final_results

        except Exception as e:
            print(f"Error in self-consistency analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with self-consistency analysis"""
    # Initialize analyzer
    analyzer = SelfConsistencyGPTAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with self-consistency prompting analysis...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with self-consistency approach
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"self_consistency_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete self-consistency analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def test_gpt_api(api_key):
    """Test GPT API connection"""
    print("Testing GPT-4o API connection...")

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": "Hello, can you respond with 'API connection successful'?"
            }
        ],
        "max_tokens": 50
    }

    try:
        response = requests.post("https://api.openai.com/v1/chat/completions",
                               headers=headers, json=payload)

        if response.status_code == 200:
            result = response.json()
            if "choices" in result:
                print("✓ GPT-4o API connection successful!")
                print(f"Response: {result['choices'][0]['message']['content']}")
                return True
        else:
            print(f"✗ API Error {response.status_code}: {response.text}")
            return False

    except Exception as e:
        print(f"✗ Connection error: {str(e)}")
        return False

def check_authentication():
    """Placeholder function to check authentication"""
    return True

def run():
    """Main execution function"""
    print("Self-Consistency Prompting Crime Video Analysis with GPT-4o - ALL Frames")
    print("="*75)
    print("Self-Consistency = Multiple independent analyses with consistency verification")
    print("="*75)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Test GPT API connection
    if not test_gpt_api(api_key):
        print("✗ GPT-4o API test failed. Please check your API key and connection.")
        return

    # Check authentication
    if not check_authentication():
        print("✗ Authentication not completed.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_independent_runs = 0

    for video_id, video_results in results.items():
        if video_results and 'Self_Consistency_Analysis' in video_results:
            analysis = video_results['Self_Consistency_Analysis']
            total_frames_processed += analysis.get('valid_frames', 0)
            if 'consistency_results' in analysis and 'methodology' in analysis['consistency_results']:
                total_independent_runs += analysis['consistency_results']['methodology'].get('num_runs', 0)

    print("\n" + "="*75)
    print(f"SELF-CONSISTENCY PROMPTING ANALYSIS COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total independent runs: {total_independent_runs}")
    print(f"Model used: GPT-4o")
    print(f"Analysis pattern: Multiple Independent → Consistency Check → Consensus")
    print("="*75)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Self-Consistency Prompting Crime Video Analysis with GPT-4o - ALL Frames
Self-Consistency = Multiple independent analyses with consistency verification
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/SELF-CONSISTENCY
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...
Testing GPT-4o API connection...
✓ GPT-4o API connection successful!
Response: API connection successful.

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'S

#Meta-Prompting
- Meta-Prompting that processes all frames from crime videos. This technique is unique because it uses the AI to generate its own specialized prompts for analysis.

Meta-Prompting Approach
The Meta-Prompting technique follows this innovative process:

- Prompt Generation: Instead of using predefined prompts, the system asks the AI to create specialized prompts for analyzing video frames
- Prompt Application: These AI-generated prompts are then used to analyze the actual frames
- Meta-Synthesis: The system also generates a specialized synthesis prompt to combine all chunk analyses

Implementation Highlights

Two-Stage Meta-Prompting:

- First Stage: For each chunk of frames, generate a specialized analysis prompt
- Second Stage: For final synthesis, generate a specialized synthesis prompt
- Both stages use the AI to create task-specific prompts rather than using predefined ones


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk undergoes the full meta-prompting process independently


Specialized Prompt Design Process: Guides the AI to create prompts that focus on:

Step-by-step observation:
- Objective description before interpretation
- Attention to easily missed details
- Organizing observations into a coherent narrative
- Avoids including example responses in the generated prompts


Fallback Safety:

- If meta-prompting fails, falls back to a simple seed prompt
Ensures analysis can continue even if prompt generation has issues

In [14]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/META-PROMPTING"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class MetaPromptingAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

        # Initial seed prompt - this will be used to generate better prompts
        self.seed_prompt = """
Analyze these video frames and describe what appears to be happening.
"""

        # Meta-prompt template to generate specialized analysis prompts
        self.meta_prompt_template = """
You are an expert at designing effective prompts for analyzing surveillance footage and crime scene videos.

I want you to create a specialized prompt for analyzing a sequence of frames from a surveillance video. The prompt should help analyze what's happening in the frames without making assumptions or jumping to conclusions. The focus should be on detailed observation of:

1. People in the scene
2. Actions and behaviors
3. Objects and their usage
4. Spatial relationships
5. Sequence of events

Create a detailed, structured prompt (300-500 words) that will guide the analysis of video frames {frame_range} of {total_frames}. The prompt should:
- Guide step-by-step observation
- Focus on objective description before interpretation
- Encourage attention to details that might be easily missed
- Help organize observations into a coherent narrative

Do not include any example responses in your prompt. The prompt should only contain instructions for analyzing the frames.
"""

    def _format_chunk_analyses(self, all_chunk_analyses):
        """Helper method to format chunk analyses for synthesis"""
        newline = '\n'
        separator = '-' * 40

        formatted_chunks = []
        for analysis in all_chunk_analyses:
            chunk_text = f"SEGMENT {analysis['chunk']} (Frames {analysis['frame_range']}):{newline}{analysis['analysis']}{newline}{newline}{separator}{newline}"
            formatted_chunks.append(chunk_text)

        return ''.join(formatted_chunks)

    def evaluate_prompt_effectiveness(self, meta_prompting_results):
        """Evaluate the effectiveness of the generated prompts - IDENTICAL TO GEMINI/CLAUDE"""
        print("=== EVALUATION PHASE: Assessing Prompt Effectiveness ===")

        # Prepare analysis results for evaluation
        analysis_results = {}

        # Extract chunk results for evaluation
        for chunk_key, chunk_data in meta_prompting_results.items():
            if chunk_key.startswith("Chunk ") and isinstance(chunk_data, dict):
                if "meta_prompt_process" in chunk_data and "frame_analysis" in chunk_data:
                    analysis_results[chunk_key] = {
                        "generated_prompt": chunk_data["meta_prompt_process"].get("specialized_prompt", ""),
                        "analysis_result": chunk_data.get("frame_analysis", "")
                    }

        evaluation_prompt = f"""Evaluate the effectiveness of the meta-generated prompts based on the analysis results below.

For each analysis type, assess:
1. PROMPT QUALITY: How well did the generated prompt elicit thorough analysis?
2. COMPLETENESS: Did the analysis address all intended aspects?
3. SPECIFICITY: How specific and detailed were the results?
4. RELEVANCE: How relevant were the findings to crime video analysis?
5. ACTIONABILITY: How useful are the insights for decision-making?

Rate each aspect 1-10 and provide improvement suggestions.

Analysis Results:
{json.dumps(analysis_results, indent=2)}

Provide your evaluation in a structured format."""

        # Create conversation for evaluation
        evaluation_conversation = [
            {
                "role": "system",
                "content": "You are an expert at evaluating prompt effectiveness and analysis quality."
            },
            {
                "role": "user",
                "content": evaluation_prompt
            }
        ]

        # Make API request for evaluation
        url = "https://api.openai.com/v1/chat/completions"
        payload = {
            "model": "gpt-4o",
            "messages": evaluation_conversation,
            "max_tokens": 4096,
            "temperature": 0.1
        }

        try:
            print("Sending request for prompt effectiveness evaluation...")
            response = requests.post(url, headers=self.headers, json=payload)

            if response.status_code == 200:
                result = response.json()
                if "choices" in result and result["choices"]:
                    evaluation_response = result["choices"][0]["message"]["content"]
                    print("Prompt effectiveness evaluation complete!")
                    return evaluation_response
            else:
                error_detail = response.json() if response.text else response.text
                print(f"API Error in evaluation: {response.status_code}: {error_detail}")
                return f"Error in evaluation: {response.status_code}: {error_detail}"
        except Exception as e:
            print(f"Error in prompt effectiveness evaluation: {str(e)}")
            return f"Error: {str(e)}"

    def process_frames_with_meta_prompting(self, frames_data, video_id):
        """Process frames with meta-prompting approach"""
        url = "https://api.openai.com/v1/chat/completions"

        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames with meta-prompting")

        # Process all frames by dividing them into chunks
        chunk_size = 10
        frame_chunks = [frame_data[i:i+chunk_size] for i in range(0, total_frames, chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {chunk_size} frames each")

        # Initialize results
        meta_prompting_results = {}
        all_chunk_analyses = []

        # Process each chunk of frames
        for chunk_idx, chunk in enumerate(frame_chunks):
            frame_start = chunk_idx * chunk_size + 1
            frame_end = min((chunk_idx + 1) * chunk_size, total_frames)
            frame_range = f"{frame_start}-{frame_end}"

            print(f"Processing chunk {chunk_idx+1}/{len(frame_chunks)} (frames {frame_range})...")

            # Initialize chunk results
            chunk_results = {
                "frame_range": frame_range,
                "meta_prompt_process": {}
            }

            # Step 1: Generate specialized prompt for this chunk using meta-prompting
            formatted_meta_prompt = self.meta_prompt_template.format(frame_range=frame_range, total_frames=total_frames)

            print(f"  Generating specialized prompt for chunk {chunk_idx+1}...")

            # Create conversation for meta-prompting
            meta_conversation = [
                {
                    "role": "system",
                    "content": "You are an expert at designing effective prompts for visual analysis tasks."
                },
                {
                    "role": "user",
                    "content": formatted_meta_prompt
                }
            ]

            # Make API request for meta-prompting
            payload = {
                "model": "gpt-4o",
                "messages": meta_conversation,
                "max_tokens": 1024,
                "temperature": 0.7  # Higher temperature for creative prompt generation
            }

            try:
                print("  Sending request to generate specialized prompt...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code != 200:
                    error_detail = response.json() if response.text else response.text
                    print(f"  API Error {response.status_code}: {error_detail}")
                    chunk_results["meta_prompt_process"]["error"] = f"Error {response.status_code}: {error_detail}"
                    # Fall back to seed prompt if meta-prompting fails
                    specialized_prompt = self.seed_prompt
                    print("  Falling back to seed prompt due to error")
                else:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        specialized_prompt = result["choices"][0]["message"]["content"]
                        print("  Successfully generated specialized prompt")

                        # Save the generated specialized prompt
                        chunk_results["meta_prompt_process"]["specialized_prompt"] = specialized_prompt
                    else:
                        # Fall back to seed prompt if response is empty
                        specialized_prompt = self.seed_prompt
                        chunk_results["meta_prompt_process"]["error"] = "Empty response from API"
                        print("  Falling back to seed prompt due to empty response")

            except Exception as e:
                print(f"  Error in meta-prompting for chunk {chunk_idx+1}: {str(e)}")
                chunk_results["meta_prompt_process"]["error"] = str(e)
                # Fall back to seed prompt if meta-prompting fails
                specialized_prompt = self.seed_prompt
                print("  Falling back to seed prompt due to exception")

            # Rate limiting between API calls
            print("  Waiting 2 seconds before next request...")
            time.sleep(2)

            # Step 2: Use the specialized prompt to analyze the frames
            print(f"  Analyzing frames using specialized prompt for chunk {chunk_idx+1}...")

            # Create conversation for frame analysis
            analysis_conversation = [
                {
                    "role": "system",
                    "content": "You are an expert at analyzing visual content and describing what you observe."
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": f"{specialized_prompt}\n\nThese are frames {frame_range} of {total_frames}."
                        }
                    ]
                }
            ]

            # Add frames to the user message
            for frame in chunk:
                # Use PNG mime type since frames are PNGs
                mime_type = "image/png"

                analysis_conversation[1]["content"].append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{mime_type};base64,{frame}",
                        "detail": "high"
                    }
                })

            # Make API request for frame analysis
            payload = {
                "model": "gpt-4o",
                "messages": analysis_conversation,
                "max_tokens": 4096,
                "temperature": 0.1  # Lower temperature for consistent analysis
            }

            try:
                print("  Sending request for frame analysis...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code != 200:
                    error_detail = response.json() if response.text else response.text
                    print(f"  API Error {response.status_code}: {error_detail}")
                    chunk_results["analysis_error"] = f"Error {response.status_code}: {error_detail}"
                else:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        frame_analysis = result["choices"][0]["message"]["content"]
                        print("  Successfully received frame analysis")

                        # Save the frame analysis
                        chunk_results["frame_analysis"] = frame_analysis

                        # Add to collection of all chunk analyses
                        all_chunk_analyses.append({
                            "chunk": chunk_idx + 1,
                            "frame_range": frame_range,
                            "analysis": frame_analysis
                        })

            except Exception as e:
                print(f"  Error in frame analysis for chunk {chunk_idx+1}: {str(e)}")
                chunk_results["analysis_error"] = str(e)

            # Save results for this chunk
            meta_prompting_results[f"Chunk {chunk_idx+1}"] = chunk_results

            # Save intermediate results for this chunk
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            chunk_result = {
                f"Chunk {chunk_idx+1}": chunk_results
            }
            self.save_results(chunk_result, f"{video_id}_meta_prompting_chunk{chunk_idx+1}_{timestamp}.json")
            print(f"  Results for chunk {chunk_idx+1} saved")

            # Rate limiting between chunks
            print("  Waiting 3 seconds before processing next chunk...")
            time.sleep(3)

        # After processing all chunks, generate a synthesis of all analyses
        if all_chunk_analyses:
            print("Generating synthesis of all chunk analyses...")

            # Create a meta-prompt for synthesis generation
            synthesis_meta_prompt = """
Create a specialized prompt for synthesizing multiple analyses of different segments of the same video. The prompt should guide the creation of a comprehensive narrative that captures the entire sequence of events.

The prompt should:
1. Guide the integration of information from all segments
2. Help identify the overall narrative arc
3. Focus on creating a coherent timeline of events
4. Emphasize objective description over interpretation

The prompt should be detailed and structured (300-500 words) and designed to create a comprehensive synthesis of all video segments.
"""

            # Create conversation for synthesis meta-prompting
            synthesis_meta_conversation = [
                {
                    "role": "system",
                    "content": "You are an expert at designing effective prompts for synthesizing information from multiple sources."
                },
                {
                    "role": "user",
                    "content": synthesis_meta_prompt
                }
            ]

            # Make API request for synthesis meta-prompting
            payload = {
                "model": "gpt-4o",
                "messages": synthesis_meta_conversation,
                "max_tokens": 1024,
                "temperature": 0.7  # Higher temperature for creative prompt generation
            }

            try:
                print("Sending request for synthesis meta-prompt generation...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code == 200:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        synthesis_prompt = result["choices"][0]["message"]["content"]
                        print("Successfully generated synthesis meta-prompt")

                        # Save the synthesis meta-prompt
                        meta_prompting_results["Synthesis Meta-Prompt"] = synthesis_prompt

                        # Use this prompt to generate the final synthesis
                        synthesis_conversation = [
                            {
                                "role": "system",
                                "content": "You are an expert at synthesizing information from multiple analyses to create a comprehensive understanding."
                            },
                            {
                                "role": "user",
                                "content": f"""
{synthesis_prompt}

Here are the analyses for each segment of the video:

{'-' * 40}
{self._format_chunk_analyses(all_chunk_analyses)}

Based on these analyses, create a comprehensive synthesis of the entire video.
"""
                            }
                        ]

                        # Make API request for final synthesis
                        payload = {
                            "model": "gpt-4o",
                            "messages": synthesis_conversation,
                            "max_tokens": 4096,
                            "temperature": 0.1  # Lower temperature for consistent synthesis
                        }

                        try:
                            print("Sending request for final synthesis...")
                            response = requests.post(url, headers=self.headers, json=payload)

                            if response.status_code == 200:
                                result = response.json()
                                if "choices" in result and result["choices"]:
                                    final_synthesis = result["choices"][0]["message"]["content"]
                                    print("Successfully generated final synthesis")

                                    # Save final synthesis
                                    meta_prompting_results["Final Synthesis"] = {
                                        "synthesis": final_synthesis
                                    }

                                    # Save final synthesis separately
                                    timestamp = time.strftime("%Y%m%d_%H%M%S")
                                    synthesis_result = {
                                        "Final Synthesis": meta_prompting_results["Final Synthesis"]
                                    }
                                    self.save_results(synthesis_result, f"{video_id}_meta_prompting_synthesis_{timestamp}.json")
                                    print("Final synthesis saved")
                            else:
                                error_detail = response.json() if response.text else response.text
                                print(f"API Error in final synthesis: {response.status_code}: {error_detail}")
                        except Exception as e:
                            print(f"Error in final synthesis: {str(e)}")
                            meta_prompting_results["Final Synthesis"] = {
                                "error": str(e)
                            }
                else:
                    error_detail = response.json() if response.text else response.text
                    print(f"API Error in synthesis meta-prompting: {response.status_code}: {error_detail}")
            except Exception as e:
                print(f"Error in synthesis meta-prompting: {str(e)}")
                meta_prompting_results["Synthesis Meta-Prompt Error"] = str(e)

        # NEW: Add prompt effectiveness evaluation phase
        print("=== EVALUATION PHASE: Assessing Meta-Prompting Effectiveness ===")
        evaluation_results = self.evaluate_prompt_effectiveness(meta_prompting_results)
        meta_prompting_results["Prompt_Effectiveness_Evaluation"] = evaluation_results

        return {
            "meta_prompting_results": meta_prompting_results,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": chunk_size,
            "methodology": {
                "approach": "Chunk-based meta-prompting with effectiveness evaluation",
                "phases": [
                    "Chunk-level meta-prompt generation",
                    "Specialized prompt application",
                    "Synthesis meta-prompting",
                    "Prompt effectiveness evaluation"
                ]
            }
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with meta-prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH META-PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_with_meta_prompting(frames_data, video_id)

            # Wrap results in expected format
            final_results = {
                "Meta_Prompting_Analysis": {
                    "method": "meta_prompting",
                    "description": "Chunk-based meta-prompting with effectiveness evaluation",
                    "crime_type": crime_type,
                    "meta_prompting_results": results,
                    "frames_used": results["frames_used"],
                    "valid_frames": results["frames_used"],
                    "analysis_timestamp": timestamp
                }
            }

            # Save complete results
            self.save_results(final_results, f"{crime_type}_{video_id}_meta_prompting_complete_{timestamp}.json")
            print(f"Complete meta-prompting analysis for {video_id} ({crime_type}) saved.")

            return final_results

        except Exception as e:
            print(f"Error in meta-prompting analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with meta-prompting"""
    # Initialize analyzer
    analyzer = MetaPromptingAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with meta-prompting...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with meta-prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_info['crime_type']}_{video_info['video_id']}_{timestamp}.json")
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"meta_prompting_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete meta-prompting analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def test_gpt_api(api_key):
    """Test GPT API connection"""
    print("Testing GPT-4o API connection...")

    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    payload = {
        "model": "gpt-4o",
        "messages": [
            {
                "role": "user",
                "content": "Hello, can you respond with 'API connection successful'?"
            }
        ],
        "max_tokens": 50
    }

    try:
        response = requests.post("https://api.openai.com/v1/chat/completions",
                               headers=headers, json=payload)

        if response.status_code == 200:
            result = response.json()
            if "choices" in result:
                print("✓ GPT-4o API connection successful!")
                print(f"Response: {result['choices'][0]['message']['content']}")
                return True
        else:
            print(f"✗ API Error {response.status_code}: {response.text}")
            return False

    except Exception as e:
        print(f"✗ Connection error: {str(e)}")
        return False

def run():
    """Main execution function"""
    print("Meta-Prompting Crime Video Analysis with GPT-4o - ALL Frames")
    print("="*65)
    print("Meta-Prompting = Chunk-based meta-prompting with effectiveness evaluation")
    print("="*65)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Test GPT API connection
    if not test_gpt_api(api_key):
        print("✗ GPT-4o API test failed. Please check your API key and connection.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_phases_completed = 0

    for video_id, video_results in results.items():
        if video_results and 'Meta_Prompting_Analysis' in video_results:
            analysis = video_results['Meta_Prompting_Analysis']
            total_frames_processed += analysis.get('valid_frames', 0)
            if 'meta_prompting_results' in analysis and 'methodology' in analysis['meta_prompting_results']:
                total_phases_completed += len(analysis['meta_prompting_results']['methodology'].get('phases', []))

    print("\n" + "="*65)
    print(f"META-PROMPTING ANALYSIS COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total meta-phases completed: {total_phases_completed}")
    print(f"Model used: GPT-4o")
    print(f"Analysis pattern: Generate → Apply → Synthesize → Evaluate")
    print("="*65)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Meta-Prompting Crime Video Analysis with GPT-4o - ALL Frames
Meta-Prompting = Chunk-based meta-prompting with effectiveness evaluation
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/META-PROMPTING
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...
Testing GPT-4o API connection...
✓ GPT-4o API connection successful!
Response: API connection successful.

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson

#Chain-Of-Thought Prompting
Chain of Thought (CoT) prompting approach that processes all frames from crime videos. This technique explicitly encourages the model to show its

step-by-step reasoning process.
- Chain of Thought Prompting Approach: The Chain of Thought technique follows this explicit reasoning process:

Step-by-Step Reasoning: The approach explicitly asks the model to "think step by step" through its analysis
- Transparent Reasoning: Each reasoning step is clearly articulated in the response
- Structured Progression: The analysis follows a logical progression from observation to conclusion
- Reasoning Synthesis: The final synthesis also uses step-by-step reasoning to connect all segments

Implementation Highlights

Structured Reasoning Steps:

The prompt breaks down the analysis into 6 clear steps:

- Objective observation without interpretation
- Identification of key actors
- Chronological sequence of events
- Important objects and their usage
- Context and setting analysis
- Integration of observations into a coherent description


Each step builds on the previous one in a logical progression


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk undergoes the full chain of thought process independently


Reasoning-Based Synthesis: The synthesis prompt also follows a chain of thought structure:

- Extraction of key information from each segment
- Timeline construction across all segments
- Tracking people across multiple segments
- Tracking objects across segments
- Contextual integration of segments
- Construction of a comprehensive description


This ensures the synthesis uses the same reasoning approach as individual chunks


Explicit Prompting for Reasoning:

- Both the analysis and synthesis prompts specifically ask to "think step by step"
- System messages reinforce the importance of step-by-step reasoning
The model is explicitly asked to show its thinking process at each step

In [16]:
import os
import json
import base64
import requests
import time
from datetime import datetime
from collections import defaultdict

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/CHAIN-OF-THOUGHT"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class ChainOfThoughtAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

        # Chain of Thought prompt template
        self.cot_prompt_template = """
Analyze these video frames using a chain of thought reasoning process. Think step by step as you examine what's happening:

Step 1: First, carefully observe and list what you can actually see in the frames. Note people, objects, settings, and actions without interpretation.

Step 2: Identify the key actors in the scene. Describe each person's appearance and what they are doing. Track individuals across multiple frames.

Step 3: Describe the sequence of events chronologically. What happens first, next, and after that?

Step 4: Note any important objects or items in the scene and how they're being used.

Step 5: Consider the context and setting. Where is this taking place? What kind of environment is shown?

Step 6: Based on all the above observations, describe what appears to be happening in these frames.

Make sure to clearly show your thinking process for each step. These are frames {frame_range} of {total_frames}.
"""

        # Chain of Thought synthesis prompt
        self.cot_synthesis_prompt = """
You are going to synthesize multiple analyses of different segments of the same video into a coherent understanding of the entire sequence. Use chain of thought reasoning to connect all segments into a complete narrative.

Think through the following steps:

Step 1: Review each segment analysis and extract the key information about people, objects, and actions from each one.

Step 2: Create a timeline by arranging events across all segments in chronological order.

Step 3: Identify which people appear across multiple segments and track their actions throughout.

Step 4: Note how objects or items are used or moved across the entire sequence.

Step 5: Consider the overall context and how different segments relate to each other.

Step 6: Based on all the above reasoning, construct a comprehensive description of what happens throughout the entire video.

Show your thinking at each step as you build your understanding of the complete video sequence.
"""

    def _format_chunk_analyses(self, all_chunk_analyses):
        """Helper method to format chunk analyses for synthesis"""
        newline = '\n'
        separator = '-' * 40

        formatted_chunks = []
        for analysis in all_chunk_analyses:
            chunk_text = f"SEGMENT {analysis['chunk']} (Frames {analysis['frame_range']}):{newline}{analysis['analysis']}{newline}{newline}{separator}{newline}"
            formatted_chunks.append(chunk_text)

        return ''.join(formatted_chunks)

    def process_frames_with_cot(self, frames_data, video_id):
        """Process frames with chain of thought prompting approach"""
        url = "https://api.openai.com/v1/chat/completions"

        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception as e:
                print(f"Error extracting frame number from {filename}: {str(e)}")
                return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames with chain of thought approach")

        # Process all frames by dividing them into chunks
        chunk_size = 10
        frame_chunks = [frame_data[i:i+chunk_size] for i in range(0, total_frames, chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {chunk_size} frames each")

        # Initialize results
        cot_results = {}
        all_chunk_analyses = []

        # Process each chunk of frames
        for chunk_idx, chunk in enumerate(frame_chunks):
            frame_start = chunk_idx * chunk_size + 1
            frame_end = min((chunk_idx + 1) * chunk_size, total_frames)
            frame_range = f"{frame_start}-{frame_end}"

            print(f"Processing chunk {chunk_idx+1}/{len(frame_chunks)} (frames {frame_range})...")

            # Initialize chunk results
            chunk_results = {
                "frame_range": frame_range
            }

            # Format the CoT prompt for this chunk
            formatted_cot_prompt = self.cot_prompt_template.format(frame_range=frame_range, total_frames=total_frames)

            # Create conversation for CoT analysis
            cot_conversation = [
                {
                    "role": "system",
                    "content": "You are analyzing frames from a video. Use step-by-step reasoning to carefully analyze what you observe."
                },
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": formatted_cot_prompt
                        }
                    ]
                }
            ]

            # Add frames to the user message
            for frame in chunk:
                # Use PNG mime type since frames are PNGs
                mime_type = "image/png"

                cot_conversation[1]["content"].append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:{mime_type};base64,{frame}",
                        "detail": "high"
                    }
                })

            # Make API request for CoT analysis
            payload = {
                "model": "gpt-4o",
                "messages": cot_conversation,
                "max_tokens": 4096,
                "temperature": 0.1  # Lower temperature for consistent reasoning
            }

            try:
                print(f"  Sending request to GPT-4o for chunk {chunk_idx+1}...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code != 200:
                    error_detail = response.json() if response.text else response.text
                    print(f"  API Error {response.status_code}: {error_detail}")
                    chunk_results["error"] = f"Error {response.status_code}: {error_detail}"
                else:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        cot_analysis = result["choices"][0]["message"]["content"]
                        print(f"  Received response for chunk {chunk_idx+1}")

                        # Save the CoT analysis
                        chunk_results["cot_analysis"] = cot_analysis

                        # Add to collection of all chunk analyses
                        all_chunk_analyses.append({
                            "chunk": chunk_idx + 1,
                            "frame_range": frame_range,
                            "analysis": cot_analysis
                        })

            except Exception as e:
                print(f"  Error in CoT analysis for chunk {chunk_idx+1}: {str(e)}")
                chunk_results["error"] = str(e)

            # Save results for this chunk
            cot_results[f"Chunk {chunk_idx+1}"] = chunk_results

            # Save intermediate results for this chunk
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            chunk_result = {
                f"Chunk {chunk_idx+1}": chunk_results
            }
            self.save_results(chunk_result, f"{video_id}_cot_chunk{chunk_idx+1}_{timestamp}.json")
            print(f"  Results for chunk {chunk_idx+1} saved")

            # Rate limiting between chunks
            print(f"  Waiting 3 seconds before next chunk...")
            time.sleep(3)

        # After processing all chunks, generate a synthesis using CoT
        if all_chunk_analyses:
            print("Generating chain of thought synthesis across all chunks...")

            # Create conversation for CoT synthesis
            synthesis_conversation = [
                {
                    "role": "system",
                    "content": "You are synthesizing multiple analyses into a coherent narrative. Use step-by-step reasoning to connect all parts."
                },
                {
                    "role": "user",
                    "content": f"""
{self.cot_synthesis_prompt}

Here are the analyses for each segment of the video:

{'-' * 40}
{self._format_chunk_analyses(all_chunk_analyses)}

Think step by step to synthesize these segments into a complete understanding of the video.
"""
                }
            ]

            # Make API request for CoT synthesis
            payload = {
                "model": "gpt-4o",
                "messages": synthesis_conversation,
                "max_tokens": 4096,
                "temperature": 0.1  # Lower temperature for consistent reasoning
            }

            try:
                print("Sending request for final synthesis...")
                response = requests.post(url, headers=self.headers, json=payload)

                if response.status_code == 200:
                    result = response.json()
                    if "choices" in result and result["choices"]:
                        cot_synthesis = result["choices"][0]["message"]["content"]
                        print("Synthesis complete!")

                        # Save CoT synthesis
                        cot_results["Chain of Thought Synthesis"] = {
                            "synthesis": cot_synthesis
                        }

                        # Save synthesis separately
                        timestamp = time.strftime("%Y%m%d_%H%M%S")
                        synthesis_result = {
                            "Chain of Thought Synthesis": cot_results["Chain of Thought Synthesis"]
                        }
                        self.save_results(synthesis_result, f"{video_id}_cot_synthesis_{timestamp}.json")
                        print("Synthesis results saved")
                else:
                    error_detail = response.json() if response.text else response.text
                    print(f"API Error in synthesis: {response.status_code}: {error_detail}")
            except Exception as e:
                print(f"Error in CoT synthesis: {str(e)}")
                cot_results["Chain of Thought Synthesis"] = {
                    "error": str(e)
                }

        return {
            "cot_results": cot_results,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": chunk_size
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with chain of thought prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH CHAIN OF THOUGHT PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_with_cot(frames_data, video_id)

            # Add crime type to results
            if "cot_results" in results:
                results["crime_type"] = crime_type

            # Save complete results
            self.save_results(results, f"{crime_type}_{video_id}_cot_complete_{timestamp}.json")
            print(f"Complete chain of thought analysis for {video_id} ({crime_type}) saved.")

            return results

        except Exception as e:
            print(f"Error in chain of thought analysis: {str(e)}")
            return {"error": str(e)}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 20 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file) if frame_file in frame_files else idx
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with chain of thought prompting"""
    # Initialize analyzer
    analyzer = ChainOfThoughtAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with chain of thought prompting...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with chain of thought prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_info['crime_type']}_{video_info['video_id']}_{timestamp}.json")
            else:
                print(f"No frames loaded for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames loaded)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"cot_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete chain of thought analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def run():
    """Main execution function"""
    print("Chain of Thought Prompting Crime Video Analysis - ALL Frames")
    print("="*50)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        f = open(api_key_path, "r")
        api_key = f.read().strip()
        f.close()

        if not api_key:
            print("✗ Failed to load GPT API key: File is empty")
            return

        print("✓ Successfully loaded GPT API key")
        print(f"API key starts with: {api_key[:5]}...")

    except Exception as e:
        print(f"✗ Failed to load GPT API key: {str(e)}")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    print("\n" + "="*50)
    print(f"CHAIN OF THOUGHT PROMPTING COMPLETE!")
    print(f"Videos processed: {len(results)}")
    print("="*50)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Chain of Thought Prompting Crime Video Analysis - ALL Frames
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/GPT/CHAIN-OF-THOUGHT
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/chatgpt.txt
File exists: True
✓ Successfully loaded GPT API key
API key starts with: sk-pr...

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shoplifting', 'Arson', 'Vandalism', 'Shooting', 'Abuse', 'Explosion', 'Assault']

Scanning Burglary directory...
  Found 398 files
  Identified 2 unique videos:
    Burglary001_x264: 266 frames
    