In [1]:
!pip install torch torchvision opencv-python-headless
!pip install openai
!pip install opencv-python pillow requests
!pip install matplotlib
!pip install matplotlib opencv-python-headless
!pip install pillow
!pip install cucim[plugins]
!pip install anthropic



In [2]:
# Install the CUDA 12 specific version of cuCIM
!pip install cucim-cu12

# Install CuPy for CUDA 12
!pip install cupy-cuda12x

# Install the required image format plugins
!pip install pylibcucim-cu12



In [3]:
from IPython.display import display, Image, Audio
import openai
import cv2  # We're using OpenCV to read video, to install !pip install opencv-python
import base64
import time
from openai import OpenAI
import os
import requests
import torch

In [4]:
# In a Python cell
!lspci | grep -i nvidia
!nvidia-smi
!nvcc --version
!which python
!python --version

0000:17:00.0 VGA compatible controller: NVIDIA Corporation GA102GL [RTX A6000] (rev a1)
0000:17:00.1 Audio device: NVIDIA Corporation GA102 High Definition Audio Controller (rev a1)
0000:65:00.0 VGA compatible controller: NVIDIA Corporation GA102GL [RTX A6000] (rev a1)
0000:65:00.1 Audio device: NVIDIA Corporation GA102 High Definition Audio Controller (rev a1)
Tue Jun 17 11:17:22 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 570.133.07     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   000000

In [5]:
!nvidia-smi

Tue Jun 17 11:17:23 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 570.133.07     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:17:00.0 Off |                  Off |
| 30%   30C    P5             20W /  300W |      18MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A6000               Off |   00

In [6]:
import torch

print(torch.cuda.is_available())  # Should return True
print(torch.cuda.current_device())  # Should return 0
print(torch.cuda.get_device_name(0))  # Should return "NVIDIA RTX A6000" (not A100)
!nvidia-smi

True
0
NVIDIA RTX A6000
Tue Jun 17 11:17:24 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.133.07             Driver Version: 570.133.07     CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA RTX A6000               Off |   00000000:17:00.0 Off |                  Off |
| 30%   30C    P5             27W /  300W |      18MiB /  49140MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA RTX A6000 

In [7]:
f = open("/home/opade7/Documents/gpu-test/API-KEYS/claude.txt", "r")
mykey = f.read()

#Iterative prompting
Iterative Prompting Approach
The Iterative Prompting technique follows a structured refinement process:

- Initial Analysis: The model provides a first-pass analysis of what appears to be happening in the frames
- Guided Iterations: Through a series of targeted follow-up prompts, the model refines specific aspects of its analysis
- Progressive Improvement: Each round builds on previous insights while addressing potential weaknesses or gaps
- Final Synthesis: After multiple refinement rounds, the model creates a final, comprehensive assessment

Implementation Highlights

Multi-Round Refinement:

Starts with an initial general analysis prompt
Follows with 4 specialized refinement rounds:

- People and relationships focus
- Actions and intent focus
- Criminal elements and evidence focus
- Critical examination (missing elements, alternative interpretations)


Concludes with a final synthesis prompt for each chunk


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk undergoes the full iterative process independently


Conversation Continuity:

- Maintains the complete conversation history throughout all rounds
- Each refinement builds on the accumulated context from previous rounds
- Creates a progressive improvement cycle where later responses incorporate earlier insights


Holistic Synthesis:

- After all chunks are iteratively analyzed, performs a final cross-chunk synthesis
- Creates a coherent narrative of the entire incident
- Addresses any discrepancies between chunk analyses

In [8]:
import os
import json
import base64
import time
from datetime import datetime
from collections import defaultdict
import anthropic

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/TRUE-ITERATIVE"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class TrueIterativeClaudeAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.model_name = "claude-sonnet-4-20250514"  # Claude Sonnet 4
        self.client = anthropic.Anthropic(api_key=api_key)
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        self.max_iterations = 8  # Maximum iterations before stopping
        self.convergence_threshold = 0.7  # Similarity threshold for convergence
        os.makedirs(self.save_dir, exist_ok=True)

        # Core iterative question that gets refined each time
        self.core_question = "Analyze these video frames for criminal activity. What crime is occurring, who is involved, what evidence supports your conclusion, and how confident are you in this assessment?"

    def make_claude_request(self, messages):
        """Make a single request to Claude API using Anthropic SDK"""
        try:
            response = self.client.messages.create(
                model=self.model_name,
                max_tokens=4096,
                temperature=0.1,
                messages=messages
            )
            return response.content[0].text

        except anthropic.APIError as e:
            print(f"API Error: {str(e)}")
            return f"API Error: {str(e)}"
        except anthropic.RateLimitError as e:
            print(f"Rate limit error: {str(e)}")
            print("Waiting 60 seconds before retrying...")
            time.sleep(60)
            # Retry once
            try:
                response = self.client.messages.create(
                    model=self.model_name,
                    max_tokens=4096,
                    temperature=0.1,
                    messages=messages
                )
                return response.content[0].text
            except Exception as retry_e:
                print(f"Retry failed: {str(retry_e)}")
                return f"Error after retry: {str(retry_e)}"
        except anthropic.AuthenticationError as e:
            print(f"Authentication error: {str(e)}")
            return f"Authentication Error: {str(e)}"
        except Exception as e:
            print(f"API request error: {str(e)}")
            return f"Error: {str(e)}"

    def calculate_similarity(self, text1, text2):
        """Simple similarity calculation based on word overlap"""
        if not text1 or not text2:
            return 0.0

        # Convert to lowercase and split into words
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())

        # Calculate Jaccard similarity
        intersection = words1.intersection(words2)
        union = words1.union(words2)

        if len(union) == 0:
            return 0.0

        return len(intersection) / len(union)

    def has_converged(self, current_response, previous_response):
        """Check if the analysis has converged (responses are very similar)"""
        if not previous_response:
            return False

        similarity = self.calculate_similarity(current_response, previous_response)
        print(f"    Similarity to previous: {similarity:.3f} (threshold: {self.convergence_threshold})")

        return similarity >= self.convergence_threshold

    def extract_confidence_score(self, response):
        """Extract confidence score from response if mentioned"""
        confidence_keywords = ["confidence", "confident", "certainty", "sure", "probability"]
        response_lower = response.lower()

        # Look for percentage mentions
        import re
        percentages = re.findall(r'(\d+)%', response)
        if percentages:
            return max([int(p) for p in percentages]) / 100.0

        # Look for confidence keywords with qualifiers
        if any(keyword in response_lower for keyword in ["very confident", "highly confident", "extremely confident"]):
            return 0.9
        elif any(keyword in response_lower for keyword in ["confident", "fairly confident"]):
            return 0.7
        elif any(keyword in response_lower for keyword in ["somewhat confident", "moderately confident"]):
            return 0.5
        elif any(keyword in response_lower for keyword in ["low confidence", "uncertain", "unsure"]):
            return 0.3

        return 0.5  # Default moderate confidence

    def process_frames_truly_iteratively(self, frame_data, video_id, crime_type):
        """Process frames using TRUE iterative prompting - same question refined repeatedly"""
        all_iterations = {}
        previous_response = None
        converged = False

        print(f"Starting TRUE iterative analysis with max {self.max_iterations} iterations...")
        print(f"Core question: {self.core_question}")
        print(f"Convergence threshold: {self.convergence_threshold}")
        print(f"Using model: {self.model_name}")

        # TRUE ITERATIVE LOOP - Same question, progressively refined
        for iteration_num in range(1, self.max_iterations + 1):
            print(f"\n=== TRUE ITERATION {iteration_num}/{self.max_iterations} ===")

            iteration_responses = []

            # Process frames in chunks for this iteration
            for i in range(0, len(frame_data), self.chunk_size):
                chunk = frame_data[i:i + self.chunk_size]

                # Build TRUE iterative prompt
                if iteration_num == 1:
                    # First iteration - ask the core question
                    iterative_prompt = f"""ITERATION {iteration_num} - Initial Analysis

Analyzing {crime_type} video {video_id}:

{self.core_question}

Be thorough and specific in your analysis. Include your confidence level in your assessment."""

                else:
                    # Subsequent iterations - refine based on previous response
                    iterative_prompt = f"""ITERATION {iteration_num} - Refining Previous Analysis

Analyzing {crime_type} video {video_id}:

PREVIOUS ANALYSIS FROM ITERATION {iteration_num-1}:
{previous_response[:800]}...

Now, analyze these SAME frames again with the SAME core question, but refine your analysis:

{self.core_question}

REFINEMENT INSTRUCTIONS:
- Review your previous analysis carefully
- Look for details you may have missed
- Reconsider your conclusions with fresh perspective
- Identify any errors or oversights in your previous assessment
- Improve the accuracy and depth of your analysis
- If you're more confident now, explain why
- If you're less confident, explain what creates uncertainty
- What new insights do you have upon re-examination?

Provide your REFINED analysis of the same core question."""

                # Prepare content with images
                content = [{"type": "text", "text": iterative_prompt}]

                # Add images to content
                for frame in chunk:
                    content.append({
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/png",
                            "data": frame
                        }
                    })

                messages = [{
                    "role": "user",
                    "content": content
                }]

                print(f"  Processing chunk {i//self.chunk_size + 1}/{(len(frame_data) + self.chunk_size - 1)//self.chunk_size}...")

                # Make API request for this chunk
                response = self.make_claude_request(messages)
                iteration_responses.append(response)

                # Rate limiting
                print(f"  Waiting 3 seconds before next request...")
                time.sleep(3)

            # Combine responses for this iteration
            if len(iteration_responses) == 1:
                current_response = iteration_responses[0]
            else:
                current_response = "\n\n=== NEXT CHUNK ===\n\n".join(iteration_responses)

            # Extract confidence for this iteration
            confidence = self.extract_confidence_score(current_response)

            # Check for convergence
            if previous_response:
                converged = self.has_converged(current_response, previous_response)

            # Store this iteration's data
            iteration_data = {
                "iteration": iteration_num,
                "type": "true_iterative_refinement",
                "core_question": self.core_question,
                "prompt_used": iterative_prompt,
                "response": current_response,
                "confidence_extracted": confidence,
                "converged": converged,
                "similarity_to_previous": self.calculate_similarity(current_response, previous_response) if previous_response else 0.0,
                "model": self.model_name
            }

            all_iterations[f"iteration_{iteration_num}"] = iteration_data

            print(f"  Confidence level: {confidence:.2f}")
            print(f"  Response preview: {current_response[:200]}...")

            # Check for convergence
            if converged:
                print(f"  *** CONVERGENCE ACHIEVED at iteration {iteration_num} ***")
                break
            elif iteration_num < self.max_iterations:
                print(f"  Continuing to iteration {iteration_num + 1} (not yet converged)")

            # Update previous response for next iteration
            previous_response = current_response

        # Create convergence summary
        convergence_summary = {
            "total_iterations_run": iteration_num,
            "max_iterations_allowed": self.max_iterations,
            "converged": converged,
            "convergence_threshold": self.convergence_threshold,
            "final_confidence": confidence,
            "methodology": "True iterative refinement - same question refined repeatedly",
            "model": self.model_name
        }

        if converged:
            convergence_summary["convergence_iteration"] = iteration_num
            convergence_summary["convergence_reason"] = f"Response similarity reached {self.convergence_threshold} threshold"
        else:
            convergence_summary["convergence_reason"] = f"Maximum iterations ({self.max_iterations}) reached without convergence"

        return all_iterations, convergence_summary

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with TRUE iterative prompting approach"""
        try:
            frame_names = list(frames_data.keys())

            # Improved sorting function for frame numbers
            def extract_frame_number(filename):
                try:
                    # Handle different naming patterns
                    if '_frame_' in filename:
                        parts = filename.split('_frame_')
                        if len(parts) > 1:
                            number_part = parts[1].split('.')[0]
                            return int(number_part)
                    elif 'frame' in filename.lower():
                        # Alternative pattern matching
                        import re
                        numbers = re.findall(r'\d+', filename)
                        if numbers:
                            return int(numbers[-1])  # Use the last number found
                except Exception as e:
                    print(f"Error extracting frame number from {filename}: {str(e)}")
                    return 0

            sorted_frames = sorted(frame_names, key=extract_frame_number)

            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) ===")
            print(f"Total frames loaded: {len(frames_data)}")
            print(f"Frame names sample: {sorted_frames[:5]}{'...' if len(sorted_frames) > 5 else ''}")
            print(f"Using model: {self.model_name}")

            results = {}
            timestamp = time.strftime("%Y%m%d_%H%M%S")

            try:
                frame_data = [frames_data[frame_name] for frame_name in sorted_frames
                             if frame_name in frames_data and frames_data[frame_name]]

                if not frame_data:
                    results["True_Iterative_Analysis"] = {
                        "error": "No valid frames were available for analysis.",
                        "frames_used": len(sorted_frames),
                        "valid_frames": 0,
                        "crime_type": crime_type,
                        "model": self.model_name
                    }
                    print("WARNING: No valid frames were available for analysis.")
                else:
                    print(f"Processing {len(frame_data)} valid frames with TRUE iterative prompting...")
                    iterative_responses, convergence_summary = self.process_frames_truly_iteratively(frame_data, video_id, crime_type)

                    results["True_Iterative_Analysis"] = {
                        "method": "true_iterative_prompting",
                        "description": "Same question refined repeatedly until convergence",
                        "core_question": self.core_question,
                        "crime_type": crime_type,
                        "convergence_summary": convergence_summary,
                        "all_iterations": iterative_responses,
                        "frames_used": len(sorted_frames),
                        "valid_frames": len(frame_data),
                        "analysis_timestamp": timestamp,
                        "model": self.model_name
                    }

                # Save results
                self.save_results(results, f"{crime_type}_{video_id}_true_iterative_analysis_{timestamp}.json")
                print(f"True iterative analysis for {video_id} ({crime_type}) completed and saved.")

            except Exception as e:
                print(f"Error processing true iterative analysis: {str(e)}")
                results["True_Iterative_Analysis"] = {
                    "method": "true_iterative_prompting",
                    "error": str(e),
                    "frames_used": len(sorted_frames) if 'sorted_frames' in locals() else 0,
                    "crime_type": crime_type,
                    "model": self.model_name
                }

            return results

        except Exception as e:
            print(f"Error in analyze_frames: {str(e)}")
            raise

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 10 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file)
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with true iterative analysis"""
    # Initialize analyzer
    analyzer = TrueIterativeClaudeAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with TRUE iterative prompting analysis using {analyzer.model_name}...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with true iterative approach
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results
            else:
                print(f"No frames found for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"true_iterative_claude_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete TRUE iterative analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def test_claude_api(api_key):
    """Test Claude API connection"""
    print("Testing Claude API connection...")

    try:
        client = anthropic.Anthropic(api_key=api_key)

        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=100,
            messages=[
                {
                    "role": "user",
                    "content": "Hello, can you respond with 'API connection successful'?"
                }
            ]
        )

        response_text = response.content[0].text
        print("✓ Claude API connection successful!")
        print(f"Response: {response_text}")
        return True

    except anthropic.APIError as e:
        print(f"✗ API Error: {str(e)}")
        return False
    except anthropic.AuthenticationError as e:
        print(f"✗ Authentication Error: {str(e)}")
        print("Please check your API key is valid and has sufficient credits.")
        return False
    except anthropic.RateLimitError as e:
        print(f"✗ Rate Limit Error: {str(e)}")
        return False
    except Exception as e:
        print(f"✗ Connection error: {str(e)}")
        return False

def check_authentication():
    """Placeholder function to check authentication"""
    return True

def run():
    """Main execution function"""
    print("TRUE Iterative Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames")
    print("="*85)
    print("TRUE ITERATIVE = Same question refined repeatedly until convergence")
    print("="*85)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/claude.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        with open(api_key_path, "r") as f:
            api_key = f.read().strip()

        if not api_key:
            print("✗ Failed to load Claude API key: File is empty")
            return

        print("✓ Successfully loaded Claude API key")
        print(f"API key starts with: {api_key[:10]}...")

    except Exception as e:
        print(f"✗ Failed to load Claude API key: {str(e)}")
        return

    # Test Claude API connection
    if not test_claude_api(api_key):
        print("✗ Claude API test failed. Please check your API key and connection.")
        return

    # Check authentication
    if not check_authentication():
        print("✗ Authentication not completed.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_iterations_run = 0
    convergence_achieved = 0

    for video_id, video_results in results.items():
        if video_results and 'True_Iterative_Analysis' in video_results:
            analysis = video_results['True_Iterative_Analysis']
            total_frames_processed += analysis.get('valid_frames', 0)
            if 'convergence_summary' in analysis:
                total_iterations_run += analysis['convergence_summary'].get('total_iterations_run', 0)
                if analysis['convergence_summary'].get('converged', False):
                    convergence_achieved += 1

    print("\n" + "="*85)
    print(f"TRUE ITERATIVE PROMPTING ANALYSIS COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total iterations run: {total_iterations_run}")
    print(f"Convergence achieved: {convergence_achieved}/{total_videos_processed} videos")
    print(f"Model used: claude-sonnet-4-20250514")
    print(f"Method: Same core question refined repeatedly")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")
    print("="*85)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
TRUE Iterative Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames
TRUE ITERATIVE = Same question refined repeatedly until convergence
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/TRUE-ITERATIVE
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/claude.txt
File exists: True
✓ Successfully loaded Claude API key
API key starts with: sk-ant-api...
Testing Claude API connection...
✓ Claude API connection successful!
Response: API connection successful

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery',

#Self-Consistency
This approach generates multiple independent analyses and determines the most reliable interpretation through consensus.
Self-Consistency Prompting Approach
The Self-Consistency technique follows a unique multi-analysis process:

Multiple Independent Analyses: The system generates several different analyses of the same frames
Diverse Perspectives: Each analysis uses a different prompt template to encourage varied viewpoints
Consensus Determination: The system identifies areas of agreement and disagreement across analyses
Confidence Assessment: For each key element, the level of consensus is explicitly evaluated

Implementation Highlights

Multi-Perspective Analysis:

Generates 5 independent analyses for each chunk of frames
Uses 5 distinct prompt templates to encourage diversity:

Standard analytical perspective
Forensic analyst perspective
Detective/law enforcement perspective
Security expert perspective
Witness testimony perspective


Higher temperature settings (0.5) for greater response diversity


Complete Frame Processing:

Processes all frames in chunks of 10 frames each
Each chunk undergoes the full multi-analysis process


Two-Level Consensus Building:

Chunk-Level Consensus: After generating multiple analyses for each chunk, determines consensus on:

Crime type
Perpetrator description
Victim description
Key actions
Evidence
Timeline


Cross-Chunk Consensus: After processing all chunks, synthesizes a final consensus across the entire video

In [9]:
import os
import json
import base64
import time
from datetime import datetime
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
import anthropic

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/SELF-CONSISTENCY"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class SelfConsistencyClaudeAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.model_name = "claude-sonnet-4-20250514"  # Claude Sonnet 4
        self.client = anthropic.Anthropic(api_key=api_key)
        self.save_dir = SAVE_DIR
        self.max_frames_per_request = 20
        self.chunk_size = 10
        self.num_independent_runs = 5  # Number of independent analyses for consistency checking
        os.makedirs(self.save_dir, exist_ok=True)

    def make_claude_request(self, messages, temperature=0.3):
        """Make a single request to Claude API with specified temperature using Anthropic SDK"""
        try:
            response = self.client.messages.create(
                model=self.model_name,
                max_tokens=4096,
                temperature=temperature,  # Variable temperature for diversity
                messages=messages
            )
            return response.content[0].text

        except anthropic.APIError as e:
            print(f"API Error: {str(e)}")
            return f"API Error: {str(e)}"
        except anthropic.RateLimitError as e:
            print(f"Rate limit error: {str(e)}")
            print("Waiting 60 seconds before retrying...")
            time.sleep(60)
            # Retry once
            try:
                response = self.client.messages.create(
                    model=self.model_name,
                    max_tokens=4096,
                    temperature=temperature,
                    messages=messages
                )
                return response.content[0].text
            except Exception as retry_e:
                print(f"Retry failed: {str(retry_e)}")
                return f"Error after retry: {str(retry_e)}"
        except anthropic.AuthenticationError as e:
            print(f"Authentication error: {str(e)}")
            return f"Authentication Error: {str(e)}"
        except Exception as e:
            print(f"API request error: {str(e)}")
            return f"Error: {str(e)}"

    def generate_independent_analysis(self, frame_data, run_number, video_id, crime_type):
        """Generate a single independent analysis of the frames"""
        print(f"  Generating independent analysis #{run_number}...")

        # Comprehensive analysis prompt (same for all runs to ensure consistency testing)
        comprehensive_prompt = f"""Analyze these video frames from {crime_type} video {video_id} thoroughly and provide a comprehensive assessment. Please address the following aspects:

1. SCENE DESCRIPTION: Describe the environment, setting, and overall context.

2. PEOPLE IDENTIFICATION: Identify all individuals present, their apparent ages, genders, and relationships.

3. ACTIONS AND BEHAVIORS: Detail what each person is doing and their specific behaviors.

4. INTERACTIONS: Analyze how the people are interacting with each other.

5. BODY LANGUAGE: Examine postures, facial expressions, and non-verbal communication.

6. POWER DYNAMICS: Assess any signs of dominance, submission, control, or vulnerability.

7. CONCERNING ELEMENTS: Identify any red flags, inappropriate behaviors, or signs of potential harm.

8. OVERALL ASSESSMENT: Provide your comprehensive conclusion about what is happening and level of concern.

Be thorough, specific, and evidence-based in your analysis."""

        run_responses = []

        # Use slightly different temperature for each run to encourage diversity
        temperatures = [0.1, 0.2, 0.3, 0.4, 0.5]
        temperature = temperatures[run_number % len(temperatures)]

        # Process frames in chunks for this independent run
        for i in range(0, len(frame_data), self.chunk_size):
            chunk = frame_data[i:i + self.chunk_size]

            # Prepare content with images
            content = [{"type": "text", "text": comprehensive_prompt}]

            # Add images to content
            for frame in chunk:
                content.append({
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/png",
                        "data": frame
                    }
                })

            messages = [{
                "role": "user",
                "content": content
            }]

            # Make API request for this chunk
            response = self.make_claude_request(messages, temperature)
            run_responses.append(response)

            # Rate limiting
            time.sleep(2)

        # Combine responses for this run
        if len(run_responses) == 1:
            combined_response = run_responses[0]
        else:
            combined_response = "\n\n=== NEXT CHUNK ===\n\n".join(run_responses)

        return combined_response

    def analyze_consistency(self, independent_analyses, video_id, crime_type):
        """Analyze consistency across independent runs"""
        print("Analyzing consistency across independent analyses...")

        # Create consistency analysis prompt
        consistency_prompt = f"""You have {len(independent_analyses)} independent analyses of the same video frames from {crime_type} video {video_id}. Please analyze the consistency across these analyses and provide:

1. CONSISTENT FINDINGS: What observations appear consistently across most or all analyses?

2. INCONSISTENT FINDINGS: What observations vary significantly between analyses?

3. CONFIDENCE LEVELS: Which findings have high confidence (consistent) vs low confidence (inconsistent)?

4. DISCREPANCY ANALYSIS: Where analyses disagree, what might explain the differences?

5. CONSENSUS ASSESSMENT: Based on the most consistent findings, what is the most reliable assessment?

6. RELIABILITY SCORE: Rate the overall reliability of the consensus (1-10 scale).

Here are the independent analyses:

"""

        # Add each independent analysis
        for i, analysis in enumerate(independent_analyses, 1):
            consistency_prompt += f"\n=== INDEPENDENT ANALYSIS #{i} ===\n{analysis}\n"

        consistency_prompt += "\n\nNow provide your consistency analysis:"

        # Make request for consistency analysis
        messages = [{
            "role": "user",
            "content": consistency_prompt
        }]

        consistency_analysis = self.make_claude_request(messages, temperature=0.1)
        return consistency_analysis

    def process_frames_self_consistency(self, frame_data, video_id, crime_type):
        """Process frames using self-consistency prompting strategy"""
        print(f"Starting self-consistency analysis with {self.num_independent_runs} independent runs...")
        print(f"Using model: {self.model_name}")

        # Generate multiple independent analyses
        independent_analyses = []
        for run_num in range(1, self.num_independent_runs + 1):
            print(f"\n--- Independent Run {run_num}/{self.num_independent_runs} ---")
            analysis = self.generate_independent_analysis(frame_data, run_num, video_id, crime_type)
            independent_analyses.append(analysis)
            print(f"  Completed run {run_num}")

            # Longer delay between independent runs
            if run_num < self.num_independent_runs:
                print(f"  Waiting 5 seconds before next independent run...")
                time.sleep(5)

        # Analyze consistency across runs
        print(f"\n--- Consistency Analysis ---")
        consistency_analysis = self.analyze_consistency(independent_analyses, video_id, crime_type)

        # Compile results
        results = {
            "independent_analyses": {},
            "consistency_analysis": consistency_analysis,
            "methodology": {
                "num_runs": self.num_independent_runs,
                "approach": "Multiple independent analyses with consistency checking",
                "temperatures_used": [0.1, 0.2, 0.3, 0.4, 0.5],
                "model": self.model_name
            }
        }

        # Store each independent analysis
        for i, analysis in enumerate(independent_analyses, 1):
            results["independent_analyses"][f"run_{i}"] = {
                "run_number": i,
                "analysis": analysis,
                "temperature": [0.1, 0.2, 0.3, 0.4, 0.5][(i-1) % 5],
                "model": self.model_name
            }

        return results

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with self-consistency prompting approach"""
        try:
            frame_names = list(frames_data.keys())

            # Improved sorting function for frame numbers
            def extract_frame_number(filename):
                try:
                    # Handle different naming patterns
                    if '_frame_' in filename:
                        parts = filename.split('_frame_')
                        if len(parts) > 1:
                            number_part = parts[1].split('.')[0]
                            return int(number_part)
                    elif 'frame' in filename.lower():
                        # Alternative pattern matching
                        import re
                        numbers = re.findall(r'\d+', filename)
                        if numbers:
                            return int(numbers[-1])  # Use the last number found
                except Exception as e:
                    print(f"Error extracting frame number from {filename}: {str(e)}")
                    return 0

            sorted_frames = sorted(frame_names, key=extract_frame_number)

            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) ===")
            print(f"Total frames loaded: {len(frames_data)}")
            print(f"Frame names sample: {sorted_frames[:5]}{'...' if len(sorted_frames) > 5 else ''}")
            print(f"Using model: {self.model_name}")

            results = {}
            timestamp = time.strftime("%Y%m%d_%H%M%S")

            try:
                frame_data = [frames_data[frame_name] for frame_name in sorted_frames
                             if frame_name in frames_data and frames_data[frame_name]]

                if not frame_data:
                    results["Self_Consistency_Analysis"] = {
                        "error": "No valid frames were available for analysis.",
                        "frames_used": len(sorted_frames),
                        "valid_frames": 0,
                        "crime_type": crime_type,
                        "model": self.model_name
                    }
                    print("WARNING: No valid frames were available for analysis.")
                else:
                    print(f"Processing {len(frame_data)} valid frames with self-consistency prompting...")
                    consistency_results = self.process_frames_self_consistency(frame_data, video_id, crime_type)

                    results["Self_Consistency_Analysis"] = {
                        "method": "self_consistency_prompting",
                        "description": "Multiple independent analyses with consistency verification",
                        "crime_type": crime_type,
                        "consistency_results": consistency_results,
                        "frames_used": len(sorted_frames),
                        "valid_frames": len(frame_data),
                        "analysis_timestamp": timestamp,
                        "model": self.model_name
                    }

                # Save results
                self.save_results(results, f"{crime_type}_{video_id}_self_consistency_analysis_{timestamp}.json")
                print(f"Self-consistency analysis for {video_id} ({crime_type}) completed and saved.")

            except Exception as e:
                print(f"Error processing self-consistency analysis: {str(e)}")
                results["Self_Consistency_Analysis"] = {
                    "method": "self_consistency_prompting",
                    "error": str(e),
                    "frames_used": len(sorted_frames) if 'sorted_frames' in locals() else 0,
                    "crime_type": crime_type,
                    "model": self.model_name
                }

            return results

        except Exception as e:
            print(f"Error in analyze_frames: {str(e)}")
            raise

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 10 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file)
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with self-consistency analysis"""
    # Initialize analyzer
    analyzer = SelfConsistencyClaudeAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with self-consistency prompting analysis using {analyzer.model_name}...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with self-consistency approach
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results
            else:
                print(f"No frames found for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"self_consistency_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete self-consistency analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def test_claude_api(api_key):
    """Test Claude API connection"""
    print("Testing Claude API connection...")

    try:
        client = anthropic.Anthropic(api_key=api_key)

        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=100,
            messages=[
                {
                    "role": "user",
                    "content": "Hello, can you respond with 'API connection successful'?"
                }
            ]
        )

        response_text = response.content[0].text
        print("✓ Claude API connection successful!")
        print(f"Response: {response_text}")
        return True

    except anthropic.APIError as e:
        print(f"✗ API Error: {str(e)}")
        return False
    except anthropic.AuthenticationError as e:
        print(f"✗ Authentication Error: {str(e)}")
        print("Please check your API key is valid and has sufficient credits.")
        return False
    except anthropic.RateLimitError as e:
        print(f"✗ Rate Limit Error: {str(e)}")
        return False
    except Exception as e:
        print(f"✗ Connection error: {str(e)}")
        return False

def check_authentication():
    """Placeholder function to check authentication"""
    return True

def run():
    """Main execution function"""
    print("Self-Consistency Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames")
    print("="*85)
    print("Self-Consistency = Multiple independent analyses with consistency verification")
    print("="*85)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/claude.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        with open(api_key_path, "r") as f:
            api_key = f.read().strip()

        if not api_key:
            print("✗ Failed to load Claude API key: File is empty")
            return

        print("✓ Successfully loaded Claude API key")
        print(f"API key starts with: {api_key[:10]}...")

    except Exception as e:
        print(f"✗ Failed to load Claude API key: {str(e)}")
        return

    # Test Claude API connection
    if not test_claude_api(api_key):
        print("✗ Claude API test failed. Please check your API key and connection.")
        return

    # Check authentication
    if not check_authentication():
        print("✗ Authentication not completed.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_independent_runs = 0

    for video_id, video_results in results.items():
        if video_results and 'Self_Consistency_Analysis' in video_results:
            analysis = video_results['Self_Consistency_Analysis']
            total_frames_processed += analysis.get('valid_frames', 0)
            if 'consistency_results' in analysis and 'methodology' in analysis['consistency_results']:
                total_independent_runs += analysis['consistency_results']['methodology'].get('num_runs', 0)

    print("\n" + "="*85)
    print(f"SELF-CONSISTENCY PROMPTING ANALYSIS COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total independent runs: {total_independent_runs}")
    print(f"Model used: claude-sonnet-4-20250514")
    print(f"Analysis pattern: Multiple Independent → Consistency Check → Consensus")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")
    print("="*85)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Self-Consistency Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames
Self-Consistency = Multiple independent analyses with consistency verification
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/SELF-CONSISTENCY
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/claude.txt
File exists: True
✓ Successfully loaded Claude API key
API key starts with: sk-ant-api...
Testing Claude API connection...
✓ Claude API connection successful!
Response: API connection successful

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighti

#Meta-Prompting
- Meta-Prompting that processes all frames from crime videos. This technique is unique because it uses the AI to generate its own specialized prompts for analysis.

Meta-Prompting Approach
The Meta-Prompting technique follows this innovative process:

- Prompt Generation: Instead of using predefined prompts, the system asks the AI to create specialized prompts for analyzing video frames
- Prompt Application: These AI-generated prompts are then used to analyze the actual frames
- Meta-Synthesis: The system also generates a specialized synthesis prompt to combine all chunk analyses

Implementation Highlights

Two-Stage Meta-Prompting:

- First Stage: For each chunk of frames, generate a specialized analysis prompt
- Second Stage: For final synthesis, generate a specialized synthesis prompt
- Both stages use the AI to create task-specific prompts rather than using predefined ones


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk undergoes the full meta-prompting process independently


Specialized Prompt Design Process: Guides the AI to create prompts that focus on:

Step-by-step observation:
- Objective description before interpretation
- Attention to easily missed details
- Organizing observations into a coherent narrative
- Avoids including example responses in the generated prompts


Fallback Safety:

- If meta-prompting fails, falls back to a simple seed prompt
Ensures analysis can continue even if prompt generation has issues

In [10]:
import os
import json
import base64
import time
from datetime import datetime
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
import anthropic

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/META-PROMPTING"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class MetaPromptingClaudeAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.model_name = "claude-sonnet-4-20250514"  # Claude Sonnet 4
        self.client = anthropic.Anthropic(api_key=api_key)
        self.save_dir = SAVE_DIR
        self.max_frames_per_request = 20
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

    def make_claude_request(self, messages, temperature=0.1):
        """Make a single request to Claude API using Anthropic SDK"""
        try:
            response = self.client.messages.create(
                model=self.model_name,
                max_tokens=4096,
                temperature=temperature,
                messages=messages
            )
            return response.content[0].text

        except anthropic.APIError as e:
            print(f"API Error: {str(e)}")
            return f"API Error: {str(e)}"
        except anthropic.RateLimitError as e:
            print(f"Rate limit error: {str(e)}")
            print("Waiting 60 seconds before retrying...")
            time.sleep(60)
            # Retry once
            try:
                response = self.client.messages.create(
                    model=self.model_name,
                    max_tokens=4096,
                    temperature=temperature,
                    messages=messages
                )
                return response.content[0].text
            except Exception as retry_e:
                print(f"Retry failed: {str(retry_e)}")
                return f"Error after retry: {str(retry_e)}"
        except anthropic.AuthenticationError as e:
            print(f"Authentication error: {str(e)}")
            return f"Authentication Error: {str(e)}"
        except Exception as e:
            print(f"API request error: {str(e)}")
            return f"Error: {str(e)}"

    def generate_meta_prompts(self, crime_type, video_id):
        """Meta-prompting phase: Generate optimized prompts for analysis"""
        print("=== META-PROMPTING PHASE: Generating Optimized Analysis Prompts ===")
        print(f"Using model: {self.model_name}")

        meta_prompt = f"""You are an expert in prompt engineering and crime video analysis. Your task is to design the most effective prompts for analyzing potentially concerning video content from a {crime_type} case (video {video_id}).

Given that you will be analyzing video frames that may contain inappropriate or harmful behavior related to {crime_type}, design specialized prompts for different aspects of the analysis that would maximize accuracy, thoroughness, and reliability.

Create optimized prompts for these 6 key analysis areas:

1. SCENE_ANALYSIS: For understanding the environment and context
2. PERSON_IDENTIFICATION: For identifying individuals and their characteristics
3. BEHAVIOR_ANALYSIS: For analyzing actions and behaviors
4. INTERACTION_DYNAMICS: For understanding relationships and interactions
5. RISK_ASSESSMENT: For identifying concerning elements and red flags
6. COMPREHENSIVE_SYNTHESIS: For creating final assessments

For each area, provide:
- An optimized prompt that would elicit the most accurate and thorough analysis
- Key questions that should be addressed
- Important considerations and potential pitfalls to avoid
- Specific instructions for evidence-based reasoning

Format your response clearly with headers for each analysis area. Make the prompts sophisticated, specific, and designed to maximize analytical quality for {crime_type} video analysis."""

        messages = [{
            "role": "user",
            "content": meta_prompt
        }]

        print("Generating specialized analysis prompts...")
        meta_response = self.make_claude_request(messages, temperature=0.3)

        return meta_response

    def extract_generated_prompts(self, meta_response):
        """Extract and structure the generated prompts"""
        print("Extracting and structuring generated prompts...")

        extraction_prompt = f"""From the following meta-prompting response, extract the specific prompts for each analysis area and format them as a structured JSON object.

Meta-prompting response:
{meta_response}

Extract and format as JSON with this structure:
{{
    "scene_analysis": {{
        "prompt": "extracted prompt text",
        "key_questions": ["question1", "question2", ...],
        "considerations": ["consideration1", "consideration2", ...]
    }},
    "person_identification": {{
        "prompt": "extracted prompt text",
        "key_questions": ["question1", "question2", ...],
        "considerations": ["consideration1", "consideration2", ...]
    }},
    "behavior_analysis": {{
        "prompt": "extracted prompt text",
        "key_questions": ["question1", "question2", ...],
        "considerations": ["consideration1", "consideration2", ...]
    }},
    "interaction_dynamics": {{
        "prompt": "extracted prompt text",
        "key_questions": ["question1", "question2", ...],
        "considerations": ["consideration1", "consideration2", ...]
    }},
    "risk_assessment": {{
        "prompt": "extracted prompt text",
        "key_questions": ["question1", "question2", ...],
        "considerations": ["consideration1", "consideration2", ...]
    }},
    "comprehensive_synthesis": {{
        "prompt": "extracted prompt text",
        "key_questions": ["question1", "question2", ...],
        "considerations": ["consideration1", "consideration2", ...]
    }}
}}

Provide only the JSON structure with the extracted content."""

        messages = [{
            "role": "user",
            "content": extraction_prompt
        }]

        extraction_response = self.make_claude_request(messages, temperature=0.1)

        try:
            # Attempt to parse as JSON, with fallback handling
            import json
            if extraction_response.strip().startswith('{'):
                generated_prompts = json.loads(extraction_response)
            else:
                # If not proper JSON, create a structured fallback
                generated_prompts = self.create_fallback_prompts(meta_response)
        except:
            generated_prompts = self.create_fallback_prompts(meta_response)

        return generated_prompts

    def create_fallback_prompts(self, meta_response):
        """Create fallback prompts if extraction fails"""
        return {
            "scene_analysis": {
                "prompt": "Analyze the scene environment, setting, location, and overall context visible in these video frames. Consider lighting, objects, spatial layout, and any environmental factors that might be relevant.",
                "key_questions": ["Where is this taking place?", "What type of environment is this?", "What objects and features are visible?"],
                "considerations": ["Note environmental factors that might affect behavior", "Consider privacy vs public settings"]
            },
            "person_identification": {
                "prompt": "Identify all individuals present in the frames. Describe their apparent ages, genders, physical characteristics, clothing, and positioning. Consider their relationships and roles.",
                "key_questions": ["How many people are present?", "What are their apparent ages and characteristics?", "What relationships do they appear to have?"],
                "considerations": ["Be objective about descriptions", "Note power imbalances based on age/size"]
            },
            "behavior_analysis": {
                "prompt": "Analyze the specific behaviors, actions, and movements of each person. Focus on what they are doing, how they are moving, and their physical interactions.",
                "key_questions": ["What specific actions is each person performing?", "How are they moving or positioned?", "What physical interactions are occurring?"],
                "considerations": ["Distinguish between voluntary and involuntary behaviors", "Note any signs of distress or discomfort"]
            },
            "interaction_dynamics": {
                "prompt": "Examine the interpersonal dynamics and relationships between individuals. Analyze communication patterns, body language, and social dynamics.",
                "key_questions": ["How are people interacting with each other?", "What does body language suggest?", "Who appears to be leading interactions?"],
                "considerations": ["Look for signs of consent vs coercion", "Note power dynamics and control patterns"]
            },
            "risk_assessment": {
                "prompt": "Identify any concerning elements, red flags, or indicators of inappropriate or harmful behavior. Focus on signs that warrant concern or further attention.",
                "key_questions": ["What concerning elements are present?", "Are there signs of inappropriate behavior?", "What risks or harms might be indicated?"],
                "considerations": ["Be specific about concerning observations", "Consider context and alternative explanations"]
            },
            "comprehensive_synthesis": {
                "prompt": "Synthesize all previous analyses into a comprehensive assessment. Integrate findings from scene, people, behaviors, interactions, and risks into a coherent conclusion.",
                "key_questions": ["What is the overall picture?", "How do all observations fit together?", "What is the level of concern?"],
                "considerations": ["Base conclusions on evidence", "Acknowledge limitations and uncertainties"]
            }
        }

    def apply_generated_prompts(self, frame_data, generated_prompts, video_id, crime_type):
        """Apply the meta-generated prompts to analyze the frames"""
        print("=== APPLICATION PHASE: Using Generated Prompts for Analysis ===")

        analysis_results = {}

        # Define the analysis sequence
        analysis_sequence = [
            "scene_analysis",
            "person_identification",
            "behavior_analysis",
            "interaction_dynamics",
            "risk_assessment",
            "comprehensive_synthesis"
        ]

        # Apply each generated prompt
        for analysis_type in analysis_sequence:
            if analysis_type in generated_prompts:
                print(f"\nApplying {analysis_type.replace('_', ' ').title()} prompt...")

                prompt_data = generated_prompts[analysis_type]
                optimized_prompt = prompt_data.get("prompt", "")
                key_questions = prompt_data.get("key_questions", [])
                considerations = prompt_data.get("considerations", [])

                # Enhance prompt with key questions and considerations
                full_prompt = f"Analyzing {crime_type} video {video_id}:\n\n{optimized_prompt}\n\nKey questions to address:\n"
                for q in key_questions:
                    full_prompt += f"- {q}\n"

                full_prompt += f"\nImportant considerations:\n"
                for c in considerations:
                    full_prompt += f"- {c}\n"

                full_prompt += f"\nProvide a thorough analysis addressing these points."

                # Apply to frame chunks
                type_responses = []
                for i in range(0, len(frame_data), self.chunk_size):
                    chunk = frame_data[i:i + self.chunk_size]

                    # Prepare content with images
                    content = [{"type": "text", "text": full_prompt}]

                    # Add images to content
                    for frame in chunk:
                        content.append({
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": "image/png",
                                "data": frame
                            }
                        })

                    messages = [{
                        "role": "user",
                        "content": content
                    }]

                    response = self.make_claude_request(messages)
                    type_responses.append(response)

                    time.sleep(3)

                # Combine responses for this analysis type
                if len(type_responses) == 1:
                    combined_response = type_responses[0]
                else:
                    combined_response = "\n\n=== NEXT CHUNK ===\n\n".join(type_responses)

                analysis_results[analysis_type] = {
                    "generated_prompt": optimized_prompt,
                    "key_questions": key_questions,
                    "considerations": considerations,
                    "analysis_result": combined_response,
                    "model": self.model_name
                }

                print(f"Completed {analysis_type.replace('_', ' ')} analysis")

        return analysis_results

    def evaluate_prompt_effectiveness(self, analysis_results, video_id, crime_type):
        """Evaluate the effectiveness of the generated prompts"""
        print("=== EVALUATION PHASE: Assessing Prompt Effectiveness ===")

        evaluation_prompt = f"""Evaluate the effectiveness of the meta-generated prompts based on the analysis results below for {crime_type} video {video_id}.

For each analysis type, assess:
1. PROMPT QUALITY: How well did the generated prompt elicit thorough analysis?
2. COMPLETENESS: Did the analysis address all intended aspects?
3. SPECIFICITY: How specific and detailed were the results?
4. RELEVANCE: How relevant were the findings to crime video analysis?
5. ACTIONABILITY: How useful are the insights for decision-making?

Rate each aspect 1-10 and provide improvement suggestions.

Analysis Results:
{json.dumps(analysis_results, indent=2)}

Provide your evaluation in a structured format."""

        messages = [{
            "role": "user",
            "content": evaluation_prompt
        }]

        evaluation_response = self.make_claude_request(messages)
        return evaluation_response

    def process_frames_meta_prompting(self, frame_data, video_id, crime_type):
        """Process frames using meta-prompting strategy"""
        print("Starting meta-prompting analysis...")
        print(f"Using model: {self.model_name}")

        # Phase 1: Generate optimized prompts
        meta_response = self.generate_meta_prompts(crime_type, video_id)

        # Phase 2: Extract and structure prompts
        generated_prompts = self.extract_generated_prompts(meta_response)

        # Phase 3: Apply generated prompts
        analysis_results = self.apply_generated_prompts(frame_data, generated_prompts, video_id, crime_type)

        # Phase 4: Evaluate prompt effectiveness
        evaluation = self.evaluate_prompt_effectiveness(analysis_results, video_id, crime_type)

        # Compile complete meta-prompting results
        meta_prompting_results = {
            "meta_prompting_process": {
                "phase_1_meta_generation": meta_response,
                "phase_2_prompt_extraction": generated_prompts,
                "phase_3_analysis_application": analysis_results,
                "phase_4_effectiveness_evaluation": evaluation
            },
            "methodology": {
                "approach": "Meta-prompting with self-generated optimized prompts",
                "model": self.model_name,
                "phases": [
                    "Meta-prompt generation",
                    "Prompt extraction and structuring",
                    "Application of generated prompts",
                    "Effectiveness evaluation"
                ]
            }
        }

        return meta_prompting_results

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with meta-prompting approach"""
        try:
            frame_names = list(frames_data.keys())

            # Improved sorting function for frame numbers
            def extract_frame_number(filename):
                try:
                    # Handle different naming patterns
                    if '_frame_' in filename:
                        parts = filename.split('_frame_')
                        if len(parts) > 1:
                            number_part = parts[1].split('.')[0]
                            return int(number_part)
                    elif 'frame' in filename.lower():
                        # Alternative pattern matching
                        import re
                        numbers = re.findall(r'\d+', filename)
                        if numbers:
                            return int(numbers[-1])  # Use the last number found
                except Exception as e:
                    print(f"Error extracting frame number from {filename}: {str(e)}")
                    return 0

            sorted_frames = sorted(frame_names, key=extract_frame_number)

            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) ===")
            print(f"Total frames loaded: {len(frames_data)}")
            print(f"Frame names sample: {sorted_frames[:5]}{'...' if len(sorted_frames) > 5 else ''}")
            print(f"Using model: {self.model_name}")

            results = {}
            timestamp = time.strftime("%Y%m%d_%H%M%S")

            try:
                frame_data = [frames_data[frame_name] for frame_name in sorted_frames
                             if frame_name in frames_data and frames_data[frame_name]]

                if not frame_data:
                    results["Meta_Prompting_Analysis"] = {
                        "error": "No valid frames were available for analysis.",
                        "frames_used": len(sorted_frames),
                        "valid_frames": 0,
                        "crime_type": crime_type,
                        "model": self.model_name
                    }
                    print("WARNING: No valid frames were available for analysis.")
                else:
                    print(f"Processing {len(frame_data)} valid frames with meta-prompting...")
                    meta_results = self.process_frames_meta_prompting(frame_data, video_id, crime_type)

                    results["Meta_Prompting_Analysis"] = {
                        "method": "meta_prompting",
                        "description": "Self-generated optimized prompts for enhanced analysis",
                        "crime_type": crime_type,
                        "meta_prompting_results": meta_results,
                        "frames_used": len(sorted_frames),
                        "valid_frames": len(frame_data),
                        "analysis_timestamp": timestamp,
                        "model": self.model_name
                    }

                # Save results
                self.save_results(results, f"{crime_type}_{video_id}_meta_prompting_analysis_{timestamp}.json")
                print(f"Meta-prompting analysis for {video_id} ({crime_type}) completed and saved.")

            except Exception as e:
                print(f"Error processing meta-prompting analysis: {str(e)}")
                results["Meta_Prompting_Analysis"] = {
                    "method": "meta_prompting",
                    "error": str(e),
                    "frames_used": len(sorted_frames) if 'sorted_frames' in locals() else 0,
                    "crime_type": crime_type,
                    "model": self.model_name
                }

            return results

        except Exception as e:
            print(f"Error in analyze_frames: {str(e)}")
            raise

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 10 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file)
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with meta-prompting analysis"""
    # Initialize analyzer
    analyzer = MetaPromptingClaudeAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with meta-prompting analysis using {analyzer.model_name}...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with meta-prompting approach
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results
            else:
                print(f"No frames found for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"meta_prompting_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete meta-prompting analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def test_claude_api(api_key):
    """Test Claude API connection"""
    print("Testing Claude API connection...")

    try:
        client = anthropic.Anthropic(api_key=api_key)

        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=100,
            messages=[
                {
                    "role": "user",
                    "content": "Hello, can you respond with 'API connection successful'?"
                }
            ]
        )

        response_text = response.content[0].text
        print("✓ Claude API connection successful!")
        print(f"Response: {response_text}")
        return True

    except anthropic.APIError as e:
        print(f"✗ API Error: {str(e)}")
        return False
    except anthropic.AuthenticationError as e:
        print(f"✗ Authentication Error: {str(e)}")
        print("Please check your API key is valid and has sufficient credits.")
        return False
    except anthropic.RateLimitError as e:
        print(f"✗ Rate Limit Error: {str(e)}")
        return False
    except Exception as e:
        print(f"✗ Connection error: {str(e)}")
        return False

def check_authentication():
    """Placeholder function to check authentication"""
    return True

def run():
    """Main execution function"""
    print("Meta-Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames")
    print("="*80)
    print("Meta-Prompting = Self-generated optimized prompts for enhanced analysis")
    print("="*80)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/claude.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        with open(api_key_path, "r") as f:
            api_key = f.read().strip()

        if not api_key:
            print("✗ Failed to load Claude API key: File is empty")
            return

        print("✓ Successfully loaded Claude API key")
        print(f"API key starts with: {api_key[:10]}...")

    except Exception as e:
        print(f"✗ Failed to load Claude API key: {str(e)}")
        return

    # Test Claude API connection
    if not test_claude_api(api_key):
        print("✗ Claude API test failed. Please check your API key and connection.")
        return

    # Check authentication
    if not check_authentication():
        print("✗ Authentication not completed.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_phases_completed = 0

    for video_id, video_results in results.items():
        if video_results and 'Meta_Prompting_Analysis' in video_results:
            analysis = video_results['Meta_Prompting_Analysis']
            total_frames_processed += analysis.get('valid_frames', 0)
            if 'meta_prompting_results' in analysis and 'methodology' in analysis['meta_prompting_results']:
                total_phases_completed += len(analysis['meta_prompting_results']['methodology'].get('phases', []))

    print("\n" + "="*80)
    print(f"META-PROMPTING ANALYSIS COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total meta-phases completed: {total_phases_completed}")
    print(f"Model used: claude-sonnet-4-20250514")
    print(f"Analysis pattern: Generate → Extract → Apply → Evaluate")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")
    print("="*80)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Meta-Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames
Meta-Prompting = Self-generated optimized prompts for enhanced analysis
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/META-PROMPTING
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/claude.txt
File exists: True
✓ Successfully loaded Claude API key
API key starts with: sk-ant-api...
Testing Claude API connection...
✓ Claude API connection successful!
Response: API connection successful

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', 'Robbery', 'Shop

#Chain-Of-Thought Prompting
Chain of Thought (CoT) prompting approach that processes all frames from crime videos. This technique explicitly encourages the model to show its

step-by-step reasoning process.
- Chain of Thought Prompting Approach: The Chain of Thought technique follows this explicit reasoning process:

Step-by-Step Reasoning: The approach explicitly asks the model to "think step by step" through its analysis
- Transparent Reasoning: Each reasoning step is clearly articulated in the response
- Structured Progression: The analysis follows a logical progression from observation to conclusion
- Reasoning Synthesis: The final synthesis also uses step-by-step reasoning to connect all segments

Implementation Highlights

Structured Reasoning Steps:

The prompt breaks down the analysis into 6 clear steps:

- Objective observation without interpretation
- Identification of key actors
- Chronological sequence of events
- Important objects and their usage
- Context and setting analysis
- Integration of observations into a coherent description


Each step builds on the previous one in a logical progression


Complete Frame Processing:

- Processes all frames in chunks of 10 frames each
- Each chunk undergoes the full chain of thought process independently


Reasoning-Based Synthesis: The synthesis prompt also follows a chain of thought structure:

- Extraction of key information from each segment
- Timeline construction across all segments
- Tracking people across multiple segments
- Tracking objects across segments
- Contextual integration of segments
- Construction of a comprehensive description


This ensures the synthesis uses the same reasoning approach as individual chunks


Explicit Prompting for Reasoning:

- Both the analysis and synthesis prompts specifically ask to "think step by step"
- System messages reinforce the importance of step-by-step reasoning
The model is explicitly asked to show its thinking process at each step

In [11]:
import os
import json
import base64
import time
from datetime import datetime
from collections import defaultdict
import anthropic

# Add Google Drive mounting for Colab
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted successfully!")
except ImportError:
    print("Not running in Colab or drive module not available")

# Configuration
DATA_DIR = "/home/opade7/Documents/gpu-test/crime-data"
SAVE_DIR = "/home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/CHAIN-OF-THOUGHT"
FRAME_INTERVAL = 1  # Process ALL frames (set to higher number for sampling)

class ChainOfThoughtClaudeAnalyzer:
    def __init__(self, api_key):
        self.api_key = api_key
        self.model_name = "claude-sonnet-4-20250514"  # Claude Sonnet 4
        self.client = anthropic.Anthropic(api_key=api_key)
        self.save_dir = SAVE_DIR
        self.chunk_size = 10
        os.makedirs(self.save_dir, exist_ok=True)

        # Chain of Thought prompt template
        self.cot_prompt_template = """
Analyze these video frames from {crime_type} video {video_id} using a chain of thought reasoning process. Think step by step as you examine what's happening:

Step 1: First, carefully observe and list what you can actually see in the frames. Note people, objects, settings, and actions without interpretation.

Step 2: Identify the key actors in the scene. Describe each person's appearance and what they are doing. Track individuals across multiple frames.

Step 3: Describe the sequence of events chronologically. What happens first, next, and after that?

Step 4: Note any important objects or items in the scene and how they're being used.

Step 5: Consider the context and setting. Where is this taking place? What kind of environment is shown?

Step 6: Based on all the above observations, describe what appears to be happening in these frames.

Make sure to clearly show your thinking process for each step. These are frames {frame_range} of {total_frames}.
"""

        # Chain of Thought synthesis prompt
        self.cot_synthesis_prompt = """
You are going to synthesize multiple analyses of different segments of the same {crime_type} video ({video_id}) into a coherent understanding of the entire sequence. Use chain of thought reasoning to connect all segments into a complete narrative.

Think through the following steps:

Step 1: Review each segment analysis and extract the key information about people, objects, and actions from each one.

Step 2: Create a timeline by arranging events across all segments in chronological order.

Step 3: Identify which people appear across multiple segments and track their actions throughout.

Step 4: Note how objects or items are used or moved across the entire sequence.

Step 5: Consider the overall context and how different segments relate to each other.

Step 6: Based on all the above reasoning, construct a comprehensive description of what happens throughout the entire video.

Show your thinking at each step as you build your understanding of the complete video sequence.
"""

    def make_claude_request(self, messages, temperature=0.1):
        """Make a single request to Claude API using Anthropic SDK"""
        try:
            response = self.client.messages.create(
                model=self.model_name,
                max_tokens=4096,
                temperature=temperature,
                messages=messages
            )
            return response.content[0].text

        except anthropic.APIError as e:
            print(f"API Error: {str(e)}")
            return f"API Error: {str(e)}"
        except anthropic.RateLimitError as e:
            print(f"Rate limit error: {str(e)}")
            print("Waiting 60 seconds before retrying...")
            time.sleep(60)
            # Retry once
            try:
                response = self.client.messages.create(
                    model=self.model_name,
                    max_tokens=4096,
                    temperature=temperature,
                    messages=messages
                )
                return response.content[0].text
            except Exception as retry_e:
                print(f"Retry failed: {str(retry_e)}")
                return f"Error after retry: {str(retry_e)}"
        except anthropic.AuthenticationError as e:
            print(f"Authentication error: {str(e)}")
            return f"Authentication Error: {str(e)}"
        except Exception as e:
            print(f"API request error: {str(e)}")
            return f"Error: {str(e)}"

    def _format_chunk_analyses(self, all_chunk_analyses):
        """Helper method to format chunk analyses for synthesis"""
        newline = '\n'
        separator = '-' * 40

        formatted_chunks = []
        for analysis in all_chunk_analyses:
            chunk_text = f"SEGMENT {analysis['chunk']} (Frames {analysis['frame_range']}):{newline}{analysis['analysis']}{newline}{newline}{separator}{newline}"
            formatted_chunks.append(chunk_text)

        return ''.join(formatted_chunks)

    def process_frames_with_cot(self, frames_data, video_id, crime_type):
        """Process frames with chain of thought prompting approach"""
        # Extract frame data from the dictionary
        frame_names = list(frames_data.keys())

        # Improved sorting function for frame numbers
        def extract_frame_number(filename):
            try:
                # Handle different naming patterns
                if '_frame_' in filename:
                    parts = filename.split('_frame_')
                    if len(parts) > 1:
                        number_part = parts[1].split('.')[0]
                        return int(number_part)
                elif 'frame' in filename.lower():
                    # Alternative pattern matching
                    import re
                    numbers = re.findall(r'\d+', filename)
                    if numbers:
                        return int(numbers[-1])  # Use the last number found
            except Exception:
                pass
            return 0

        sorted_frames = sorted(frame_names, key=extract_frame_number)
        frame_data = [frames_data[frame_name] for frame_name in sorted_frames if frame_name in frames_data and frames_data[frame_name]]

        if not frame_data:
            return {"error": "No valid frames available for analysis"}

        total_frames = len(frame_data)
        print(f"Processing all {total_frames} frames with chain of thought approach")
        print(f"Using model: {self.model_name}")

        # Process all frames by dividing them into chunks
        chunk_size = 10
        frame_chunks = [frame_data[i:i+chunk_size] for i in range(0, total_frames, chunk_size)]
        print(f"Split into {len(frame_chunks)} chunks of approximately {chunk_size} frames each")

        # Initialize results
        cot_results = {}
        all_chunk_analyses = []

        # Process each chunk of frames
        for chunk_idx, chunk in enumerate(frame_chunks):
            frame_start = chunk_idx * chunk_size + 1
            frame_end = min((chunk_idx + 1) * chunk_size, total_frames)
            frame_range = f"{frame_start}-{frame_end}"

            print(f"Processing chunk {chunk_idx+1}/{len(frame_chunks)} (frames {frame_range})...")

            # Initialize chunk results
            chunk_results = {
                "frame_range": frame_range,
                "model": self.model_name
            }

            # Format the CoT prompt for this chunk
            formatted_cot_prompt = self.cot_prompt_template.format(
                crime_type=crime_type,
                video_id=video_id,
                frame_range=frame_range,
                total_frames=total_frames
            )

            # Prepare content with text and images for Claude
            content = [
                {
                    "type": "text",
                    "text": formatted_cot_prompt
                }
            ]

            # Add frames to content
            for frame in chunk:
                content.append({
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/png",
                        "data": frame
                    }
                })

            # Create messages for Claude
            messages = [
                {
                    "role": "user",
                    "content": content
                }
            ]

            try:
                print(f"  Sending request to Claude for chunk {chunk_idx+1}...")
                cot_analysis = self.make_claude_request(messages, temperature=0.1)

                if not cot_analysis.startswith("Error"):
                    print(f"  Received response for chunk {chunk_idx+1}")

                    # Save the CoT analysis
                    chunk_results["cot_analysis"] = cot_analysis

                    # Add to collection of all chunk analyses
                    all_chunk_analyses.append({
                        "chunk": chunk_idx + 1,
                        "frame_range": frame_range,
                        "analysis": cot_analysis
                    })
                else:
                    print(f"  Error in CoT analysis for chunk {chunk_idx+1}: {cot_analysis}")
                    chunk_results["error"] = cot_analysis

            except Exception as e:
                print(f"  Error in CoT analysis for chunk {chunk_idx+1}: {str(e)}")
                chunk_results["error"] = str(e)

            # Save results for this chunk
            cot_results[f"Chunk {chunk_idx+1}"] = chunk_results

            # Save intermediate results for this chunk
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            chunk_result = {
                f"Chunk {chunk_idx+1}": chunk_results
            }
            self.save_results(chunk_result, f"{crime_type}_{video_id}_cot_chunk{chunk_idx+1}_{timestamp}.json")
            print(f"  Results for chunk {chunk_idx+1} saved")

            # Rate limiting between chunks
            print(f"  Waiting 3 seconds before next chunk...")
            time.sleep(3)

        # After processing all chunks, generate a synthesis using CoT
        if all_chunk_analyses:
            print("Generating chain of thought synthesis across all chunks...")

            # Create synthesis prompt with all chunk analyses
            synthesis_text = f"""
{self.cot_synthesis_prompt.format(crime_type=crime_type, video_id=video_id)}

Here are the analyses for each segment of the video:

{'-' * 40}
{self._format_chunk_analyses(all_chunk_analyses)}

Think step by step to synthesize these segments into a complete understanding of the video.
"""

            # Create messages for synthesis
            synthesis_messages = [
                {
                    "role": "user",
                    "content": synthesis_text
                }
            ]

            try:
                print("Sending request for final synthesis...")
                cot_synthesis = self.make_claude_request(synthesis_messages, temperature=0.1)

                if not cot_synthesis.startswith("Error"):
                    print("Synthesis complete!")

                    # Save CoT synthesis
                    cot_results["Chain of Thought Synthesis"] = {
                        "synthesis": cot_synthesis,
                        "model": self.model_name
                    }

                    # Save synthesis separately
                    timestamp = time.strftime("%Y%m%d_%H%M%S")
                    synthesis_result = {
                        "Chain of Thought Synthesis": cot_results["Chain of Thought Synthesis"]
                    }
                    self.save_results(synthesis_result, f"{crime_type}_{video_id}_cot_synthesis_{timestamp}.json")
                    print("Synthesis results saved")
                else:
                    print(f"Error in CoT synthesis: {cot_synthesis}")
                    cot_results["Chain of Thought Synthesis"] = {
                        "error": cot_synthesis,
                        "model": self.model_name
                    }
            except Exception as e:
                print(f"Error in CoT synthesis: {str(e)}")
                cot_results["Chain of Thought Synthesis"] = {
                    "error": str(e),
                    "model": self.model_name
                }

        return {
            "cot_results": cot_results,
            "crime_type": crime_type,
            "frames_used": total_frames,
            "chunks_processed": len(frame_chunks),
            "frames_per_chunk": chunk_size,
            "model": self.model_name
        }

    def save_results(self, results, filename):
        """Save results to a file"""
        filepath = os.path.join(self.save_dir, filename)
        with open(filepath, 'w') as f:
            json.dump(results, f, indent=2)
        print(f"Results saved to: {filepath}")

    def analyze_frames(self, frames_data, video_id, crime_type):
        """Analyze frames with chain of thought prompting"""
        try:
            print(f"\n=== ANALYZING VIDEO: {video_id} ({crime_type}) WITH CHAIN OF THOUGHT PROMPTING ===")
            print(f"Total frames loaded: {len(frames_data)}")
            print(f"Using model: {self.model_name}")

            timestamp = time.strftime("%Y%m%d_%H%M%S")
            results = self.process_frames_with_cot(frames_data, video_id, crime_type)

            # Save complete results
            self.save_results(results, f"{crime_type}_{video_id}_cot_complete_{timestamp}.json")
            print(f"Complete chain of thought analysis for {video_id} ({crime_type}) saved.")

            return results

        except Exception as e:
            print(f"Error in chain of thought analysis: {str(e)}")
            return {"error": str(e), "model": self.model_name}

def discover_all_videos_and_frames(data_dir):
    """Discover all crime types, videos, and their frames"""
    print(f"\n=== DISCOVERING ALL VIDEOS AND FRAMES ===")
    print(f"Scanning directory: {data_dir}")

    all_videos = {}

    try:
        # Get all subdirectories (crime types)
        crime_types = [d for d in os.listdir(data_dir)
                      if os.path.isdir(os.path.join(data_dir, d))]

        print(f"Found {len(crime_types)} crime type directories: {crime_types}")

        for crime_type in crime_types:
            crime_dir = os.path.join(data_dir, crime_type)
            print(f"\nScanning {crime_type} directory...")

            try:
                all_files = os.listdir(crime_dir)
                print(f"  Found {len(all_files)} files")

                # Group files by video ID
                video_groups = defaultdict(list)

                for filename in all_files:
                    # Skip non-image files
                    if not any(filename.lower().endswith(ext) for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    # Extract video ID from filename
                    video_id = extract_video_id_from_filename(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  Identified {len(video_groups)} unique videos:")
                for video_id, frames in video_groups.items():
                    print(f"    {video_id}: {len(frames)} frames")
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': frames,
                        'crime_dir': crime_dir
                    }

            except Exception as e:
                print(f"  Error scanning {crime_type}: {str(e)}")

    except Exception as e:
        print(f"Error accessing main directory: {str(e)}")

    print(f"\nTotal videos discovered: {len(all_videos)}")
    return all_videos

def extract_video_id_from_filename(filename):
    """Extract video ID from filename using various patterns"""
    import re

    # Remove file extension
    name_without_ext = os.path.splitext(filename)[0]

    # Pattern 1: VideoName_something_frame_number
    if '_frame_' in name_without_ext:
        parts = name_without_ext.split('_frame_')
        return parts[0]

    # Pattern 2: VideoName_number (assuming last part is frame number)
    parts = name_without_ext.split('_')
    if len(parts) >= 2:
        # Check if last part is a number
        try:
            int(parts[-1])
            return '_'.join(parts[:-1])
        except ValueError:
            pass

    # Pattern 3: Just use the filename without numbers at the end
    video_id = re.sub(r'_?\d+$', '', name_without_ext)
    if video_id and video_id != name_without_ext:
        return video_id

    # Pattern 4: If all else fails, use the whole name (assuming single frame)
    return name_without_ext

def load_frames_for_video(video_info, frame_interval=1):
    """Load every Nth frame for a specific video (default: ALL frames)"""
    frames_data = {}
    crime_dir = video_info['crime_dir']
    frame_files = video_info['frames']
    video_id = video_info['video_id']

    print(f"\nLoading {'ALL frames' if frame_interval == 1 else f'every {frame_interval}th frame'} for {video_id}...")

    # Sort frames by frame number
    def extract_frame_number(filename):
        try:
            import re
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])  # Use the last number found
        except Exception:
            pass
        return 0

    frame_files.sort(key=extract_frame_number)

    # Select every Nth frame
    selected_frames = frame_files[::frame_interval]
    if frame_interval == 1:
        print(f"  Processing ALL {len(selected_frames)} frames")
    else:
        print(f"  Selected {len(selected_frames)} frames from {len(frame_files)} total frames")

    # Load selected frames
    for idx, frame_file in enumerate(selected_frames):
        frame_path = os.path.join(crime_dir, frame_file)
        try:
            with open(frame_path, 'rb') as f:
                frame_data = base64.b64encode(f.read()).decode('utf-8')
                frames_data[frame_file] = frame_data

            # Show progress
            if idx < 3 or idx % 10 == 0 or idx == len(selected_frames) - 1:
                original_idx = frame_files.index(frame_file)
                print(f"  Loaded: {frame_file} (frame #{original_idx + 1}, {os.path.getsize(frame_path)/1024:.1f} KB)")

        except Exception as e:
            print(f"  Error loading frame {frame_file}: {str(e)}")

    if frame_interval == 1:
        print(f"Successfully loaded ALL {len(frames_data)} frames for {video_id}")
    else:
        print(f"Successfully loaded {len(frames_data)} frames for {video_id} (every {frame_interval}th frame)")
    return frames_data

def process_all_crime_folders(api_key):
    """Process all crime folders with chain of thought prompting"""
    # Initialize analyzer
    analyzer = ChainOfThoughtClaudeAnalyzer(api_key)

    # Discover all videos and frames
    all_videos = discover_all_videos_and_frames(DATA_DIR)

    if not all_videos:
        print("No videos found to process!")
        return {}

    all_results = {}
    skipped_videos = []

    print(f"\nProcessing {len(all_videos)} videos with chain of thought prompting using {analyzer.model_name}...")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")

    # Process each video
    for video_key, video_info in all_videos.items():
        print(f"\nProcessing video: {video_key}")

        try:
            # Load frames for this video (every Nth frame as configured)
            frames_data = load_frames_for_video(video_info, frame_interval=FRAME_INTERVAL)

            if frames_data:
                # Analyze frames with chain of thought prompting
                results = analyzer.analyze_frames(
                    frames_data,
                    video_info['video_id'],
                    video_info['crime_type']
                )
                all_results[video_key] = results

                # Save results after each video for resilience
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                current_progress = {video_key: results}
                analyzer.save_results(current_progress, f"progress_{video_key}_{timestamp}.json")
            else:
                print(f"No frames found for video {video_key} - skipping")
                skipped_videos.append(f"{video_key} (no frames)")

        except Exception as e:
            print(f"Error processing video {video_key}: {str(e)}")
            skipped_videos.append(f"{video_key} (error: {str(e)})")

    # Save summary results
    summary_file = os.path.join(SAVE_DIR, f"cot_summary_{time.strftime('%Y%m%d_%H%M%S')}.json")
    with open(summary_file, 'w') as f:
        json.dump(all_results, f, indent=2)

    # Log skipped videos
    if skipped_videos:
        skipped_file = os.path.join(SAVE_DIR, f"skipped_videos_{time.strftime('%Y%m%d_%H%M%S')}.txt")
        with open(skipped_file, 'w') as f:
            f.write("Videos that could not be processed:\n")
            for video in skipped_videos:
                f.write(f"{video}\n")
        print(f"\nSkipped {len(skipped_videos)} videos. List saved to: {skipped_file}")

    print(f"\nComplete chain of thought analysis saved to: {summary_file}")
    print(f"Successfully processed {len(all_results)} videos")

    return all_results

def test_claude_api(api_key):
    """Test Claude API connection"""
    print("Testing Claude API connection...")

    try:
        client = anthropic.Anthropic(api_key=api_key)

        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=100,
            messages=[
                {
                    "role": "user",
                    "content": "Hello, can you respond with 'API connection successful'?"
                }
            ]
        )

        response_text = response.content[0].text
        print("✓ Claude API connection successful!")
        print(f"Response: {response_text}")
        return True

    except anthropic.APIError as e:
        print(f"✗ API Error: {str(e)}")
        return False
    except anthropic.AuthenticationError as e:
        print(f"✗ Authentication Error: {str(e)}")
        print("Please check your API key is valid and has sufficient credits.")
        return False
    except anthropic.RateLimitError as e:
        print(f"✗ Rate Limit Error: {str(e)}")
        return False
    except Exception as e:
        print(f"✗ Connection error: {str(e)}")
        return False

def run():
    """Main execution function"""
    print("Chain of Thought Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames")
    print("="*80)
    print("Chain of Thought = Step-by-step reasoning with explicit thinking process")
    print("="*80)

    # Test directory access first
    print("Testing directory access...")
    for path in [DATA_DIR, SAVE_DIR]:
        print(f"Path: {path}")
        print(f"  Exists: {os.path.exists(path)}")
        if os.path.exists(path):
            try:
                contents = os.listdir(path)
                print(f"  Contains {len(contents)} items")
                if contents:
                    print(f"  First few items: {contents[:3]}")
            except Exception as e:
                print(f"  Error accessing contents: {str(e)}")

    # Get API key
    try:
        api_key_path = "/home/opade7/Documents/gpu-test/API-KEYS/claude.txt"
        print(f"Trying to load API key from: {api_key_path}")
        print(f"File exists: {os.path.exists(api_key_path)}")

        with open(api_key_path, "r") as f:
            api_key = f.read().strip()

        if not api_key:
            print("✗ Failed to load Claude API key: File is empty")
            return

        print("✓ Successfully loaded Claude API key")
        print(f"API key starts with: {api_key[:10]}...")

    except Exception as e:
        print(f"✗ Failed to load Claude API key: {str(e)}")
        return

    # Test Claude API connection
    if not test_claude_api(api_key):
        print("✗ Claude API test failed. Please check your API key and connection.")
        return

    # Verify directories exist
    print("\nVerifying directories:")
    print(f"Data directory exists: {os.path.exists(DATA_DIR)}")
    print(f"Save directory exists: {os.path.exists(SAVE_DIR)}")

    if not os.path.exists(DATA_DIR):
        print(f"✗ Data directory not found: {DATA_DIR}")
        return

    # Create save directory if it doesn't exist
    os.makedirs(SAVE_DIR, exist_ok=True)

    # Process all crime folders
    results = process_all_crime_folders(api_key)

    # Print summary
    total_frames_processed = 0
    total_videos_processed = len(results)
    total_chunks_processed = 0

    for video_id, video_results in results.items():
        if video_results and 'cot_results' in video_results:
            total_frames_processed += video_results.get('frames_used', 0)
            total_chunks_processed += video_results.get('chunks_processed', 0)

    print("\n" + "="*80)
    print(f"CHAIN OF THOUGHT PROMPTING COMPLETE!")
    print(f"Videos processed: {total_videos_processed}")
    print(f"Total frames analyzed: {total_frames_processed}")
    print(f"Total chunks processed: {total_chunks_processed}")
    print(f"Model used: claude-sonnet-4-20250514")
    print(f"Analysis pattern: Step 1 → Step 2 → ... → Step 6 → Synthesis")
    print(f"Frame processing: {'ALL frames' if FRAME_INTERVAL == 1 else f'Every {FRAME_INTERVAL}th frame'}")
    print("="*80)

if __name__ == "__main__":
    run()

Not running in Colab or drive module not available
Chain of Thought Prompting Crime Video Analysis with Claude Sonnet 4 - ALL Frames
Chain of Thought = Step-by-step reasoning with explicit thinking process
Testing directory access...
Path: /home/opade7/Documents/gpu-test/crime-data
  Exists: True
  Contains 11 items
  First few items: ['Burglary', 'Stealing', 'Fighting']
Path: /home/opade7/Documents/gpu-test/RESULT-PROMPT/CLAUDE/CHAIN-OF-THOUGHT
  Exists: False
Trying to load API key from: /home/opade7/Documents/gpu-test/API-KEYS/claude.txt
File exists: True
✓ Successfully loaded Claude API key
API key starts with: sk-ant-api...
Testing Claude API connection...
✓ Claude API connection successful!
Response: API connection successful

Verifying directories:
Data directory exists: True
Save directory exists: False

=== DISCOVERING ALL VIDEOS AND FRAMES ===
Scanning directory: /home/opade7/Documents/gpu-test/crime-data
Found 11 crime type directories: ['Burglary', 'Stealing', 'Fighting', '