In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install -q anthropic

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/357.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m357.5/357.5 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25h

#This script tests ALL 8 prompting techniques from your original study:

In [4]:
#!/usr/bin/env python3
"""
COMPREHENSIVE Sensitivity Analysis for ALL Prompting Techniques
================================================================================
This script tests ALL 8 prompting techniques from your original study:
1. Zero-Shot
2. Sequential Prompting
3. Least-to-Most
4. ReAct (Reasoning and Acting)
5. Chain of Thought (CoT)
6. True Iterative
7. Self-Consistency
8. Meta-Prompting

For EACH technique, tests:
- Chunk sizes: 5, 10, 15, 20 frames
- Max tokens: 2048, 4096, 8192
- Temperatures: 0.1, 0.5, 1.0
- Frame extraction: Every 90th frame
- All 3 models: GPT-4o, Gemini Pro 1.5, Claude Sonnet 4.5
================================================================================
"""

import os
import json
import time
import base64
import requests
from datetime import datetime
import anthropic
import google.generativeai as genai
from openai import OpenAI
from PIL import Image
import io

# ========================== CONFIGURATION ==========================
DATA_DIR = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/crime-data"
RESULT_BASE_DIR = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/SENSITIVITY_ANALYSIS_COMPLETE"

# API Key Paths
GPT_KEY_PATH = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/SAVE/FINAL-COMPLETED/API-KEYS/chatgpt.txt"
GEMINI_KEY_PATH = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/SAVE/FINAL-COMPLETED/API-KEYS/Gemini.txt"
CLAUDE_KEY_PATH = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/SAVE/FINAL-COMPLETED/API-KEYS/claude.txt"

# Frame extraction
FRAME_INTERVAL = 90  # Every 90th frame

# Sensitivity parameters
CHUNK_SIZES = [5, 10, 15, 20]
MAX_TOKENS_OPTIONS = [2048, 4096, 8192]
TEMPERATURE_OPTIONS = [0.1, 0.5, 1.0]

# Baseline (from original study)
BASELINE_CHUNK_SIZE = 10
BASELINE_MAX_TOKENS = 4096
BASELINE_TEMPERATURE = 0.1

# ========================== ALL PROMPTING TECHNIQUES ==========================

# 1. ZERO-SHOT PROMPTING
ZERO_SHOT_PROMPT = "What's happening in these frames? Describe the scene and any notable actions or events."

# 2. SEQUENTIAL PROMPTING (5 steps)
SEQUENTIAL_PROMPTS = [
    "What's happening in these frames? Describe the scene at a high level.",
    "Based on what you observed in your previous response, who are the main people in the scene and what are they doing?",
    "Looking at the actions you described, do you observe any potential criminal activities? If so, describe them in detail.",
    "Based on your crime analysis, what objects or items are involved in the incident?",
    "Considering all your observations, create a chronological timeline of events shown in these frames."
]

# 3. LEAST-TO-MOST PROMPTING (8 steps - simple to complex)
LEAST_TO_MOST_PROMPTS = [
    "List all visible objects in these frames. Just identify what you can see - furniture, tools, vehicles, etc.",
    "Identify the people visible in these frames. How many are there? Describe each person's basic appearance (clothing, general features).",
    "Describe the location and setting shown in these frames. What kind of place is this? Describe the spatial layout.",
    "What actions are the people performing in these frames? List simple actions you can observe.",
    "How are the people interacting with each other and with objects in the scene? Describe specific interactions.",
    "Do you notice any unusual, concerning, or potentially suspicious behaviors in these frames? If so, what specifically seems unusual?",
    "Based on your previous observations, analyze whether any potential criminal activities might be occurring. What specific elements suggest criminal behavior?",
    "Using all your previous observations, construct a detailed chronological timeline of events shown in these frames. Include who did what, when, and potential motives."
]

# 4. REACT PROMPTING (Reasoning and Acting)
REACT_PROMPT_TEMPLATE = """Analyze these frames using the ReAct approach (Reasoning and Acting). For each important element you observe:

1. Thought: Reason about what you're seeing and what it might mean
2. Action: Describe what specific aspect you'll focus on analyzing next
3. Observation: Make detailed observations about that aspect
4. Decision: Draw a conclusion based on your observations

Specifically, follow this cycle for:
- People and their appearances
- Actions and behaviors
- Objects and items
- Spatial relationships
- Temporal sequence of events
- Potential criminal activity

After going through these cycles, provide your final analysis of what crime appears to be occurring, who is involved, and what evidence supports this conclusion.

You are now analyzing frames {frame_range} of {total_frames}."""

# 5. CHAIN OF THOUGHT PROMPTING
COT_PROMPT_TEMPLATE = """Analyze these video frames using a chain of thought reasoning process. Think step by step as you examine what's happening:

Step 1: First, carefully observe and list what you can actually see in the frames. Note people, objects, settings, and actions without interpretation.

Step 2: Identify the key actors in the scene. Describe each person's appearance and what they are doing. Track individuals across multiple frames.

Step 3: Describe the sequence of events chronologically. What happens first, next, and after that?

Step 4: Note any important objects or items in the scene and how they're being used.

Step 5: Consider the context and setting. Where is this taking place? What kind of environment is shown?

Step 6: Based on all the above observations, describe what appears to be happening in these frames.

Make sure to clearly show your thinking process for each step. These are frames {frame_range} of {total_frames}."""

# 6. TRUE ITERATIVE PROMPTING
TRUE_ITERATIVE_BASE_PROMPT = "Analyze these frames and describe what you observe about the scene, people, and activities."
TRUE_ITERATIVE_FOLLOWUPS = [
    "Based on your previous observation, what specific details about the people stand out? Describe their clothing, positioning, and body language.",
    "Now focusing on the actions: what exactly are these people doing? Be very specific about their movements and interactions.",
    "Looking at the environment and objects: what items or environmental features are significant? How do they relate to the actions?",
    "Considering everything you've observed: what appears to be the nature of this incident? What type of activity or crime might be occurring?",
    "Final synthesis: Provide a comprehensive analysis of what happened in these frames, including timeline, participants, actions, and conclusions."
]

# 7. SELF-CONSISTENCY PROMPTING
SELF_CONSISTENCY_PROMPT = """Analyze these frames and provide THREE INDEPENDENT analyses from different perspectives:

Analysis 1 - Focus on Physical Evidence:
Look at the frames focusing purely on physical evidence - objects, locations, visible actions. What concrete evidence do you see?

Analysis 2 - Focus on Behavioral Patterns:
Look at the frames focusing on human behavior - body language, interactions, movements. What behavioral patterns emerge?

Analysis 3 - Focus on Temporal Sequence:
Look at the frames focusing on the sequence of events - what happens first, next, last. What is the timeline?

After providing all three analyses, synthesize them into a final conclusion about what crime or incident is occurring. Note any agreements or disagreements between the three perspectives."""

# 8. META-PROMPTING
META_PROMPT_TEMPLATE = """You are analyzing crime video frames. Before analyzing the actual frames, first think about:

1. Strategy Planning: What aspects should you focus on to effectively analyze a potential crime scene?
2. Information Priorities: What information is most critical vs. supplementary?
3. Analysis Framework: What systematic approach will you use?

Now, using the strategy you just developed, analyze these {frame_count} frames:
- Apply your planned analysis strategy
- Prioritize the information you identified as critical
- Follow your systematic framework

Frames {frame_range} of {total_frames}.

Provide both your strategic thinking AND your frame analysis."""

# ========================== UTILITY FUNCTIONS ==========================

def load_api_key(filepath):
    """Load API key from file"""
    try:
        with open(filepath, 'r') as f:
            key = f.read().strip()
        print(f"✓ Loaded API key from {os.path.basename(filepath)}")
        return key
    except Exception as e:
        print(f"✗ Failed to load API key from {filepath}: {str(e)}")
        return None

def encode_image_base64(image_path):
    """Encode image to base64"""
    try:
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    except Exception as e:
        print(f"Error encoding image {image_path}: {str(e)}")
        return None

def save_results(results, save_dir, filename):
    """Save results to JSON file"""
    os.makedirs(save_dir, exist_ok=True)
    filepath = os.path.join(save_dir, filename)

    with open(filepath, 'w') as f:
        json.dump(results, f, indent=2)

    print(f"  ✓ Saved: {filename}")

def discover_videos_and_frames(data_dir, frame_interval=90):
    """Discover all videos and extract every Nth frame"""
    print(f"\n{'='*80}")
    print(f"DISCOVERING VIDEOS - EXTRACTING EVERY {frame_interval}TH FRAME")
    print(f"{'='*80}")

    all_videos = {}
    crime_types = [d for d in os.listdir(data_dir)
                   if os.path.isdir(os.path.join(data_dir, d))]

    print(f"Found {len(crime_types)} crime types")

    for crime_type in crime_types:
        crime_dir = os.path.join(data_dir, crime_type)
        all_files = os.listdir(crime_dir)

        video_frames = {}
        for filename in all_files:
            if filename.endswith('.png'):
                parts = filename.split('_frame_')
                if len(parts) == 2:
                    video_name = parts[0]
                    frame_num = int(parts[1].replace('.png', ''))

                    if video_name not in video_frames:
                        video_frames[video_name] = []
                    video_frames[video_name].append((frame_num, filename))

        for video_name, frames in video_frames.items():
            frames.sort(key=lambda x: x[0])
            selected_frames = {}

            for i in range(0, len(frames), frame_interval):
                frame_num, frame_file = frames[i]
                frame_path = os.path.join(crime_dir, frame_file)

                encoded = encode_image_base64(frame_path)
                if encoded:
                    selected_frames[frame_file] = encoded

            video_key = f"{crime_type}_{video_name}"
            all_videos[video_key] = {
                'crime_type': crime_type,
                'video_name': video_name,
                'frames': selected_frames,
                'total_original_frames': len(frames),
                'frames_extracted': len(selected_frames),
                'extraction_interval': frame_interval
            }

            print(f"  {video_key}: {len(frames)} → {len(selected_frames)} frames")

    print(f"\nTotal videos: {len(all_videos)}")
    return all_videos

# ========================== MODEL PROCESSORS ==========================

class GPTProcessor:
    """Process frames using GPT-4o"""
    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }

    def process_chunk(self, prompt, frames, chunk_size, max_tokens, temperature):
        """Process a chunk of frames"""
        url = "https://api.openai.com/v1/chat/completions"

        content = [{"type": "text", "text": prompt}]
        for frame_base64 in frames:
            content.append({
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{frame_base64}",
                    "detail": "high"
                }
            })

        messages = [{"role": "user", "content": content}]

        payload = {
            "model": "gpt-4o",
            "messages": messages,
            "max_tokens": max_tokens,
            "temperature": temperature
        }

        try:
            response = requests.post(url, headers=self.headers, json=payload)

            if response.status_code != 200:
                return {"error": f"API Error {response.status_code}"}

            result = response.json()
            if "choices" in result and result["choices"]:
                return {
                    "response": result["choices"][0]["message"]["content"],
                    "usage": result.get("usage", {}),
                    "model": "gpt-4o"
                }
            return {"error": "No response from API"}

        except Exception as e:
            return {"error": str(e)}

class GeminiProcessor:
    """Process frames using Gemini"""
    def __init__(self, api_key):
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel('gemini-1.5-pro')

    def process_chunk(self, prompt, frames, chunk_size, max_tokens, temperature):
        """Process a chunk of frames"""
        try:
            content = [prompt]

            for frame_base64 in frames:
                frame_bytes = base64.b64decode(frame_base64)
                img = Image.open(io.BytesIO(frame_bytes))
                content.append(img)

            generation_config = {
                'max_output_tokens': max_tokens,
                'temperature': temperature,
            }

            response = self.model.generate_content(
                content,
                generation_config=generation_config
            )

            return {
                "response": response.text,
                "model": "gemini-1.5-pro"
            }

        except Exception as e:
            return {"error": str(e)}

class ClaudeProcessor:
    """Process frames using Claude"""
    def __init__(self, api_key):
        self.client = anthropic.Anthropic(api_key=api_key)

    def process_chunk(self, prompt, frames, chunk_size, max_tokens, temperature):
        """Process a chunk of frames"""
        try:
            content = [{"type": "text", "text": prompt}]

            for frame_base64 in frames:
                content.append({
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/png",
                        "data": frame_base64
                    }
                })

            message = self.client.messages.create(
                model="claude-sonnet-4-5-20250929",
                max_tokens=max_tokens,
                temperature=temperature,
                messages=[
                    {"role": "user", "content": content}
                ]
            )

            return {
                "response": message.content[0].text,
                "usage": {
                    "input_tokens": message.usage.input_tokens,
                    "output_tokens": message.usage.output_tokens
                },
                "model": "claude-sonnet-4.5"
            }

        except Exception as e:
            return {"error": str(e)}

# ========================== PROMPTING TECHNIQUE PROCESSORS ==========================

def get_prompt_for_technique(technique, chunk_idx, total_chunks, frame_range, total_frames, step=None):
    """Get the appropriate prompt for each technique"""

    if technique == "zero_shot":
        return ZERO_SHOT_PROMPT

    elif technique == "sequential":
        if step is None:
            step = 0
        return SEQUENTIAL_PROMPTS[min(step, len(SEQUENTIAL_PROMPTS)-1)]

    elif technique == "least_to_most":
        if step is None:
            step = 0
        return LEAST_TO_MOST_PROMPTS[min(step, len(LEAST_TO_MOST_PROMPTS)-1)]

    elif technique == "react":
        return REACT_PROMPT_TEMPLATE.format(
            frame_range=frame_range,
            total_frames=total_frames
        )

    elif technique == "cot":
        return COT_PROMPT_TEMPLATE.format(
            frame_range=frame_range,
            total_frames=total_frames
        )

    elif technique == "true_iterative":
        if step == 0:
            return TRUE_ITERATIVE_BASE_PROMPT
        else:
            return TRUE_ITERATIVE_FOLLOWUPS[min(step-1, len(TRUE_ITERATIVE_FOLLOWUPS)-1)]

    elif technique == "self_consistency":
        return SELF_CONSISTENCY_PROMPT

    elif technique == "meta_prompting":
        frame_count = (chunk_idx + 1) * 10  # Approximate
        return META_PROMPT_TEMPLATE.format(
            frame_count=frame_count,
            frame_range=frame_range,
            total_frames=total_frames
        )

    else:
        return ZERO_SHOT_PROMPT

# ========================== SENSITIVITY TESTING ==========================

def run_technique_sensitivity_test(processor, video_data, technique, test_config):
    """
    Run sensitivity test for a specific prompting technique
    """
    frames_dict = video_data['frames']
    frame_names = sorted(frames_dict.keys(),
                        key=lambda x: int(x.split('_frame_')[1].replace('.png', '')))
    frame_data = [frames_dict[name] for name in frame_names]

    chunk_size = test_config['chunk_size']
    max_tokens = test_config['max_tokens']
    temperature = test_config['temperature']

    results = {
        'config': test_config,
        'technique': technique,
        'video_info': {
            'crime_type': video_data['crime_type'],
            'video_name': video_data['video_name'],
            'frames_used': len(frame_data),
            'extraction_interval': video_data['extraction_interval']
        },
        'chunks': [],
        'errors': []
    }

    num_chunks = (len(frame_data) + chunk_size - 1) // chunk_size
    total_frames = len(frame_data)

    # For multi-step techniques
    if technique in ["sequential", "least_to_most", "true_iterative"]:
        # Process each step across all chunks
        if technique == "sequential":
            steps = SEQUENTIAL_PROMPTS
        elif technique == "least_to_most":
            steps = LEAST_TO_MOST_PROMPTS
        else:  # true_iterative
            steps = [TRUE_ITERATIVE_BASE_PROMPT] + TRUE_ITERATIVE_FOLLOWUPS

        conversation_history = []

        for step_idx, step_prompt in enumerate(steps):
            print(f"      Step {step_idx+1}/{len(steps)}")

            for i in range(0, len(frame_data), chunk_size):
                chunk = frame_data[i:i+chunk_size]
                chunk_num = i // chunk_size + 1
                frame_start = i + 1
                frame_end = min(i + chunk_size, total_frames)
                frame_range = f"{frame_start}-{frame_end}"

                prompt = get_prompt_for_technique(
                    technique, chunk_num-1, num_chunks,
                    frame_range, total_frames, step_idx
                )

                start_time = time.time()
                result = processor.process_chunk(prompt, chunk, chunk_size, max_tokens, temperature)
                processing_time = time.time() - start_time

                chunk_result = {
                    'step': step_idx + 1,
                    'chunk_number': chunk_num,
                    'frames_in_chunk': len(chunk),
                    'processing_time': processing_time,
                    'prompt': prompt
                }

                if 'error' in result:
                    chunk_result['error'] = result['error']
                    results['errors'].append(f"Step {step_idx+1} Chunk {chunk_num}: {result['error']}")
                else:
                    chunk_result['response'] = result['response']
                    if 'usage' in result:
                        chunk_result['usage'] = result['usage']
                    conversation_history.append(result['response'])

                results['chunks'].append(chunk_result)
                time.sleep(3)

    else:
        # Single-pass techniques
        for i in range(0, len(frame_data), chunk_size):
            chunk = frame_data[i:i+chunk_size]
            chunk_num = i // chunk_size + 1
            frame_start = i + 1
            frame_end = min(i + chunk_size, total_frames)
            frame_range = f"{frame_start}-{frame_end}"

            print(f"      Chunk {chunk_num}/{num_chunks}")

            prompt = get_prompt_for_technique(
                technique, chunk_num-1, num_chunks,
                frame_range, total_frames
            )

            start_time = time.time()
            result = processor.process_chunk(prompt, chunk, chunk_size, max_tokens, temperature)
            processing_time = time.time() - start_time

            chunk_result = {
                'chunk_number': chunk_num,
                'frames_in_chunk': len(chunk),
                'processing_time': processing_time,
                'prompt': prompt
            }

            if 'error' in result:
                chunk_result['error'] = result['error']
                results['errors'].append(f"Chunk {chunk_num}: {result['error']}")
            else:
                chunk_result['response'] = result['response']
                if 'usage' in result:
                    chunk_result['usage'] = result['usage']

            results['chunks'].append(chunk_result)
            time.sleep(3)

    return results

def run_full_sensitivity_for_model(model_name, processor, videos, save_dir, techniques_to_test=None):
    """
    Run complete sensitivity analysis for a model across ALL techniques
    """
    if techniques_to_test is None:
        techniques_to_test = [
            "zero_shot", "sequential", "least_to_most", "react",
            "cot", "true_iterative", "self_consistency", "meta_prompting"
        ]

    print(f"\n{'='*80}")
    print(f"COMPREHENSIVE SENSITIVITY ANALYSIS FOR {model_name.upper()}")
    print(f"Testing {len(techniques_to_test)} prompting techniques")
    print(f"{'='*80}")

    all_results = {
        'model': model_name,
        'analysis_date': datetime.now().isoformat(),
        'frame_interval': FRAME_INTERVAL,
        'techniques_tested': techniques_to_test,
        'tests': {}
    }

    test_video_key = list(videos.keys())[0]
    test_video = videos[test_video_key]

    print(f"\nTest video: {test_video_key}")
    print(f"Frames: {test_video['frames_extracted']} (from {test_video['total_original_frames']})")

    # For each prompting technique
    for technique in techniques_to_test:
        print(f"\n{'-'*80}")
        print(f"TESTING TECHNIQUE: {technique.upper()}")
        print(f"{'-'*80}")

        all_results['tests'][technique] = {}

        # Test 1: Baseline
        print(f"\n  Baseline Configuration:")
        print(f"    Chunk: {BASELINE_CHUNK_SIZE}, Tokens: {BASELINE_MAX_TOKENS}, Temp: {BASELINE_TEMPERATURE}")

        baseline_config = {
            'chunk_size': BASELINE_CHUNK_SIZE,
            'max_tokens': BASELINE_MAX_TOKENS,
            'temperature': BASELINE_TEMPERATURE,
            'test_type': 'baseline'
        }

        all_results['tests'][technique]['baseline'] = run_technique_sensitivity_test(
            processor, test_video, technique, baseline_config
        )

        # Test 2: Chunk Size Sensitivity
        print(f"\n  Chunk Size Sensitivity: {CHUNK_SIZES}")
        all_results['tests'][technique]['chunk_size_sensitivity'] = []

        for chunk_size in CHUNK_SIZES:
            print(f"    Testing chunk_size={chunk_size}")
            config = {
                'chunk_size': chunk_size,
                'max_tokens': BASELINE_MAX_TOKENS,
                'temperature': BASELINE_TEMPERATURE,
                'test_type': 'chunk_size_sensitivity',
                'variable': 'chunk_size'
            }

            result = run_technique_sensitivity_test(processor, test_video, technique, config)
            all_results['tests'][technique]['chunk_size_sensitivity'].append(result)

        # Test 3: Max Tokens Sensitivity
        print(f"\n  Max Tokens Sensitivity: {MAX_TOKENS_OPTIONS}")
        all_results['tests'][technique]['max_tokens_sensitivity'] = []

        for max_tokens in MAX_TOKENS_OPTIONS:
            print(f"    Testing max_tokens={max_tokens}")
            config = {
                'chunk_size': BASELINE_CHUNK_SIZE,
                'max_tokens': max_tokens,
                'temperature': BASELINE_TEMPERATURE,
                'test_type': 'max_tokens_sensitivity',
                'variable': 'max_tokens'
            }

            result = run_technique_sensitivity_test(processor, test_video, technique, config)
            all_results['tests'][technique]['max_tokens_sensitivity'].append(result)

        # Test 4: Temperature Sensitivity
        print(f"\n  Temperature Sensitivity: {TEMPERATURE_OPTIONS}")
        all_results['tests'][technique]['temperature_sensitivity'] = []

        for temperature in TEMPERATURE_OPTIONS:
            print(f"    Testing temperature={temperature}")
            config = {
                'chunk_size': BASELINE_CHUNK_SIZE,
                'max_tokens': BASELINE_MAX_TOKENS,
                'temperature': temperature,
                'test_type': 'temperature_sensitivity',
                'variable': 'temperature'
            }

            result = run_technique_sensitivity_test(processor, test_video, technique, config)
            all_results['tests'][technique]['temperature_sensitivity'].append(result)

        # Save after each technique
        save_results(all_results, save_dir, f"{model_name}_sensitivity_progress.json")

    return all_results

# ========================== MAIN EXECUTION ==========================

def main():
    """Main execution function"""

    print("\n" + "="*80)
    print("COMPREHENSIVE SENSITIVITY ANALYSIS - ALL 8 PROMPTING TECHNIQUES")
    print("="*80)
    print("\nTesting ALL techniques from your study:")
    print("  1. Zero-Shot")
    print("  2. Sequential Prompting")
    print("  3. Least-to-Most")
    print("  4. ReAct")
    print("  5. Chain of Thought")
    print("  6. True Iterative")
    print("  7. Self-Consistency")
    print("  8. Meta-Prompting")
    print("\nFor EACH technique, testing:")
    print(f"  - Chunk sizes: {CHUNK_SIZES}")
    print(f"  - Max tokens: {MAX_TOKENS_OPTIONS}")
    print(f"  - Temperatures: {TEMPERATURE_OPTIONS}")
    print(f"  - Frame extraction: Every {FRAME_INTERVAL}th frame")
    print("="*80)

    # Create results directory
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    save_dir = os.path.join(RESULT_BASE_DIR, f"complete_analysis_{timestamp}")
    os.makedirs(save_dir, exist_ok=True)

    print(f"\nResults directory: {save_dir}")

    # Load API keys
    print(f"\n{'='*80}")
    print("LOADING API KEYS")
    print(f"{'='*80}")

    gpt_key = load_api_key(GPT_KEY_PATH)
    gemini_key = load_api_key(GEMINI_KEY_PATH)
    claude_key = load_api_key(CLAUDE_KEY_PATH)

    if not all([gpt_key, gemini_key, claude_key]):
        print("\n✗ Failed to load API keys")
        return

    # Discover videos
    videos = discover_videos_and_frames(DATA_DIR, FRAME_INTERVAL)

    if not videos:
        print("\n✗ No videos discovered")
        return

    # Initialize processors
    print(f"\n{'='*80}")
    print("INITIALIZING MODEL PROCESSORS")
    print(f"{'='*80}")

    processors = {
        'gpt': GPTProcessor(gpt_key),
        'gemini': GeminiProcessor(gemini_key),
        'claude': ClaudeProcessor(claude_key)
    }

    print("✓ All processors initialized")

    # Run sensitivity analysis for each model
    all_model_results = {}

    # You can select which techniques to test (comment out to test all 8)
    techniques_to_test = [
        "zero_shot",
        "sequential",
        "least_to_most",
        "react",
        "cot"
        # "true_iterative",
        # "self_consistency",
        # "meta_prompting"
    ]

    print(f"\nTesting {len(techniques_to_test)} techniques per model")

    for model_name, processor in processors.items():
        try:
            print(f"\n{'='*80}")
            print(f"STARTING: {model_name.upper()}")
            print(f"{'='*80}")

            model_results = run_full_sensitivity_for_model(
                model_name, processor, videos, save_dir, techniques_to_test
            )
            all_model_results[model_name] = model_results

            # Save individual model results
            save_results(
                model_results,
                save_dir,
                f"{model_name}_complete_all_techniques.json"
            )

            print(f"\n✓ Completed {model_name.upper()}")

        except Exception as e:
            print(f"\n✗ Error in {model_name}: {str(e)}")
            continue

    # Generate final report
    if all_model_results:
        print(f"\n{'='*80}")
        print("GENERATING FINAL REPORT")
        print(f"{'='*80}")

        final_report = {
            'analysis_summary': {
                'date': datetime.now().isoformat(),
                'frame_extraction': f"Every {FRAME_INTERVAL}th frame",
                'baseline_config': {
                    'chunk_size': BASELINE_CHUNK_SIZE,
                    'max_tokens': BASELINE_MAX_TOKENS,
                    'temperature': BASELINE_TEMPERATURE
                },
                'tested_parameters': {
                    'chunk_sizes': CHUNK_SIZES,
                    'max_tokens': MAX_TOKENS_OPTIONS,
                    'temperatures': TEMPERATURE_OPTIONS
                },
                'techniques_tested': techniques_to_test
            },
            'models_tested': list(all_model_results.keys()),
            'model_results': all_model_results
        }

        save_results(final_report, save_dir, "COMPREHENSIVE_FINAL_REPORT.json")

        print(f"\n{'='*80}")
        print("ANALYSIS COMPLETE!")
        print(f"{'='*80}")
        print(f"\nAll results saved to: {save_dir}")
        print("\nGenerated files:")
        print("  - COMPREHENSIVE_FINAL_REPORT.json")
        for model in all_model_results.keys():
            print(f"  - {model}_complete_all_techniques.json")

    else:
        print("\n✗ No results generated")

if __name__ == "__main__":
    main()


COMPREHENSIVE SENSITIVITY ANALYSIS - ALL 8 PROMPTING TECHNIQUES

Testing ALL techniques from your study:
  1. Zero-Shot
  2. Sequential Prompting
  3. Least-to-Most
  4. ReAct
  5. Chain of Thought
  6. True Iterative
  7. Self-Consistency
  8. Meta-Prompting

For EACH technique, testing:
  - Chunk sizes: [5, 10, 15, 20]
  - Max tokens: [2048, 4096, 8192]
  - Temperatures: [0.1, 0.5, 1.0]
  - Frame extraction: Every 90th frame

Results directory: /content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/SENSITIVITY_ANALYSIS_COMPLETE/complete_analysis_20251103_222831

LOADING API KEYS
✓ Loaded API key from chatgpt.txt
✓ Loaded API key from Gemini.txt
✓ Loaded API key from claude.txt

DISCOVERING VIDEOS - EXTRACTING EVERY 90TH FRAME
Found 11 crime types
  Shoplifting_Shoplifting003_x264: 902 → 11 frames
  Shoplifting_Shoplifting004_x264: 667 → 8 frames
  Fighting_Fighting003_x264: 256 → 3 frames
  Fighting_Fighting016_x264: 228 → 3 frames
  Shooting_Shooting005_x264: 127 →



      Chunk 2/2





  Chunk Size Sensitivity: [5, 10, 15, 20]
    Testing chunk_size=5
      Chunk 1/3




      Chunk 2/3




      Chunk 3/3




    Testing chunk_size=10
      Chunk 1/2




      Chunk 2/2




    Testing chunk_size=15
      Chunk 1/1




    Testing chunk_size=20
      Chunk 1/1





  Max Tokens Sensitivity: [2048, 4096, 8192]
    Testing max_tokens=2048
      Chunk 1/2




      Chunk 2/2




    Testing max_tokens=4096
      Chunk 1/2




      Chunk 2/2




    Testing max_tokens=8192
      Chunk 1/2




      Chunk 2/2





  Temperature Sensitivity: [0.1, 0.5, 1.0]
    Testing temperature=0.1
      Chunk 1/2




      Chunk 2/2




    Testing temperature=0.5
      Chunk 1/2




      Chunk 2/2




    Testing temperature=1.0
      Chunk 1/2




      Chunk 2/2




  ✓ Saved: gemini_sensitivity_progress.json

--------------------------------------------------------------------------------
TESTING TECHNIQUE: SEQUENTIAL
--------------------------------------------------------------------------------

  Baseline Configuration:
    Chunk: 10, Tokens: 4096, Temp: 0.1
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5





  Chunk Size Sensitivity: [5, 10, 15, 20]
    Testing chunk_size=5
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing chunk_size=10
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing chunk_size=15
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing chunk_size=20
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5





  Max Tokens Sensitivity: [2048, 4096, 8192]
    Testing max_tokens=2048
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing max_tokens=4096
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing max_tokens=8192
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5





  Temperature Sensitivity: [0.1, 0.5, 1.0]
    Testing temperature=0.1
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing temperature=0.5
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




    Testing temperature=1.0
      Step 1/5




      Step 2/5




      Step 3/5




      Step 4/5




      Step 5/5




  ✓ Saved: gemini_sensitivity_progress.json

--------------------------------------------------------------------------------
TESTING TECHNIQUE: LEAST_TO_MOST
--------------------------------------------------------------------------------

  Baseline Configuration:
    Chunk: 10, Tokens: 4096, Temp: 0.1
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8





  Chunk Size Sensitivity: [5, 10, 15, 20]
    Testing chunk_size=5
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing chunk_size=10
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing chunk_size=15
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing chunk_size=20
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8





  Max Tokens Sensitivity: [2048, 4096, 8192]
    Testing max_tokens=2048
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing max_tokens=4096
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing max_tokens=8192
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8





  Temperature Sensitivity: [0.1, 0.5, 1.0]
    Testing temperature=0.1
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing temperature=0.5
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




    Testing temperature=1.0
      Step 1/8




      Step 2/8




      Step 3/8




      Step 4/8




      Step 5/8




      Step 6/8




      Step 7/8




      Step 8/8




  ✓ Saved: gemini_sensitivity_progress.json

--------------------------------------------------------------------------------
TESTING TECHNIQUE: REACT
--------------------------------------------------------------------------------

  Baseline Configuration:
    Chunk: 10, Tokens: 4096, Temp: 0.1
      Chunk 1/2




      Chunk 2/2





  Chunk Size Sensitivity: [5, 10, 15, 20]
    Testing chunk_size=5
      Chunk 1/3




      Chunk 2/3




      Chunk 3/3




    Testing chunk_size=10
      Chunk 1/2




      Chunk 2/2




    Testing chunk_size=15
      Chunk 1/1




    Testing chunk_size=20
      Chunk 1/1





  Max Tokens Sensitivity: [2048, 4096, 8192]
    Testing max_tokens=2048
      Chunk 1/2




      Chunk 2/2




    Testing max_tokens=4096
      Chunk 1/2




      Chunk 2/2




    Testing max_tokens=8192
      Chunk 1/2




      Chunk 2/2





  Temperature Sensitivity: [0.1, 0.5, 1.0]
    Testing temperature=0.1
      Chunk 1/2




      Chunk 2/2




    Testing temperature=0.5
      Chunk 1/2




      Chunk 2/2




    Testing temperature=1.0
      Chunk 1/2




      Chunk 2/2




  ✓ Saved: gemini_sensitivity_progress.json

--------------------------------------------------------------------------------
TESTING TECHNIQUE: COT
--------------------------------------------------------------------------------

  Baseline Configuration:
    Chunk: 10, Tokens: 4096, Temp: 0.1
      Chunk 1/2




      Chunk 2/2





  Chunk Size Sensitivity: [5, 10, 15, 20]
    Testing chunk_size=5
      Chunk 1/3




      Chunk 2/3




      Chunk 3/3




    Testing chunk_size=10
      Chunk 1/2




      Chunk 2/2




    Testing chunk_size=15
      Chunk 1/1




    Testing chunk_size=20
      Chunk 1/1





  Max Tokens Sensitivity: [2048, 4096, 8192]
    Testing max_tokens=2048
      Chunk 1/2




      Chunk 2/2




    Testing max_tokens=4096
      Chunk 1/2




      Chunk 2/2




    Testing max_tokens=8192
      Chunk 1/2




      Chunk 2/2





  Temperature Sensitivity: [0.1, 0.5, 1.0]
    Testing temperature=0.1
      Chunk 1/2




      Chunk 2/2




    Testing temperature=0.5
      Chunk 1/2




      Chunk 2/2




    Testing temperature=1.0
      Chunk 1/2




      Chunk 2/2




  ✓ Saved: gemini_sensitivity_progress.json
  ✓ Saved: gemini_complete_all_techniques.json

✓ Completed GEMINI

STARTING: CLAUDE

COMPREHENSIVE SENSITIVITY ANALYSIS FOR CLAUDE
Testing 5 prompting techniques

Test video: Shoplifting_Shoplifting003_x264
Frames: 11 (from 902)

--------------------------------------------------------------------------------
TESTING TECHNIQUE: ZERO_SHOT
--------------------------------------------------------------------------------

  Baseline Configuration:
    Chunk: 10, Tokens: 4096, Temp: 0.1
      Chunk 1/2
      Chunk 2/2

  Chunk Size Sensitivity: [5, 10, 15, 20]
    Testing chunk_size=5
      Chunk 1/3
      Chunk 2/3
      Chunk 3/3
    Testing chunk_size=10
      Chunk 1/2
      Chunk 2/2
    Testing chunk_size=15
      Chunk 1/1
    Testing chunk_size=20
      Chunk 1/1

  Max Tokens Sensitivity: [2048, 4096, 8192]
    Testing max_tokens=2048
      Chunk 1/2
      Chunk 2/2
    Testing max_tokens=4096
      Chunk 1/2
      Chunk 2/2
    Testing 