In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install -q anthropic

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/357.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m357.5/357.5 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
"""
Diagnostic Claude Iterative Prompting - Trace Failure Modes
Addresses reviewer concern: "Claude's iterative method drops to near zero.
Please add one example trace to illustrate the failure mode and what error
compounds across turns."

This script:
1. Runs Claude's iterative method on crime videos (every 90th frame)
2. Captures detailed traces showing how errors compound
3. Identifies specific failure modes
4. Generates publication-ready examples
"""

import os
import json
import base64
import time
from datetime import datetime
from collections import defaultdict
import anthropic
import re
from difflib import SequenceMatcher

# Mount Google Drive
try:
    from google.colab import drive
    if not os.path.exists('/content/drive'):
        drive.mount('/content/drive')
        print("✓ Google Drive mounted")
except:
    print("Drive mount skipped")

# Configuration
API_KEY_PATH = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/SAVE/FINAL-COMPLETED/API-KEYS/claude.txt"
DATA_DIR = "/content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/crime-data"
SAVE_DIR = "/content/drive/MyDrive/claude_iterative_diagnostic"
FRAME_SKIP = 90  # Every 90th frame
CHUNK_SIZE = 10
MAX_ITERATIONS = 6


class ClaudeIterativeDiagnostic:
    """Diagnostic analyzer for Claude's iterative failure modes"""

    def __init__(self, api_key):
        self.api_key = api_key
        self.model_name = "claude-sonnet-4-20250514"
        self.client = anthropic.Anthropic(api_key=api_key)
        self.save_dir = SAVE_DIR
        os.makedirs(self.save_dir, exist_ok=True)

        # Core question
        self.core_question = """Analyze these video frames for criminal activity:
1. What crime is occurring?
2. Who are the individuals involved (describe appearances)?
3. What specific evidence supports your conclusion?
4. What is your confidence level (HIGH/MEDIUM/LOW)?"""

    def make_request(self, messages):
        """Make Claude API request with error handling"""
        try:
            response = self.client.messages.create(
                model=self.model_name,
                max_tokens=4096,
                temperature=0.1,
                messages=messages
            )
            return response.content[0].text, None
        except Exception as e:
            return None, str(e)

    def extract_key_claims(self, text):
        """Extract specific factual claims from response"""
        claims = {
            'crime_type': None,
            'num_people': None,
            'actions': [],
            'objects': [],
            'location': None,
            'confidence': None
        }

        text_lower = text.lower()

        # Extract crime type
        crime_keywords = ['theft', 'robbery', 'assault', 'vandalism', 'shoplifting',
                         'burglary', 'fighting', 'shooting', 'arson', 'explosion']
        for crime in crime_keywords:
            if crime in text_lower:
                claims['crime_type'] = crime
                break

        # Extract number of people
        people_patterns = [
            r'(\d+)\s+(?:people|individuals|persons|suspects)',
            r'(\w+)\s+(?:people|individuals|persons|suspects)',  # "two people", etc.
        ]
        for pattern in people_patterns:
            match = re.search(pattern, text_lower)
            if match:
                claims['num_people'] = match.group(1)
                break

        # Extract actions
        action_keywords = ['walking', 'running', 'standing', 'picking', 'grabbing',
                          'throwing', 'hitting', 'taking', 'leaving', 'entering']
        for action in action_keywords:
            if action in text_lower:
                claims['actions'].append(action)

        # Extract objects
        object_keywords = ['bag', 'item', 'merchandise', 'product', 'weapon',
                          'car', 'door', 'counter', 'shelf', 'cash']
        for obj in object_keywords:
            if obj in text_lower:
                claims['objects'].append(obj)

        # Extract confidence
        if 'high confidence' in text_lower or 'very confident' in text_lower:
            claims['confidence'] = 'HIGH'
        elif 'low confidence' in text_lower or 'uncertain' in text_lower:
            claims['confidence'] = 'LOW'
        else:
            claims['confidence'] = 'MEDIUM'

        return claims

    def compare_claims(self, claims1, claims2):
        """Compare two sets of claims to identify changes/contradictions"""
        changes = {
            'crime_changed': claims1['crime_type'] != claims2['crime_type'],
            'people_changed': claims1['num_people'] != claims2['num_people'],
            'actions_added': set(claims2['actions']) - set(claims1['actions']),
            'actions_removed': set(claims1['actions']) - set(claims2['actions']),
            'objects_added': set(claims2['objects']) - set(claims1['objects']),
            'objects_removed': set(claims1['objects']) - set(claims2['objects']),
            'confidence_changed': claims1['confidence'] != claims2['confidence']
        }

        # Identify contradictions (not just additions)
        changes['has_contradiction'] = (
            changes['crime_changed'] or
            changes['people_changed'] or
            len(changes['actions_removed']) > 0 or
            len(changes['objects_removed']) > 0
        )

        return changes

    def calculate_text_similarity(self, text1, text2):
        """Calculate similarity between two texts"""
        return SequenceMatcher(None, text1.lower(), text2.lower()).ratio()

    def analyze_error_propagation(self, iterations_data):
        """Analyze how errors compound across iterations"""
        error_analysis = {
            'iterations': [],
            'error_patterns': [],
            'failure_mode': None,
            'compounding_mechanism': None
        }

        previous_claims = None
        previous_text = None
        contradiction_count = 0
        confidence_trajectory = []

        for i, (iter_key, iter_data) in enumerate(sorted(iterations_data.items())):
            iteration_num = i + 1
            response = iter_data['response']

            # Extract claims
            current_claims = self.extract_key_claims(response)
            confidence_trajectory.append(current_claims['confidence'])

            iteration_analysis = {
                'iteration': iteration_num,
                'response_length': len(response),
                'claims': current_claims,
                'similarity_to_previous': 0.0,
                'contradictions': None,
                'error_indicators': []
            }

            if previous_claims and previous_text:
                # Calculate similarity
                similarity = self.calculate_text_similarity(previous_text, response)
                iteration_analysis['similarity_to_previous'] = similarity

                # Identify contradictions
                changes = self.compare_claims(previous_claims, current_claims)
                iteration_analysis['contradictions'] = changes

                if changes['has_contradiction']:
                    contradiction_count += 1
                    error_analysis['error_patterns'].append({
                        'iteration': iteration_num,
                        'type': 'contradiction',
                        'details': changes
                    })

                # Detect error indicators
                if changes['crime_changed']:
                    iteration_analysis['error_indicators'].append(
                        f"Crime type changed: {previous_claims['crime_type']} → {current_claims['crime_type']}"
                    )

                if changes['people_changed']:
                    iteration_analysis['error_indicators'].append(
                        f"Number of people changed: {previous_claims['num_people']} → {current_claims['num_people']}"
                    )

                if changes['confidence_changed']:
                    iteration_analysis['error_indicators'].append(
                        f"Confidence changed: {previous_claims['confidence']} → {current_claims['confidence']}"
                    )

                # Check for increasing uncertainty
                if (previous_claims['confidence'] in ['HIGH', 'MEDIUM'] and
                    current_claims['confidence'] == 'LOW'):
                    iteration_analysis['error_indicators'].append(
                        "Confidence decreased (potential confusion)"
                    )

                # Check for hallucination (too many new details with low similarity)
                if similarity < 0.3 and (len(changes['actions_added']) > 3 or
                                        len(changes['objects_added']) > 3):
                    iteration_analysis['error_indicators'].append(
                        "Potential hallucination: Many new details with low similarity to previous"
                    )

            error_analysis['iterations'].append(iteration_analysis)
            previous_claims = current_claims
            previous_text = response

        # Identify failure mode
        if contradiction_count >= 2:
            error_analysis['failure_mode'] = "Contradiction Accumulation"
            error_analysis['compounding_mechanism'] = (
                "Model changes its interpretation across iterations, "
                "contradicting previous claims. Each iteration builds on "
                "flawed premises from the previous iteration, compounding errors."
            )
        elif len(set(confidence_trajectory)) == 1 and confidence_trajectory[0] == 'LOW':
            error_analysis['failure_mode'] = "Persistent Uncertainty"
            error_analysis['compounding_mechanism'] = (
                "Model remains uncertain throughout iterations, unable to "
                "refine its understanding. Repeated exposure doesn't help."
            )
        elif any('hallucination' in str(iter_data.get('error_indicators', []))
                for iter_data in error_analysis['iterations']):
            error_analysis['failure_mode'] = "Hallucination Cascade"
            error_analysis['compounding_mechanism'] = (
                "Model generates increasingly detailed but unsupported claims "
                "across iterations, building elaborate narratives not grounded "
                "in the visual evidence."
            )
        else:
            error_analysis['failure_mode'] = "Gradual Degradation"
            error_analysis['compounding_mechanism'] = (
                "Model's responses become less coherent or accurate over "
                "iterations without a clear single cause."
            )

        return error_analysis

    def generate_trace_example(self, iterations_data, error_analysis, video_id, crime_type):
        """Generate publication-ready trace example"""
        trace = {
            'video_id': video_id,
            'crime_type': crime_type,
            'model': self.model_name,
            'failure_mode': error_analysis['failure_mode'],
            'compounding_mechanism': error_analysis['compounding_mechanism'],
            'trace_by_iteration': []
        }

        for iter_analysis in error_analysis['iterations']:
            iteration_num = iter_analysis['iteration']
            iter_key = f"iteration_{iteration_num}"

            if iter_key in iterations_data:
                iter_data = iterations_data[iter_key]

                trace_entry = {
                    'iteration': iteration_num,
                    'prompt_type': 'initial' if iteration_num == 1 else 'refinement',
                    'response_preview': iter_data['response'][:300] + '...',
                    'key_claims': iter_analysis['claims'],
                    'similarity_to_previous': iter_analysis['similarity_to_previous'],
                    'error_indicators': iter_analysis['error_indicators'],
                    'contradictions': iter_analysis['contradictions']
                }

                trace['trace_by_iteration'].append(trace_entry)

        return trace

    def process_video_with_diagnostics(self, frames_data, video_id, crime_type):
        """Process video with full diagnostic tracking"""
        print(f"\n{'='*80}")
        print(f"DIAGNOSTIC ANALYSIS: {video_id} ({crime_type})")
        print(f"{'='*80}")

        iterations = {}
        previous_response = None

        for iteration_num in range(1, MAX_ITERATIONS + 1):
            print(f"\n--- Iteration {iteration_num}/{MAX_ITERATIONS} ---")

            # Prepare frames (take subset for each iteration)
            frames_subset = frames_data[:CHUNK_SIZE]

            # Build prompt
            if iteration_num == 1:
                prompt = f"""ITERATION {iteration_num} - Initial Analysis

Analyzing {crime_type} video:

{self.core_question}

Be specific and detailed. State your confidence level."""
            else:
                prompt = f"""ITERATION {iteration_num} - Refinement

Previous analysis from iteration {iteration_num - 1}:
{previous_response[:500]}...

Now analyze the SAME frames again with these instructions:
1. Review your previous analysis
2. Look for any errors or oversights
3. Refine your conclusions
4. Update your confidence level

{self.core_question}"""

            # Prepare message with images
            content = [{"type": "text", "text": prompt}]
            for frame in frames_subset:
                content.append({
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": "image/png",
                        "data": frame
                    }
                })

            messages = [{"role": "user", "content": content}]

            # Make request
            response, error = self.make_request(messages)

            if error:
                print(f"  ✗ Error: {error}")
                iterations[f"iteration_{iteration_num}"] = {
                    "iteration": iteration_num,
                    "error": error,
                    "response": None
                }
                break

            print(f"  ✓ Response received ({len(response)} chars)")
            print(f"  Preview: {response[:150]}...")

            # Store iteration data
            iterations[f"iteration_{iteration_num}"] = {
                "iteration": iteration_num,
                "prompt": prompt,
                "response": response,
                "response_length": len(response)
            }

            previous_response = response

            # Rate limiting
            if iteration_num < MAX_ITERATIONS:
                time.sleep(3)

        # Analyze error propagation
        print(f"\n{'='*80}")
        print("ANALYZING ERROR PROPAGATION...")
        print(f"{'='*80}")

        error_analysis = self.analyze_error_propagation(iterations)

        print(f"\nFailure Mode: {error_analysis['failure_mode']}")
        print(f"Compounding Mechanism: {error_analysis['compounding_mechanism']}")

        # Generate trace example
        trace_example = self.generate_trace_example(
            iterations, error_analysis, video_id, crime_type
        )

        # Save results
        timestamp = time.strftime("%Y%m%d_%H%M%S")

        # Save detailed results
        detailed_file = os.path.join(
            self.save_dir,
            f"diagnostic_{crime_type}_{video_id}_{timestamp}.json"
        )
        with open(detailed_file, 'w') as f:
            json.dump({
                'video_id': video_id,
                'crime_type': crime_type,
                'iterations': iterations,
                'error_analysis': error_analysis,
                'trace_example': trace_example
            }, f, indent=2)

        print(f"\n✓ Detailed results saved: {detailed_file}")

        # Generate publication-ready trace
        self.generate_publication_trace(trace_example, video_id, crime_type)

        return trace_example, error_analysis

    def generate_publication_trace(self, trace, video_id, crime_type):
        """Generate publication-ready trace text file"""
        timestamp = time.strftime("%Y%m%d_%H%M%S")
        trace_file = os.path.join(
            self.save_dir,
            f"trace_example_{crime_type}_{video_id}_{timestamp}.txt"
        )

        with open(trace_file, 'w') as f:
            f.write("="*80 + "\n")
            f.write(f"CLAUDE ITERATIVE FAILURE MODE TRACE EXAMPLE\n")
            f.write(f"Video: {video_id} ({crime_type})\n")
            f.write(f"Model: {trace['model']}\n")
            f.write("="*80 + "\n\n")

            f.write(f"IDENTIFIED FAILURE MODE: {trace['failure_mode']}\n\n")

            f.write("COMPOUNDING MECHANISM:\n")
            f.write(f"{trace['compounding_mechanism']}\n\n")

            f.write("="*80 + "\n")
            f.write("ITERATION-BY-ITERATION TRACE\n")
            f.write("="*80 + "\n\n")

            for trace_entry in trace['trace_by_iteration']:
                f.write(f"\n{'─'*80}\n")
                f.write(f"ITERATION {trace_entry['iteration']}\n")
                f.write(f"{'─'*80}\n\n")

                f.write(f"Response Preview:\n{trace_entry['response_preview']}\n\n")

                f.write("Key Claims:\n")
                claims = trace_entry['key_claims']
                f.write(f"  • Crime Type: {claims['crime_type']}\n")
                f.write(f"  • Number of People: {claims['num_people']}\n")
                f.write(f"  • Actions: {', '.join(claims['actions']) if claims['actions'] else 'None'}\n")
                f.write(f"  • Objects: {', '.join(claims['objects']) if claims['objects'] else 'None'}\n")
                f.write(f"  • Confidence: {claims['confidence']}\n\n")

                if trace_entry['iteration'] > 1:
                    f.write(f"Similarity to Previous: {trace_entry['similarity_to_previous']:.3f}\n\n")

                    if trace_entry['error_indicators']:
                        f.write("⚠ ERROR INDICATORS:\n")
                        for indicator in trace_entry['error_indicators']:
                            f.write(f"  • {indicator}\n")
                        f.write("\n")

                    if trace_entry['contradictions']:
                        contras = trace_entry['contradictions']
                        if contras['has_contradiction']:
                            f.write("❌ CONTRADICTIONS DETECTED:\n")
                            if contras['crime_changed']:
                                f.write("  • Crime type changed\n")
                            if contras['people_changed']:
                                f.write("  • Number of people changed\n")
                            if contras['actions_removed']:
                                f.write(f"  • Actions removed: {', '.join(contras['actions_removed'])}\n")
                            if contras['objects_removed']:
                                f.write(f"  • Objects removed: {', '.join(contras['objects_removed'])}\n")
                            f.write("\n")

            f.write("\n" + "="*80 + "\n")
            f.write("ANALYSIS SUMMARY\n")
            f.write("="*80 + "\n\n")

            f.write("This trace illustrates how errors compound in Claude's iterative approach:\n\n")

            if trace['failure_mode'] == "Contradiction Accumulation":
                f.write("1. Initial response makes claims about the video\n")
                f.write("2. Subsequent iterations change these claims\n")
                f.write("3. Contradictions accumulate, confusing the model\n")
                f.write("4. Later iterations build on flawed premises\n")
                f.write("5. Performance degrades as errors compound\n\n")
                f.write("HOW TO AVOID:\n")
                f.write("• Use fresh context for each iteration (don't feed back previous responses)\n")
                f.write("• Implement consistency checks across iterations\n")
                f.write("• Stop iteration when contradictions are detected\n")

            elif trace['failure_mode'] == "Hallucination Cascade":
                f.write("1. Initial response makes reasonable observations\n")
                f.write("2. Subsequent iterations add unsupported details\n")
                f.write("3. Model generates increasingly elaborate narratives\n")
                f.write("4. Later claims become detached from visual evidence\n")
                f.write("5. Confidence remains high despite hallucinations\n\n")
                f.write("HOW TO AVOID:\n")
                f.write("• Ground each iteration in visual evidence\n")
                f.write("• Penalize addition of unsupported details\n")
                f.write("• Require explicit evidence for each claim\n")

            elif trace['failure_mode'] == "Persistent Uncertainty":
                f.write("1. Initial response expresses uncertainty\n")
                f.write("2. Subsequent iterations fail to resolve uncertainty\n")
                f.write("3. Model cannot refine understanding through iteration\n")
                f.write("4. Repeated exposure doesn't help\n")
                f.write("5. Resources wasted on unproductive iterations\n\n")
                f.write("HOW TO AVOID:\n")
                f.write("• Detect early uncertainty and skip iteration\n")
                f.write("• Use different prompting strategy for ambiguous cases\n")
                f.write("• Provide more context or different frame samples\n")

            else:
                f.write("Multiple error patterns detected. See detailed trace above.\n\n")
                f.write("HOW TO AVOID:\n")
                f.write("• Monitor response quality across iterations\n")
                f.write("• Implement early stopping when degradation detected\n")
                f.write("• Use ensemble approaches instead of iteration\n")

        print(f"✓ Publication trace saved: {trace_file}")


class VideoLoader:
    """Load and sample video frames"""

    def __init__(self, data_dir, frame_skip=90):
        self.data_dir = data_dir
        self.frame_skip = frame_skip

    def discover_videos(self):
        """Discover all videos"""
        print(f"\n=== DISCOVERING VIDEOS ===")
        print(f"Scanning: {self.data_dir}")

        all_videos = {}

        try:
            crime_types = [d for d in os.listdir(self.data_dir)
                          if os.path.isdir(os.path.join(self.data_dir, d))]

            print(f"Found {len(crime_types)} crime types: {crime_types}")

            for crime_type in crime_types:
                crime_dir = os.path.join(self.data_dir, crime_type)
                all_files = os.listdir(crime_dir)

                video_groups = defaultdict(list)

                for filename in all_files:
                    if not any(filename.lower().endswith(ext)
                             for ext in ['.png', '.jpg', '.jpeg', '.bmp']):
                        continue

                    video_id = self._extract_video_id(filename)
                    if video_id:
                        video_groups[video_id].append(filename)

                print(f"  {crime_type}: {len(video_groups)} videos")

                for video_id, frames in video_groups.items():
                    all_videos[f"{crime_type}_{video_id}"] = {
                        'crime_type': crime_type,
                        'video_id': video_id,
                        'frames': sorted(frames, key=self._extract_frame_number),
                        'crime_dir': crime_dir
                    }

        except Exception as e:
            print(f"Error: {str(e)}")

        print(f"Total videos: {len(all_videos)}")
        return all_videos

    def _extract_video_id(self, filename):
        """Extract video ID from filename"""
        import re
        name_without_ext = os.path.splitext(filename)[0]

        if '_frame_' in name_without_ext:
            return name_without_ext.split('_frame_')[0]

        parts = name_without_ext.split('_')
        if len(parts) >= 2:
            try:
                int(parts[-1])
                return '_'.join(parts[:-1])
            except ValueError:
                pass

        video_id = re.sub(r'_?\d+$', '', name_without_ext)
        if video_id and video_id != name_without_ext:
            return video_id

        return name_without_ext

    def _extract_frame_number(self, filename):
        """Extract frame number for sorting"""
        import re
        try:
            if '_frame_' in filename:
                parts = filename.split('_frame_')
                if len(parts) > 1:
                    return int(parts[1].split('.')[0])
            numbers = re.findall(r'\d+', filename)
            if numbers:
                return int(numbers[-1])
        except:
            pass
        return 0

    def load_video_frames(self, video_info):
        """Load frames (every Nth frame)"""
        crime_dir = video_info['crime_dir']
        all_frames = video_info['frames']

        selected_frames = all_frames[::self.frame_skip]

        frames_data = []
        for frame_file in selected_frames:
            frame_path = os.path.join(crime_dir, frame_file)
            try:
                with open(frame_path, 'rb') as f:
                    frame_data = base64.b64encode(f.read()).decode('utf-8')
                    frames_data.append(frame_data)
            except Exception as e:
                print(f"  Error loading {frame_file}: {str(e)}")

        print(f"  Loaded {len(frames_data)} frames (every {self.frame_skip}th)")
        return frames_data


def main():
    """Main execution"""
    print("\n" + "╔" + "="*78 + "╗")
    print("║" + " "*15 + "CLAUDE ITERATIVE DIAGNOSTIC ANALYSIS" + " "*27 + "║")
    print("║" + " "*10 + "Tracing Failure Modes & Error Compounding" + " "*27 + "║")
    print("╚" + "="*78 + "╝\n")

    # Load API key
    print("Loading API key...")
    try:
        with open(API_KEY_PATH, 'r') as f:
            api_key = f.read().strip()
        if not api_key:
            print("✗ API key file is empty")
            return
        print("✓ API key loaded")
    except Exception as e:
        print(f"✗ Error loading API key: {str(e)}")
        return

    # Test API
    print("\nTesting Claude API...")
    try:
        client = anthropic.Anthropic(api_key=api_key)
        response = client.messages.create(
            model="claude-sonnet-4-20250514",
            max_tokens=50,
            messages=[{"role": "user", "content": "Hello"}]
        )
        print("✓ API connection successful")
    except Exception as e:
        print(f"✗ API test failed: {str(e)}")
        return

    # Initialize components
    print("\nInitializing components...")
    diagnostic = ClaudeIterativeDiagnostic(api_key)
    loader = VideoLoader(DATA_DIR, frame_skip=FRAME_SKIP)

    # Discover videos
    all_videos = loader.discover_videos()

    if not all_videos:
        print("✗ No videos found")
        return

    # Process videos (limit to 3 for demonstration)
    print(f"\nProcessing up to 3 videos for diagnostic analysis...")

    results = []
    for video_key, video_info in list(all_videos.items())[:3]:
        print(f"\n{'='*80}")
        print(f"VIDEO: {video_key}")
        print(f"{'='*80}")

        try:
            # Load frames
            frames_data = loader.load_video_frames(video_info)

            if not frames_data:
                print("  ✗ No frames loaded")
                continue

            # Run diagnostic analysis
            trace, error_analysis = diagnostic.process_video_with_diagnostics(
                frames_data,
                video_info['video_id'],
                video_info['crime_type']
            )

            results.append({
                'video_key': video_key,
                'trace': trace,
                'error_analysis': error_analysis
            })

        except Exception as e:
            print(f"  ✗ Error processing {video_key}: {str(e)}")

    # Generate summary report
    print("\n" + "="*80)
    print("GENERATING SUMMARY REPORT")
    print("="*80 + "\n")

    summary_file = os.path.join(SAVE_DIR, f"diagnostic_summary_{time.strftime('%Y%m%d_%H%M%S')}.txt")

    with open(summary_file, 'w') as f:
        f.write("="*80 + "\n")
        f.write("CLAUDE ITERATIVE PROMPTING - DIAGNOSTIC SUMMARY\n")
        f.write(f"Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
        f.write("="*80 + "\n\n")

        f.write(f"Videos Analyzed: {len(results)}\n")
        f.write(f"Frame Sampling: Every {FRAME_SKIP}th frame\n")
        f.write(f"Max Iterations: {MAX_ITERATIONS}\n")
        f.write(f"Model: claude-sonnet-4-20250514\n\n")

        f.write("FAILURE MODES IDENTIFIED:\n\n")

        failure_mode_counts = defaultdict(int)
        for result in results:
            failure_mode = result['trace']['failure_mode']
            failure_mode_counts[failure_mode] += 1

            f.write(f"Video: {result['video_key']}\n")
            f.write(f"  Failure Mode: {failure_mode}\n")
            f.write(f"  Mechanism: {result['trace']['compounding_mechanism']}\n\n")

        f.write("\n" + "-"*80 + "\n")
        f.write("FAILURE MODE DISTRIBUTION:\n")
        f.write("-"*80 + "\n\n")

        for mode, count in failure_mode_counts.items():
            pct = (count / len(results)) * 100
            f.write(f"{mode}: {count} ({pct:.1f}%)\n")

        f.write("\n\n" + "="*80 + "\n")
        f.write("RECOMMENDATIONS FOR AVOIDING THESE FAILURES\n")
        f.write("="*80 + "\n\n")

        f.write("1. DETECT EARLY FAILURE SIGNALS\n")
        f.write("   • Monitor for contradictions between iterations\n")
        f.write("   • Track confidence trajectory (decreasing = warning sign)\n")
        f.write("   • Check similarity scores (too low = drift, too high = stuck)\n\n")

        f.write("2. IMPLEMENT SAFEGUARDS\n")
        f.write("   • Stop iteration when contradictions detected\n")
        f.write("   • Limit iterations to 3-4 maximum\n")
        f.write("   • Require evidence grounding for new claims\n\n")

        f.write("3. ALTERNATIVE APPROACHES\n")
        f.write("   • Use ensemble methods instead of iteration\n")
        f.write("   • Try different frame samples rather than iterating\n")
        f.write("   • Consider multi-turn conversation instead of refinement\n\n")

        f.write("4. FOR YOUR PAPER\n")
        f.write("   • Include one trace example (see individual trace files)\n")
        f.write("   • Explain the specific failure mode observed\n")
        f.write("   • Show how errors compound across iterations\n")
        f.write("   • Provide concrete recommendations for avoidance\n")

    print(f"✓ Summary report saved: {summary_file}")

    # Final summary
    print("\n" + "="*80)
    print("DIAGNOSTIC ANALYSIS COMPLETE")
    print("="*80)
    print(f"\nGenerated files:")
    print(f"  • Detailed diagnostics: {len(results)} files")
    print(f"  • Trace examples: {len(results)} files")
    print(f"  • Summary report: 1 file")
    print(f"\nAll files saved to: {SAVE_DIR}/")
    print("\n" + "="*80)

    # Print failure modes summary
    print("\nFAILURE MODES FOUND:")
    for mode, count in failure_mode_counts.items():
        pct = (count / len(results)) * 100
        print(f"  • {mode}: {count} ({pct:.1f}%)")

    print("\n✓ Use the trace examples in your paper to address reviewer concern!")
    print("  Look for files: trace_example_*.txt")


if __name__ == "__main__":
    main()


║               CLAUDE ITERATIVE DIAGNOSTIC ANALYSIS                           ║
║          Tracing Failure Modes & Error Compounding                           ║

Loading API key...
✓ API key loaded

Testing Claude API...
✓ API connection successful

Initializing components...

=== DISCOVERING VIDEOS ===
Scanning: /content/drive/Shareddrives/DR KOFI RESEARCH/RESEARCH/COMPLETED/PROMPTS/crime-data
Found 11 crime types: ['Shoplifting', 'Fighting', 'Shooting', 'Stealing', 'Explosion', 'Arson', 'Vandalism', 'Abuse', 'Robbery', 'Burglary', 'Assault']
  Shoplifting: 2 videos
  Fighting: 2 videos
  Shooting: 2 videos
  Stealing: 2 videos
  Explosion: 2 videos
  Arson: 2 videos
  Vandalism: 2 videos
  Abuse: 2 videos
  Robbery: 2 videos
  Burglary: 2 videos
  Assault: 2 videos
Total videos: 22

Processing up to 3 videos for diagnostic analysis...

VIDEO: Shoplifting_Shoplifting003_x264
  Loaded 11 frames (every 90th)

DIAGNOSTIC ANALYSIS: Shoplifting003_x264 (Shoplifting)

--- Iteration 1/6 --