In [12]:
import os
import json
from openai import OpenAI

# === Config ===
API_KEY = ""
BASE_URL = ""
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)

# === Helper: Extract task description from evaluator.py ===
def extract_task_description(evaluator_path: str):
    """
    Attempts to extract task description from evaluator.py.
    Looks for common patterns like docstrings, TASK_DESCRIPTION variable, etc.
    """
    with open(evaluator_path, "r") as f:
        content = f.read()

    import re

    # Pattern 1: TASK_DESCRIPTION = "..."
    pattern1 = r'TASK_DESCRIPTION\s*=\s*["\'](.+?)["\']'
    match = re.search(pattern1, content, re.DOTALL)
    if match:
        return match.group(1).strip()

    # Pattern 2: Module docstring (first triple-quoted string)
    pattern2 = r'^\s*["\']{3}(.+?)["\']{3}'
    match = re.search(pattern2, content, re.DOTALL | re.MULTILINE)
    if match:
        return match.group(1).strip()

    # Pattern 3: Look for class docstring
    pattern3 = r'class\s+\w+.*?:\s*["\']{3}(.+?)["\']{3}'
    match = re.search(pattern3, content, re.DOTALL)
    if match:
        return match.group(1).strip()

    return None


# === One-hop: Generate generalizable personalization dimensions ===
def generate_generalizable_dimensions(task_description: str, evaluator_code: str, existing_dimensions: list):
    prompt = f"""
You are analyzing a benchmark task for AI agents. The goal is to find *organizational personalization dimensions*‚Äîaspects of the task that can vary naturally between organizations or users, and that affect *how* the task is executed, not what the correct answer is.

For example:
- Communication tone or style (formal vs. casual)
- Update frequency or detail level
- Explanation style (succinct vs. detailed)
- Adherence to process or flexibility
- Documentation expectations
- Approval hierarchy or escalation norms
- Any other task-specific personalization dimension

The agent should still pass all rule-based evaluators if personalization is missing, but might fail hidden or high-level behavior tests.

Given the following task description and evaluator code, propose **3‚Äì4 creative, generalizable personalization dimensions**.
Each should have:
- A concise `"name"`
- A clear `"description"`
- A `"value_schema"` (how values look or are structured)
- Example `"possible_values"` (2‚Äì4 realistic examples)
- A `"generality"` tag: "high", "medium", or "low" (whether it applies to many tasks)
- A `"task_specific_relevance"` score: 1‚Äì5 (how important this dimension is for this specific task)

Avoid duplicates of these existing known dimensions:
{json.dumps(existing_dimensions, indent=2)}

---

TASK DESCRIPTION:
{task_description if task_description else "No explicit description found ‚Äî infer from evaluator code."}

---

EVALUATOR CODE:
{evaluator_code[:3000]}

---

Return ONLY valid JSON list, no commentary. Example:
[
  {{
    "name": "Communication tone",
    "description": "Defines how formal or casual the agent's messages to coworkers should be.",
    "value_schema": "categorical",
    "possible_values": ["Formal", "Neutral", "Friendly"],
    "generality": "high",
    "task_specific_relevance": 4
  }},
  ...
]
"""
    response = client.chat.completions.create(
        model="gpt-5",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.6,
    )

    print("MODEL OUTPUT ‚Üì‚Üì‚Üì\n", response.choices[0].message.content, "\n\n")

    try:
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to parse JSON: {e}")
        return []


# === Main pipeline ===
def process_all_tasks(tasks_file: str, base_dir: str, global_dim_path: str = "global_dimensions.json"):
    """
    Reads TAC_tasks.txt (format: task_id: instruction per line),
    extracts evaluator content, and generates personalization dimensions.
    """
    if os.path.exists(global_dim_path):
        with open(global_dim_path, "r") as f:
            existing_dims = json.load(f)
    else:
        existing_dims = []

    results = {}
    tasks = {}

    with open(tasks_file, "r") as f:
        for line in f:
            line = line.strip()
            if not line or ":" not in line:
                continue
            task_id, task_instruction = line.split(":", 1)
            tasks[task_id.strip()] = task_instruction.strip()

    print(f"Found {len(tasks)} tasks to process.\n")

    for task_id, task_instruction in tasks.items():
        print(f"üîπ Processing task: {task_id}")

        evaluator_path = os.path.join(base_dir, task_id, "evaluator.py")
        if not os.path.exists(evaluator_path):
            print(f"‚ö†Ô∏è Skipping {task_id}: evaluator.py not found.")
            continue

        with open(evaluator_path, "r") as f:
            evaluator_code = f.read()

        desc = extract_task_description(evaluator_path)
        task_desc = task_instruction

        dims = generate_generalizable_dimensions(task_desc, evaluator_code, existing_dims)
        if not dims:
            print(f"‚ùå No dimensions generated for {task_id}")
            continue

        # Merge into global list, avoiding duplicates by name
        for d in dims:
            if d["name"].lower() not in [x["name"].lower() for x in existing_dims]:
                existing_dims.append(d)

        results[task_id] = {
            "task_instruction": task_instruction,
            "dimensions": dims
        }

    with open("personalization_dimensions_per_task.json", "w") as f:
        json.dump(results, f, indent=2)

    with open(global_dim_path, "w") as f:
        json.dump(existing_dims, f, indent=2)

    print(f"\n‚úÖ Saved task-specific dimensions to personalization_dimensions_per_task.json")
    print(f"‚úÖ Updated global dimension ontology at {global_dim_path}")


# === Run ===
if __name__ == "__main__":
    tasks_file = "TAC_tasks.txt"
    base_dir = "TheAgentCompany/workspaces/tasks"
    process_all_tasks(tasks_file, base_dir)


Found 175 tasks to process.

üîπ Processing task: admin-arrange-meeting-rooms
MODEL OUTPUT ‚Üì‚Üì‚Üì
 [
  {
    "name": "Notification channel and visibility",
    "description": "Specifies where and how to deliver results in RocketChat, including audience scope and mention style.",
    "value_schema": "object: {channel_type: 'dm'|'channel', recipients: string[], channel_name?: string, mention_style: 'none'|'direct'|'team', visibility: 'private'|'team'|'org'}",
    "possible_values": [
      {"channel_type": "dm", "recipients": ["Chen Xinyi"], "mention_style": "none", "visibility": "private"},
      {"channel_type": "channel", "channel_name": "ops-scheduling", "recipients": ["Chen Xinyi"], "mention_style": "direct", "visibility": "team"},
      {"channel_type": "dm", "recipients": ["Chen Xinyi", "Team Lead"], "mention_style": "direct", "visibility": "private"},
      {"channel_type": "channel", "channel_name": "general", "recipients": [], "mention_style": "none", "visibility": "org"}
 

In [21]:
import os
import json
from openai import OpenAI

# === Config ===
API_KEY = ""
BASE_URL = ""
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)

BATCH_SIZE = 5  # Process 5 tasks per API call


def prune_task_batch(tasks_batch: dict, base_dir: str):
    """
    Prunes dimensions for a batch of tasks in one API call.
    
    Args:
        tasks_batch: Dict of {task_id: {task_instruction, dimensions}}
        base_dir: Path to evaluator files
    
    Returns:
        Dict of {task_id: {kept_dimensions, removed_dimensions}}
    """
    
    # Enrich with evaluator code (reduced to save tokens)
    enriched_batch = {}
    for task_id, task_data in tasks_batch.items():
        evaluator_path = os.path.join(base_dir, task_id, "evaluator.py")
        evaluator_code = ""
        if os.path.exists(evaluator_path):
            with open(evaluator_path, "r") as f:
                full_code = f.read()               
                evaluator_code = full_code
        
        enriched_batch[task_id] = {
            'task_instruction': task_data['task_instruction'],
            'dimensions': task_data['dimensions'],
            'evaluator_snippet': evaluator_code
        }
    
    # Get actual task IDs for the example
    task_id_list = list(enriched_batch.keys())
    example_structure = {tid: {"kept_dimensions": [], "removed_dimensions": []} for tid in task_id_list}
    
    prompt = f"""
You are reviewing personalization dimensions for multiple AI agent benchmark tasks.

For EACH task, prune dimensions that:
- Are too specific/artificial (not real organizational preferences)
- Change evaluation criteria (modify what's "correct")
- Are unrealistic for organizations to care about
- Modify task requirements rather than execution style

Keep dimensions that:
- Represent realistic organizational norms/preferences
- Affect HOW tasks are done, not WHAT the correct answer is
- Are generalizable across similar task instances

Dimensions can be task-specific but must be a realistic organizational preference for the task.
---

TASKS TO REVIEW:
{json.dumps(enriched_batch, indent=2)}

---

CRITICAL: Use the EXACT task IDs from the input. Return ONLY valid JSON with pruning decisions for ALL tasks.

Expected structure (use actual task IDs from input):
{json.dumps(example_structure, indent=2)}

Each task should have:
{{
  "kept_dimensions": [
    {{
      "name": "Communication tone",
      "description": "...",
      "value_schema": "categorical",
      "possible_values": ["Formal", "Casual"],
      "generality": "high",
      "task_specific_relevance": 4,
      "reasoning": "Realistic org preference"
    }}
  ],
  "removed_dimensions": [
    {{
      "name": "Approval threshold",
      "reasoning": "Changes evaluation criteria"
    }}
  ]
}}
"""
    try:
        response = client.chat.completions.create(
            model="gpt-5",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=16000,
        )
        print("=" * 40)
        print("PRUNE BATCH RESPONSE")
        print("=" * 40)
        print(response.choices[0].message.content)
        print("=" * 40)
        
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to parse batch pruning: {e}")
        return None


def build_global_hierarchy(all_pruned_dimensions: list):
    """
    Build canonical hierarchy from all pruned dimensions in ONE call.
    """
    
    prompt = f"""
You are building a unified taxonomy of personalization dimensions.

**Build a two-level hierarchy:**

1. **Organizational-level** (generality ‚â• 7/10): Cross-task preferences
   - Communication style, update frequency, documentation norms, approval processes
   - Apply broadly across many task types

2. **Task-specific** (generality 3-6/10): Preferences for particular task types
   - Chart types, email formatting, code style, report structure
   - Only relevant to specific task categories

**Merge similar dimensions:**
- "Email tone", "Slack style", "Report formality" ‚Üí "Communication tone"
- "Status update detail", "Progress verbosity" ‚Üí "Update detail level"
- Keep truly unique dimensions that don't fit categories

---

ALL PRUNED DIMENSIONS (from all tasks):
{json.dumps(all_pruned_dimensions, indent=2)}

---

Return ONLY valid JSON:

{{
  "organizational_preferences": [
    {{
      "canonical_name": "Communication tone",
      "description": "How formal or casual communications should be",
      "value_schema": "categorical",
      "example_values": ["Formal", "Professional", "Casual", "Friendly"],
      "applies_to_task_types": ["email", "messaging", "reporting"],
      "merged_from_dimensions": ["Email tone", "Message formality"],
      "merged_from_tasks": ["task_001", "task_005"],
      "generality_score": 9
    }}
  ],
  "task_specific_preferences": [
    {{
      "canonical_name": "Chart visualization type",
      "description": "Preferred chart types for data visualization",
      "value_schema": "categorical",
      "example_values": ["Bar", "Line", "Pie"],
      "applies_to_task_types": ["data_visualization"],
      "merged_from_dimensions": ["Visualization preference"],
      "merged_from_tasks": ["task_010"],
      "generality_score": 4
    }}
  ]
}}
"""
    try:
        response = client.chat.completions.create(
            model="gpt-5",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.4,
            max_tokens=16000,
        )
        print("=" * 40)
        print("HIERARCHY RESPONSE")
        print("=" * 40)
        print(response.choices[0].message.content)
        print("=" * 40)
        
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to parse hierarchy: {e}")
        return None


def map_task_batch(tasks_batch: dict, hierarchy: dict):
    """
    Maps a batch of tasks to the canonical hierarchy in one API call.
    
    Args:
        tasks_batch: Dict of {task_id: {task_instruction, dimensions}}
        hierarchy: The canonical hierarchy
    
    Returns:
        Dict of {task_id: {organizational_preferences, task_specific_preferences, unmapped_dimensions}}
    """
    
    # Get actual task IDs for the example
    task_id_list = list(tasks_batch.keys())
    example_structure = {
        tid: {
            "organizational_preferences": [],
            "task_specific_preferences": [],
            "unmapped_dimensions": []
        } for tid in task_id_list
    }
    
    prompt = f"""
Map multiple tasks' dimensions to the canonical hierarchy.

For each task and each dimension:
1. Find the matching canonical preference (organizational or task-specific)
2. Create a task-specific instantiation
3. Mark any that don't fit as unmapped

---

TASKS TO MAP:
{json.dumps(tasks_batch, indent=2)}

CANONICAL HIERARCHY:
{json.dumps(hierarchy, indent=2)}

---

CRITICAL: Use the EXACT task IDs from the input. Return ONLY valid JSON with mappings for ALL tasks.

Expected structure (use actual task IDs from input):
{json.dumps(example_structure, indent=2)}

Each task should have:
{{
  "organizational_preferences": [
    {{
      "canonical_name": "Communication tone",
      "task_instantiation": {{
        "description": "Tone for email responses",
        "possible_values": ["Formal", "Friendly"],
        "default": "Professional",
        "original_dimension_name": "Email tone"
      }}
    }}
  ],
  "task_specific_preferences": [...],
  "unmapped_dimensions": [...]
}}
"""
    try:
        response = client.chat.completions.create(
            model="gpt-5",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.3,
            max_tokens=16000,
        )
        print("=" * 40)
        print("MAP BATCH RESPONSE")
        print("=" * 40)
        print(response.choices[0].message.content)
        print("=" * 40)
        
        return json.loads(response.choices[0].message.content)
    except Exception as e:
        print(f"‚ö†Ô∏è Failed to parse batch mapping: {e}")
        return None


def process_dimensions(input_file: str = None,
                      output_file: str = "hierarchical_dimensions.json",
                      base_dir: str = "TheAgentCompany/workspaces/tasks",
                      batch_size: int = BATCH_SIZE,
                      resume: bool = True):
    """
    Complete pipeline with batching and resume support:
    PHASE 1 (batched): Prune dimensions for all tasks
    PHASE 2 (single): Build canonical hierarchy
    PHASE 3 (batched): Map all tasks to hierarchy
    
    Args:
        input_file: Path to dimensions file
        output_file: Where to save results
        base_dir: Path to task directory
        batch_size: Number of tasks per API call
        resume: If True, skip already processed tasks
    """
    
    # Auto-detect input file
    if input_file is None:
        if os.path.exists("personalization_dimensions_per_task.json"):
            input_file = "personalization_dimensions_per_task.json"
            print("üìÅ Using generated dimensions file")
        else:
            print("‚ùå No dimensions file found!")
            print("   Expected: 'personalization_dimensions_per_task.json'")
            return
    
    print(f"üìÇ Loading from: {input_file}")
    print(f"üî¢ Batch size: {batch_size} tasks per API call")
    print(f"üîÑ Resume mode: {'ON' if resume else 'OFF'}\n")
    
    with open(input_file, "r") as f:
        data = json.load(f)
    
    task_ids = list(data.keys())
    print(f"üìä Loaded {len(task_ids)} tasks\n")
    
    # === PHASE 1: Prune in batches (with resume) ===
    print("="*80)
    print("PHASE 1: PRUNING DIMENSIONS (batched)")
    print("="*80 + "\n")
    
    # Load existing progress if resuming
    pruned_data = {}
    pruning_decisions = {}
    if resume and os.path.exists("pruned_dimensions_per_task.json"):
        with open("pruned_dimensions_per_task.json", "r") as f:
            pruned_data = json.load(f)
        print(f"‚úÖ Found existing pruned data for {len(pruned_data)} tasks")
        
        if os.path.exists("pruning_decisions.json"):
            with open("pruning_decisions.json", "r") as f:
                pruning_decisions = json.load(f)
    
    # Determine which tasks need pruning
    tasks_to_prune = [tid for tid in task_ids if tid not in pruned_data]
    
    if tasks_to_prune:
        print(f"üîÑ Need to prune: {len(tasks_to_prune)} tasks\n")
    else:
        print(f"‚úÖ All tasks already pruned!\n")
    
    # Prune remaining tasks in batches
    for i in range(0, len(tasks_to_prune), batch_size):
        batch_ids = tasks_to_prune[i:i+batch_size]
        batch_data = {tid: data[tid] for tid in batch_ids}
        
        batch_num = i // batch_size + 1
        total_batches = (len(tasks_to_prune) + batch_size - 1) // batch_size
        
        print(f"üîπ Pruning batch {batch_num}/{total_batches} ({len(batch_ids)} tasks)...")
        print(f"   Tasks: {', '.join(batch_ids)}")
        
        batch_results = prune_task_batch(batch_data, base_dir)
        
        if not batch_results:
            print(f"   ‚ùå Batch failed, keeping all dimensions")
            for tid in batch_ids:
                pruned_data[tid] = data[tid]
                pruning_decisions[tid] = {
                    'kept': data[tid]['dimensions'],
                    'removed': [],
                    'error': 'API call failed'
                }
        else:
            # Process batch results
            batch_kept = 0
            batch_removed = 0
            for task_id in batch_ids:
                if task_id not in batch_results:
                    print(f"   ‚ö†Ô∏è {task_id} missing from results, keeping original")
                    pruned_data[task_id] = data[task_id]
                    pruning_decisions[task_id] = {
                        'kept': data[task_id]['dimensions'],
                        'removed': [],
                        'error': 'Missing from batch response'
                    }
                    continue
                
                result = batch_results[task_id]
                kept = result.get('kept_dimensions', [])
                removed = result.get('removed_dimensions', [])
                
                pruned_data[task_id] = {
                    'task_instruction': data[task_id]['task_instruction'],
                    'dimensions': kept
                }
                
                pruning_decisions[task_id] = {
                    'kept': kept,
                    'removed': removed
                }
                
                batch_kept += len(kept)
                batch_removed += len(removed)
            
            print(f"   ‚úÖ Batch complete: {batch_kept} kept, {batch_removed} removed")
        
        # Save progress after each batch
        with open("pruned_dimensions_per_task.json", "w") as f:
            json.dump(pruned_data, f, indent=2)
        with open("pruning_decisions.json", "w") as f:
            json.dump(pruning_decisions, f, indent=2)
        print(f"   üíæ Progress saved\n")
    
    # Calculate totals
    total_kept = sum(len(pruned_data[tid]['dimensions']) for tid in task_ids if tid in pruned_data)
    total_removed = sum(len(pruning_decisions.get(tid, {}).get('removed', [])) for tid in task_ids)
    
    print(f"‚úÖ Phase 1 complete: {total_kept} kept, {total_removed} removed")
    
    # Report errors
    error_tasks = [tid for tid in task_ids if pruning_decisions.get(tid, {}).get('error')]
    if error_tasks:
        print(f"\n‚ö†Ô∏è Warning: {len(error_tasks)} tasks had errors:")
        for tid in error_tasks[:5]:  # Show first 5
            print(f"   - {tid}: {pruning_decisions[tid]['error']}")
        if len(error_tasks) > 5:
            print(f"   ... and {len(error_tasks) - 5} more")
        print(f"   Rerun with resume=True to retry failed tasks.\n")
    else:
        print()
    
    # === PHASE 2: Build global hierarchy ===
    print("="*80)
    print("PHASE 2: BUILDING CANONICAL HIERARCHY (single call)")
    print("="*80 + "\n")
    
    # Collect all pruned dimensions
    all_pruned_dimensions = []
    for task_id in task_ids:
        if task_id in pruned_data:
            for dim in pruned_data[task_id]['dimensions']:
                all_pruned_dimensions.append({
                    **dim,
                    'source_task_id': task_id
                })
    
    hierarchy = build_global_hierarchy(all_pruned_dimensions)
    
    if not hierarchy:
        print("‚ùå Failed to build hierarchy")
        return
    
    print(f"‚úÖ Built hierarchy:")
    print(f"   - Organizational preferences: {len(hierarchy['organizational_preferences'])}")
    print(f"   - Task-specific preferences: {len(hierarchy['task_specific_preferences'])}\n")
    
    # === PHASE 3: Map in batches (with resume) ===
    print("="*80)
    print("PHASE 3: MAPPING TASKS TO HIERARCHY (batched)")
    print("="*80 + "\n")
    
    # Load existing mappings if resuming
    task_mappings = {}
    if resume and os.path.exists(output_file):
        try:
            with open(output_file, "r") as f:
                existing_result = json.load(f)
                task_mappings = existing_result.get('task_mappings', {})
            print(f"‚úÖ Found existing mappings for {len(task_mappings)} tasks")
        except:
            pass
    
    # Determine which tasks need mapping
    tasks_to_map = [tid for tid in task_ids if tid not in task_mappings and tid in pruned_data]
    
    if tasks_to_map:
        print(f"üîÑ Need to map: {len(tasks_to_map)} tasks\n")
    else:
        print(f"‚úÖ All tasks already mapped!\n")
    
    # Map remaining tasks in batches
    for i in range(0, len(tasks_to_map), batch_size):
        batch_ids = tasks_to_map[i:i+batch_size]
        batch_data = {tid: pruned_data[tid] for tid in batch_ids}
        
        batch_num = i // batch_size + 1
        total_batches = (len(tasks_to_map) + batch_size - 1) // batch_size
        
        print(f"üîó Mapping batch {batch_num}/{total_batches} ({len(batch_ids)} tasks)...")
        print(f"   Tasks: {', '.join(batch_ids)}")
        
        batch_mappings = map_task_batch(batch_data, hierarchy)
        
        if not batch_mappings:
            print(f"   ‚ùå Batch mapping failed")
            continue
        
        # Process batch results
        for task_id in batch_ids:
            if task_id not in batch_mappings:
                print(f"   ‚ö†Ô∏è {task_id} missing from mapping")
                continue
            
            mapping = batch_mappings[task_id]
            task_mappings[task_id] = {
                'task_instruction': pruned_data[task_id]['task_instruction'],
                'dimensions_kept': len(pruned_data[task_id]['dimensions']),
                'dimensions_removed': len(pruning_decisions.get(task_id, {}).get('removed', [])),
                **mapping
            }
        
        print(f"   ‚úÖ Batch mapped")
        
        # Save progress after each batch
        partial_result = {
            'canonical_hierarchy': hierarchy,
            'task_mappings': task_mappings,
            'pruning_decisions': pruning_decisions,
            'summary': {'status': 'in_progress'}
        }
        with open(output_file, "w") as f:
            json.dump(partial_result, f, indent=2)
        print(f"   üíæ Progress saved\n")
    
    # === FINAL OUTPUT ===
    result = {
        'canonical_hierarchy': hierarchy,
        'task_mappings': task_mappings,
        'pruning_decisions': pruning_decisions,
        'summary': {
            'total_tasks': len(data),
            'total_original_dimensions': sum(len(t['dimensions']) for t in data.values()),
            'dimensions_after_pruning': total_kept,
            'dimensions_removed': total_removed,
            'canonical_organizational_prefs': len(hierarchy['organizational_preferences']),
            'canonical_task_specific_prefs': len(hierarchy['task_specific_preferences']),
            'total_canonical_prefs': len(hierarchy['organizational_preferences']) + len(hierarchy['task_specific_preferences']),
            'reduction_ratio': (len(hierarchy['organizational_preferences']) + len(hierarchy['task_specific_preferences'])) / total_kept if total_kept > 0 else 0
        }
    }
    
    # Save everything
    with open(output_file, "w") as f:
        json.dump(result, f, indent=2)
    
    with open("processing_statistics.json", "w") as f:
        json.dump(result['summary'], f, indent=2)
    
    print(f"{'='*80}")
    print("PROCESSING COMPLETE")
    print(f"{'='*80}")
    print(f"Original dimensions: {result['summary']['total_original_dimensions']}")
    print(f"After pruning: {result['summary']['dimensions_after_pruning']}")
    print(f"Removed: {result['summary']['dimensions_removed']}")
    print(f"")
    print(f"Canonical organizational preferences: {result['summary']['canonical_organizational_prefs']}")
    print(f"Canonical task-specific preferences: {result['summary']['canonical_task_specific_prefs']}")
    print(f"Total canonical preferences: {result['summary']['total_canonical_prefs']}")
    print(f"Reduction from pruned: {result['summary']['reduction_ratio']:.2%}")
    print(f"\n‚úÖ Saved complete structure to {output_file}")
    print(f"‚úÖ Saved statistics to processing_statistics.json")
    
    # Create readable summaries
    create_readable_summary(result, "hierarchy_summary.txt")
    create_pruning_summary(result, "pruning_summary.txt")


def create_readable_summary(result: dict, output_file: str):
    """Human-readable hierarchy summary."""
    
    with open(output_file, "w") as f:
        f.write("="*80 + "\n")
        f.write("PERSONALIZATION DIMENSION HIERARCHY\n")
        f.write("="*80 + "\n\n")
        
        # Summary stats
        s = result['summary']
        f.write("SUMMARY\n")
        f.write("-"*80 + "\n")
        f.write(f"Tasks: {s['total_tasks']}\n")
        f.write(f"Original dimensions: {s['total_original_dimensions']}\n")
        f.write(f"After pruning: {s['dimensions_after_pruning']} ({s['dimensions_removed']} removed)\n")
        f.write(f"Final canonical: {s['total_canonical_prefs']} ({s['canonical_organizational_prefs']} org + {s['canonical_task_specific_prefs']} task-specific)\n\n")
        
        # Organizational preferences
        f.write("="*80 + "\n")
        f.write("ORGANIZATIONAL-LEVEL PREFERENCES\n")
        f.write("-"*80 + "\n\n")
        
        h = result['canonical_hierarchy']
        for pref in h['organizational_preferences']:
            f.write(f"üìã {pref['canonical_name']}\n")
            f.write(f"   {pref['description']}\n")
            f.write(f"   Values: {', '.join(pref['example_values'])}\n")
            f.write(f"   Applies to: {', '.join(pref['applies_to_task_types'])}\n")
            f.write(f"   Merged from: {', '.join(pref['merged_from_dimensions'])}\n")
            f.write(f"   Tasks: {', '.join(pref['merged_from_tasks'])}\n")
            f.write(f"   Generality: {pref['generality_score']}/10\n\n")
        
        # Task-specific preferences
        f.write("="*80 + "\n")
        f.write("TASK-SPECIFIC PREFERENCES\n")
        f.write("-"*80 + "\n\n")
        
        for pref in h['task_specific_preferences']:
            f.write(f"üîß {pref['canonical_name']}\n")
            f.write(f"   {pref['description']}\n")
            f.write(f"   Values: {', '.join(pref['example_values'])}\n")
            f.write(f"   Applies to: {', '.join(pref['applies_to_task_types'])}\n")
            f.write(f"   Merged from: {', '.join(pref['merged_from_dimensions'])}\n")
            f.write(f"   Tasks: {', '.join(pref['merged_from_tasks'])}\n")
            f.write(f"   Generality: {pref['generality_score']}/10\n\n")
        
        # Per-task mappings
        f.write("="*80 + "\n")
        f.write("TASK MAPPINGS\n")
        f.write("-"*80 + "\n\n")
        
        for task_id, tm in result['task_mappings'].items():
            f.write(f"\n{task_id}\n")
            f.write(f"{tm['task_instruction']}\n")
            f.write(f"Kept: {tm['dimensions_kept']}, Removed: {tm['dimensions_removed']}\n\n")
            
            if tm.get('organizational_preferences'):
                f.write("  Organizational:\n")
                for p in tm['organizational_preferences']:
                    i = p['task_instantiation']
                    f.write(f"    ‚Ä¢ {p['canonical_name']}: {i['description']}\n")
            
            if tm.get('task_specific_preferences'):
                f.write("  Task-specific:\n")
                for p in tm['task_specific_preferences']:
                    i = p['task_instantiation']
                    f.write(f"    ‚Ä¢ {p['canonical_name']}: {i['description']}\n")
            
            f.write("\n")
    
    print(f"‚úÖ Saved hierarchy summary to {output_file}")


def create_pruning_summary(result: dict, output_file: str):
    """Human-readable pruning decisions."""
    
    with open(output_file, "w") as f:
        f.write("="*80 + "\n")
        f.write("PRUNING DECISIONS\n")
        f.write("="*80 + "\n\n")
        
        for task_id, decisions in result['pruning_decisions'].items():
            f.write(f"\n{task_id}\n")
            f.write("-"*80 + "\n")
            
            if 'error' in decisions:
                f.write(f"‚ùå ERROR: {decisions['error']}\n\n")
            
            kept = decisions.get('kept', [])
            removed = decisions.get('removed', [])
            
            f.write(f"‚úÖ KEPT ({len(kept)}):\n")
            for d in kept:
                f.write(f"   ‚Ä¢ {d['name']}\n")
                if 'reasoning' in d:
                    f.write(f"     {d['reasoning']}\n")
            
            f.write(f"\n‚ùå REMOVED ({len(removed)}):\n")
            for d in removed:
                f.write(f"   ‚Ä¢ {d['name']}\n")
                f.write(f"     {d['reasoning']}\n")
            
            f.write("\n")
    
    print(f"‚úÖ Saved pruning summary to {output_file}")


if __name__ == "__main__":
    import sys
    
    # python dimension_processor.py [batch_size] [--no-resume]
    input_file = "personalization_dimensions_per_task.json"
    base_dir = "TheAgentCompany/workspaces/tasks"
    
    # Parse arguments
    batch_size = BATCH_SIZE
    resume = True
    process_dimensions(input_file=input_file, base_dir=base_dir, batch_size=batch_size, resume=resume)

üìÇ Loading from: personalization_dimensions_per_task.json
üî¢ Batch size: 5 tasks per API call
üîÑ Resume mode: ON

üìä Loaded 175 tasks

PHASE 1: PRUNING DIMENSIONS (batched)

‚úÖ Found existing pruned data for 20 tasks
üîÑ Need to prune: 155 tasks

üîπ Pruning batch 1/31 (5 tasks)...
   Tasks: ds-find-meeting-spreadsheet, ds-fix-table-values-and-missing-answers, ds-format-excel-sheets, ds-janusgraph-exercise, ds-merge-multiple-sheets
PRUNE BATCH RESPONSE
{
  "ds-find-meeting-spreadsheet": {
    "kept_dimensions": [
      {
        "name": "Target artifact name matching and normalization",
        "description": "Defines how to match the intended file by name, handling case, punctuation, pluralization, and required keywords or geographic qualifiers.",
        "value_schema": "object: {normalize_case: 'lower'|'as_is', strip_punctuation: boolean, singular_plural_equivalence: boolean, keyword_set: string[], require_geography?: string, match_mode: 'exact'|'contains_all'|'fuzzy', ti