# Extended Thinking for All Claude Models

## Table of contents
- [Setup](#setup)
- [Model Capabilities](#model-capabilities)
- [Basic Examples](#basic-examples)
- [Streaming with Extended Thinking](#streaming-with-extended-thinking)
- [Token Management](#token-management)
- [Advanced Patterns](#advanced-patterns)
- [Integration with AI Agents](#integration-with-ai-agents)
- [Error Handling](#error-handling)

This notebook demonstrates how to use extended thinking across all Claude models, with examples for integrating into your AI agent system.

Extended thinking gives Claude enhanced reasoning capabilities for complex tasks, while providing transparency into its step-by-step thought process. When enabled, Claude creates `thinking` content blocks where it outputs internal reasoning before crafting a final response.

For more information, see the [official documentation](https://docs.claude.com/en/docs/build-with-claude/extended-thinking).

## Setup

Install required packages and configure the environment for all Claude models.

In [None]:
%pip install anthropic typing_extensions

In [None]:
import anthropic
import os
import json
from typing import Dict, Any, Optional, List, Union
from enum import Enum

# Set your API key
# os.environ["ANTHROPIC_API_KEY"] = "your-api-key-here"

# Initialize the client
client = anthropic.Anthropic()

class ClaudeModel(Enum):
    """Available Claude models with extended thinking support."""
    SONNET_3_5 = "claude-3-5-sonnet-20241022"
    SONNET_4_5 = "claude-sonnet-4-5"  # Latest with extended thinking
    OPUS_3 = "claude-3-opus-20240229"
    HAIKU_3 = "claude-3-haiku-20240307"
    
    @property
    def supports_thinking(self) -> bool:
        """Check if model supports extended thinking."""
        return self in [ClaudeModel.SONNET_4_5, ClaudeModel.SONNET_3_5]
    
    @property
    def default_thinking_budget(self) -> int:
        """Get default thinking budget for model."""
        if self == ClaudeModel.SONNET_4_5:
            return 8000
        elif self == ClaudeModel.SONNET_3_5:
            return 4000
        else:
            return 0

# Model capabilities matrix
MODEL_CAPABILITIES = {
    ClaudeModel.SONNET_4_5: {
        "extended_thinking": True,
        "max_thinking_budget": 65536,
        "recommended_budget": 8000,
        "context_window": 200000
    },
    ClaudeModel.SONNET_3_5: {
        "extended_thinking": True,
        "max_thinking_budget": 32768,
        "recommended_budget": 4000,
        "context_window": 200000
    },
    ClaudeModel.OPUS_3: {
        "extended_thinking": False,
        "max_thinking_budget": 0,
        "recommended_budget": 0,
        "context_window": 200000
    },
    ClaudeModel.HAIKU_3: {
        "extended_thinking": False,
        "max_thinking_budget": 0,
        "recommended_budget": 0,
        "context_window": 200000
    }
}

print("Model Capabilities:")
for model in ClaudeModel:
    caps = MODEL_CAPABILITIES[model]
    print(f"\n{model.name}:")
    print(f"  - Extended Thinking: {caps['extended_thinking']}")
    print(f"  - Max Budget: {caps['max_thinking_budget']:,} tokens")
    print(f"  - Context Window: {caps['context_window']:,} tokens")

## Model Capabilities

Different Claude models have varying support for extended thinking.

In [None]:
class ExtendedThinkingManager:
    """Manager for extended thinking across all Claude models."""
    
    def __init__(self, client: anthropic.Anthropic):
        self.client = client
        
    def create_message(
        self,
        model: ClaudeModel,
        messages: List[Dict[str, str]],
        thinking_budget: Optional[int] = None,
        max_tokens: int = 4000,
        **kwargs
    ) -> Any:
        """Create a message with model-appropriate thinking configuration."""
        
        # Check model capabilities
        caps = MODEL_CAPABILITIES[model]
        
        # Prepare base parameters
        params = {
            "model": model.value,
            "messages": messages,
            "max_tokens": max_tokens
        }
        
        # Add thinking if supported
        if caps["extended_thinking"]:
            budget = thinking_budget or caps["recommended_budget"]
            # Ensure budget is within limits
            budget = min(budget, caps["max_thinking_budget"])
            budget = max(budget, 1024)  # Minimum budget
            
            params["thinking"] = {
                "type": "enabled",
                "budget_tokens": budget
            }
            print(f"✓ Extended thinking enabled with {budget:,} token budget")
        else:
            print(f"ℹ {model.name} doesn't support extended thinking, using standard mode")
        
        # Add any additional parameters
        params.update(kwargs)
        
        return self.client.messages.create(**params)
    
    def process_response(self, response) -> Dict[str, Any]:
        """Process response and extract thinking + final answer."""
        result = {
            "thinking_blocks": [],
            "redacted_blocks": [],
            "final_answer": "",
            "has_thinking": False
        }
        
        for block in response.content:
            if block.type == "thinking":
                result["thinking_blocks"].append({
                    "content": block.thinking,
                    "signature": getattr(block, 'signature', None)
                })
                result["has_thinking"] = True
            elif block.type == "redacted_thinking":
                result["redacted_blocks"].append({
                    "data": block.data if hasattr(block, 'data') else None
                })
                result["has_thinking"] = True
            elif block.type == "text":
                result["final_answer"] += block.text
        
        return result

# Initialize manager
thinking_manager = ExtendedThinkingManager(client)

print("Extended Thinking Manager initialized!")

## Basic Examples

Examples showing extended thinking across different models.

In [None]:
def test_model_thinking(model: ClaudeModel, prompt: str):
    """Test extended thinking with a specific model."""
    print(f"\n{'='*60}")
    print(f"Testing {model.name}")
    print(f"{'='*60}")
    
    try:
        response = thinking_manager.create_message(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            thinking_budget=2000  # Will be ignored for non-supporting models
        )
        
        result = thinking_manager.process_response(response)
        
        if result["has_thinking"]:
            print(f"\n🧠 THINKING DETECTED:")
            for i, block in enumerate(result["thinking_blocks"][:1]):  # Show first block
                preview = block["content"][:300] + "..." if len(block["content"]) > 300 else block["content"]
                print(f"  Block {i+1}: {preview}")
        
        print(f"\n✅ FINAL ANSWER:")
        preview = result["final_answer"][:500] + "..." if len(result["final_answer"]) > 500 else result["final_answer"]
        print(preview)
        
    except Exception as e:
        print(f"❌ Error: {e}")

# Test prompt
test_prompt = """Solve this step by step: 
If a train leaves Chicago at 2 PM traveling east at 60 mph, 
and another train leaves New York at 3 PM traveling west at 80 mph, 
and the distance between the cities is 790 miles, 
when and where do they meet?"""

# Test with different models (comment out to avoid API calls)
# test_model_thinking(ClaudeModel.SONNET_4_5, test_prompt)
# test_model_thinking(ClaudeModel.SONNET_3_5, test_prompt)
# test_model_thinking(ClaudeModel.OPUS_3, test_prompt)

print("\nTo run tests, uncomment the test_model_thinking() calls above")

## Streaming with Extended Thinking

Handle streaming responses with thinking blocks.

In [None]:
class StreamingThinkingHandler:
    """Handler for streaming responses with thinking."""
    
    def __init__(self):
        self.current_block_type = None
        self.current_content = ""
        self.thinking_content = []
        self.final_answer = ""
        
    def handle_event(self, event) -> Optional[str]:
        """Process streaming event and return display text if needed."""
        
        if event.type == "content_block_start":
            self.current_block_type = event.content_block.type
            self.current_content = ""
            
            if self.current_block_type == "thinking":
                return "\n🧠 [Thinking...]"
            elif self.current_block_type == "text":
                return "\n✅ [Generating answer...]\n"
                
        elif event.type == "content_block_delta":
            if event.delta.type == "thinking_delta":
                self.current_content += event.delta.thinking
                # Return dots to show progress without cluttering output
                return "."
            elif event.delta.type == "text_delta":
                self.current_content += event.delta.text
                self.final_answer += event.delta.text
                return event.delta.text  # Stream the actual text
                
        elif event.type == "content_block_stop":
            if self.current_block_type == "thinking":
                self.thinking_content.append(self.current_content)
                return f" [{len(self.current_content)} chars]\n"
            self.current_block_type = None
            
        elif event.type == "message_stop":
            return "\n\n[Complete]"
            
        return None

def stream_with_thinking(model: ClaudeModel, prompt: str):
    """Stream a response with thinking blocks."""
    
    if not MODEL_CAPABILITIES[model]["extended_thinking"]:
        print(f"⚠️ {model.name} doesn't support extended thinking")
        return
    
    handler = StreamingThinkingHandler()
    
    params = {
        "model": model.value,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": 4000
    }
    
    if MODEL_CAPABILITIES[model]["extended_thinking"]:
        params["thinking"] = {
            "type": "enabled",
            "budget_tokens": MODEL_CAPABILITIES[model]["recommended_budget"]
        }
    
    with client.messages.stream(**params) as stream:
        for event in stream:
            output = handler.handle_event(event)
            if output:
                print(output, end="", flush=True)
    
    return handler

# Example usage (uncomment to run)
# handler = stream_with_thinking(
#     ClaudeModel.SONNET_4_5,
#     "What are the key considerations when designing a distributed cache?"
# )

print("Streaming handler configured. Uncomment example to test.")

## Token Management

Optimize token usage with extended thinking.

In [None]:
class ThinkingBudgetOptimizer:
    """Optimize thinking budget based on task complexity."""
    
    TASK_COMPLEXITY_BUDGETS = {
        "simple": 1024,      # Basic questions, simple logic
        "moderate": 2048,    # Multi-step problems, moderate analysis
        "complex": 4096,     # Complex reasoning, detailed analysis
        "deep": 8192,        # Deep analysis, research tasks
        "extensive": 16384,  # Extensive reasoning, complex proofs
        "maximum": 32768     # Maximum depth analysis
    }
    
    @staticmethod
    def estimate_complexity(prompt: str) -> str:
        """Estimate task complexity from prompt."""
        
        # Keywords indicating complexity
        simple_keywords = ["what is", "define", "explain briefly", "summarize"]
        moderate_keywords = ["how does", "compare", "analyze", "describe"]
        complex_keywords = ["step by step", "prove", "derive", "implement"]
        deep_keywords = ["research", "investigate", "comprehensive", "detailed analysis"]
        extensive_keywords = ["exhaustive", "complete proof", "all possibilities", "thoroughly"]
        
        prompt_lower = prompt.lower()
        
        # Check keywords in order of complexity
        if any(kw in prompt_lower for kw in extensive_keywords):
            return "extensive"
        elif any(kw in prompt_lower for kw in deep_keywords):
            return "deep"
        elif any(kw in prompt_lower for kw in complex_keywords):
            return "complex"
        elif any(kw in prompt_lower for kw in moderate_keywords):
            return "moderate"
        elif any(kw in prompt_lower for kw in simple_keywords):
            return "simple"
        else:
            # Default based on prompt length
            if len(prompt) > 500:
                return "complex"
            elif len(prompt) > 200:
                return "moderate"
            else:
                return "simple"
    
    @classmethod
    def get_optimal_budget(
        cls, 
        prompt: str, 
        model: ClaudeModel,
        override_complexity: Optional[str] = None
    ) -> int:
        """Get optimal thinking budget for prompt and model."""
        
        # Check if model supports thinking
        if not MODEL_CAPABILITIES[model]["extended_thinking"]:
            return 0
        
        # Determine complexity
        complexity = override_complexity or cls.estimate_complexity(prompt)
        
        # Get base budget
        budget = cls.TASK_COMPLEXITY_BUDGETS.get(complexity, 2048)
        
        # Ensure within model limits
        max_budget = MODEL_CAPABILITIES[model]["max_thinking_budget"]
        budget = min(budget, max_budget)
        
        # Ensure minimum budget
        budget = max(budget, 1024)
        
        return budget

# Test complexity estimation
test_prompts = [
    "What is Python?",
    "Explain how neural networks work",
    "Implement a red-black tree with full balancing",
    "Provide a comprehensive analysis of quantum computing applications",
    "Exhaustively prove the correctness of this algorithm"
]

optimizer = ThinkingBudgetOptimizer()

print("Complexity Analysis:\n")
for prompt in test_prompts:
    complexity = optimizer.estimate_complexity(prompt)
    budget = optimizer.get_optimal_budget(prompt, ClaudeModel.SONNET_4_5)
    print(f"Prompt: {prompt[:50]}...")
    print(f"  Complexity: {complexity}")
    print(f"  Recommended budget: {budget:,} tokens\n")

## Advanced Patterns

Advanced patterns for using extended thinking in production systems.

In [None]:
class AgentThinkingInterface:
    """Interface for AI agents to use extended thinking."""
    
    def __init__(self, client: anthropic.Anthropic):
        self.client = client
        self.optimizer = ThinkingBudgetOptimizer()
        self.thinking_cache = {}  # Cache thinking for similar prompts
        
    def think_and_execute(
        self,
        task: str,
        context: Optional[Dict[str, Any]] = None,
        model: ClaudeModel = ClaudeModel.SONNET_4_5,
        thinking_mode: str = "auto",  # auto, minimal, maximal
        return_thinking: bool = False
    ) -> Dict[str, Any]:
        """Execute task with optimal thinking configuration."""
        
        # Build prompt with context
        messages = []
        
        if context:
            system_context = f"Context:\n{json.dumps(context, indent=2)}"
            messages.append({"role": "user", "content": system_context})
        
        messages.append({"role": "user", "content": task})
        
        # Determine thinking budget
        if thinking_mode == "minimal":
            budget = 1024
        elif thinking_mode == "maximal":
            budget = MODEL_CAPABILITIES[model]["max_thinking_budget"]
        else:  # auto
            budget = self.optimizer.get_optimal_budget(task, model)
        
        # Check if model supports thinking
        params = {
            "model": model.value,
            "messages": messages,
            "max_tokens": 8000
        }
        
        if MODEL_CAPABILITIES[model]["extended_thinking"] and budget > 0:
            params["thinking"] = {
                "type": "enabled",
                "budget_tokens": budget
            }
        
        # Execute request
        response = self.client.messages.create(**params)
        
        # Process response
        result = {
            "success": True,
            "answer": "",
            "thinking": [],
            "model": model.name,
            "thinking_budget_used": budget,
            "metadata": {
                "complexity": self.optimizer.estimate_complexity(task),
                "thinking_mode": thinking_mode
            }
        }
        
        for block in response.content:
            if block.type == "thinking" and return_thinking:
                result["thinking"].append(block.thinking)
            elif block.type == "text":
                result["answer"] += block.text
        
        return result
    
    def batch_think(
        self,
        tasks: List[str],
        model: ClaudeModel = ClaudeModel.SONNET_4_5,
        parallel: bool = False
    ) -> List[Dict[str, Any]]:
        """Process multiple tasks with thinking."""
        results = []
        
        for task in tasks:
            result = self.think_and_execute(task, model=model)
            results.append(result)
            
            # Add delay between requests to avoid rate limits
            if not parallel:
                import time
                time.sleep(1)
        
        return results

# Initialize agent interface
agent_interface = AgentThinkingInterface(client)

# Example usage for agents
example_agent_task = {
    "task": "Analyze this code for potential bugs and suggest improvements",
    "context": {
        "code": "def calculate_average(numbers):\n  return sum(numbers) / len(numbers)",
        "language": "python",
        "purpose": "Calculate mean of number list"
    }
}

print("Agent Interface Example:")
print(json.dumps(example_agent_task, indent=2))
print("\n[Ready for agent integration]")

## Integration with AI Agents

Example of how to integrate extended thinking into your AI agent system.

In [None]:
# TypeScript interface equivalent (for reference)
typescript_interface = """
// TypeScript interface for extended thinking integration

interface ExtendedThinkingConfig {
  enabled: boolean;
  budgetTokens: number;
  mode: 'auto' | 'minimal' | 'moderate' | 'maximal';
  returnThinking: boolean;
}

interface ThinkingResponse {
  success: boolean;
  answer: string;
  thinking?: string[];
  metadata: {
    model: string;
    thinkingBudgetUsed: number;
    complexity: string;
  };
}

class ExtendedThinkingService {
  async executeWithThinking(
    prompt: string,
    config?: Partial<ExtendedThinkingConfig>
  ): Promise<ThinkingResponse> {
    // Implementation would call the Python service or API
  }
}
"""

# Agent integration example
class SmartAgent:
    """Example agent using extended thinking."""
    
    def __init__(self, name: str, thinking_interface: AgentThinkingInterface):
        self.name = name
        self.thinking = thinking_interface
        
    def solve_problem(self, problem: str, use_deep_thinking: bool = True) -> str:
        """Solve a problem using extended thinking."""
        
        # Determine thinking mode based on problem
        if use_deep_thinking:
            mode = "maximal" if "complex" in problem.lower() else "auto"
        else:
            mode = "minimal"
        
        # Execute with thinking
        result = self.thinking.think_and_execute(
            task=problem,
            thinking_mode=mode,
            return_thinking=True  # Get thinking for logging
        )
        
        # Log thinking process (useful for debugging)
        if result["thinking"]:
            print(f"[{self.name}] Used {len(result['thinking'])} thinking blocks")
            print(f"[{self.name}] Complexity: {result['metadata']['complexity']}")
        
        return result["answer"]

# Create example agents
code_reviewer = SmartAgent("CodeReviewer", agent_interface)
test_generator = SmartAgent("TestGenerator", agent_interface)
bug_hunter = SmartAgent("BugHunter", agent_interface)

print("Smart Agents Created:")
print(f"  - {code_reviewer.name}")
print(f"  - {test_generator.name}")
print(f"  - {bug_hunter.name}")
print("\n[Agents ready with extended thinking capabilities]")

## Error Handling

Robust error handling for production systems.

In [None]:
class ExtendedThinkingError(Exception):
    """Base exception for extended thinking errors."""
    pass

class ThinkingBudgetError(ExtendedThinkingError):
    """Error when thinking budget is invalid."""
    pass

class ModelCapabilityError(ExtendedThinkingError):
    """Error when model doesn't support requested feature."""
    pass

def safe_thinking_request(
    client: anthropic.Anthropic,
    model: ClaudeModel,
    messages: List[Dict[str, str]],
    thinking_budget: int = 2000,
    max_retries: int = 3
) -> Optional[Any]:
    """Make a safe thinking request with error handling and retries."""
    
    # Validate model capabilities
    if not MODEL_CAPABILITIES[model].get("extended_thinking", False):
        print(f"⚠️ {model.name} doesn't support extended thinking, falling back to standard mode")
        return client.messages.create(
            model=model.value,
            messages=messages,
            max_tokens=4000
        )
    
    # Validate thinking budget
    if thinking_budget < 1024:
        raise ThinkingBudgetError(f"Budget {thinking_budget} is below minimum (1024)")
    
    max_budget = MODEL_CAPABILITIES[model]["max_thinking_budget"]
    if thinking_budget > max_budget:
        print(f"⚠️ Budget {thinking_budget} exceeds max ({max_budget}), using max")
        thinking_budget = max_budget
    
    # Attempt request with retries
    for attempt in range(max_retries):
        try:
            response = client.messages.create(
                model=model.value,
                messages=messages,
                max_tokens=4000,
                thinking={
                    "type": "enabled",
                    "budget_tokens": thinking_budget
                }
            )
            return response
            
        except anthropic.RateLimitError as e:
            print(f"Rate limit hit, waiting {2 ** attempt} seconds...")
            import time
            time.sleep(2 ** attempt)
            
        except anthropic.APIError as e:
            if "thinking" in str(e) and "temperature" in str(e):
                # Incompatible parameters, retry without temperature
                print("Removing incompatible parameters...")
                return client.messages.create(
                    model=model.value,
                    messages=messages,
                    max_tokens=4000,
                    thinking={
                        "type": "enabled",
                        "budget_tokens": thinking_budget
                    }
                )
            else:
                print(f"API Error on attempt {attempt + 1}: {e}")
                if attempt == max_retries - 1:
                    raise
                    
        except Exception as e:
            print(f"Unexpected error: {e}")
            if attempt == max_retries - 1:
                raise
    
    return None

# Test error handling
print("Error Handling Examples:\n")

# Example 1: Budget too small
try:
    safe_thinking_request(
        client,
        ClaudeModel.SONNET_4_5,
        [{"role": "user", "content": "test"}],
        thinking_budget=500
    )
except ThinkingBudgetError as e:
    print(f"✓ Caught budget error: {e}")

# Example 2: Model without thinking support
print("\n✓ Testing fallback for non-thinking model:")
# This would fall back to standard mode
# safe_thinking_request(
#     client,
#     ClaudeModel.HAIKU_3,
#     [{"role": "user", "content": "test"}]
# )

print("\n[Error handling configured]")