### Setup - Install Libraries, Import, and Initialize
**What this cell does:**

- Installs necessary libraries
- Imports everything we'll need for the entire notebook
- Sets up environment variables for the Anthropic API key
- Creates basic configuration

In [0]:
# Install required libraries
%pip install anthropic python-dotenv pandas

# Import standard libraries
import json
import os
from datetime import datetime
from typing import Dict, List, Optional, Any
import uuid

# Import third-party libraries
import pandas as pd
from IPython.display import display, Markdown
import anthropic
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Initialize Anthropic client
api_key = os.getenv('ANTHROPIC_API_KEY')
if api_key:
    client = anthropic.Anthropic(api_key=api_key)
    print("Anthropic client initialized successfully")
else:
    print("No ANTHROPIC_API_KEY found in .env file")
    print("Please create a .env file with: ANTHROPIC_API_KEY=your-key-here")

# Set display options for better DataFrame viewing
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 50)
pd.set_option('display.width', None)

print("All imports completed")
print(f"Notebook started at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

#### Initialize and Create Basic Decision Event Structure
**What this cell does:**

- Define a function to create a technical decision event
- Include: timestamp, decision_type, title, context, rationale
- Test by creating a sample decision about choosing DynamoDB
- Display as a pandas DataFrame with one row

In [0]:
def create_decision_event(
    title: str,
    decision_type: str,
    context: str,
    rationale: str,
    alternatives_considered: List[str] = None,
    tags: List[str] = None
) -> Dict[str, Any]:
    """Create a technical decision event with all necessary metadata."""
    return {
        'id': f"decision_{uuid.uuid4().hex[:8]}",
        'timestamp': datetime.now().isoformat(),
        'event_type': 'technical_decision',
        'decision_type': decision_type,
        'title': title,
        'context': context,
        'rationale': rationale,
        'alternatives_considered': alternatives_considered or [],
        'tags': tags or []
    }

# Test the function with a sample decision
sample_decision = create_decision_event(
    title="Use DynamoDB for session storage",
    decision_type="database",
    context="HAS needs fast, scalable session storage for passenger flow tracking",
    rationale="DynamoDB provides millisecond latency and automatic scaling",
    alternatives_considered=["Redis", "RDS PostgreSQL", "MongoDB"],
    tags=["database", "aws", "performance", "scalability"]
)

# Display as DataFrame
df = pd.DataFrame([sample_decision])
display(df)

# Also print the raw event for inspection
print("\nRaw event structure:")
print(json.dumps(sample_decision, indent=2))

#### Create History Storage and Display Function
**What this cell does:**

- Initialize empty history list Y
- Create function to add events to history
- Create function to display history as a nice pandas DataFrame
- Test by adding our first decision and displaying it

In [0]:
# Initialize our history (Y) - this will store all events
Y = []

def add_to_history(event: Dict[str, Any], history: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """Add an event to history and return the updated history."""
    history.append(event)
    return history

def display_history(history: List[Dict[str, Any]], last_n: Optional[int] = None) -> pd.DataFrame:
    """Display history as a formatted DataFrame."""
    if not history:
        print("History is empty")
        return pd.DataFrame()
    
    df = pd.DataFrame(history)
    
    # Select columns to display in a nice order
    display_columns = ['timestamp', 'title', 'decision_type', 'context']
    if 'outcome' in df.columns:
        display_columns.append('outcome')
    
    # Show only last_n records if specified
    if last_n:
        df = df.tail(last_n)
    
    # Format timestamp for better readability
    df['timestamp'] = pd.to_datetime(df['timestamp']).dt.strftime('%Y-%m-%d %H:%M')
    
    return df[display_columns]

# Test by adding our sample decision to history
Y = add_to_history(sample_decision, Y)

# Display the history
df_history = display_history(Y)
display(df_history)

print(f"\nTotal events in history: {len(Y)}")

#### Create More Decision Events
**What this cell does:**

- Create 3-4 more technical decision events
- Mix of database choices, API designs, architecture patterns
- Add them all to history
- Display the growing history DataFrame to see our journal

In [0]:
# Create several more technical decisions to build our history
decision_2 = create_decision_event(
    title="Implement event-driven architecture for real-time updates",
    decision_type="architecture",
    context="HAS needs real-time passenger flow updates across multiple terminals",
    rationale="Event-driven pattern allows loose coupling and scalability",
    alternatives_considered=["Polling-based updates", "Direct database queries", "WebSocket connections"],
    tags=["architecture", "real-time", "scalability", "events"]
)

decision_3 = create_decision_event(
    title="Use GraphQL for mobile API",
    decision_type="api",
    context="Mobile app needs flexible data fetching to minimize bandwidth usage",
    rationale="GraphQL allows clients to request exactly what they need, reducing over-fetching",
    alternatives_considered=["REST API", "gRPC", "JSON-RPC"],
    tags=["api", "mobile", "performance", "graphql"]
)

decision_4 = create_decision_event(
    title="Choose Kubernetes for container orchestration",
    decision_type="infrastructure",
    context="Need to manage 50+ microservices across multiple environments",
    rationale="Kubernetes provides robust orchestration, auto-scaling, and self-healing",
    alternatives_considered=["Docker Swarm", "ECS", "Manual Docker deployment"],
    tags=["infrastructure", "containers", "kubernetes", "devops"]
)

decision_5 = create_decision_event(
    title="Use Redis for caching layer",
    decision_type="database",
    context="API response times need improvement for frequently accessed data",
    rationale="Redis provides sub-millisecond latency and built-in cache expiration",
    alternatives_considered=["Memcached", "DynamoDB DAX", "In-memory application cache"],
    tags=["database", "caching", "performance", "redis"]
)

# Add all decisions to history
for decision in [decision_2, decision_3, decision_4, decision_5]:
    Y = add_to_history(decision, Y)

# Display the complete history
df_history = display_history(Y)
display(df_history)

print(f"\nTotal events in history: {len(Y)}")
print(f"Decision types: {df_history['decision_type'].value_counts().to_dict()}")

#### Query History Function - Find Related Decisions
**What this cell does:**

- Create function to search history for related decisions
- Input: keywords or topic (e.g., "database", "API")
- Output: filtered DataFrame of relevant past decisions
- Test with "database" to find all DB-related decisions

In [0]:
def query_history(history: List[Dict[str, Any]], search_term: str) -> pd.DataFrame:
    """Search history for decisions related to a search term."""
    search_term_lower = search_term.lower()
    related_events = []
    
    for event in history:
        # Search in multiple fields
        searchable_text = ' '.join([
            event.get('title', ''),
            event.get('context', ''),
            event.get('rationale', ''),
            event.get('decision_type', ''),
            ' '.join(event.get('tags', [])),
            ' '.join(event.get('alternatives_considered', []))
        ]).lower()
        
        if search_term_lower in searchable_text:
            related_events.append(event)
    
    if not related_events:
        print(f"No decisions found related to '{search_term}'")
        return pd.DataFrame()
    
    return display_history(related_events)

# Test with different search terms
print("=== Searching for 'database' related decisions ===")
db_decisions = query_history(Y, "database")
display(db_decisions)

print("\n=== Searching for 'performance' related decisions ===")
perf_decisions = query_history(Y, "performance")
display(perf_decisions)

print("\n=== Searching for 'API' related decisions ===")
api_decisions = query_history(Y, "api")
display(api_decisions)

#### Simple Pattern Detection Function
**What this cell does:**

- Create function to identify decision patterns without LLM
- Example: count decisions by type, identify frequent topics
- Return summary statistics as DataFrame
- Shows what we have before adding intelligence

In [0]:
def analyze_patterns(history: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Analyze patterns in decision history without LLM."""
    if not history:
        return {"error": "No history to analyze"}
    
    # Count decisions by type
    type_counts = {}
    for event in history:
        decision_type = event.get('decision_type', 'unknown')
        type_counts[decision_type] = type_counts.get(decision_type, 0) + 1
    
    # Count all tags
    tag_counts = {}
    for event in history:
        for tag in event.get('tags', []):
            tag_counts[tag] = tag_counts.get(tag, 0) + 1
    
    # Find most considered alternatives
    alternative_counts = {}
    for event in history:
        for alt in event.get('alternatives_considered', []):
            alternative_counts[alt] = alternative_counts.get(alt, 0) + 1
    
    # Time-based analysis
    df_temp = pd.DataFrame(history)
    df_temp['timestamp'] = pd.to_datetime(df_temp['timestamp'])
    df_temp['hour'] = df_temp['timestamp'].dt.hour
    df_temp['day_of_week'] = df_temp['timestamp'].dt.day_name()
    
    return {
        'decision_types': type_counts,
        'top_tags': dict(sorted(tag_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
        'alternatives_considered': dict(sorted(alternative_counts.items(), key=lambda x: x[1], reverse=True)[:5]),
        'total_decisions': len(history),
        'decisions_by_hour': df_temp['hour'].value_counts().to_dict(),
        'decisions_by_day': df_temp['day_of_week'].value_counts().to_dict()
    }

# Analyze current patterns
patterns = analyze_patterns(Y)

# Display results in a nice format
print("=== Decision Pattern Analysis ===\n")
print("Decision Types:")
display(pd.DataFrame(list(patterns['decision_types'].items()), columns=['Type', 'Count']))

print("\nTop Tags:")
display(pd.DataFrame(list(patterns['top_tags'].items()), columns=['Tag', 'Count']))

print("\nMost Considered Alternatives:")
display(pd.DataFrame(list(patterns['alternatives_considered'].items()), columns=['Alternative', 'Times Considered']))

print(f"\nTotal Decisions: {patterns['total_decisions']}")

#### Add Consequence Events - Track Decision Outcomes
**What this cell does:**

- Create new event type for consequences/outcomes of decisions
- Link consequences back to original decisions via decision_id
- Add several consequences to our existing decisions
- Display enhanced history showing decisions with their outcomes

In [0]:

def create_consequence_event(
    original_decision_id: str,
    outcome: str,
    impact: str,
    lessons_learned: str,
    tags: List[str] = None
) -> Dict[str, Any]:
    """Create a consequence event linked to an original decision."""
    return {
        'id': f"consequence_{uuid.uuid4().hex[:8]}",
        'timestamp': datetime.now().isoformat(),
        'event_type': 'decision_consequence',
        'original_decision_id': original_decision_id,
        'outcome': outcome,
        'impact': impact,
        'lessons_learned': lessons_learned,
        'tags': tags or []
    }

# Create consequences for our existing decisions
consequence_1 = create_consequence_event(
    original_decision_id=Y[0]['id'],  # DynamoDB decision
    outcome="DynamoDB costs exceeded budget by 40%",
    impact="Had to implement aggressive TTL policies and optimize read/write capacity",
    lessons_learned="Always model costs with realistic traffic projections",
    tags=["cost-overrun", "database", "aws"]
)

consequence_2 = create_consequence_event(
    original_decision_id=Y[1]['id'],  # Event-driven architecture
    outcome="Event processing achieved sub-100ms latency",
    impact="Successfully handled 10x traffic spike during holiday travel",
    lessons_learned="Event-driven architecture provides excellent scalability",
    tags=["success", "performance", "architecture"]
)

consequence_3 = create_consequence_event(
    original_decision_id=Y[2]['id'],  # GraphQL API
    outcome="Mobile app data usage reduced by 60%",
    impact="Improved user experience and reduced AWS data transfer costs",
    lessons_learned="GraphQL complexity worth it for mobile optimization",
    tags=["success", "mobile", "cost-savings"]
)

consequence_4 = create_consequence_event(
    original_decision_id=Y[3]['id'],  # Kubernetes
    outcome="Kubernetes cluster management required dedicated DevOps hire",
    impact="Increased operational costs but improved deployment reliability",
    lessons_learned="Consider managed services (EKS) vs self-managed for small teams",
    tags=["operational-cost", "team-growth", "infrastructure"]
)

# Add all consequences to history
for consequence in [consequence_1, consequence_2, consequence_3, consequence_4]:
    Y = add_to_history(consequence, Y)

# Create an enhanced display function that shows relationships
def display_decisions_with_consequences(history: List[Dict[str, Any]]) -> None:
    """Display decisions along with their consequences."""
    # Separate decisions and consequences
    decisions = [e for e in history if e.get('event_type') == 'technical_decision']
    consequences = [e for e in history if e.get('event_type') == 'decision_consequence']
    
    # Create a map of consequences by decision_id
    consequences_map = {}
    for c in consequences:
        decision_id = c.get('original_decision_id')
        if decision_id not in consequences_map:
            consequences_map[decision_id] = []
        consequences_map[decision_id].append(c)
    
    # Display each decision with its consequences
    for decision in decisions:
        print(f"\n{'='*80}")
        print(f"DECISION: {decision['title']}")
        print(f"Type: {decision['decision_type']} | Date: {decision['timestamp'][:10]}")
        print(f"Context: {decision['context']}")
        
        # Check if there are consequences
        decision_consequences = consequences_map.get(decision['id'], [])
        if decision_consequences:
            print(f"\nCONSEQUENCES ({len(decision_consequences)}):")
            for i, cons in enumerate(decision_consequences, 1):
                print(f"  {i}. Outcome: {cons['outcome']}")
                print(f"     Impact: {cons['impact']}")
                print(f"     Lesson: {cons['lessons_learned']}")

# Display the enhanced view
display_decisions_with_consequences(Y)

# Also show summary statistics
print(f"\n{'='*80}")
print("SUMMARY STATISTICS:")
print(f"Total Events: {len(Y)}")
print(f"Decisions: {len([e for e in Y if e.get('event_type') == 'technical_decision'])}")
print(f"Consequences: {len([e for e in Y if e.get('event_type') == 'decision_consequence'])}")

# Show recent history in DataFrame format
print("\nRecent History (last 5 events):")
df_recent = display_history(Y, last_n=5)
display(df_recent)

#### Create Context Builder for LLM
**What this cell does:**

- Function to build relevant context from history for a new decision
- Input: new decision topic
- Output: formatted string of relevant past decisions and outcomes
- Test with a new decision topic like "caching strategy"

In [0]:
def build_context_for_decision(topic: str, history: List[Dict[str, Any]], max_items: int = 5) -> str:
    """Build relevant context from history for a new decision topic."""
    # Search for related decisions and consequences
    related_items = []
    topic_lower = topic.lower()
    
    for event in history:
        # Create searchable text from the event
        searchable_text = ' '.join([
            event.get('title', ''),
            event.get('context', ''),
            event.get('rationale', ''),
            event.get('outcome', ''),
            event.get('impact', ''),
            event.get('lessons_learned', ''),
            ' '.join(event.get('tags', [])),
            ' '.join(event.get('alternatives_considered', []))
        ]).lower()
        
        if topic_lower in searchable_text or any(word in searchable_text for word in topic_lower.split()):
            related_items.append(event)
    
    # Sort by relevance (simple: number of topic words found)
    def relevance_score(event):
        searchable = ' '.join(str(v) for v in event.values()).lower()
        return sum(1 for word in topic_lower.split() if word in searchable)
    
    related_items.sort(key=relevance_score, reverse=True)
    related_items = related_items[:max_items]
    
    # Build context string
    context_parts = [f"Context for decision about: {topic}\n"]
    context_parts.append(f"Found {len(related_items)} related items from history:\n")
    
    for i, item in enumerate(related_items, 1):
        context_parts.append(f"\n{i}. {item.get('event_type', 'unknown').replace('_', ' ').title()}")
        
        if item.get('event_type') == 'technical_decision':
            context_parts.append(f"   Title: {item['title']}")
            context_parts.append(f"   Context: {item['context']}")
            context_parts.append(f"   Rationale: {item['rationale']}")
            context_parts.append(f"   Alternatives: {', '.join(item.get('alternatives_considered', []))}")
        
        elif item.get('event_type') == 'decision_consequence':
            context_parts.append(f"   Outcome: {item['outcome']}")
            context_parts.append(f"   Impact: {item['impact']}")
            context_parts.append(f"   Lesson: {item['lessons_learned']}")
        
        context_parts.append(f"   Tags: {', '.join(item.get('tags', []))}")
    
    return '\n'.join(context_parts)

# Test with different decision topics
print("=== Context for 'caching strategy' decision ===")
caching_context = build_context_for_decision("caching strategy", Y)
print(caching_context)

print("\n" + "="*80 + "\n")
print("=== Context for 'cost optimization' decision ===")
cost_context = build_context_for_decision("cost optimization", Y)
print(cost_context)

print("\n" + "="*80 + "\n")
print("=== Context for 'monitoring infrastructure' decision ===")
monitoring_context = build_context_for_decision("monitoring infrastructure", Y)
print(monitoring_context)

# Also create a function to get context as structured data
def get_structured_context(topic: str, history: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Get context as structured data for programmatic use."""
    related_decisions = []
    related_consequences = []
    
    topic_lower = topic.lower()
    
    for event in history:
        searchable = ' '.join(str(v) for v in event.values()).lower()
        if topic_lower in searchable or any(word in searchable for word in topic_lower.split()):
            if event.get('event_type') == 'technical_decision':
                related_decisions.append(event)
            elif event.get('event_type') == 'decision_consequence':
                related_consequences.append(event)
    
    return {
        'topic': topic,
        'related_decisions': related_decisions[:3],
        'related_consequences': related_consequences[:3],
        'total_related': len(related_decisions) + len(related_consequences)
    }

# Test structured context
struct_context = get_structured_context("database", Y)
print(f"\nStructured context for 'database':")
print(f"- Related decisions: {len(struct_context['related_decisions'])}")
print(f"- Related consequences: {len(struct_context['related_consequences'])}")
print(f"- Total related items: {struct_context['total_related']}")

#### LLM Integration - Analyze Decision Consistency
**What this cell does:**

- Create function that uses LLM to check if new decision aligns with past decisions
- Input: new decision + relevant history context
- Output: analysis of consistency, potential conflicts
- Display as formatted markdown or DataFrame

In [0]:
def analyze_decision_consistency(new_decision: Dict[str, Any], history: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Use LLM to analyze if a new decision aligns with past decisions."""
    # Build context from history
    decision_topic = f"{new_decision['decision_type']} {new_decision['title']}"
    historical_context = build_context_for_decision(decision_topic, history, max_items=10)
    
    # Create prompt for LLM
    prompt = f"""Analyze the consistency of this new technical decision with our past decisions and their outcomes.

NEW DECISION:
Title: {new_decision['title']}
Type: {new_decision['decision_type']}
Context: {new_decision['context']}
Rationale: {new_decision['rationale']}
Alternatives Considered: {', '.join(new_decision.get('alternatives_considered', []))}

HISTORICAL CONTEXT:
{historical_context}

Please analyze:
1. CONSISTENCY: Does this decision align with our past decisions and learned lessons?
2. CONFLICTS: Are there any contradictions with previous choices or their outcomes?
3. PATTERNS: What patterns from our history support or challenge this decision?
4. RISKS: Based on past consequences, what risks should we consider?
5. RECOMMENDATIONS: Specific suggestions based on our history

Format your response as JSON with these exact keys: consistency_score (0-100), conflicts, supporting_patterns, risks, recommendations."""
    
    try:
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1000,
            messages=[{"role": "user", "content": prompt}]
        )
        
        # Parse the response
        response_text = response.content[0].text
        
        # Try to extract JSON from response
        import re
        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
        if json_match:
            analysis = json.loads(json_match.group())
        else:
            # Fallback if JSON parsing fails
            analysis = {
                "consistency_score": 0,
                "analysis_text": response_text,
                "error": "Could not parse structured response"
            }
            
        return {
            "new_decision": new_decision['title'],
            "analysis": analysis,
            "historical_items_reviewed": len([e for e in history if decision_topic.lower() in str(e).lower()])
        }
        
    except Exception as e:
        return {
            "new_decision": new_decision['title'],
            "error": str(e),
            "analysis": None
        }

# Test with a new decision that should align well
aligned_decision = create_decision_event(
    title="Implement distributed caching with Redis Cluster",
    decision_type="infrastructure",
    context="Need to scale caching solution across multiple availability zones",
    rationale="Redis Cluster provides horizontal scaling and high availability",
    alternatives_considered=["Hazelcast", "Apache Ignite", "ElastiCache"],
    tags=["caching", "redis", "scalability", "infrastructure"]
)

print("=== Testing Aligned Decision ===")
consistency_analysis = analyze_decision_consistency(aligned_decision, Y)

if consistency_analysis.get('analysis'):
    print(f"\nDecision: {consistency_analysis['new_decision']}")
    print(f"Historical items reviewed: {consistency_analysis['historical_items_reviewed']}")
    
    analysis = consistency_analysis['analysis']
    if 'consistency_score' in analysis:
        print(f"\nConsistency Score: {analysis.get('consistency_score', 'N/A')}/100")
        print(f"\nConflicts: {analysis.get('conflicts', 'None identified')}")
        print(f"\nSupporting Patterns: {analysis.get('supporting_patterns', 'None identified')}")
        print(f"\nRisks: {analysis.get('risks', 'None identified')}")
        print(f"\nRecommendations: {analysis.get('recommendations', 'None provided')}")
    else:
        print("\nAnalysis Text:")
        print(analysis.get('analysis_text', 'No analysis available'))
else:
    print(f"Error: {consistency_analysis.get('error')}")

print("\n" + "="*80 + "\n")

# Test with a potentially conflicting decision
conflicting_decision = create_decision_event(
    title="Replace all caching with direct database queries",
    decision_type="architecture",
    context="Simplify architecture by removing caching layer",
    rationale="Reduce complexity and avoid cache invalidation issues",
    alternatives_considered=["Keep Redis", "Use CDN only", "In-memory caching"],
    tags=["architecture", "simplification", "database"]
)

print("=== Testing Potentially Conflicting Decision ===")
conflict_analysis = analyze_decision_consistency(conflicting_decision, Y)

if conflict_analysis.get('analysis'):
    print(f"\nDecision: {conflict_analysis['new_decision']}")
    print(f"Historical items reviewed: {conflict_analysis['historical_items_reviewed']}")
    
    analysis = conflict_analysis['analysis']
    if 'consistency_score' in analysis:
        print(f"\nConsistency Score: {analysis.get('consistency_score', 'N/A')}/100")
        print(f"\nConflicts: {analysis.get('conflicts', 'None identified')}")
        print(f"\nSupporting Patterns: {analysis.get('supporting_patterns', 'None identified')}")
        print(f"\nRisks: {analysis.get('risks', 'None identified')}")
        print(f"\nRecommendations: {analysis.get('recommendations', 'None provided')}")
    else:
        print("\nAnalysis Text:")
        print(analysis.get('analysis_text', 'No analysis available'))
else:
    print(f"Error: {conflict_analysis.get('error')}")

# Create a summary DataFrame of analyses
analyses_summary = pd.DataFrame([
    {
        'Decision': 'Distributed Redis Caching',
        'Type': 'infrastructure',
        'Consistency': consistency_analysis.get('analysis', {}).get('consistency_score', 'N/A')
    },
    {
        'Decision': 'Remove Caching Layer',
        'Type': 'architecture', 
        'Consistency': conflict_analysis.get('analysis', {}).get('consistency_score', 'N/A')
    }
])

print("\n=== Analysis Summary ===")
display(analyses_summary)

#### LLM Pattern Recognition
**What this cell does:**

- Function for LLM to identify patterns across all decisions
- What types of decisions lead to what outcomes?
- What decisions tend to cluster together?
- Display insights as structured data

In [0]:
def recognize_decision_patterns(history: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Use LLM to identify patterns across all technical decisions."""
    # Prepare history summary for analysis
    decisions = [e for e in history if e.get('event_type') == 'technical_decision']
    consequences = [e for e in history if e.get('event_type') == 'decision_consequence']
    
    # Create decision-consequence pairs
    consequence_map = {}
    for cons in consequences:
        consequence_map[cons.get('original_decision_id')] = cons
    
    history_summary = []
    for dec in decisions:
        summary_item = {
            'decision': dec['title'],
            'type': dec['decision_type'],
            'tags': dec.get('tags', []),
            'alternatives': dec.get('alternatives_considered', [])
        }
        if dec['id'] in consequence_map:
            cons = consequence_map[dec['id']]
            summary_item['outcome'] = cons['outcome']
            summary_item['lesson'] = cons['lessons_learned']
        history_summary.append(summary_item)
    
    prompt = f"""Analyze this technical decision history to identify patterns, trends, and insights.

DECISION HISTORY:
{json.dumps(history_summary, indent=2)}

Please identify:
1. DECISION PATTERNS: What types of decisions tend to lead to what types of outcomes?
2. CLUSTERING: Which decisions tend to be made together or in sequence?
3. SUCCESS FACTORS: What characteristics are common in successful decisions?
4. FAILURE PATTERNS: What warning signs appear in decisions that led to problems?
5. TECHNOLOGY PREFERENCES: What consistent technology choices emerge?
6. LEARNING CURVE: How have decision-making patterns evolved over time?

Format your response as JSON with these keys: decision_patterns, decision_clusters, success_factors, failure_patterns, technology_preferences, evolution_insights."""
    
    try:
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1500,
            messages=[{"role": "user", "content": prompt}]
        )
        
        response_text = response.content[0].text
        
        # Extract JSON
        import re
        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
        if json_match:
            patterns = json.loads(json_match.group())
            return {
                "status": "success",
                "patterns": patterns,
                "decisions_analyzed": len(decisions),
                "consequences_analyzed": len(consequences)
            }
        else:
            return {
                "status": "error",
                "raw_response": response_text,
                "decisions_analyzed": len(decisions),
                "consequences_analyzed": len(consequences)
            }
            
    except Exception as e:
        return {
            "status": "error",
            "error": str(e),
            "decisions_analyzed": len(decisions),
            "consequences_analyzed": len(consequences)
        }

# Run pattern recognition
print("=== Analyzing Decision Patterns Across History ===\n")
pattern_analysis = recognize_decision_patterns(Y)

if pattern_analysis['status'] == 'success':
    print(f"Analyzed {pattern_analysis['decisions_analyzed']} decisions and {pattern_analysis['consequences_analyzed']} consequences\n")
    
    patterns = pattern_analysis['patterns']
    
    # Display each pattern category
    for key, value in patterns.items():
        print(f"\n{'='*60}")
        print(f"{key.replace('_', ' ').upper()}:")
        print('='*60)
        
        if isinstance(value, list):
            for i, item in enumerate(value, 1):
                print(f"{i}. {item}")
        elif isinstance(value, dict):
            for k, v in value.items():
                print(f"- {k}: {v}")
        else:
            print(value)
else:
    print(f"Error analyzing patterns: {pattern_analysis.get('error', 'Unknown error')}")
    if 'raw_response' in pattern_analysis:
        print("\nRaw response:")
        print(pattern_analysis['raw_response'])

# Create a visual summary of key insights
def create_pattern_summary(patterns: Dict[str, Any]) -> pd.DataFrame:
    """Create a summary DataFrame of key pattern insights."""
    if not patterns:
        return pd.DataFrame()
    
    summary_data = []
    
    # Extract key metrics if available
    if 'success_factors' in patterns:
        summary_data.append({
            'Category': 'Success Factors',
            'Count': len(patterns['success_factors']) if isinstance(patterns['success_factors'], list) else 1,
            'Key Insight': patterns['success_factors'][0] if isinstance(patterns['success_factors'], list) else str(patterns['success_factors'])[:100]
        })
    
    if 'failure_patterns' in patterns:
        summary_data.append({
            'Category': 'Failure Patterns',
            'Count': len(patterns['failure_patterns']) if isinstance(patterns['failure_patterns'], list) else 1,
            'Key Insight': patterns['failure_patterns'][0] if isinstance(patterns['failure_patterns'], list) else str(patterns['failure_patterns'])[:100]
        })
    
    if 'technology_preferences' in patterns:
        tech_prefs = patterns['technology_preferences']
        if isinstance(tech_prefs, dict):
            summary_data.append({
                'Category': 'Technology Preferences',
                'Count': len(tech_prefs),
                'Key Insight': f"Preferred: {', '.join(list(tech_prefs.keys())[:3])}"
            })
    
    return pd.DataFrame(summary_data)

if pattern_analysis['status'] == 'success' and 'patterns' in pattern_analysis:
    print("\n\n=== Pattern Summary ===")
    summary_df = create_pattern_summary(pattern_analysis['patterns'])
    if not summary_df.empty:
        display(summary_df)

# Also analyze decision frequency over time
print("\n=== Decision Frequency Analysis ===")
df_decisions = pd.DataFrame([e for e in Y if e.get('event_type') == 'technical_decision'])
df_decisions['timestamp'] = pd.to_datetime(df_decisions['timestamp'])
df_decisions['date'] = df_decisions['timestamp'].dt.date

decision_freq = df_decisions.groupby('decision_type').size().reset_index(name='count')
print("\nDecisions by Type:")
display(decision_freq)

#### Decision Impact Prediction
**What this cell does:**

- Function where LLM predicts potential consequences of a new decision
- Based on patterns from past decisions and their outcomes
- Input: proposed decision
- Output: predicted impacts, similar past situations

In [0]:
def predict_decision_impact(proposed_decision: Dict[str, Any], history: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Use LLM to predict potential consequences of a proposed decision."""
    # Get structured context
    struct_context = get_structured_context(
        f"{proposed_decision['decision_type']} {proposed_decision['title']}", 
        history
    )
    
    # Prepare decision-outcome pairs for analysis
    decisions_with_outcomes = []
    consequences_by_id = {c['original_decision_id']: c for c in history if c.get('event_type') == 'decision_consequence'}
    
    for decision in struct_context['related_decisions']:
        item = {
            'decision': decision['title'],
            'type': decision['decision_type'],
            'context': decision['context'],
            'rationale': decision['rationale']
        }
        if decision['id'] in consequences_by_id:
            consequence = consequences_by_id[decision['id']]
            item['actual_outcome'] = consequence['outcome']
            item['actual_impact'] = consequence['impact']
            item['lessons'] = consequence['lessons_learned']
        decisions_with_outcomes.append(item)
    
    prompt = f"""Based on our historical decisions and their outcomes, predict the potential consequences of this proposed decision.

PROPOSED DECISION:
Title: {proposed_decision['title']}
Type: {proposed_decision['decision_type']}
Context: {proposed_decision['context']}
Rationale: {proposed_decision['rationale']}
Alternatives: {', '.join(proposed_decision.get('alternatives_considered', []))}

SIMILAR PAST DECISIONS AND OUTCOMES:
{json.dumps(decisions_with_outcomes, indent=2)}

Please predict:
1. LIKELY_OUTCOMES: Most probable consequences (both positive and negative)
2. IMPACT_TIMELINE: When these impacts will likely manifest
3. RISK_FACTORS: Specific risks based on similar past decisions
4. SUCCESS_CONDITIONS: What needs to be true for this decision to succeed
5. SIMILAR_SITUATIONS: Past decisions most relevant to this one
6. CONFIDENCE_LEVEL: How confident are you in these predictions (0-100)

Format as JSON with these exact keys: likely_outcomes, impact_timeline, risk_factors, success_conditions, similar_situations, confidence_level."""
    
    try:
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1500,
            messages=[{"role": "user", "content": prompt}]
        )
        
        response_text = response.content[0].text
        
        # Extract JSON
        import re
        json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
        if json_match:
            predictions = json.loads(json_match.group())
            return {
                "status": "success",
                "decision": proposed_decision['title'],
                "predictions": predictions,
                "historical_context_size": len(decisions_with_outcomes)
            }
        else:
            return {
                "status": "error",
                "decision": proposed_decision['title'],
                "raw_response": response_text
            }
            
    except Exception as e:
        return {
            "status": "error",
            "decision": proposed_decision['title'],
            "error": str(e)
        }

# Test with a new monitoring decision
monitoring_decision = create_decision_event(
    title="Implement DataDog for comprehensive monitoring",
    decision_type="infrastructure",
    context="Need unified monitoring across all services and infrastructure",
    rationale="DataDog provides integrated APM, logs, and infrastructure monitoring",
    alternatives_considered=["Prometheus + Grafana", "AWS CloudWatch", "New Relic"],
    tags=["monitoring", "observability", "infrastructure"]
)

print("=== Predicting Impact of Monitoring Decision ===\n")
impact_prediction = predict_decision_impact(monitoring_decision, Y)

if impact_prediction['status'] == 'success':
    print(f"Decision: {impact_prediction['decision']}")
    print(f"Historical context: {impact_prediction['historical_context_size']} similar decisions analyzed\n")
    
    predictions = impact_prediction['predictions']
    
    print(f"Confidence Level: {predictions.get('confidence_level', 'N/A')}%\n")
    
    print("LIKELY OUTCOMES:")
    for outcome in predictions.get('likely_outcomes', []):
        print(f"- {outcome}")
    
    print("\nIMPACT TIMELINE:")
    timeline = predictions.get('impact_timeline', {})
    if isinstance(timeline, dict):
        for period, impact in timeline.items():
            print(f"- {period}: {impact}")
    else:
        print(f"- {timeline}")
    
    print("\nRISK FACTORS:")
    for risk in predictions.get('risk_factors', []):
        print(f"- {risk}")
    
    print("\nSUCCESS CONDITIONS:")
    for condition in predictions.get('success_conditions', []):
        print(f"- {condition}")
    
    print("\nSIMILAR SITUATIONS:")
    for situation in predictions.get('similar_situations', []):
        print(f"- {situation}")
else:
    print(f"Error: {impact_prediction.get('error', 'Unknown error')}")

# Test with another decision - microservices migration
microservices_decision = create_decision_event(
    title="Migrate monolithic application to microservices",
    decision_type="architecture",
    context="Current monolith becoming difficult to scale and deploy",
    rationale="Microservices will enable independent scaling and deployment",
    alternatives_considered=["Modular monolith", "Serverless functions", "Keep monolith but optimize"],
    tags=["architecture", "microservices", "scalability"]
)

print("\n" + "="*80 + "\n")
print("=== Predicting Impact of Microservices Migration ===\n")
micro_prediction = predict_decision_impact(microservices_decision, Y)

if micro_prediction['status'] == 'success':
    print(f"Decision: {micro_prediction['decision']}")
    print(f"Confidence Level: {micro_prediction['predictions'].get('confidence_level', 'N/A')}%\n")
    
    # Create a comparison DataFrame
    comparison_data = []
    
    for decision_name, prediction in [("DataDog Monitoring", impact_prediction), ("Microservices Migration", micro_prediction)]:
        if prediction['status'] == 'success':
            pred = prediction['predictions']
            comparison_data.append({
                'Decision': decision_name,
                'Confidence': f"{pred.get('confidence_level', 0)}%",
                'Risk Count': len(pred.get('risk_factors', [])),
                'Success Conditions': len(pred.get('success_conditions', [])),
                'Timeline': 'Short-term' if 'immediate' in str(pred.get('impact_timeline', '')).lower() else 'Long-term'
            })
    
    if comparison_data:
        print("\n=== Decision Impact Comparison ===")
        comparison_df = pd.DataFrame(comparison_data)
        display(comparison_df)

# Add a helper function to summarize predictions
def summarize_prediction(prediction_result: Dict[str, Any]) -> str:
    """Create a concise summary of impact predictions."""
    if prediction_result['status'] != 'success':
        return "Prediction failed"
    
    pred = prediction_result['predictions']
    confidence = pred.get('confidence_level', 0)
    risks = len(pred.get('risk_factors', []))
    
    if confidence >= 80:
        confidence_text = "High confidence"
    elif confidence >= 60:
        confidence_text = "Moderate confidence"
    else:
        confidence_text = "Low confidence"
    
    risk_level = "High" if risks >= 3 else "Medium" if risks >= 2 else "Low"
    
    return f"{confidence_text} ({confidence}%), {risk_level} risk ({risks} factors)"

print(f"\nMonitoring Decision Summary: {summarize_prediction(impact_prediction)}")
print(f"Microservices Decision Summary: {summarize_prediction(micro_prediction)}")

#### Generate Decision Record
**What this cell does:**

- Function to create a formal ADR (Architectural Decision Record)
- LLM helps write: context, options considered, consequences
- Uses history to enrich the ADR with learned insights
- Output: markdown-formatted ADR

In [0]:
def generate_adr(decision: Dict[str, Any], history: List[Dict[str, Any]], predictions: Dict[str, Any] = None) -> str:
    """Generate a formal Architectural Decision Record using decision history and predictions."""
    # Get historical context
    context = build_context_for_decision(
        f"{decision['decision_type']} {decision['title']}", 
        history, 
        max_items=10
    )
    
    # Get consistency analysis
    consistency = analyze_decision_consistency(decision, history)
    
    prompt = f"""Generate a formal Architectural Decision Record (ADR) for this technical decision.

DECISION DETAILS:
{json.dumps(decision, indent=2)}

HISTORICAL CONTEXT:
{context}

CONSISTENCY ANALYSIS:
{json.dumps(consistency.get('analysis', {}), indent=2) if consistency.get('analysis') else 'Not available'}

IMPACT PREDICTIONS:
{json.dumps(predictions.get('predictions', {}), indent=2) if predictions else 'Not available'}

Please create a comprehensive ADR following this structure:

# ADR-[NUMBER]: [TITLE]

## Status
[Proposed/Accepted/Deprecated/Superseded]

## Context
[Detailed explanation of the problem and why a decision is needed]

## Decision
[The change that we're proposing or have agreed to implement]

## Consequences
[What becomes easier or more difficult as a result of this decision]

### Positive Consequences
[List positive outcomes]

### Negative Consequences
[List negative outcomes or trade-offs]

## Options Considered
[Detailed analysis of each alternative with pros/cons]

## Related Decisions
[Links to related ADRs based on historical context]

## Lessons from History
[Key insights from similar past decisions]

## Implementation Notes
[Specific guidance for implementing this decision]

## Review Triggers
[Conditions that would prompt revisiting this decision]

Format as proper Markdown. Be specific and detailed based on the provided context."""
    
    try:
        response = client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=2000,
            messages=[{"role": "user", "content": prompt}]
        )
        
        return response.content[0].text
        
    except Exception as e:
        return f"Error generating ADR: {str(e)}"

# Generate ADR for the monitoring decision
print("=== Generating ADR for DataDog Monitoring Decision ===\n")
adr_content = generate_adr(monitoring_decision, Y, impact_prediction)
print(adr_content)

# Save ADR to a variable for potential file export
monitoring_adr = adr_content

print("\n" + "="*80 + "\n")

# Generate ADR for a decision that's already in our history (to show how consequences get included)
print("=== Generating ADR for Historical Redis Decision ===\n")
redis_decision = Y[4]  # The Redis caching decision
redis_adr = generate_adr(redis_decision, Y)
print(redis_adr)

# Create a function to generate a summary ADR index
def generate_adr_index(history: List[Dict[str, Any]]) -> pd.DataFrame:
    """Generate an index of all decisions that could have ADRs."""
    decisions = [e for e in history if e.get('event_type') == 'technical_decision']
    consequences = {c['original_decision_id']: c for c in history if c.get('event_type') == 'decision_consequence'}
    
    adr_index = []
    for i, decision in enumerate(decisions, 1):
        adr_entry = {
            'ADR_Number': f"ADR-{i:03d}",
            'Title': decision['title'],
            'Type': decision['decision_type'],
            'Date': decision['timestamp'][:10],
            'Status': 'Implemented' if decision['id'] in consequences else 'Proposed',
            'Has_Consequences': 'Yes' if decision['id'] in consequences else 'No'
        }
        
        if decision['id'] in consequences:
            outcome = consequences[decision['id']]['outcome']
            if 'success' in outcome.lower() or 'achieved' in outcome.lower():
                adr_entry['Outcome'] = 'Positive'
            elif 'exceeded budget' in outcome.lower() or 'required' in outcome.lower():
                adr_entry['Outcome'] = 'Mixed'
            else:
                adr_entry['Outcome'] = 'Negative'
        else:
            adr_entry['Outcome'] = 'TBD'
        
        adr_index.append(adr_entry)
    
    return pd.DataFrame(adr_index)

print("=== ADR Index ===")
adr_index_df = generate_adr_index(Y)
display(adr_index_df)

# Create a template function for quick ADR generation
def create_adr_template(decision_type: str) -> str:
    """Create a basic ADR template for a specific decision type."""
    templates = {
        'database': {
            'considerations': ['Performance requirements', 'Scalability needs', 'Consistency model', 'Cost projections'],
            'risks': ['Vendor lock-in', 'Migration complexity', 'Operational overhead']
        },
        'architecture': {
            'considerations': ['System complexity', 'Team expertise', 'Maintenance burden', 'Evolution path'],
            'risks': ['Over-engineering', 'Under-engineering', 'Technical debt']
        },
        'infrastructure': {
            'considerations': ['Operational complexity', 'Cost model', 'Security requirements', 'Compliance needs'],
            'risks': ['Cost overruns', 'Skill gaps', 'Vendor dependencies']
        },
        'api': {
            'considerations': ['Client needs', 'Performance requirements', 'Version strategy', 'Security model'],
            'risks': ['Breaking changes', 'Performance degradation', 'Security vulnerabilities']
        }
    }
    
    template_data = templates.get(decision_type, templates['architecture'])
    
    return f"""# ADR-XXX: [Decision Title]

## Status
Proposed

## Context
[Describe the issue motivating this decision]

## Decision
[Describe the proposed solution]

## Consequences

### Positive Consequences
- [Benefit 1]
- [Benefit 2]

### Negative Consequences  
- [Trade-off 1]
- [Trade-off 2]

## Options Considered

### Option 1: [Name]
**Pros:**
- [Pro 1]

**Cons:**
- [Con 1]

## Key Considerations
{chr(10).join(f'- {c}' for c in template_data['considerations'])}

## Risk Factors
{chr(10).join(f'- {r}' for r in template_data['risks'])}

## Implementation Notes
[Specific guidance]

## Review Triggers
- [Condition 1]
- [Condition 2]
"""

print("\n=== Quick ADR Template for Infrastructure Decisions ===")
print(create_adr_template('infrastructure'))

#### Decision Timeline Visualization
**What this cell does:**

- Create a simple timeline showing decisions chronologically
- Color code by decision type
- Shows the pattern of decision-making over time

In [0]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime

# Prepare timeline data
timeline_data = []
for event in Y:
    if event.get('event_type') == 'technical_decision':
        timeline_data.append({
            'timestamp': pd.to_datetime(event['timestamp']),
            'title': event['title'],
            'type': event['decision_type']
        })

df_timeline = pd.DataFrame(timeline_data)

# Create the visualization
plt.figure(figsize=(12, 6))

# Color mapping for decision types
colors = {
    'database': '#E74C3C',      # Red
    'architecture': '#3498DB',   # Blue
    'infrastructure': '#2ECC71', # Green
    'api': '#F39C12'            # Orange
}

# Plot each decision as a point on the timeline
for decision_type in df_timeline['type'].unique():
    type_data = df_timeline[df_timeline['type'] == decision_type]
    plt.scatter(type_data['timestamp'], 
               [decision_type] * len(type_data),
               c=colors.get(decision_type, '#999999'),
               s=200,
               alpha=0.8,
               label=decision_type)

# Add decision titles as annotations
for _, row in df_timeline.iterrows():
    plt.annotate(row['title'][:25] + '...' if len(row['title']) > 25 else row['title'],
                xy=(row['timestamp'], row['type']),
                xytext=(5, 5),
                textcoords='offset points',
                fontsize=8,
                alpha=0.7)

# Format the plot
plt.xlabel('Date', fontsize=12)
plt.ylabel('Decision Type', fontsize=12)
plt.title('Technical Decision Timeline', fontsize=16, pad=20)
plt.grid(True, alpha=0.3, axis='x')
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Format dates on x-axis
ax = plt.gca()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=5))
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

# Display summary statistics
print("\n=== Timeline Summary ===")
print(f"Total decisions tracked: {len(df_timeline)}")
print(f"\nDecisions by type:")
for decision_type, count in df_timeline['type'].value_counts().items():
    print(f"  {decision_type}: {count}")

time_span = df_timeline['timestamp'].max() - df_timeline['timestamp'].min()
print(f"\nTime span: {time_span.total_seconds() / 60:.1f} minutes")
print(f"Decision frequency: {len(df_timeline) / (time_span.total_seconds() / 3600):.1f} per hour")