# Chapter 7: Introduction to AI and Large Language Models
**From: Zero to AI Agent**

## Overview
In this chapter, you'll learn about:
- What is artificial intelligence?
- Understanding Large Language Models (LLMs)
- How LLMs work (conceptual overview)
- Introduction to prompts and completions
- Common LLM providers (OpenAI, Anthropic, Google, open-source)
- API keys and authentication
- Costs and rate limiting considerations


In [None]:
!pip install -q -r requirements.txt

from dotenv import load_dotenv
load_dotenv()

---
## Section 7.1: What is artificial intelligence?

In [None]:
# From: ai_vs_traditional.py

# From: Zero to AI Agent, Chapter 7, Section 7.1
# File: ai_vs_traditional.py

"""
Demonstrates the difference between traditional programming and AI approaches
to spam detection.
"""

# Traditional Programming: You define the rules
def is_spam_traditional(email):
    """
    Traditional approach to spam detection using predefined rules.
    You explicitly program what words indicate spam.
    """
    spam_words = ['winner', 'free', 'click here', 'urgent']
    for word in spam_words:
        if word.lower() in email.lower():
            return True
    return False


# AI Approach: The system learns the patterns
# (This is a conceptual example - we'll build real AI models in later chapters!)
def is_spam_ai(email, ai_model=None):
    """
    AI approach to spam detection using a trained model.
    The model learns patterns from thousands of examples.
    
    Note: This is a simplified example. In real applications,
    the model would be loaded from a trained neural network or
    machine learning model.
    """
    if ai_model is None:
        # Placeholder for when we don't have a real model yet
        # In practice, you'd load a trained model here
        return 0.5  # Return neutral probability
    
    # An AI model would have learned from thousands of examples
    # what makes an email spam, finding patterns we might miss
    probability = ai_model.predict(email)  # Returns 0.0 to 1.0
    return probability > 0.5


# Example usage
if __name__ == "__main__":
    test_emails = [
        "Congratulations! You're a WINNER! Click here for your FREE prize!",
        "Meeting rescheduled to 3 PM tomorrow",
        "URGENT: Your account needs verification",
        "Can you review the attached proposal?",
    ]
    
    print("Traditional Programming Approach:")
    print("-" * 40)
    for email in test_emails:
        result = is_spam_traditional(email)
        print(f"Email: {email[:50]}...")
        print(f"Spam: {result}\n")
    
    print("\nAI Approach (conceptual):")
    print("-" * 40)
    print("With a trained AI model, the system would analyze:")
    print("- Writing style patterns")
    print("- Sender reputation")
    print("- Context and semantics")
    print("- Thousands of subtle features humans might miss")
    print("\nResult: Much more accurate spam detection!")


In [None]:
# From: narrow_ai_example.py

# From: Zero to AI Agent, Chapter 7, Section 7.1  
# File: narrow_ai_example.py

"""
Demonstrates Narrow AI - systems that excel at specific tasks
but can't generalize to other domains.
"""

class ChessAI:
    """
    Example of Narrow AI - Amazing at chess, useless at everything else.
    This illustrates how current AI systems are specialized tools,
    not general-purpose intelligence.
    """
    
    def __init__(self):
        self.name = "DeepChess"
        self.specialty = "Chess"
    
    def find_best_move(self, board):
        """
        In a real chess AI, this would analyze millions of positions.
        Can beat world champions at chess!
        """
        # Simplified for demonstration
        # Real chess AI would use minimax, alpha-beta pruning, 
        # neural networks, etc.
        return "e2-e4"  # Classic opening move
    
    def write_poetry(self):
        """
        This chess AI can't write poetry - it only knows chess.
        This is the limitation of narrow AI.
        """
        return "Error: I only know chess. Poetry is outside my domain."
    
    def translate_language(self, text):
        """Another task this narrow AI can't do."""
        return "Error: I only know chess. Translation is outside my domain."
    
    def diagnose_illness(self, symptoms):
        """Yet another task beyond this narrow AI."""
        return "Error: I only know chess. Medical diagnosis is outside my domain."
    
    def demonstrate_narrow_ai(self):
        """Shows both the strength and limitation of narrow AI."""
        print(f"Hi, I'm {self.name}, a Narrow AI specialized in {self.specialty}")
        print("\nWhat I CAN do:")
        print(f"✓ Chess move: {self.find_best_move('starting_position')}")
        print("✓ Analyze millions of chess positions per second")
        print("✓ Beat world chess champions")
        
        print("\nWhat I CAN'T do:")
        print(f"✗ Write poetry: {self.write_poetry()}")
        print(f"✗ Translate: {self.translate_language('Hello')}")
        print(f"✗ Medical diagnosis: {self.diagnose_illness('headache')}")
        
        print("\nThis is Narrow AI: Superhuman at one thing, helpless at everything else!")


# Example of how different narrow AIs specialize
class TranslationAI:
    """Another narrow AI, but for translation."""
    
    def translate(self, text, target_language):
        # Simplified - real translation AI uses complex neural networks
        translations = {
            'Hello': {'spanish': 'Hola', 'french': 'Bonjour'},
            'Thank you': {'spanish': 'Gracias', 'french': 'Merci'}
        }
        return translations.get(text, {}).get(target_language, "Unknown")
    
    def play_chess(self):
        return "Error: I only know translation. Chess is outside my domain."


if __name__ == "__main__":
    # Demonstrate narrow AI limitations
    chess_ai = ChessAI()
    chess_ai.demonstrate_narrow_ai()
    
    print("\n" + "="*50)
    print("Key Concept: Every AI today is Narrow AI")
    print("="*50)
    print("• Excel at specific tasks")
    print("• Can't generalize to other domains")
    print("• Multiple narrow AIs needed for different tasks")
    print("• AGI (Artificial General Intelligence) doesn't exist yet")


In [None]:
# From: rule_based_vs_ml.py

# From: Zero to AI Agent, Chapter 7, Section 7.1
# File: rule_based_vs_ml.py

"""
Compares rule-based systems (traditional AI) with machine learning approaches
using a plant problem diagnosis example.
"""

# Rule-Based System (Old School AI)
def diagnose_plant_problem_rules(symptoms):
    """
    Traditional rule-based approach to plant diagnosis.
    Every decision is explicitly programmed.
    
    Problems with this approach:
    - Requires extensive manual rule creation
    - Can't handle cases not explicitly programmed
    - Doesn't improve with experience
    - Misses subtle pattern combinations
    """
    # Convert symptoms to lowercase for comparison
    symptoms_lower = [s.lower() for s in symptoms]
    
    # Explicitly programmed decision tree
    if "yellow_leaves" in symptoms_lower:
        if "brown_tips" in symptoms_lower:
            return "Overwatering - reduce watering frequency"
        elif "pale_green" in symptoms_lower:
            return "Iron deficiency - add iron supplement"
        else:
            return "Nitrogen deficiency - add fertilizer"
    
    elif "brown_spots" in symptoms_lower:
        if "fuzzy_growth" in symptoms_lower:
            return "Fungal infection - apply fungicide"
        else:
            return "Bacterial infection - remove affected leaves"
    
    elif "wilting" in symptoms_lower:
        if "dry_soil" in symptoms_lower:
            return "Underwatering - increase water"
        else:
            return "Root rot - check drainage"
    
    elif "holes_in_leaves" in symptoms_lower:
        return "Pest damage - inspect for insects"
    
    else:
        return "Unknown problem - consult expert"


# Machine Learning Approach (Modern AI) - Conceptual
class PlantDiagnosisML:
    """
    Machine Learning approach to plant diagnosis.
    The system learns patterns from thousands of examples.
    
    In reality, this would use:
    - Computer vision to analyze plant images
    - Neural networks trained on plant disease databases
    - Pattern recognition across multiple features
    """
    
    def __init__(self):
        # In a real system, we'd load a trained model here
        self.model_trained = False
        self.training_examples = 0
    
    def train(self, examples):
        """
        Simulates training on plant examples.
        Real ML would use algorithms like:
        - Convolutional Neural Networks for image analysis
        - Random Forests for symptom classification
        - Deep Learning for complex pattern recognition
        """
        self.training_examples += len(examples)
        self.model_trained = True
        print(f"Model trained on {self.training_examples} plant examples")
        
        # A real model would learn patterns like:
        # - Yellow + drooping often means overwatering
        # - Spots with rings usually indicate fungal issues
        # - Certain patterns appear together in specific diseases
    
    def diagnose(self, plant_image_or_symptoms):
        """
        ML diagnosis based on learned patterns.
        
        Real advantages over rules:
        - Handles cases never explicitly programmed
        - Identifies subtle patterns humans miss
        - Improves with more data
        - Can consider hundreds of features simultaneously
        """
        if not self.model_trained:
            return "Model needs training first"
        
        # Simplified demonstration
        # Real ML would process image pixels, extract features,
        # and run through neural network layers
        
        return {
            'diagnosis': 'Likely fungal infection (confidence: 87%)',
            'alternative': 'Possible overwatering (confidence: 12%)',
            'recommendation': 'Reduce humidity, improve air circulation',
            'learned_from': f'{self.training_examples} similar cases'
        }


def demonstrate_approaches():
    """Shows the difference between rule-based and ML approaches."""
    
    print("="*60)
    print("RULE-BASED vs MACHINE LEARNING PLANT DIAGNOSIS")
    print("="*60)
    
    # Test case
    symptoms = ["yellow_leaves", "brown_tips", "drooping"]
    
    # Rule-based approach
    print("\n1. RULE-BASED APPROACH:")
    print("-" * 30)
    diagnosis_rules = diagnose_plant_problem_rules(symptoms)
    print(f"Symptoms: {symptoms}")
    print(f"Diagnosis: {diagnosis_rules}")
    print("\nLimitations:")
    print("• Only handles pre-programmed combinations")
    print("• Can't improve with experience")
    print("• Might miss subtle patterns")
    
    # ML approach
    print("\n2. MACHINE LEARNING APPROACH:")
    print("-" * 30)
    ml_system = PlantDiagnosisML()
    
    # Simulate training
    training_data = [
        {'image': 'plant1.jpg', 'diagnosis': 'overwatering'},
        {'image': 'plant2.jpg', 'diagnosis': 'fungal'},
        # ... thousands more examples
    ]
    ml_system.train(training_data)
    
    # Get diagnosis
    ml_diagnosis = ml_system.diagnose(symptoms)
    print(f"Symptoms: {symptoms}")
    print(f"ML Diagnosis: {ml_diagnosis}")
    
    print("\nAdvantages:")
    print("• Learns from experience")
    print("• Finds patterns humans might miss")
    print("• Handles novel combinations")
    print("• Improves with more data")


if __name__ == "__main__":
    demonstrate_approaches()
    
    print("\n" + "="*60)
    print("KEY TAKEAWAY")
    print("="*60)
    print("Rule-Based: You program every decision")
    print("Machine Learning: System learns patterns from data")
    print("\nModern AI uses ML because the world is too complex for rules!")


In [None]:
# From: recommendation_system.py

# From: Zero to AI Agent, Chapter 7, Section 7.1
# File: recommendation_system.py

"""
A simplified recommendation system demonstrating how services like
Netflix, YouTube, and Spotify recommend content using AI patterns.
"""

import random
from collections import Counter

class SimpleRecommender:
    """
    Simulates how streaming services recommend content.
    This is a simplified version - real systems use:
    - Collaborative filtering (users who liked X also liked Y)
    - Content-based filtering (similar genres, actors, themes)
    - Deep learning for complex pattern recognition
    - Matrix factorization for finding hidden preferences
    """
    
    def __init__(self):
        self.user_history = []
        self.all_movies = {
            'action': ['Die Hard', 'Mad Max', 'John Wick', 'The Matrix', 'Mission Impossible'],
            'comedy': ['Airplane', 'Ghostbusters', 'The Hangover', 'Bridesmaids', 'Superbad'],
            'sci-fi': ['Interstellar', 'Arrival', 'Blade Runner', 'Dune', 'Ex Machina'],
            'drama': ['The Shawshank Redemption', 'Forrest Gump', 'The Godfather', 'Moonlight'],
            'horror': ['The Shining', 'Get Out', 'Hereditary', 'A Quiet Place', 'The Witch']
        }
        
        # In real systems, this would be learned from millions of users
        self.genre_relationships = {
            'action': ['sci-fi', 'thriller'],
            'sci-fi': ['action', 'thriller'],
            'comedy': ['romance', 'drama'],
            'drama': ['romance', 'thriller'],
            'horror': ['thriller', 'sci-fi']
        }
    
    def watch_movie(self, movie, genre, rating=None):
        """
        Records a movie watch event.
        Real systems track much more:
        - Time of day watched
        - How much was watched (did they finish?)
        - Device used
        - Whether they searched for it or clicked a recommendation
        """
        watch_data = {
            'movie': movie,
            'genre': genre,
            'rating': rating if rating else random.randint(3, 5)
        }
        self.user_history.append(watch_data)
        print(f"✅ You watched: {movie} ({genre}) - Rating: {watch_data['rating']}/5")
    
    def get_user_preferences(self):
        """
        Analyzes viewing history to understand preferences.
        Real systems use sophisticated algorithms to find patterns.
        """
        if not self.user_history:
            return None
        
        # Find favorite genres based on frequency and ratings
        genre_scores = {}
        for watch in self.user_history:
            genre = watch['genre']
            rating = watch['rating']
            
            if genre not in genre_scores:
                genre_scores[genre] = {'count': 0, 'total_rating': 0}
            
            genre_scores[genre]['count'] += 1
            genre_scores[genre]['total_rating'] += rating
        
        # Calculate weighted preferences
        preferences = {}
        for genre, scores in genre_scores.items():
            avg_rating = scores['total_rating'] / scores['count']
            # Weight by both frequency and rating
            preferences[genre] = scores['count'] * avg_rating
        
        return preferences
    
    def get_recommendations(self, num_recommendations=5):
        """
        Generates personalized recommendations.
        
        Real recommendation systems use:
        - Collaborative filtering: "Users like you also watched..."
        - Content similarity: "Because you liked The Matrix, try Inception"
        - Trending adjustments: Boost popular/new content
        - Diversity injection: Don't recommend all from same genre
        - Business rules: Promote originals, new releases
        """
        if not self.user_history:
            # Cold start problem - new users with no history
            print("🎬 New user detected! Here are popular picks:")
            popular_picks = []
            for genre, movies in self.all_movies.items():
                popular_picks.append(random.choice(movies))
            return popular_picks[:num_recommendations]
        
        # Get user preferences
        preferences = self.get_user_preferences()
        
        # Find top genres
        top_genres = sorted(preferences.items(), key=lambda x: x[1], reverse=True)
        if not top_genres:
            return []
        
        favorite_genre = top_genres[0][0]
        
        # Get unwatched movies from favorite genre
        watched = [watch['movie'] for watch in self.user_history]
        recommendations = []
        
        # Primary recommendations from favorite genre
        for movie in self.all_movies.get(favorite_genre, []):
            if movie not in watched:
                recommendations.append({
                    'movie': movie,
                    'reason': f"Because you love {favorite_genre} movies",
                    'confidence': 0.9
                })
        
        # Add related genre recommendations for diversity
        related_genres = self.genre_relationships.get(favorite_genre, [])
        for related_genre in related_genres:
            if related_genre in self.all_movies:
                for movie in self.all_movies[related_genre]:
                    if movie not in watched and len(recommendations) < num_recommendations * 2:
                        recommendations.append({
                            'movie': movie,
                            'reason': f"You might like {related_genre} (similar to {favorite_genre})",
                            'confidence': 0.7
                        })
        
        # Sort by confidence and return top N
        recommendations.sort(key=lambda x: x['confidence'], reverse=True)
        return recommendations[:num_recommendations]
    
    def explain_recommendations(self):
        """
        Shows how recommendations are generated.
        Transparency in AI systems helps build trust.
        """
        preferences = self.get_user_preferences()
        
        if not preferences:
            print("No viewing history yet!")
            return
        
        print("\n📊 YOUR VIEWING PROFILE:")
        print("-" * 40)
        
        total_watched = len(self.user_history)
        print(f"Movies watched: {total_watched}")
        
        # Show genre breakdown
        genre_counts = Counter(watch['genre'] for watch in self.user_history)
        print("\nGenre preferences:")
        for genre, count in genre_counts.most_common():
            percentage = (count / total_watched) * 100
            avg_rating = sum(w['rating'] for w in self.user_history if w['genre'] == genre) / count
            print(f"  • {genre}: {count} movies ({percentage:.0f}%) - Avg rating: {avg_rating:.1f}")
        
        print("\n🤖 HOW WE RECOMMEND:")
        print("-" * 40)
        print("1. Analyze your viewing history")
        print("2. Find your favorite genres")
        print("3. Consider your ratings")
        print("4. Look at related genres you might enjoy")
        print("5. Filter out what you've already seen")
        print("6. Rank by predicted enjoyment")


def demonstrate_recommendation_system():
    """Interactive demonstration of the recommendation system."""
    
    print("="*60)
    print("AI RECOMMENDATION SYSTEM DEMO")
    print("Like Netflix, YouTube, or Spotify")
    print("="*60)
    
    # Create recommender
    netflix_ai = SimpleRecommender()
    
    # Simulate viewing history
    print("\n📺 SIMULATING YOUR VIEWING HISTORY:")
    print("-" * 40)
    
    # User likes action movies
    netflix_ai.watch_movie('Die Hard', 'action', 5)
    netflix_ai.watch_movie('Mad Max', 'action', 4)
    netflix_ai.watch_movie('John Wick', 'action', 5)
    
    # Tried one comedy
    netflix_ai.watch_movie('Airplane', 'comedy', 3)
    
    # Loved a sci-fi movie
    netflix_ai.watch_movie('Interstellar', 'sci-fi', 5)
    
    # Explain the AI's understanding
    netflix_ai.explain_recommendations()
    
    # Get recommendations
    print("\n🎬 PERSONALIZED RECOMMENDATIONS FOR YOU:")
    print("-" * 40)
    recommendations = netflix_ai.get_recommendations()
    
    for i, rec in enumerate(recommendations, 1):
        print(f"{i}. {rec['movie']}")
        print(f"   Why: {rec['reason']}")
        print(f"   Confidence: {rec['confidence']*100:.0f}%")
    
    print("\n💡 REAL SYSTEMS ARE MORE COMPLEX:")
    print("-" * 40)
    print("• Track millions of users' behaviors")
    print("• Use deep neural networks")
    print("• Consider time of day, device, mood")
    print("• A/B test different algorithms")
    print("• Balance personalization with discovery")
    print("• Update in real-time as you watch")


if __name__ == "__main__":
    demonstrate_recommendation_system()
    
    print("\n" + "="*60)
    print("This is AI in your daily life!")
    print("Every 'For You' page uses similar patterns.")
    print("="*60)


In [None]:
# From: ai_api_preview.py

# From: Zero to AI Agent, Chapter 7, Section 7.1
# File: ai_api_preview.py

"""
Preview of what you'll build soon - integrating with AI APIs.
This demonstrates how your Python skills directly apply to AI development.
"""

import json
import requests
from typing import Dict, Optional

def ask_ai(question: str, api_key: str = "YOUR_KEY", max_tokens: int = 100) -> str:
    """
    Example of calling an AI API - you already know every part of this code!
    
    Your existing Python skills:
    - JSON for data format ✓ (Chapter 6)
    - Requests for API calls ✓ (Chapter 6)
    - Error handling ✓ (Chapter 6)
    - Functions for organization ✓ (Chapter 5)
    - Type hints ✓ (Throughout)
    
    Args:
        question: The prompt to send to the AI
        api_key: Your API key (you'll get this in Chapter 8)
        max_tokens: Maximum length of response
    
    Returns:
        The AI's response text
    
    Note: This is a template. In Chapter 8, you'll make this work
    with real AI services like OpenAI, Anthropic, etc.
    """
    try:
        # Prepare the API request (Chapter 6 skills!)
        api_data = {
            "prompt": question,
            "max_tokens": max_tokens,
            "temperature": 0.7,  # Creativity level (0=focused, 1=creative)
            "model": "gpt-3.5-turbo"  # Which AI model to use
        }
        
        # Make the API call (You did this in Chapter 6!)
        response = requests.post(
            "https://api.ai-service.com/chat",  # You'll use real endpoints soon
            json=api_data,
            headers={
                "Authorization": f"Bearer {api_key}",
                "Content-Type": "application/json"
            },
            timeout=30  # Good practice: always set timeouts
        )
        
        # Check if request was successful
        response.raise_for_status()
        
        # Parse the JSON response (Chapter 6 again!)
        result = response.json()
        
        # Extract the AI's response
        # Different APIs structure this differently
        # OpenAI: result["choices"][0]["message"]["content"]
        # Anthropic: result["completion"]
        # You'll learn the specifics for each provider
        
        return result.get("response", "No response received")
        
    except requests.exceptions.RequestException as e:
        # Network errors
        return f"Network error: {str(e)}"
    except json.JSONDecodeError as e:
        # Invalid JSON response
        return f"Invalid response format: {str(e)}"
    except KeyError as e:
        # Missing expected fields
        return f"Unexpected response structure: {str(e)}"
    except Exception as e:
        # Catch-all for other errors
        return f"Unexpected error: {str(e)}"


def demonstrate_conversation_flow():
    """
    Shows how you'll manage conversations with AI.
    This pattern works with any AI provider.
    """
    
    # Conversation history (like a chat app)
    conversation = []
    
    def add_message(role: str, content: str):
        """Add a message to the conversation."""
        conversation.append({
            "role": role,  # "user", "assistant", or "system"
            "content": content,
            "timestamp": "2024-01-01 12:00:00"  # You might track time
        })
    
    def get_ai_response(user_input: str) -> str:
        """Get AI response while maintaining context."""
        # Add user message
        add_message("user", user_input)
        
        # In real implementation, you'd send the entire
        # conversation history for context
        # response = ask_ai_with_context(conversation)
        
        # For now, just echo
        response = f"AI would respond to: '{user_input}'"
        
        # Add AI response
        add_message("assistant", response)
        
        return response
    
    # Simulate a conversation
    print("="*60)
    print("CONVERSATION FLOW EXAMPLE")
    print("="*60)
    
    # System message sets the AI's behavior
    add_message("system", "You are a helpful Python tutor.")
    
    # User interactions
    questions = [
        "What is a list in Python?",
        "Can you show me an example?",
        "How is it different from a tuple?"
    ]
    
    for question in questions:
        print(f"\n👤 User: {question}")
        response = get_ai_response(question)
        print(f"🤖 AI: {response}")
    
    # Show conversation history
    print("\n" + "="*60)
    print("CONVERSATION HISTORY (What we send to AI):")
    print("="*60)
    for msg in conversation:
        print(f"{msg['role'].upper()}: {msg['content'][:50]}...")


def show_different_ai_tasks():
    """
    Examples of different tasks you'll accomplish with AI APIs.
    """
    
    print("="*60)
    print("WHAT YOU'LL BUILD WITH AI APIs")
    print("="*60)
    
    tasks = {
        "Translation": {
            "prompt": "Translate 'Hello, how are you?' to Spanish",
            "expected": "Hola, ¿cómo estás?"
        },
        "Summarization": {
            "prompt": "Summarize this text in one sentence: [long article]",
            "expected": "One sentence summary of the article"
        },
        "Code Generation": {
            "prompt": "Write a Python function to reverse a string",
            "expected": "def reverse_string(s): return s[::-1]"
        },
        "Question Answering": {
            "prompt": "What is the capital of France?",
            "expected": "Paris"
        },
        "Creative Writing": {
            "prompt": "Write a haiku about programming",
            "expected": "Code flows like water / Bugs hide in the silent depths / Debug brings the light"
        },
        "Data Extraction": {
            "prompt": "Extract the date from: 'Meeting on January 15th at 3pm'",
            "expected": "January 15th"
        }
    }
    
    for task_name, task_info in tasks.items():
        print(f"\n📌 {task_name}:")
        print(f"   Prompt: {task_info['prompt']}")
        print(f"   AI Output: {task_info['expected']}")
    
    print("\n" + "="*60)
    print("🎯 Each task uses the same simple pattern:")
    print("1. Prepare your prompt")
    print("2. Call the AI API")
    print("3. Process the response")
    print("That's it! You already know how to do all three!")


if __name__ == "__main__":
    # Show what's coming
    print("="*60)
    print("🚀 PREVIEW: AI API INTEGRATION")
    print("="*60)
    print("\nYou already have ALL the Python skills needed!")
    print("\nWhat you know:")
    print("✓ Making API calls (requests library)")
    print("✓ Working with JSON data")
    print("✓ Error handling")
    print("✓ Functions and organization")
    print("\nWhat you'll learn:")
    print("• How to get API keys")
    print("• Specific endpoints for each AI service")
    print("• How to structure prompts effectively")
    print("• Managing conversation context")
    print("• Cost optimization strategies")
    
    # Demonstrate the patterns
    print("\n")
    demonstrate_conversation_flow()
    print("\n")
    show_different_ai_tasks()
    
    print("\n" + "="*60)
    print("💡 Remember: AI APIs are just web APIs!")
    print("You've already done this in Chapter 6!")
    print("="*60)


---
### Section 7.1 Exercises

### Exercise 7.1.1: AI or Not AI?

Consider each system below. Is it using AI or traditional programming? Think about whether the system follows fixed rules or learns from patterns.

**Systems to evaluate:**
1. A calculator app that adds numbers
2. Google Photos finding all pictures of your dog
3. A website login that checks if password matches
4. Spotify creating your 'Discover Weekly' playlist
5. An alarm clock that rings at 7 AM
6. Your phone's face unlock
7. A thermostat that turns on at 70°F
8. Gmail's spam filter
9. A video game where enemies always patrol the same path
10. YouTube's recommendation algorithm

In [None]:
# Your code here


### Exercise 7.1.2: Categorizing AI Types

Match each AI application to its learning type. Remember:
- **Supervised Learning**: Learns from labeled examples (like a teacher showing correct answers)
- **Unsupervised Learning**: Finds patterns without being told what to look for
- **Reinforcement Learning**: Learns through trial and error with rewards/penalties

**Applications to categorize:**

A. An email filter trained on examples of spam and not-spam emails
B. A system that groups customers by shopping behavior without predefined categories
C. A robot learning to walk by trying different movements and getting points for distance traveled
D. A model that predicts house prices from past sales data with known prices
E. An AI finding hidden patterns in genetic data without knowing what diseases to look for
F. A game-playing AI that improves by winning/losing thousands of games
G. A photo app that learned to identify faces after seeing millions of labeled face images

In [None]:
# Your code here


### Exercise 7.1.3: Design Your Own AI Application

Think of a problem in your daily life that AI could solve. This is a thought exercise – no coding required!

Fill in these details for your AI idea:

📌 **Problem it solves:** 
(What daily annoyance or challenge does it address?)

**Type of AI:** 
(Supervised / Unsupervised / Reinforcement – and why?)

📁 **Data it would need:** 
(What information would it need to learn from?)

➡️ **Inputs:** 
(What information does the user provide?)

⬅️ **Outputs:** 
(What does the AI produce or recommend?)

- **Why AI instead of traditional programming?** 
(What makes this problem suitable for learning rather than rules?)

In [None]:
# Your code here


---
## Section 7.2: Understanding Large Language Models (LLMs)

In [None]:
# Section 7.2 content
# No source files found for this section

---
### Section 7.2 Exercises

### Exercise 7.2.1: Token Estimation

Estimate how many tokens each text would use (remember: ~4 characters or ¾ word per token):

1. "Hello, world!"
2. "The quick brown fox jumps over the lazy dog."
3. A typical email (200 words)
4. This entire section you're reading
5. "def calculate_sum(a, b): return a + b"

*Estimates below – try it yourself first!*

In [None]:
# Your code here


### Exercise 7.2.2: Choosing the Right Parameters

For each scenario, what temperature would you choose and why?

A. Writing legal contract language
B. Generating creative story ideas
C. Translating technical documentation
D. Writing varied product descriptions
E. Solving coding problems
F. Brainstorming business names

*Think about the tradeoff between consistency and creativity.*

In [None]:
# Your code here


### Exercise 7.2.3: Identifying Good vs Bad LLM Tasks

Categorize each task as "Great for LLMs," "Okay with Caveats," or "Bad Idea":

1. Writing a first draft of a blog post
2. Calculating compound interest over 30 years
3. Checking if an email sounds professional
4. Getting today's stock prices
5. Explaining a complex concept simply
6. Generating test data for your application
7. Making medical diagnoses
8. Summarizing a long document
9. Checking if a password is secure
10. Writing poetry in Shakespeare's style

*Consider: Does it need real-time data? Precise calculations? Creative language?*

In [None]:
# Your code here


---
## Section 7.3: How LLMs work (conceptual overview)

In [None]:
# Section 7.3 content
# No source files found for this section

---
### Section 7.3 Exercises

### Exercise 7.3.1: Trace the Flow

Walk through what happens with this prompt: "The weather today is"

Consider:
1. How does it become tokens?
2. What patterns might activate?
3. What completions are likely?
4. What information is missing?

*Think through each step before checking the solution.*

In [None]:
# Your code here


### Exercise 7.3.2: Context Window Planning

You have a 4,000 token context window. Design an approach for:

A. Having a 10,000 token conversation
B. Analyzing a 50,000 token document
C. Maintaining chat history over multiple sessions

*Consider: What to keep, what to summarize, what to drop?*

In [None]:
# Your code here


### Exercise 7.3.3: Understanding Failures

For each scenario, explain why the LLM fails using what you learned:

1. Can't do exact arithmetic on large numbers
2. Makes up fake citations
3. Contradicts itself in long conversations
4. Can't learn your preferences permanently
5. Sometimes says factually wrong things confidently

*Hint: Think about the mechanism – pattern matching, no memory, statistical training.*

In [None]:
# Your code here


---
## Section 7.4: Introduction to prompts and completions

In [None]:
# Section 7.4 content
# No source files found for this section

---
### Section 7.4 Exercises

### Exercise 7.4.1: Prompt Improvement Challenge

Take these weak prompts and improve them using the techniques you've learned:

1. "Write about space"
2. "Fix this: def func(x): return x/0"
3. "Translate: Hello"
4. "Make a list"
5. "Explain AI"

In [None]:
# Your code here


### Exercise 7.4.2: Few-Shot Template Creation

Create a few-shot prompt template for:
- Extracting dates from text
- Classifying customer support tickets
- Converting informal text to formal business language

In [None]:
# Your code here


### Exercise 7.4.3: Role-Based Prompting

Write system prompts for these AI assistants:
- A Socratic tutor who guides through questions
- A code reviewer focusing on security
- A creative writing partner for brainstorming

In [None]:
# Your code here


### Exercise 7.4.4: Completion Control Experiment

Using the same base prompt, experiment with:
- Different temperatures (0, 0.5, 1.0, 1.5)
- Different max_tokens (50, 200, 500)
- Different stop sequences

Document how each parameter changes the output.

In [None]:
# Your code here


---
## Section 7.5: Common LLM providers (OpenAI, Anthropic, Google, open-source)

In [None]:
# Section 7.5 content
# No source files found for this section

---
### Section 7.5 Exercises

### Exercise 7.5.1: Cost Calculator

Create a function that calculates monthly costs for different providers based on usage patterns. Consider a chatbot that handles varying message volumes.

In [None]:
# Your code here


### Exercise 7.5.2: Provider Comparison Matrix

Build a comparison matrix for your specific use case. Include factors like cost, performance, features, and limitations.

In [None]:
# Your code here


### Exercise 7.5.3: Migration Planning

Design a migration plan from OpenAI to an open-source model. What challenges would you face? How would you handle them?

In [None]:
# Your code here


### Exercise 7.5.4: API Abstraction

Write a simple wrapper class that can work with both OpenAI and Anthropic APIs, allowing easy switching between providers.

In [None]:
# Your code here


---
## Section 7.6: API keys and authentication

In [None]:
# From: api_key_storage.py

# From: Zero to AI Agent, Chapter 7, Section 7.6
# File: api_key_storage.py

"""
Demonstrates the right and wrong ways to store API keys in your code.
Critical for security and preventing expensive mistakes.
"""

import os
from dotenv import load_dotenv


# ============================================================================
# ❌ NEVER DO THIS - EXAMPLES OF WHAT NOT TO DO
# ============================================================================

def bad_example_hardcoded():
    """
    NEVER hardcode API keys directly in your code!
    This is the #1 security mistake beginners make.
    """
    # ❌ NEVER DO THIS
    # api_key = "sk-proj-KJ5hLvP9Ym8NwZ3bT7BlbkFJ4Xq2RnD8sFgH6kLpMc1A"
    
    # Even if the repo is private, don't do this!
    # Keys can be exposed through:
    # - Accidentally making repo public
    # - Sharing code snippets
    # - Screenshots or demos
    # - Git history (even after deletion)
    
    print("❌ Example of what NOT to do - hardcoding keys")
    print("This would expose your key to anyone who sees the code")


def bad_example_in_comments():
    """Even in comments, never include real API keys!"""
    
    # ❌ NEVER DO THIS EITHER
    # My API key: sk-proj-KJ5hLvP9Ym8NwZ3bT7BlbkFJ4Xq2RnD8sFgH6kLpMc1A
    # TODO: Remove this before committing
    
    print("❌ Don't put keys in comments either - they often get forgotten")


# ============================================================================
# ✅ ALWAYS DO THIS - SECURE API KEY HANDLING
# ============================================================================

def good_example_environment_variable():
    """
    ✅ CORRECT: Load API keys from environment variables
    This keeps your keys separate from your code.
    """
    # Load from environment variable
    api_key = os.getenv("OPENAI_API_KEY")
    
    if not api_key:
        print("No API key found!")
        print("Please set it using:")
        print("  Mac/Linux: export OPENAI_API_KEY='your-key-here'")
        print("  Windows: set OPENAI_API_KEY=your-key-here")
        return None
    
    # Never print the actual key!
    print(f"✅ API key loaded successfully (starts with {api_key[:7]}...)")
    return api_key


def good_example_dotenv():
    """
    ✅ CORRECT: Load from .env file using python-dotenv
    This is convenient for development.
    """
    # Load .env file
    load_dotenv()
    
    # Now get the key
    api_key = os.getenv("OPENAI_API_KEY")
    
    if not api_key:
        print("No API key found in .env file!")
        print("Create a .env file with:")
        print("  OPENAI_API_KEY=your-key-here")
        return None
    
    print("✅ API key loaded from .env file")
    return api_key


def good_example_with_validation():
    """
    ✅ BEST PRACTICE: Load with validation and error handling
    """
    # Try multiple sources
    api_key = os.getenv("OPENAI_API_KEY")
    
    if not api_key:
        # Try loading from .env as fallback
        load_dotenv()
        api_key = os.getenv("OPENAI_API_KEY")
    
    if not api_key:
        raise ValueError(
            "API key not found!\n"
            "Please set OPENAI_API_KEY environment variable or add to .env file"
        )
    
    # Validate format (basic check)
    if not api_key.startswith("sk-"):
        print("⚠️ Warning: API key format might be incorrect")
    
    # Check for common mistakes
    if " " in api_key:
        raise ValueError("API key contains spaces - probably a copy/paste error")
    
    if len(api_key) < 20:
        raise ValueError("API key seems too short - might be truncated")
    
    print("✅ API key validated and ready to use")
    return api_key


# ============================================================================
# SETTING UP YOUR PROJECT CORRECTLY
# ============================================================================

def setup_project_security():
    """
    Set up a new project with proper security from the start
    """
    import os
    from pathlib import Path
    
    print("Setting up secure API key management...")
    
    # 1. Create .env file if it doesn't exist
    if not Path(".env").exists():
        with open(".env", "w") as f:
            f.write("# API Keys - NEVER commit this file!\n")
            f.write("OPENAI_API_KEY=your-key-here\n")
            f.write("ANTHROPIC_API_KEY=your-key-here\n")
            f.write("GOOGLE_API_KEY=your-key-here\n")
        print("✅ Created .env file")
    
    # 2. Create .env.example for others
    with open(".env.example", "w") as f:
        f.write("# Copy this to .env and add your actual keys\n")
        f.write("OPENAI_API_KEY=ADD_YOUR_KEY_HERE\n")
        f.write("ANTHROPIC_API_KEY=ADD_YOUR_KEY_HERE\n")
        f.write("GOOGLE_API_KEY=ADD_YOUR_KEY_HERE\n")
    print("✅ Created .env.example (safe to commit)")
    
    # 3. Create/update .gitignore
    gitignore_lines = [
        "# Environment variables",
        ".env",
        ".env.local",
        ".env.*.local",
        "",
        "# API keys and secrets",
        "config.json",
        "secrets.json",
        "*.key",
        "",
        "# Python",
        "__pycache__/",
        "*.py[cod]",
        "venv/",
        "env/",
    ]
    
    with open(".gitignore", "w") as f:
        f.write("\n".join(gitignore_lines))
    print("✅ Created .gitignore to protect secrets")
    
    print("\n✅ Project security setup complete!")
    print("Next steps:")
    print("1. Edit .env and add your actual API keys")
    print("2. Run: pip install python-dotenv")
    print("3. Use load_dotenv() in your code")


if __name__ == "__main__":
    print("=" * 60)
    print("API KEY SECURITY DEMONSTRATION")
    print("=" * 60)
    
    # Show what NOT to do
    print("\n⚠️ EXAMPLES OF BAD PRACTICES:")
    print("-" * 40)
    bad_example_hardcoded()
    
    print("\n✅ EXAMPLES OF GOOD PRACTICES:")
    print("-" * 40)
    
    # Try to load API key the right way
    try:
        key = good_example_with_validation()
        if key:
            print(f"Success! Key starts with: {key[:10]}...")
    except Exception as e:
        print(f"Setup needed: {e}")
    
    print("\n💡 TIP: Run setup_project_security() to set up your project correctly!")


In [None]:
# From: api_config_manager.py

# From: Zero to AI Agent, Chapter 7, Section 7.6
# File: api_config_manager.py

"""
Centralized API configuration manager for working with multiple AI providers.
Handles loading, validation, and organization of API keys.
"""

import os
from dataclasses import dataclass
from typing import Optional, List, Dict
from dotenv import load_dotenv
import json


@dataclass
class APIConfig:
    """Centralized API configuration for multiple providers"""
    openai_key: Optional[str] = None
    anthropic_key: Optional[str] = None
    google_key: Optional[str] = None
    replicate_key: Optional[str] = None
    
    @classmethod
    def from_env(cls):
        """Load all API keys from environment variables"""
        # Try to load .env file first
        load_dotenv()
        
        return cls(
            openai_key=os.getenv("OPENAI_API_KEY"),
            anthropic_key=os.getenv("ANTHROPIC_API_KEY"),
            google_key=os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY"),
            replicate_key=os.getenv("REPLICATE_API_TOKEN")
        )
    
    @classmethod
    def from_json(cls, filepath: str = "config.json"):
        """Load API keys from a JSON configuration file"""
        if not os.path.exists(filepath):
            raise FileNotFoundError(
                f"{filepath} not found! Copy config.example.json and add your keys"
            )
        
        with open(filepath) as f:
            config_data = json.load(f)
        
        return cls(
            openai_key=config_data.get("openai_key"),
            anthropic_key=config_data.get("anthropic_key"),
            google_key=config_data.get("google_key"),
            replicate_key=config_data.get("replicate_key")
        )
    
    def get_available_providers(self) -> List[str]:
        """Return list of providers with valid keys"""
        providers = []
        
        if self.openai_key and self.openai_key != "ADD_YOUR_KEY_HERE":
            providers.append("openai")
        if self.anthropic_key and self.anthropic_key != "ADD_YOUR_KEY_HERE":
            providers.append("anthropic")
        if self.google_key and self.google_key != "ADD_YOUR_KEY_HERE":
            providers.append("google")
        if self.replicate_key and self.replicate_key != "ADD_YOUR_KEY_HERE":
            providers.append("replicate")
        
        return providers
    
    def validate(self) -> bool:
        """Check if at least one key is configured properly"""
        available = self.get_available_providers()
        
        if not available:
            print("❌ No valid API keys found!")
            print("\nPlease configure at least one:")
            print("  - OPENAI_API_KEY")
            print("  - ANTHROPIC_API_KEY")
            print("  - GOOGLE_API_KEY")
            print("  - REPLICATE_API_TOKEN")
            return False
        
        print(f"✅ API keys loaded for: {', '.join(available)}")
        return True
    
    def get_primary_provider(self) -> Optional[str]:
        """Get the first available provider (useful for fallback)"""
        providers = self.get_available_providers()
        return providers[0] if providers else None
    
    def mask_key(self, key: str) -> str:
        """Safely display a masked version of an API key"""
        if not key:
            return "Not configured"
        if len(key) < 10:
            return "Invalid key"
        return f"{key[:7]}...{key[-4:]}"
    
    def display_status(self):
        """Display the status of all API keys"""
        print("=" * 60)
        print("API KEY CONFIGURATION STATUS")
        print("=" * 60)
        
        providers = {
            "OpenAI": self.openai_key,
            "Anthropic": self.anthropic_key,
            "Google": self.google_key,
            "Replicate": self.replicate_key
        }
        
        for name, key in providers.items():
            if key and key != "ADD_YOUR_KEY_HERE":
                print(f"✅ {name:12} : {self.mask_key(key)}")
            else:
                print(f"❌ {name:12} : Not configured")
        
        available = self.get_available_providers()
        print("-" * 60)
        print(f"Total configured: {len(available)}/{len(providers)}")
        
        if available:
            print(f"Primary provider: {self.get_primary_provider()}")


class MultiProviderClient:
    """
    Manage multiple AI provider clients with automatic fallback
    """
    def __init__(self, config: APIConfig):
        self.config = config
        self.clients = {}
        self._initialize_clients()
    
    def _initialize_clients(self):
        """Initialize available clients based on configured keys"""
        
        # OpenAI
        if self.config.openai_key:
            try:
                from openai import OpenAI
                self.clients["openai"] = OpenAI(api_key=self.config.openai_key)
                print("✅ OpenAI client initialized")
            except ImportError:
                print("⚠️ OpenAI package not installed. Run: pip install openai")
        
        # Anthropic
        if self.config.anthropic_key:
            try:
                from anthropic import Anthropic
                self.clients["anthropic"] = Anthropic(api_key=self.config.anthropic_key)
                print("✅ Anthropic client initialized")
            except ImportError:
                print("⚠️ Anthropic package not installed. Run: pip install anthropic")
        
        # Google
        if self.config.google_key:
            try:
                import google.generativeai as genai
                genai.configure(api_key=self.config.google_key)
                self.clients["google"] = genai
                print("✅ Google Gemini client initialized")
            except ImportError:
                print("⚠️ Google package not installed. Run: pip install google-generativeai")
    
    def get_client(self, provider: str = None):
        """Get a specific client or the first available one"""
        if provider:
            return self.clients.get(provider)
        
        # Return first available client
        for provider, client in self.clients.items():
            return client
        
        return None
    
    def list_available_models(self) -> Dict[str, List[str]]:
        """List available models for each configured provider"""
        models = {}
        
        if "openai" in self.clients:
            models["openai"] = [
                "gpt-3.5-turbo",
                "gpt-4",
                "gpt-4-turbo-preview"
            ]
        
        if "anthropic" in self.clients:
            models["anthropic"] = [
                "claude-3-opus-20240229",
                "claude-3-sonnet-20240229",
                "claude-3-haiku-20240307"
            ]
        
        if "google" in self.clients:
            models["google"] = [
                "gemini-pro",
                "gemini-pro-vision"
            ]
        
        return models


def create_example_config_files():
    """Create example configuration files for users to customize"""
    
    # Create config.example.json
    example_config = {
        "openai_key": "ADD_YOUR_KEY_HERE",
        "anthropic_key": "ADD_YOUR_KEY_HERE",
        "google_key": "ADD_YOUR_KEY_HERE",
        "replicate_key": "ADD_YOUR_KEY_HERE",
        "default_temperature": 0.7,
        "max_tokens": 500,
        "timeout": 30
    }
    
    with open("config.example.json", "w") as f:
        json.dump(example_config, f, indent=2)
    
    print("✅ Created config.example.json")
    
    # Create .env.example
    env_example = """# API Keys Configuration
# Copy this file to .env and add your actual keys

# OpenAI API Key (https://platform.openai.com/api-keys)
OPENAI_API_KEY=ADD_YOUR_KEY_HERE

# Anthropic API Key (https://console.anthropic.com/)
ANTHROPIC_API_KEY=ADD_YOUR_KEY_HERE

# Google AI Studio Key (https://aistudio.google.com/)
GOOGLE_API_KEY=ADD_YOUR_KEY_HERE

# Replicate API Token (https://replicate.com/account/api-tokens)
REPLICATE_API_TOKEN=ADD_YOUR_KEY_HERE
"""
    
    with open(".env.example", "w") as f:
        f.write(env_example)
    
    print("✅ Created .env.example")


if __name__ == "__main__":
    # Demonstrate the configuration manager
    print("API Configuration Manager Demo")
    print("=" * 60)
    
    # Load configuration from environment
    config = APIConfig.from_env()
    
    # Display status
    config.display_status()
    
    # Validate configuration
    print("\n" + "=" * 60)
    if config.validate():
        print("\n🎉 Ready to use AI APIs!")
        
        # Initialize multi-provider client
        client_manager = MultiProviderClient(config)
        
        # Show available models
        models = client_manager.list_available_models()
        if models:
            print("\nAvailable models:")
            for provider, model_list in models.items():
                print(f"\n{provider}:")
                for model in model_list:
                    print(f"  - {model}")
    else:
        print("\n📝 To get started:")
        print("1. Copy .env.example to .env")
        print("2. Add your API keys")
        print("3. Run this script again")
        
        # Create example files if they don't exist
        if not os.path.exists(".env.example"):
            print("\nCreating example configuration files...")
            create_example_config_files()


In [None]:
# From: rate_limit_handler.py

# From: Zero to AI Agent, Chapter 7, Section 7.6
# File: rate_limit_handler.py

"""
Robust rate limit handling with exponential backoff for AI API calls.
Prevents hitting rate limits and handles them gracefully when they occur.
"""

import time
import random
from typing import Callable, Any, Optional, Dict
from datetime import datetime, timedelta
from collections import deque
import functools


def retry_with_exponential_backoff(
    func: Callable = None,
    initial_delay: float = 1,
    exponential_base: float = 2,
    jitter: bool = True,
    max_retries: int = 5,
    max_delay: float = 60
):
    """
    Decorator to retry a function with exponential backoff.
    Perfect for handling rate limits and transient failures.
    
    Args:
        func: Function to retry (used when called as decorator)
        initial_delay: Starting delay in seconds
        exponential_base: Multiplier for each retry
        jitter: Add randomness to prevent thundering herd
        max_retries: Maximum number of retry attempts
        max_delay: Maximum delay between retries
    
    Usage:
        @retry_with_exponential_backoff(max_retries=3)
        def make_api_call():
            # Your API call here
            pass
    """
    def decorator(f):
        @functools.wraps(f)
        def wrapper(*args, **kwargs):
            num_retries = 0
            delay = initial_delay
            
            while num_retries < max_retries:
                try:
                    # Try to execute the function
                    return f(*args, **kwargs)
                
                except Exception as e:
                    error_str = str(e).lower()
                    
                    # Check if it's a rate limit error
                    if any(indicator in error_str for indicator in 
                           ["rate_limit", "rate limit", "429", "too many requests"]):
                        
                        if num_retries < max_retries - 1:
                            # Calculate delay with exponential backoff
                            actual_delay = min(delay, max_delay)
                            
                            # Add jitter if requested
                            if jitter:
                                actual_delay += random.uniform(0, 1)
                            
                            print(f"⏳ Rate limited. Waiting {actual_delay:.1f} seconds...")
                            print(f"   Retry {num_retries + 1}/{max_retries}")
                            
                            time.sleep(actual_delay)
                            
                            # Increase delay for next retry
                            delay *= exponential_base
                            num_retries += 1
                        else:
                            print(f"❌ Max retries ({max_retries}) exceeded")
                            raise
                    else:
                        # Not a rate limit error, re-raise immediately
                        raise e
            
            # Should never reach here, but just in case
            raise Exception(f"Maximum retries ({max_retries}) exceeded")
        
        return wrapper
    
    # Handle both @retry_with_exponential_backoff and @retry_with_exponential_backoff()
    if func is None:
        return decorator
    else:
        return decorator(func)


class RateLimitTracker:
    """
    Track API calls and implement client-side rate limiting
    to prevent hitting server rate limits.
    """
    
    def __init__(self, max_requests_per_minute: int = 50):
        """
        Initialize rate limit tracker
        
        Args:
            max_requests_per_minute: Maximum requests allowed per minute
        """
        self.max_rpm = max_requests_per_minute
        self.request_times = deque()
        self.total_requests = 0
        self.total_throttled = 0
    
    def can_make_request(self) -> bool:
        """Check if we can make a request without exceeding limits"""
        self._clean_old_requests()
        return len(self.request_times) < self.max_rpm
    
    def wait_if_needed(self) -> float:
        """
        Wait if necessary to avoid rate limits.
        Returns the number of seconds waited.
        """
        self._clean_old_requests()
        
        if len(self.request_times) >= self.max_rpm:
            # Calculate how long to wait
            oldest_request = self.request_times[0]
            wait_until = oldest_request + timedelta(minutes=1)
            wait_seconds = (wait_until - datetime.now()).total_seconds()
            
            if wait_seconds > 0:
                print(f"⏳ Approaching rate limit ({self.max_rpm} req/min)")
                print(f"   Waiting {wait_seconds:.1f} seconds...")
                time.sleep(wait_seconds + 0.1)  # Add small buffer
                self.total_throttled += 1
                return wait_seconds
        
        return 0
    
    def record_request(self):
        """Record that a request was made"""
        self.request_times.append(datetime.now())
        self.total_requests += 1
    
    def _clean_old_requests(self):
        """Remove requests older than 1 minute from tracking"""
        one_minute_ago = datetime.now() - timedelta(minutes=1)
        
        while self.request_times and self.request_times[0] < one_minute_ago:
            self.request_times.popleft()
    
    def get_stats(self) -> Dict[str, Any]:
        """Get current rate limit statistics"""
        self._clean_old_requests()
        
        return {
            "requests_in_last_minute": len(self.request_times),
            "max_requests_per_minute": self.max_rpm,
            "available_requests": self.max_rpm - len(self.request_times),
            "total_requests": self.total_requests,
            "times_throttled": self.total_throttled,
            "current_usage_percent": (len(self.request_times) / self.max_rpm) * 100
        }
    
    def reset(self):
        """Reset all tracking"""
        self.request_times.clear()
        self.total_requests = 0
        self.total_throttled = 0


class APIRateLimiter:
    """
    Complete rate limiting solution for API calls with
    provider-specific limits and automatic throttling.
    """
    
    # Default rate limits for different providers (requests per minute)
    DEFAULT_LIMITS = {
        "openai": {
            "gpt-3.5-turbo": 60,
            "gpt-4": 20,
            "default": 50
        },
        "anthropic": {
            "default": 50
        },
        "google": {
            "free": 60,
            "paid": 360,
            "default": 60
        }
    }
    
    def __init__(self):
        """Initialize rate limiters for different providers"""
        self.trackers = {}
        self.last_errors = {}
    
    def get_tracker(self, provider: str, model: str = None) -> RateLimitTracker:
        """Get or create a rate limit tracker for a specific provider/model"""
        key = f"{provider}:{model}" if model else provider
        
        if key not in self.trackers:
            # Get appropriate limit
            provider_limits = self.DEFAULT_LIMITS.get(provider, {"default": 50})
            
            if model and model in provider_limits:
                limit = provider_limits[model]
            else:
                limit = provider_limits.get("default", 50)
            
            self.trackers[key] = RateLimitTracker(limit)
        
        return self.trackers[key]
    
    @retry_with_exponential_backoff(max_retries=3)
    def make_api_call(self, provider: str, api_function: Callable, 
                      model: str = None, **kwargs) -> Any:
        """
        Make an API call with rate limiting and retry logic
        
        Args:
            provider: Name of the API provider
            api_function: The actual API function to call
            model: Optional model name for provider-specific limits
            **kwargs: Arguments to pass to the API function
        
        Returns:
            The result of the API call
        """
        tracker = self.get_tracker(provider, model)
        
        # Wait if approaching rate limit
        wait_time = tracker.wait_if_needed()
        
        # Record the request
        tracker.record_request()
        
        try:
            # Make the actual API call
            result = api_function(**kwargs)
            
            # Clear any previous errors for this provider
            if provider in self.last_errors:
                del self.last_errors[provider]
            
            return result
            
        except Exception as e:
            # Record the error
            self.last_errors[provider] = {
                "error": str(e),
                "time": datetime.now(),
                "model": model
            }
            raise
    
    def get_all_stats(self) -> Dict[str, Dict]:
        """Get statistics for all tracked providers"""
        stats = {}
        
        for key, tracker in self.trackers.items():
            stats[key] = tracker.get_stats()
            
            # Add error info if available
            provider = key.split(":")[0]
            if provider in self.last_errors:
                error_info = self.last_errors[provider]
                time_since_error = (datetime.now() - error_info["time"]).total_seconds()
                stats[key]["last_error"] = {
                    "message": error_info["error"][:100],  # Truncate long errors
                    "seconds_ago": round(time_since_error, 1)
                }
        
        return stats
    
    def display_status(self):
        """Display current rate limit status for all providers"""
        print("=" * 60)
        print("RATE LIMIT STATUS")
        print("=" * 60)
        
        stats = self.get_all_stats()
        
        if not stats:
            print("No API calls tracked yet")
            return
        
        for key, stat in stats.items():
            provider_model = key.replace(":", " - ")
            usage_bar = self._create_usage_bar(stat["current_usage_percent"])
            
            print(f"\n{provider_model}:")
            print(f"  Usage: {usage_bar} {stat['current_usage_percent']:.0f}%")
            print(f"  Requests: {stat['requests_in_last_minute']}/{stat['max_requests_per_minute']}")
            print(f"  Available: {stat['available_requests']}")
            
            if "last_error" in stat:
                print(f"  ⚠️ Last error: {stat['last_error']['seconds_ago']}s ago")
    
    def _create_usage_bar(self, percent: float, width: int = 20) -> str:
        """Create a visual progress bar for usage"""
        filled = int((percent / 100) * width)
        bar = "█" * filled + "░" * (width - filled)
        
        # Color coding (would need terminal colors in real implementation)
        if percent < 50:
            return f"[{bar}]"  # Green
        elif percent < 80:
            return f"[{bar}]"  # Yellow
        else:
            return f"[{bar}]"  # Red


# Example usage functions
def demo_rate_limiting():
    """Demonstrate rate limiting in action"""
    
    print("Rate Limiting Demo")
    print("=" * 60)
    
    # Create a rate limiter
    limiter = APIRateLimiter()
    
    # Simulate API calls
    def mock_api_call(message: str):
        """Simulate an API call"""
        # Random chance of rate limit error for demo
        if random.random() < 0.1:
            raise Exception("Rate limit exceeded (429)")
        return f"Response to: {message}"
    
    # Make several calls
    for i in range(5):
        try:
            result = limiter.make_api_call(
                provider="openai",
                api_function=mock_api_call,
                model="gpt-3.5-turbo",
                message=f"Test message {i+1}"
            )
            print(f"✅ Call {i+1}: Success")
        except Exception as e:
            print(f"❌ Call {i+1}: {e}")
        
        time.sleep(0.5)  # Small delay between calls
    
    # Show statistics
    print("\n")
    limiter.display_status()


if __name__ == "__main__":
    # Run the demonstration
    demo_rate_limiting()
    
    print("\n" + "=" * 60)
    print("💡 Rate Limiting Best Practices:")
    print("=" * 60)
    print("1. Always implement client-side rate limiting")
    print("2. Use exponential backoff for retries")
    print("3. Add jitter to prevent thundering herd")
    print("4. Track statistics to optimize usage")
    print("5. Set conservative limits to avoid surprises")
    print("6. Monitor and alert on repeated failures")


In [None]:
# From: security_audit.py

# From: Zero to AI Agent, Chapter 7, Section 7.6
# File: security_audit.py

"""
Security audit tool to scan projects for exposed API keys and security issues.
Essential for preventing expensive mistakes and security breaches.
"""

import os
import re
from pathlib import Path
from typing import List, Dict, Tuple
import json
import subprocess
from datetime import datetime


class APIKeyAuditor:
    """
    Comprehensive security auditor for API keys in your project
    """
    
    # Patterns that might indicate API keys
    KEY_PATTERNS = {
        "OpenAI": r'sk-[a-zA-Z0-9]{48}',
        "Anthropic": r'sk-ant-[a-zA-Z0-9-]+',
        "Google": r'AIza[a-zA-Z0-9_-]{35}',
        "Replicate": r'[a-f0-9]{40}',  # Less specific, more false positives
        "Generic Secret": r'(api[_-]?key|secret|token|password)[\s]*=[\s]*["\'][^"\']{20,}["\']'
    }
    
    # Files/folders to skip
    SKIP_PATHS = {
        ".git", ".env", "venv", "env", "__pycache__", 
        "node_modules", ".pytest_cache", "dist", "build"
    }
    
    # File extensions to check
    CHECK_EXTENSIONS = {
        ".py", ".js", ".jsx", ".ts", ".tsx", ".json", ".yaml", 
        ".yml", ".md", ".txt", ".sh", ".bash", ".config"
    }
    
    def __init__(self, project_dir: str = "."):
        """Initialize the auditor with a project directory"""
        self.project_dir = Path(project_dir)
        self.violations = []
        self.warnings = []
        self.safe_files = []
    
    def audit_file(self, filepath: Path) -> List[Dict]:
        """Audit a single file for exposed keys"""
        violations = []
        
        try:
            with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
                content = f.read()
                
                # Check each line
                for line_num, line in enumerate(content.splitlines(), 1):
                    # Skip comments and docstrings for false positives
                    if line.strip().startswith(('#', '//', '"""', "'''")):
                        continue
                    
                    # Check for each pattern
                    for key_type, pattern in self.KEY_PATTERNS.items():
                        matches = re.finditer(pattern, line)
                        for match in matches:
                            # Check if it's likely a real key
                            if self._is_likely_real_key(match.group(), key_type):
                                violations.append({
                                    "file": str(filepath.relative_to(self.project_dir)),
                                    "line": line_num,
                                    "type": key_type,
                                    "preview": self._mask_sensitive_data(line.strip()),
                                    "severity": "HIGH"
                                })
        
        except Exception as e:
            self.warnings.append(f"Could not scan {filepath}: {e}")
        
        return violations
    
    def _is_likely_real_key(self, text: str, key_type: str) -> bool:
        """Check if a match is likely a real API key"""
        # Common false positive indicators
        false_positive_indicators = [
            "example", "your-key", "add_your", "placeholder",
            "xxx", "...", "abc", "123", "test", "demo"
        ]
        
        text_lower = text.lower()
        
        # Check for obvious placeholders
        for indicator in false_positive_indicators:
            if indicator in text_lower:
                return False
        
        # Check for repeated characters (likely fake)
        if len(set(text)) < len(text) / 3:  # Too many repeated chars
            return False
        
        return True
    
    def _mask_sensitive_data(self, text: str) -> str:
        """Mask potential sensitive data in preview"""
        # Replace potential keys with masked versions
        for key_type, pattern in self.KEY_PATTERNS.items():
            text = re.sub(pattern, lambda m: m.group()[:10] + "***MASKED***", text)
        return text[:100] + "..." if len(text) > 100 else text
    
    def audit_project(self) -> Dict:
        """Audit the entire project for security issues"""
        print(f"🔍 Auditing project: {self.project_dir.absolute()}")
        
        files_scanned = 0
        
        # Walk through project files
        for file_path in self._get_files_to_scan():
            violations = self.audit_file(file_path)
            
            if violations:
                self.violations.extend(violations)
            else:
                self.safe_files.append(str(file_path.relative_to(self.project_dir)))
            
            files_scanned += 1
        
        # Check additional security issues
        self._check_gitignore()
        self._check_git_history()
        self._check_environment_files()
        
        # Compile results
        results = {
            "scan_time": datetime.now().isoformat(),
            "project_dir": str(self.project_dir.absolute()),
            "files_scanned": files_scanned,
            "violations": self.violations,
            "warnings": self.warnings,
            "safe_files_count": len(self.safe_files),
            "summary": self._generate_summary()
        }
        
        return results
    
    def _get_files_to_scan(self) -> List[Path]:
        """Get list of files to scan, respecting skip patterns"""
        files_to_scan = []
        
        for file_path in self.project_dir.rglob("*"):
            # Skip directories
            if file_path.is_dir():
                continue
            
            # Skip excluded paths
            if any(skip in file_path.parts for skip in self.SKIP_PATHS):
                continue
            
            # Only check relevant extensions
            if file_path.suffix not in self.CHECK_EXTENSIONS:
                continue
            
            files_to_scan.append(file_path)
        
        return files_to_scan
    
    def _check_gitignore(self):
        """Check if .gitignore properly excludes sensitive files"""
        gitignore_path = self.project_dir / ".gitignore"
        
        if not gitignore_path.exists():
            self.warnings.append("⚠️ No .gitignore file found!")
            return
        
        with open(gitignore_path) as f:
            gitignore_content = f.read()
        
        # Check for important exclusions
        important_exclusions = [".env", "config.json", "secrets", "*.key"]
        missing_exclusions = []
        
        for exclusion in important_exclusions:
            if exclusion not in gitignore_content:
                missing_exclusions.append(exclusion)
        
        if missing_exclusions:
            self.warnings.append(
                f"⚠️ .gitignore missing important exclusions: {', '.join(missing_exclusions)}"
            )
    
    def _check_git_history(self):
        """Check git history for accidentally committed keys"""
        try:
            # Only run if in a git repository
            if not (self.project_dir / ".git").exists():
                return
            
            # Search git history for key patterns (last 50 commits)
            result = subprocess.run(
                ["git", "log", "-50", "--grep", "sk-", "--oneline"],
                capture_output=True,
                text=True,
                cwd=self.project_dir
            )
            
            if result.stdout:
                self.warnings.append(
                    "⚠️ Git history might contain API keys. Review commit history!"
                )
        except Exception:
            # Git might not be available
            pass
    
    def _check_environment_files(self):
        """Check for improperly secured environment files"""
        env_files = [".env", ".env.local", "config.json", "secrets.json"]
        
        for env_file in env_files:
            file_path = self.project_dir / env_file
            if file_path.exists():
                # Check permissions (Unix-like systems)
                try:
                    stats = file_path.stat()
                    mode = oct(stats.st_mode)[-3:]
                    if mode != "600":  # Should be readable only by owner
                        self.warnings.append(
                            f"⚠️ {env_file} has loose permissions: {mode}"
                        )
                except:
                    pass
    
    def _generate_summary(self) -> Dict:
        """Generate a summary of the audit results"""
        severity_counts = {"HIGH": 0, "MEDIUM": 0, "LOW": 0}
        
        for violation in self.violations:
            severity = violation.get("severity", "MEDIUM")
            severity_counts[severity] += 1
        
        return {
            "total_violations": len(self.violations),
            "total_warnings": len(self.warnings),
            "severity_breakdown": severity_counts,
            "status": self._get_status()
        }
    
    def _get_status(self) -> str:
        """Determine overall security status"""
        if len(self.violations) == 0 and len(self.warnings) == 0:
            return "✅ SECURE"
        elif len(self.violations) == 0:
            return "⚠️ WARNINGS"
        else:
            return "❌ VULNERABLE"
    
    def generate_report(self, results: Dict, output_format: str = "console"):
        """Generate a security report in various formats"""
        
        if output_format == "console":
            self._print_console_report(results)
        elif output_format == "json":
            return json.dumps(results, indent=2)
        elif output_format == "markdown":
            return self._generate_markdown_report(results)
    
    def _print_console_report(self, results: Dict):
        """Print a formatted console report"""
        print("\n" + "=" * 60)
        print("🔒 SECURITY AUDIT REPORT")
        print("=" * 60)
        
        # Status
        print(f"\nStatus: {results['summary']['status']}")
        print(f"Files Scanned: {results['files_scanned']}")
        
        # Violations
        if results['violations']:
            print(f"\n❌ VIOLATIONS FOUND: {len(results['violations'])}")
            print("-" * 40)
            
            for violation in results['violations'][:5]:  # Show first 5
                print(f"\n📁 {violation['file']} (line {violation['line']})")
                print(f"   Type: {violation['type']}")
                print(f"   Preview: {violation['preview']}")
            
            if len(results['violations']) > 5:
                print(f"\n... and {len(results['violations']) - 5} more violations")
        
        # Warnings
        if results['warnings']:
            print(f"\n⚠️ WARNINGS: {len(results['warnings'])}")
            print("-" * 40)
            for warning in results['warnings']:
                print(f"  • {warning}")
        
        # Recommendations
        print("\n" + "=" * 60)
        print("📋 RECOMMENDATIONS")
        print("=" * 60)
        
        if results['violations']:
            print("1. ⚠️ IMMEDIATELY remove exposed keys from code")
            print("2. 🔄 Rotate any exposed API keys")
            print("3. 📝 Move keys to environment variables or .env file")
            print("4. 🚫 Add .env to .gitignore")
            print("5. 🧹 Clean git history if keys were committed")
        else:
            print("✅ No critical issues found!")
            print("Continue following security best practices:")
            print("  • Never hardcode API keys")
            print("  • Use environment variables")
            print("  • Keep .gitignore updated")
            print("  • Rotate keys periodically")
    
    def _generate_markdown_report(self, results: Dict) -> str:
        """Generate a markdown-formatted report"""
        md = f"""# Security Audit Report

**Date:** {results['scan_time']}
**Project:** {results['project_dir']}
**Status:** {results['summary']['status']}

## Summary
- Files Scanned: {results['files_scanned']}
- Violations: {results['summary']['total_violations']}
- Warnings: {results['summary']['total_warnings']}

## Violations
"""
        
        if results['violations']:
            for v in results['violations']:
                md += f"\n### {v['file']} (line {v['line']})\n"
                md += f"- **Type:** {v['type']}\n"
                md += f"- **Severity:** {v['severity']}\n"
                md += f"- **Preview:** `{v['preview']}`\n"
        else:
            md += "\nNo violations found ✅\n"
        
        return md


def quick_security_check():
    """Run a quick security check on the current directory"""
    auditor = APIKeyAuditor()
    results = auditor.audit_project()
    auditor.generate_report(results)
    
    return results['summary']['status'] == "✅ SECURE"


if __name__ == "__main__":
    print("Starting Security Audit...")
    print("-" * 60)
    
    # Run audit on current directory
    auditor = APIKeyAuditor(".")
    results = auditor.audit_project()
    
    # Generate report
    auditor.generate_report(results, "console")
    
    # Save detailed report
    with open("security_audit_report.json", "w") as f:
        json.dump(results, f, indent=2)
    
    print("\n📄 Detailed report saved to: security_audit_report.json")


In [None]:
# From: first_api_call.py

# From: Zero to AI Agent, Chapter 7, Section 7.6
# File: first_api_call.py

"""
Your first authenticated API calls to real AI services!
Tests connections to OpenAI, Anthropic, and Google Gemini.
"""

import os
from dotenv import load_dotenv
import sys
from typing import Optional, Dict, Any


# Load environment variables from .env file
load_dotenv()


def test_openai() -> bool:
    """
    Test OpenAI API connection with GPT-3.5-Turbo
    
    Returns:
        True if successful, False otherwise
    """
    try:
        from openai import OpenAI
        
        # Get API key
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key or api_key == "ADD_YOUR_KEY_HERE":
            print("❌ OpenAI: No valid API key found")
            print("   Set OPENAI_API_KEY in your .env file")
            return False
        
        # Initialize client
        client = OpenAI(api_key=api_key)
        
        # Make a simple test call
        print("🔄 Testing OpenAI connection...")
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": "Respond with exactly: 'Hello, API world!'"}
            ],
            max_tokens=20,
            temperature=0  # Make response deterministic
        )
        
        # Extract response
        message = response.choices[0].message.content
        print(f"🎉 OpenAI Response: {message}")
        
        # Show token usage
        if hasattr(response, 'usage'):
            print(f"   Tokens used: {response.usage.total_tokens}")
            print(f"   Model: {response.model}")
        
        return True
        
    except ImportError:
        print("❌ OpenAI: Package not installed")
        print("   Run: pip install openai")
        return False
        
    except Exception as e:
        print(f"❌ OpenAI Error: {str(e)[:100]}")
        return False


def test_anthropic() -> bool:
    """
    Test Anthropic API connection with Claude
    
    Returns:
        True if successful, False otherwise
    """
    try:
        from anthropic import Anthropic
        
        # Get API key
        api_key = os.getenv("ANTHROPIC_API_KEY")
        if not api_key or api_key == "ADD_YOUR_KEY_HERE":
            print("❌ Anthropic: No valid API key found")
            print("   Set ANTHROPIC_API_KEY in your .env file")
            return False
        
        # Initialize client
        client = Anthropic(api_key=api_key)
        
        # Make a simple test call
        print("🔄 Testing Anthropic connection...")
        message = client.messages.create(
            model="claude-3-haiku-20240307",  # Cheapest Claude model
            max_tokens=20,
            messages=[
                {"role": "user", "content": "Respond with exactly: 'Hello, API world!'"}
            ],
            temperature=0
        )
        
        # Extract response
        response_text = message.content[0].text
        print(f"🎉 Anthropic Response: {response_text}")
        
        # Show usage info
        if hasattr(message, 'usage'):
            print(f"   Tokens used: {message.usage.input_tokens + message.usage.output_tokens}")
        print(f"   Model: {message.model}")
        
        return True
        
    except ImportError:
        print("❌ Anthropic: Package not installed")
        print("   Run: pip install anthropic")
        return False
        
    except Exception as e:
        print(f"❌ Anthropic Error: {str(e)[:100]}")
        return False


def test_google() -> bool:
    """
    Test Google Gemini API connection
    
    Returns:
        True if successful, False otherwise
    """
    try:
        import google.generativeai as genai
        
        # Get API key
        api_key = os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY")
        if not api_key or api_key == "ADD_YOUR_KEY_HERE":
            print("❌ Google: No valid API key found")
            print("   Set GOOGLE_API_KEY in your .env file")
            return False
        
        # Configure API
        genai.configure(api_key=api_key)
        
        # Create model
        model = genai.GenerativeModel('gemini-pro')
        
        # Make a simple test call
        print("🔄 Testing Google Gemini connection...")
        response = model.generate_content(
            "Respond with exactly: 'Hello, API world!'",
            generation_config=genai.types.GenerationConfig(
                temperature=0,
                max_output_tokens=20,
            )
        )
        
        # Extract response
        print(f"🎉 Google Response: {response.text}")
        print(f"   Model: gemini-pro")
        
        return True
        
    except ImportError:
        print("❌ Google: Package not installed")
        print("   Run: pip install google-generativeai")
        return False
        
    except Exception as e:
        print(f"❌ Google Error: {str(e)[:100]}")
        return False


def test_all_providers() -> Dict[str, bool]:
    """
    Test all configured AI providers
    
    Returns:
        Dictionary with test results for each provider
    """
    results = {}
    
    print("=" * 60)
    print("🚀 Testing AI API Connections")
    print("=" * 60)
    
    # Check which providers have keys configured
    providers = []
    
    if os.getenv("OPENAI_API_KEY"):
        providers.append(("OpenAI", test_openai))
    
    if os.getenv("ANTHROPIC_API_KEY"):
        providers.append(("Anthropic", test_anthropic))
    
    if os.getenv("GOOGLE_API_KEY") or os.getenv("GEMINI_API_KEY"):
        providers.append(("Google", test_google))
    
    if not providers:
        print("\n⚠️ No API keys configured!")
        print("\nTo get started:")
        print("1. Create a .env file in your project directory")
        print("2. Add your API keys:")
        print("   OPENAI_API_KEY=your-key-here")
        print("   ANTHROPIC_API_KEY=your-key-here")
        print("   GOOGLE_API_KEY=your-key-here")
        return results
    
    # Test each provider
    for name, test_func in providers:
        print(f"\nTesting {name}...")
        print("-" * 40)
        results[name] = test_func()
        print()
    
    return results


def display_summary(results: Dict[str, bool]):
    """Display a summary of test results"""
    
    print("=" * 60)
    print("📊 Test Summary")
    print("=" * 60)
    
    if not results:
        print("No providers tested (no API keys configured)")
        return
    
    working = sum(1 for success in results.values() if success)
    total = len(results)
    
    print(f"\nProviders tested: {total}")
    print(f"Working: {working}")
    print(f"Failed: {total - working}")
    
    print("\nDetails:")
    for provider, success in results.items():
        status = "✅ Working" if success else "❌ Failed"
        print(f"  {provider}: {status}")
    
    if working == 0:
        print("\n⚠️ No working API connections!")
        print("\nTroubleshooting steps:")
        print("1. Check that API keys are correctly set in .env")
        print("2. Verify you have credits/billing set up")
        print("3. Install required packages:")
        print("   pip install openai anthropic google-generativeai")
        print("4. Check your internet connection")
    elif working == total:
        print("\n🎉 All API connections working perfectly!")
        print("You're ready to build AI applications!")
    else:
        print(f"\n✅ {working} provider(s) working - enough to get started!")


def check_prerequisites():
    """Check if all prerequisites are met"""
    
    print("Checking prerequisites...")
    print("-" * 40)
    
    # Check for .env file
    if not os.path.exists(".env"):
        print("📝 No .env file found")
        print("Creating template .env file...")
        
        template = """# AI API Keys
# Get your keys from:
# OpenAI: https://platform.openai.com/api-keys
# Anthropic: https://console.anthropic.com/
# Google: https://aistudio.google.com/

OPENAI_API_KEY=ADD_YOUR_KEY_HERE
ANTHROPIC_API_KEY=ADD_YOUR_KEY_HERE
GOOGLE_API_KEY=ADD_YOUR_KEY_HERE
"""
        
        with open(".env", "w") as f:
            f.write(template)
        
        print("✅ Created .env file - please add your API keys")
        return False
    
    # Check for python-dotenv
    try:
        import dotenv
        print("✅ python-dotenv installed")
    except ImportError:
        print("❌ python-dotenv not installed")
        print("   Run: pip install python-dotenv")
        return False
    
    return True


if __name__ == "__main__":
    print("🎯 First API Call - Testing Your AI Connections")
    print("=" * 60)
    
    # Check prerequisites
    if not check_prerequisites():
        print("\n⚠️ Please complete setup before testing APIs")
        sys.exit(1)
    
    # Test all providers
    results = test_all_providers()
    
    # Display summary
    display_summary(results)
    
    # Exit code based on results
    if results and any(results.values()):
        sys.exit(0)  # At least one working
    else:
        sys.exit(1)  # None working


---
### Section 7.6 Exercises

### Exercise 7.6.1: Secure Key Storage

Create a complete key management system that:
- Loads keys from multiple sources (.env, environment, config file)
- Validates key format
- Provides fallback options
- Never exposes keys in logs or errors

In [None]:
# Your code here


### Exercise 7.6.2: Multi-Provider Authentication

Build a class that can authenticate with multiple providers and automatically failover if one is unavailable.

In [None]:
# Your code here


### Exercise 7.6.3: Rate Limit Handler

Implement a robust rate limit handler that:
- Tracks requests per minute
- Automatically throttles when approaching limits
- Provides helpful feedback about wait times
- Works with any API provider

In [None]:
# Your code here


### Exercise 7.6.4: API Key Audit Tool

Create a tool that:
- Scans a project for exposed API keys
- Checks Git history for accidentally committed keys
- Validates that all keys in use are properly secured
- Generates a security report

In [None]:
# Your code here


---
## Section 7.7: Costs and rate limiting considerations

In [None]:
# From: token_cost_calculator.py

# From: Zero to AI Agent, Chapter 7, Section 7.7
# File: token_cost_calculator.py

"""
Understanding token costs and calculating API expenses.
Essential for building cost-effective AI applications.
"""

from typing import Dict, Optional
from datetime import datetime


def understand_token_pricing():
    """
    Tokens are the currency of LLMs - understand how they work
    """
    
    # Approximate token counts for common text
    examples = {
        "Hello": 1,  # 1 token
        "Hello, world!": 4,  # 4 tokens (Hello | , | world | !)
        "The quick brown fox": 4,  # Common words = 1 token each
        "Anthropomorphization": 3,  # Uncommon words split up
        "👍": 1,  # Emojis usually 1-2 tokens
        "import numpy as np": 5,  # Code tokens
        "def calculate_cost():": 6,  # Function definitions
        "https://example.com": 4,  # URLs split into parts
    }
    
    # Token estimation rules of thumb
    print("Token Estimation Rules:")
    print("• 1 token ≈ 4 characters in English")
    print("• 1 token ≈ ¾ words")
    print("• 100 tokens ≈ 75 words")
    print("• 1 page of text ≈ 500 tokens")
    print("• 1 conversation turn ≈ 50-200 tokens")
    print("\nExamples:")
    for text, tokens in examples.items():
        print(f"  '{text}' = ~{tokens} tokens")
    
    return examples


def calculate_cost(input_tokens: int, output_tokens: int, model: str = "gpt-3.5-turbo") -> Optional[Dict]:
    """
    Calculate actual costs for different models
    
    Args:
        input_tokens: Number of input/prompt tokens
        output_tokens: Number of output/completion tokens
        model: Model name
    
    Returns:
        Dictionary with cost breakdown or None if model not found
    """
    
    # Prices per 1K tokens (as of 2024 - check provider docs for current prices)
    pricing = {
        # OpenAI Models
        "gpt-3.5-turbo": {"input": 0.0005, "output": 0.0015},
        "gpt-3.5-turbo-16k": {"input": 0.003, "output": 0.004},
        "gpt-4": {"input": 0.03, "output": 0.06},
        "gpt-4-turbo": {"input": 0.01, "output": 0.03},
        "gpt-4-32k": {"input": 0.06, "output": 0.12},
        
        # Anthropic Models
        "claude-3-haiku": {"input": 0.00025, "output": 0.00125},
        "claude-3-sonnet": {"input": 0.003, "output": 0.015},
        "claude-3-opus": {"input": 0.015, "output": 0.075},
        "claude-2.1": {"input": 0.008, "output": 0.024},
        
        # Google Models
        "gemini-pro": {"input": 0.000125, "output": 0.000375},
        "gemini-pro-vision": {"input": 0.000125, "output": 0.000375},
        
        # Other Models
        "llama-2-70b": {"input": 0.001, "output": 0.001},  # Via Replicate
        "mixtral-8x7b": {"input": 0.0005, "output": 0.0005},  # Via Replicate
    }
    
    if model not in pricing:
        print(f"Warning: Model '{model}' not in pricing database")
        return None
    
    # Calculate costs
    input_cost = (input_tokens / 1000) * pricing[model]["input"]
    output_cost = (output_tokens / 1000) * pricing[model]["output"]
    total_cost = input_cost + output_cost
    
    # Cost per 1K tokens (weighted average)
    total_tokens = input_tokens + output_tokens
    cost_per_1k = total_cost / (total_tokens / 1000) if total_tokens > 0 else 0
    
    return {
        "model": model,
        "input_tokens": input_tokens,
        "output_tokens": output_tokens,
        "total_tokens": total_tokens,
        "input_cost": input_cost,
        "output_cost": output_cost,
        "total_cost": total_cost,
        "cost_per_1k_tokens": cost_per_1k,
        "breakdown": f"${input_cost:.6f} (input) + ${output_cost:.6f} (output)"
    }


def estimate_tokens(text: str) -> int:
    """
    Estimate token count from text
    
    Args:
        text: Text to estimate
    
    Returns:
        Estimated token count
    """
    # Rule of thumb: 1 token ≈ 4 characters or ¾ words
    char_estimate = len(text) / 4
    word_estimate = len(text.split()) * 4 / 3
    
    # Use average of both methods
    return int((char_estimate + word_estimate) / 2)


def calculate_real_world_costs():
    """
    Calculate costs for real-world scenarios
    """
    
    scenarios = {
        "Customer Service Bot": {
            "daily_conversations": 100,
            "messages_per_conversation": 10,
            "avg_input_tokens": 50,
            "avg_output_tokens": 100,
            "model": "gpt-3.5-turbo"
        },
        "Code Assistant": {
            "daily_conversations": 50,
            "messages_per_conversation": 1,
            "avg_input_tokens": 200,  # Code context
            "avg_output_tokens": 300,  # Generated code
            "model": "gpt-4"
        },
        "Content Generator": {
            "daily_conversations": 10,
            "messages_per_conversation": 1,
            "avg_input_tokens": 100,  # Prompt
            "avg_output_tokens": 800,  # Article
            "model": "claude-3-sonnet"
        },
        "Study Assistant": {
            "daily_conversations": 20,
            "messages_per_conversation": 15,
            "avg_input_tokens": 75,
            "avg_output_tokens": 150,
            "model": "gemini-pro"
        }
    }
    
    print("="*60)
    print("REAL-WORLD COST SCENARIOS (Monthly Estimates)")
    print("="*60)
    
    for name, scenario in scenarios.items():
        # Calculate daily usage
        daily_messages = scenario["daily_conversations"] * scenario["messages_per_conversation"]
        daily_input_tokens = daily_messages * scenario["avg_input_tokens"]
        daily_output_tokens = daily_messages * scenario["avg_output_tokens"]
        
        # Calculate costs
        cost_data = calculate_cost(
            daily_input_tokens, 
            daily_output_tokens, 
            scenario["model"]
        )
        
        if cost_data:
            daily_cost = cost_data["total_cost"]
            monthly_cost = daily_cost * 30
            yearly_cost = monthly_cost * 12
            
            print(f"\n📊 {name}:")
            print(f"  Model: {scenario['model']}")
            print(f"  Usage: {daily_messages} messages/day")
            print(f"  Tokens: {daily_input_tokens + daily_output_tokens:,} tokens/day")
            print(f"  Daily: ${daily_cost:.2f}")
            print(f"  Monthly: ${monthly_cost:.2f}")
            print(f"  Yearly: ${yearly_cost:.2f}")
            
            # Cost breakdown
            if monthly_cost > 100:
                print(f"  ⚠️ High cost! Consider optimization strategies")
            elif monthly_cost < 10:
                print(f"  ✅ Very affordable for this use case")


def compare_model_costs(prompt: str, expected_response_length: int = 200):
    """
    Compare costs across different models for the same task
    
    Args:
        prompt: The input prompt
        expected_response_length: Expected response in tokens
    """
    
    input_tokens = estimate_tokens(prompt)
    output_tokens = expected_response_length
    
    models = [
        "gemini-pro",      # Cheapest
        "claude-3-haiku",  # Fast & cheap
        "gpt-3.5-turbo",   # Popular choice
        "claude-3-sonnet", # Good balance
        "gpt-4",          # High quality
        "claude-3-opus",   # Top tier
    ]
    
    print("\n" + "="*60)
    print("MODEL COST COMPARISON")
    print(f"Prompt: {len(prompt)} chars (~{input_tokens} tokens)")
    print(f"Expected response: ~{output_tokens} tokens")
    print("="*60)
    
    results = []
    
    for model in models:
        cost_data = calculate_cost(input_tokens, output_tokens, model)
        if cost_data:
            results.append((model, cost_data["total_cost"]))
            print(f"\n{model}:")
            print(f"  Cost per call: ${cost_data['total_cost']:.6f}")
            print(f"  1,000 calls: ${cost_data['total_cost'] * 1000:.2f}")
            print(f"  10,000 calls: ${cost_data['total_cost'] * 10000:.2f}")
    
    # Show cheapest vs most expensive
    if results:
        results.sort(key=lambda x: x[1])
        cheapest = results[0]
        most_expensive = results[-1]
        
        print("\n" + "-"*60)
        print(f"💰 Cheapest: {cheapest[0]} (${cheapest[1]:.6f}/call)")
        print(f"💎 Most expensive: {most_expensive[0]} (${most_expensive[1]:.6f}/call)")
        print(f"📊 Price difference: {most_expensive[1]/cheapest[1]:.1f}x more expensive")


if __name__ == "__main__":
    # Demonstrate token understanding
    print("Understanding Tokens")
    print("-" * 60)
    understand_token_pricing()
    
    # Calculate real-world costs
    print("\n")
    calculate_real_world_costs()
    
    # Compare models
    sample_prompt = "Explain the concept of recursion in programming with an example"
    compare_model_costs(sample_prompt, expected_response_length=300)
    
    # Example: Calculate specific cost
    print("\n" + "="*60)
    print("Specific Cost Example")
    print("="*60)
    
    result = calculate_cost(500, 1500, "gpt-3.5-turbo")
    if result:
        print(f"Model: {result['model']}")
        print(f"Total tokens: {result['total_tokens']}")
        print(f"Total cost: ${result['total_cost']:.4f}")
        print(f"Breakdown: {result['breakdown']}")


In [None]:
# From: context_management.py

# From: Zero to AI Agent, Chapter 7, Section 7.7
# File: context_management.py

"""
Managing conversation context to avoid the hidden cost trap.
Shows the difference between bad and good context management.
"""

from typing import List, Dict
import time


def bad_context_accumulation():
    """
    ❌ BAD: Context grows with each message, costs grow quadratically!
    This is a common mistake that can make costs explode.
    """
    print("="*60)
    print("❌ BAD EXAMPLE: Uncontrolled Context Growth")
    print("="*60)
    
    messages = []
    total_tokens = 0
    
    # Simulate 100 message exchanges
    for i in range(20):  # Using 20 for demo, imagine 100+
        # Add user message
        messages.append({"role": "user", "content": f"Message {i}"})
        
        # Each API call includes ALL previous messages!
        tokens_in_request = sum(len(m["content"]) for m in messages) * 5  # Rough token estimate
        total_tokens += tokens_in_request
        
        print(f"Message {i+1}: Sending {len(messages)} messages ({tokens_in_request} tokens)")
        
        # Add assistant response (simulated)
        messages.append({"role": "assistant", "content": f"Response {i}"})
    
    print(f"\nTotal tokens sent: {total_tokens}")
    print(f"Average tokens per request: {total_tokens/20:.0f}")
    print("⚠️ Notice how token count grows with each message!")
    
    # Calculate approximate cost
    cost_per_1k = 0.002  # GPT-3.5-turbo average
    total_cost = (total_tokens / 1000) * cost_per_1k
    print(f"Estimated cost: ${total_cost:.4f}")
    
    return total_tokens


def good_context_management():
    """
    ✅ GOOD: Manage context size to keep costs under control
    """
    print("\n" + "="*60)
    print("✅ GOOD EXAMPLE: Managed Context")
    print("="*60)
    
    messages = []
    total_tokens = 0
    max_context_tokens = 2000  # Set a limit
    
    for i in range(20):
        # Add user message
        messages.append({"role": "user", "content": f"Message {i}"})
        
        # Manage context size BEFORE making API call
        messages = manage_context(messages, max_tokens=max_context_tokens)
        
        tokens_in_request = sum(len(m["content"]) for m in messages) * 5
        total_tokens += tokens_in_request
        
        print(f"Message {i+1}: Sending {len(messages)} messages ({tokens_in_request} tokens)")
        
        # Add assistant response
        messages.append({"role": "assistant", "content": f"Response {i}"})
    
    print(f"\nTotal tokens sent: {total_tokens}")
    print(f"Average tokens per request: {total_tokens/20:.0f}")
    print("✅ Token count stays controlled!")
    
    # Calculate cost
    cost_per_1k = 0.002
    total_cost = (total_tokens / 1000) * cost_per_1k
    print(f"Estimated cost: ${total_cost:.4f}")
    
    return total_tokens


def manage_context(messages: List[Dict], max_tokens: int = 2000) -> List[Dict]:
    """
    Keep context under control by removing old messages
    
    Args:
        messages: List of message dictionaries
        max_tokens: Maximum tokens to keep in context
    
    Returns:
        Trimmed message list
    """
    # Always keep system message if present
    has_system = messages and messages[0].get("role") == "system"
    start_index = 1 if has_system else 0
    
    # Estimate total tokens (rough: 1 token ≈ 4 characters)
    def estimate_tokens(msgs):
        return sum(len(m.get("content", "")) / 4 for m in msgs)
    
    total_tokens = estimate_tokens(messages)
    
    # Remove oldest messages if over limit
    while total_tokens > max_tokens and len(messages) > start_index + 2:
        # Remove oldest user-assistant pair (keep system message)
        messages.pop(start_index)  # Remove oldest user message
        if start_index < len(messages) and messages[start_index].get("role") == "assistant":
            messages.pop(start_index)  # Remove corresponding assistant message
        
        total_tokens = estimate_tokens(messages)
    
    return messages


def smart_context_strategies():
    """
    Advanced context management strategies
    """
    print("\n" + "="*60)
    print("SMART CONTEXT MANAGEMENT STRATEGIES")
    print("="*60)
    
    strategies = {
        "Sliding Window": {
            "description": "Keep only last N messages",
            "pros": "Simple, predictable cost",
            "cons": "Loses older context",
            "code": """
messages = messages[-10:]  # Keep last 10 messages
            """
        },
        "Summarization": {
            "description": "Periodically summarize old messages",
            "pros": "Preserves key information",
            "cons": "Requires extra API call for summary",
            "code": """
if len(messages) > 20:
    summary = summarize_messages(messages[:-10])
    messages = [{"role": "system", "content": summary}] + messages[-10:]
            """
        },
        "Importance Scoring": {
            "description": "Keep only important messages",
            "pros": "Retains critical context",
            "cons": "Complex to implement",
            "code": """
messages = [m for m in messages if m.get('importance', 0) > threshold]
            """
        },
        "Token Budget": {
            "description": "Allocate token budget per conversation turn",
            "pros": "Precise cost control",
            "cons": "May cut off mid-conversation",
            "code": """
while calculate_tokens(messages) > budget:
    messages.pop(1)  # Remove oldest after system
            """
        }
    }
    
    for name, strategy in strategies.items():
        print(f"\n📋 {name}:")
        print(f"  Description: {strategy['description']}")
        print(f"  ✅ Pros: {strategy['pros']}")
        print(f"  ❌ Cons: {strategy['cons']}")
        print(f"  Code snippet:{strategy['code']}")


def safe_retry_pattern():
    """
    Safe retry pattern that avoids infinite cost loops
    """
    print("\n" + "="*60)
    print("SAFE RETRY PATTERNS")
    print("="*60)
    
    # ❌ BAD: Infinite retries
    print("\n❌ BAD: Infinite retry loop")
    print("-" * 40)
    print("""
while True:
    try:
        response = expensive_api_call()
        break
    except:
        continue  # This could run forever!
""")
    
    # ✅ GOOD: Limited retries with backoff
    print("\n✅ GOOD: Limited retries with exponential backoff")
    print("-" * 40)
    
    def safe_api_call(max_retries=3):
        """Safe API call with limited retries and backoff"""
        for attempt in range(max_retries):
            try:
                # Simulate API call
                if attempt < 2:  # Simulate failures
                    raise Exception("API Error")
                return {"success": True, "attempt": attempt + 1}
            except Exception as e:
                if attempt == max_retries - 1:
                    print(f"  Failed after {max_retries} attempts")
                    raise
                
                wait_time = 2 ** attempt  # Exponential backoff
                print(f"  Attempt {attempt + 1} failed, waiting {wait_time}s...")
                time.sleep(wait_time)
        
        return None
    
    # Demonstrate
    print("\nDemonstration:")
    try:
        result = safe_api_call()
        print(f"  Success on attempt {result['attempt']}")
    except:
        print("  Final failure - stopping to prevent cost overrun")


class ContextWindowManager:
    """
    Professional context window management
    """
    
    def __init__(self, max_tokens: int = 4000, reserve_tokens: int = 500):
        """
        Initialize context manager
        
        Args:
            max_tokens: Maximum context window size
            reserve_tokens: Tokens to reserve for response
        """
        self.max_tokens = max_tokens
        self.reserve_tokens = reserve_tokens
        self.effective_max = max_tokens - reserve_tokens
        self.total_trimmed = 0
        self.trim_count = 0
    
    def prepare_context(self, messages: List[Dict], new_message: str) -> List[Dict]:
        """
        Prepare context for API call
        
        Args:
            messages: Current conversation history
            new_message: New message to add
        
        Returns:
            Optimized message list within token budget
        """
        # Add new message temporarily
        temp_messages = messages + [{"role": "user", "content": new_message}]
        
        # Calculate current size
        current_tokens = self._estimate_tokens(temp_messages)
        
        if current_tokens <= self.effective_max:
            return temp_messages
        
        # Need to trim
        self.trim_count += 1
        tokens_to_trim = current_tokens - self.effective_max
        self.total_trimmed += tokens_to_trim
        
        # Trim strategy: Remove oldest messages but keep system
        trimmed = self._trim_messages(temp_messages, self.effective_max)
        
        print(f"🔄 Trimmed context: {current_tokens} → {self._estimate_tokens(trimmed)} tokens")
        
        return trimmed
    
    def _estimate_tokens(self, messages: List[Dict]) -> int:
        """Estimate token count"""
        return sum(len(m.get("content", "")) // 4 for m in messages)
    
    def _trim_messages(self, messages: List[Dict], target_tokens: int) -> List[Dict]:
        """Trim messages to fit within token budget"""
        # Keep system message if present
        result = []
        if messages and messages[0].get("role") == "system":
            result.append(messages[0])
            messages = messages[1:]
        
        # Keep most recent messages that fit
        for msg in reversed(messages):
            test_result = [msg] + result[1:] if result else [msg]
            if self._estimate_tokens(result[:1] + test_result) <= target_tokens:
                result = result[:1] + test_result if result else test_result
            else:
                break
        
        return result
    
    def get_stats(self) -> Dict:
        """Get context management statistics"""
        return {
            "max_tokens": self.max_tokens,
            "reserve_tokens": self.reserve_tokens,
            "effective_max": self.effective_max,
            "total_trimmed": self.total_trimmed,
            "trim_count": self.trim_count,
            "avg_trimmed": self.total_trimmed / max(1, self.trim_count)
        }


def demonstrate_context_costs():
    """
    Show the real cost impact of context management
    """
    print("\n" + "="*60)
    print("COST IMPACT COMPARISON")
    print("="*60)
    
    bad_tokens = bad_context_accumulation()
    good_tokens = good_context_management()
    
    savings = bad_tokens - good_tokens
    savings_percent = (savings / bad_tokens) * 100
    
    print("\n" + "="*60)
    print("RESULTS")
    print("="*60)
    print(f"Bad approach: {bad_tokens:,} tokens")
    print(f"Good approach: {good_tokens:,} tokens")
    print(f"Tokens saved: {savings:,} ({savings_percent:.1f}%)")
    
    # Cost calculation
    cost_per_1k = 0.002
    money_saved = (savings / 1000) * cost_per_1k
    print(f"Money saved on 20 messages: ${money_saved:.4f}")
    print(f"Projected monthly savings (1000 conversations): ${money_saved * 50:.2f}")


if __name__ == "__main__":
    # Demonstrate the cost difference
    demonstrate_context_costs()
    
    # Show strategies
    smart_context_strategies()
    
    # Show safe retry pattern
    safe_retry_pattern()
    
    # Professional context manager demo
    print("\n" + "="*60)
    print("PROFESSIONAL CONTEXT MANAGER DEMO")
    print("="*60)
    
    manager = ContextWindowManager(max_tokens=2000)
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
    ]
    
    # Simulate conversation
    for i in range(10):
        new_msg = f"This is message {i} with some content to simulate real conversation length."
        messages = manager.prepare_context(messages[:-1] if i > 0 else messages, new_msg)
        messages.append({"role": "assistant", "content": f"Response to message {i}"})
    
    stats = manager.get_stats()
    print("\nContext Manager Stats:")
    for key, value in stats.items():
        print(f"  {key}: {value}")


In [None]:
# From: smart_rate_limiter.py

# From: Zero to AI Agent, Chapter 7, Section 7.7
# File: smart_rate_limiter.py

"""
Intelligent rate limiting that maximizes throughput while respecting limits.
Includes user feedback and statistics tracking.
"""

import time
from collections import deque
from datetime import datetime, timedelta
from typing import Dict, Tuple, Optional


class RateLimitTypes:
    """Different types of rate limits you'll encounter"""
    
    def __init__(self):
        self.limits = {
            "requests_per_minute": "How many API calls you can make",
            "tokens_per_minute": "How much text you can process", 
            "requests_per_day": "Daily quota (free tiers)",
            "concurrent_requests": "Parallel calls allowed",
            "tokens_per_request": "Max size of single request"
        }
    
    def show_provider_limits(self) -> Dict:
        """Real limits from major providers (as of 2024)"""
        
        limits = {
            "OpenAI GPT-3.5": {
                "tier_1": {"rpm": 3, "tpm": 40000, "max_tokens": 4096},
                "tier_2": {"rpm": 60, "tpm": 60000, "max_tokens": 4096},
                "tier_3": {"rpm": 500, "tpm": 160000, "max_tokens": 4096}
            },
            "OpenAI GPT-4": {
                "tier_1": {"rpm": 3, "tpm": 10000, "max_tokens": 8192},
                "tier_2": {"rpm": 20, "tpm": 40000, "max_tokens": 8192},
                "tier_3": {"rpm": 120, "tpm": 300000, "max_tokens": 8192}
            },
            "Anthropic Claude": {
                "default": {"rpm": 50, "tpm": 100000, "max_tokens": 200000},
                "scale": {"rpm": 1000, "tpm": 2000000, "max_tokens": 200000}
            },
            "Google Gemini": {
                "free": {"rpm": 60, "rpd": 1500, "tpm": 1000000},
                "paid": {"rpm": 360, "rpd": 30000, "tpm": 4000000}
            }
        }
        
        return limits
    
    def print_limits(self):
        """Display provider limits in a readable format"""
        limits = self.show_provider_limits()
        
        print("="*60)
        print("PROVIDER RATE LIMITS")
        print("="*60)
        
        for provider, tiers in limits.items():
            print(f"\n📊 {provider}:")
            for tier, limits in tiers.items():
                print(f"  {tier}:")
                for key, value in limits.items():
                    if key == "rpm":
                        print(f"    Requests/min: {value}")
                    elif key == "tpm":
                        print(f"    Tokens/min: {value:,}")
                    elif key == "rpd":
                        print(f"    Requests/day: {value:,}")
                    elif key == "max_tokens":
                        print(f"    Max tokens: {value:,}")


class SmartRateLimiter:
    """
    Intelligent rate limiting that maximizes throughput
    """
    
    def __init__(self, requests_per_minute: int = 60, tokens_per_minute: int = 40000):
        """
        Initialize rate limiter
        
        Args:
            requests_per_minute: RPM limit
            tokens_per_minute: TPM limit
        """
        self.rpm_limit = requests_per_minute
        self.tpm_limit = tokens_per_minute
        
        # Track request times and token counts
        self.request_times = deque()
        self.token_counts = deque()
        
        # Statistics
        self.total_wait_time = 0
        self.total_requests = 0
        self.total_tokens = 0
        self.rate_limit_hits = 0
    
    def estimate_tokens(self, text: str) -> int:
        """
        Estimate token count for text
        
        Args:
            text: Text to estimate
        
        Returns:
            Estimated token count
        """
        # Rough estimate: 1 token ≈ 4 characters
        return len(text) // 4
    
    def can_proceed(self, estimated_tokens: int) -> Tuple[bool, Optional[str]]:
        """
        Check if we can make a request now
        
        Args:
            estimated_tokens: Estimated tokens for the request
        
        Returns:
            Tuple of (can_proceed, reason_if_not)
        """
        current_time = datetime.now()
        minute_ago = current_time - timedelta(minutes=1)
        
        # Clean old entries
        while self.request_times and self.request_times[0] < minute_ago:
            self.request_times.popleft()
            if self.token_counts:
                self.token_counts.popleft()
        
        # Check request limit
        if len(self.request_times) >= self.rpm_limit:
            return False, f"Request limit reached ({self.rpm_limit} RPM)"
        
        # Check token limit
        current_tokens = sum(self.token_counts)
        if current_tokens + estimated_tokens > self.tpm_limit:
            return False, f"Token limit reached ({self.tpm_limit} TPM)"
        
        return True, None
    
    def wait_if_needed(self, estimated_tokens: int) -> float:
        """
        Smart waiting with user feedback
        
        Args:
            estimated_tokens: Estimated tokens for the request
        
        Returns:
            Number of seconds waited
        """
        total_waited = 0
        
        while True:
            can_proceed, reason = self.can_proceed(estimated_tokens)
            
            if can_proceed:
                break
            
            self.rate_limit_hits += 1
            
            # Calculate optimal wait time
            if self.request_times:
                oldest_request = self.request_times[0]
                wait_until = oldest_request + timedelta(minutes=1)
                wait_seconds = (wait_until - datetime.now()).total_seconds()
                
                if wait_seconds > 0:
                    # Provide helpful feedback
                    current_rpm = len(self.request_times)
                    current_tpm = sum(self.token_counts)
                    
                    print(f"\n⏳ Rate limit: {reason}")
                    print(f"   Current usage: {current_rpm}/{self.rpm_limit} RPM, {current_tpm:,}/{self.tpm_limit:,} TPM")
                    print(f"   Waiting {wait_seconds:.1f} seconds...")
                    print(f"   (Request #{self.total_requests + 1})")
                    
                    # Show progress bar for long waits
                    if wait_seconds > 5:
                        self._show_wait_progress(wait_seconds)
                    else:
                        time.sleep(wait_seconds)
                    
                    total_waited += wait_seconds
                    self.total_wait_time += wait_seconds
            else:
                # Should not happen, but safety check
                time.sleep(1)
                total_waited += 1
        
        return total_waited
    
    def _show_wait_progress(self, wait_seconds: float):
        """Show progress bar while waiting"""
        intervals = 20
        interval_time = wait_seconds / intervals
        
        for i in range(intervals):
            progress = (i + 1) / intervals
            bar_length = 30
            filled = int(bar_length * progress)
            bar = "█" * filled + "░" * (bar_length - filled)
            remaining = wait_seconds - (i * interval_time)
            print(f"\r   [{bar}] {remaining:.1f}s remaining", end="")
            time.sleep(interval_time)
        print("\r   ✅ Ready to proceed!                        ")
    
    def record_request(self, actual_tokens: int):
        """
        Record that a request was made
        
        Args:
            actual_tokens: Actual token count used
        """
        self.request_times.append(datetime.now())
        self.token_counts.append(actual_tokens)
        self.total_requests += 1
        self.total_tokens += actual_tokens
    
    def get_stats(self) -> Dict:
        """Get rate limiting statistics"""
        current_rpm = len(self.request_times)
        current_tpm = sum(self.token_counts)
        
        # Calculate efficiency
        if self.total_requests > 0:
            avg_wait = self.total_wait_time / self.total_requests
            efficiency = (self.total_requests / (self.total_requests + self.total_wait_time)) * 100
        else:
            avg_wait = 0
            efficiency = 100
        
        return {
            "total_requests": self.total_requests,
            "total_tokens": self.total_tokens,
            "total_wait_time": f"{self.total_wait_time:.1f}s",
            "avg_wait_per_request": f"{avg_wait:.2f}s",
            "current_rpm": current_rpm,
            "current_tpm": current_tpm,
            "rpm_usage": f"{(current_rpm/self.rpm_limit)*100:.1f}%",
            "tpm_usage": f"{(current_tpm/self.tpm_limit)*100:.1f}%",
            "rate_limit_hits": self.rate_limit_hits,
            "efficiency": f"{efficiency:.1f}%"
        }
    
    def reset(self):
        """Reset all statistics"""
        self.request_times.clear()
        self.token_counts.clear()
        self.total_wait_time = 0
        self.total_requests = 0
        self.total_tokens = 0
        self.rate_limit_hits = 0


class AdaptiveRateLimiter:
    """
    Advanced rate limiter that adapts to actual API responses
    """
    
    def __init__(self):
        """Initialize adaptive rate limiter"""
        self.limiters = {}  # One limiter per model
        self.performance_history = {}
        self.last_429_time = {}  # Track when we last hit 429 errors
    
    def get_limiter(self, model: str) -> SmartRateLimiter:
        """Get or create limiter for a specific model"""
        if model not in self.limiters:
            # Set conservative defaults, will adapt based on responses
            self.limiters[model] = SmartRateLimiter(
                requests_per_minute=30,  # Start conservative
                tokens_per_minute=30000
            )
        return self.limiters[model]
    
    def handle_response(self, model: str, success: bool, headers: Dict = None):
        """
        Adapt limits based on API response
        
        Args:
            model: Model name
            success: Whether request succeeded
            headers: Response headers (may contain rate limit info)
        """
        if not success:
            # Hit rate limit, back off
            if model in self.limiters:
                limiter = self.limiters[model]
                # Reduce limits by 20%
                limiter.rpm_limit = int(limiter.rpm_limit * 0.8)
                limiter.tpm_limit = int(limiter.tpm_limit * 0.8)
                self.last_429_time[model] = datetime.now()
                print(f"⚠️ Rate limit hit for {model}, reducing to {limiter.rpm_limit} RPM")
        
        elif headers:
            # Some APIs provide rate limit info in headers
            self._parse_rate_limit_headers(model, headers)
    
    def _parse_rate_limit_headers(self, model: str, headers: Dict):
        """Parse rate limit information from response headers"""
        # Example headers (varies by provider):
        # X-RateLimit-Limit-Requests
        # X-RateLimit-Remaining-Requests
        # X-RateLimit-Reset-Requests
        
        if "x-ratelimit-limit-requests" in headers:
            limit = int(headers["x-ratelimit-limit-requests"])
            if model in self.limiters:
                self.limiters[model].rpm_limit = limit
        
        if "x-ratelimit-remaining-requests" in headers:
            remaining = int(headers["x-ratelimit-remaining-requests"])
            # Could use this to optimize request timing
    
    def should_increase_limits(self, model: str) -> bool:
        """Check if we should try increasing limits"""
        if model not in self.last_429_time:
            return True
        
        # Wait at least 5 minutes after a 429 before increasing
        time_since_429 = datetime.now() - self.last_429_time[model]
        return time_since_429 > timedelta(minutes=5)
    
    def optimize_limits(self, model: str):
        """Gradually increase limits if no errors"""
        if self.should_increase_limits(model) and model in self.limiters:
            limiter = self.limiters[model]
            # Increase by 10%
            limiter.rpm_limit = min(int(limiter.rpm_limit * 1.1), 500)  # Cap at 500
            limiter.tpm_limit = min(int(limiter.tpm_limit * 1.1), 200000)  # Cap
            print(f"📈 Optimizing {model} limits to {limiter.rpm_limit} RPM")


def demonstrate_rate_limiting():
    """Demonstrate rate limiting in action"""
    
    print("="*60)
    print("SMART RATE LIMITING DEMONSTRATION")
    print("="*60)
    
    # Create rate limiter with low limits for demo
    limiter = SmartRateLimiter(requests_per_minute=5, tokens_per_minute=1000)
    
    print(f"\nLimits: {limiter.rpm_limit} RPM, {limiter.tpm_limit} TPM")
    print("-" * 40)
    
    # Simulate API calls
    prompts = [
        "Short prompt",
        "This is a medium length prompt with more tokens",
        "This is a much longer prompt that contains significantly more tokens and will use up more of our token budget",
        "Another short one",
        "Medium prompt here",
        "Final prompt"
    ]
    
    for i, prompt in enumerate(prompts):
        print(f"\n📝 Request {i+1}: '{prompt[:30]}...'")
        
        # Estimate tokens
        estimated_tokens = limiter.estimate_tokens(prompt) * 10  # Multiply for demo
        print(f"   Estimated tokens: {estimated_tokens}")
        
        # Wait if needed
        wait_time = limiter.wait_if_needed(estimated_tokens)
        if wait_time == 0:
            print("   ✅ No wait needed")
        
        # Simulate API call
        print("   Making API call...")
        time.sleep(0.5)  # Simulate API latency
        
        # Record request
        actual_tokens = estimated_tokens + 50  # Simulate actual usage
        limiter.record_request(actual_tokens)
        print(f"   ✅ Complete! Used {actual_tokens} tokens")
    
    # Show statistics
    print("\n" + "="*60)
    print("RATE LIMITING STATISTICS")
    print("="*60)
    
    stats = limiter.get_stats()
    for key, value in stats.items():
        print(f"{key}: {value}")


def demonstrate_adaptive_limiting():
    """Demonstrate adaptive rate limiting"""
    
    print("\n" + "="*60)
    print("ADAPTIVE RATE LIMITING DEMONSTRATION")
    print("="*60)
    
    adapter = AdaptiveRateLimiter()
    
    models = ["gpt-3.5-turbo", "gpt-4", "claude-3-haiku"]
    
    for model in models:
        print(f"\n🤖 Testing {model}")
        limiter = adapter.get_limiter(model)
        print(f"   Initial limits: {limiter.rpm_limit} RPM")
        
        # Simulate successful requests
        for i in range(3):
            adapter.handle_response(model, success=True)
        
        # Try to optimize
        adapter.optimize_limits(model)
        
        # Simulate rate limit hit
        adapter.handle_response(model, success=False)


if __name__ == "__main__":
    # Show provider limits
    rate_types = RateLimitTypes()
    rate_types.print_limits()
    
    # Demonstrate smart rate limiting
    print("\n")
    demonstrate_rate_limiting()
    
    # Demonstrate adaptive rate limiting
    demonstrate_adaptive_limiting()


In [None]:
# From: cost_optimization.py

# From: Zero to AI Agent, Chapter 7, Section 7.7
# File: cost_optimization.py

"""
Battle-tested strategies to minimize API costs while maintaining quality.
Includes model selection, caching, and batch processing.
"""

import hashlib
import json
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any


def smart_model_selection(task_type: str, complexity: int, max_cost_per_request: float = 0.01) -> str:
    """
    Choose the cheapest model that can handle the task
    
    Args:
        task_type: Type of task (e.g., "qa", "code", "creative")
        complexity: Complexity score 1-10
        max_cost_per_request: Maximum acceptable cost
    
    Returns:
        Recommended model name
    """
    
    # Model capabilities and costs (prices as of 2024)
    model_capabilities = {
        "gemini-pro": {
            "cost_per_1k": 0.0005,  # Very cheap!
            "good_for": ["simple_qa", "basic_chat", "translations"],
            "complexity_score": 6,
            "speed": "fast",
            "context_window": 32000
        },
        "claude-3-haiku": {
            "cost_per_1k": 0.0015,
            "good_for": ["quick_responses", "high_volume", "summaries"],
            "complexity_score": 6,
            "speed": "very_fast",
            "context_window": 200000
        },
        "gpt-3.5-turbo": {
            "cost_per_1k": 0.002,
            "good_for": ["simple_qa", "basic_chat", "summaries", "simple_code"],
            "complexity_score": 7,
            "speed": "fast",
            "context_window": 16000
        },
        "claude-3-sonnet": {
            "cost_per_1k": 0.018,
            "good_for": ["complex_qa", "analysis", "creative", "code"],
            "complexity_score": 8,
            "speed": "medium",
            "context_window": 200000
        },
        "gpt-4": {
            "cost_per_1k": 0.09,
            "good_for": ["complex_reasoning", "code_generation", "analysis", "math"],
            "complexity_score": 10,
            "speed": "slow",
            "context_window": 128000
        },
        "claude-3-opus": {
            "cost_per_1k": 0.09,
            "good_for": ["complex_reasoning", "research", "long_documents"],
            "complexity_score": 10,
            "speed": "slow",
            "context_window": 200000
        }
    }
    
    # Task-specific recommendations
    task_models = {
        "simple_qa": ["gemini-pro", "claude-3-haiku", "gpt-3.5-turbo"],
        "code_generation": ["gpt-3.5-turbo", "claude-3-sonnet", "gpt-4"],
        "creative_writing": ["claude-3-sonnet", "gpt-4"],
        "data_analysis": ["gpt-3.5-turbo", "claude-3-sonnet", "gpt-4"],
        "translation": ["gemini-pro", "gpt-3.5-turbo"],
        "summarization": ["claude-3-haiku", "gpt-3.5-turbo"],
    }
    
    # Get suitable models for task
    suitable_models = task_models.get(task_type, list(model_capabilities.keys()))
    
    # Filter by complexity
    candidates = []
    for model in suitable_models:
        if model in model_capabilities:
            model_info = model_capabilities[model]
            if model_info["complexity_score"] >= min(complexity, 10):
                candidates.append((model, model_info))
    
    # Sort by cost
    candidates.sort(key=lambda x: x[1]["cost_per_1k"])
    
    # Return cheapest suitable model
    if candidates:
        selected = candidates[0][0]
        print(f"📊 Model Selection:")
        print(f"   Task: {task_type} (complexity: {complexity}/10)")
        print(f"   Selected: {selected}")
        print(f"   Cost: ${candidates[0][1]['cost_per_1k']:.4f}/1K tokens")
        print(f"   Alternatives considered: {[c[0] for c in candidates[1:3]]}")
        return selected
    
    # Default fallback
    return "gpt-3.5-turbo"


class ResponseCache:
    """
    Cache responses to avoid repeated API calls
    Saves money by reusing previous responses
    """
    
    def __init__(self, cache_duration_hours: int = 24, max_cache_size: int = 1000):
        """
        Initialize cache
        
        Args:
            cache_duration_hours: How long to keep cached responses
            max_cache_size: Maximum number of cached items
        """
        self.cache = {}
        self.cache_duration = timedelta(hours=cache_duration_hours)
        self.max_cache_size = max_cache_size
        
        # Statistics
        self.cache_hits = 0
        self.cache_misses = 0
        self.total_saved = 0.0
        self.bytes_saved = 0
    
    def _get_cache_key(self, prompt: str, model: str = "default", temperature: float = 0.7) -> str:
        """Generate unique cache key"""
        content = f"{prompt}_{model}_{temperature}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def get(self, prompt: str, model: str = "gpt-3.5-turbo", temperature: float = 0.7) -> Optional[Any]:
        """
        Try to get cached response
        
        Args:
            prompt: The prompt to look up
            model: Model name
            temperature: Temperature setting
        
        Returns:
            Cached response or None
        """
        key = self._get_cache_key(prompt, model, temperature)
        
        if key in self.cache:
            entry = self.cache[key]
            # Check if still valid
            if datetime.now() - entry["timestamp"] < self.cache_duration:
                self.cache_hits += 1
                self.total_saved += entry["cost"]
                self.bytes_saved += len(str(entry["response"]))
                
                # Update access time for LRU
                entry["last_accessed"] = datetime.now()
                
                print(f"💰 Cache hit! Saved ${entry['cost']:.4f}")
                return entry["response"]
            else:
                # Expired, remove it
                del self.cache[key]
        
        self.cache_misses += 1
        return None
    
    def set(self, prompt: str, response: Any, cost: float, model: str = "gpt-3.5-turbo", temperature: float = 0.7):
        """
        Cache a response
        
        Args:
            prompt: The prompt
            response: The response to cache
            cost: Cost of this API call
            model: Model name
            temperature: Temperature setting
        """
        # Check cache size
        if len(self.cache) >= self.max_cache_size:
            self._evict_oldest()
        
        key = self._get_cache_key(prompt, model, temperature)
        self.cache[key] = {
            "response": response,
            "timestamp": datetime.now(),
            "last_accessed": datetime.now(),
            "cost": cost,
            "model": model
        }
    
    def _evict_oldest(self):
        """Remove least recently used item"""
        if not self.cache:
            return
        
        # Find least recently accessed
        oldest_key = min(self.cache.keys(), 
                        key=lambda k: self.cache[k]["last_accessed"])
        del self.cache[oldest_key]
    
    def get_stats(self) -> Dict:
        """Get cache statistics"""
        total_requests = self.cache_hits + self.cache_misses
        hit_rate = (self.cache_hits / max(1, total_requests)) * 100
        
        # Calculate cache value
        cache_value = sum(entry["cost"] for entry in self.cache.values())
        
        return {
            "cache_hits": self.cache_hits,
            "cache_misses": self.cache_misses,
            "hit_rate": f"{hit_rate:.1f}%",
            "total_saved": f"${self.total_saved:.2f}",
            "bytes_saved": f"{self.bytes_saved:,}",
            "cache_size": len(self.cache),
            "cache_value": f"${cache_value:.2f}"
        }
    
    def clear_expired(self):
        """Remove expired entries"""
        now = datetime.now()
        expired_keys = [
            key for key, entry in self.cache.items()
            if now - entry["timestamp"] >= self.cache_duration
        ]
        for key in expired_keys:
            del self.cache[key]
        
        return len(expired_keys)


def batch_process_efficiently(items: List[str], batch_size: int = 10) -> Dict:
    """
    Process multiple items in single API calls when possible
    
    Args:
        items: List of items to process
        batch_size: Items per API call
    
    Returns:
        Results and cost information
    """
    
    total_cost = 0
    results = []
    api_calls = 0
    
    print(f"\n📦 Batch Processing {len(items)} items")
    print(f"   Batch size: {batch_size}")
    
    for i in range(0, len(items), batch_size):
        batch = items[i:i+batch_size]
        api_calls += 1
        
        # Combine into single prompt
        combined_prompt = "Process each item separately and number the responses:\n\n"
        for j, item in enumerate(batch, 1):
            combined_prompt += f"{j}. {item}\n"
        
        # Estimate cost (simplified)
        estimated_tokens = len(combined_prompt) // 4 + (100 * len(batch))  # Assume 100 tokens per response
        batch_cost = (estimated_tokens / 1000) * 0.002  # GPT-3.5 pricing
        total_cost += batch_cost
        
        print(f"   Batch {api_calls}: {len(batch)} items, ~{estimated_tokens} tokens, ${batch_cost:.4f}")
        
        # Simulate processing
        batch_results = [f"Processed: {item}" for item in batch]
        results.extend(batch_results)
    
    # Calculate savings
    individual_cost = len(items) * ((150 / 1000) * 0.002)  # If processed individually
    savings = individual_cost - total_cost
    savings_percent = (savings / individual_cost) * 100
    
    print(f"\n💰 Batch Processing Results:")
    print(f"   Items processed: {len(items)}")
    print(f"   API calls made: {api_calls}")
    print(f"   Total cost: ${total_cost:.4f}")
    print(f"   Cost if individual: ${individual_cost:.4f}")
    print(f"   Saved: ${savings:.4f} ({savings_percent:.1f}%)")
    
    return {
        "results": results,
        "total_cost": total_cost,
        "api_calls": api_calls,
        "savings": savings
    }


class CostOptimizer:
    """
    Comprehensive cost optimization manager
    """
    
    def __init__(self, daily_budget: float = 10.0):
        """
        Initialize cost optimizer
        
        Args:
            daily_budget: Maximum daily spending
        """
        self.daily_budget = daily_budget
        self.cache = ResponseCache()
        self.model_usage = {}
        self.optimization_stats = {
            "cache_savings": 0,
            "model_downgrades": 0,
            "batch_savings": 0
        }
    
    def optimize_request(self, prompt: str, task_type: str = "simple_qa") -> Dict:
        """
        Optimize a request for cost
        
        Args:
            prompt: The prompt to process
            task_type: Type of task
        
        Returns:
            Optimization recommendations
        """
        recommendations = {}
        
        # 1. Check cache first
        cached = self.cache.get(prompt)
        if cached:
            self.optimization_stats["cache_savings"] += 1
            recommendations["use_cache"] = True
            recommendations["cached_response"] = cached
            return recommendations
        
        # 2. Estimate complexity
        complexity = self._estimate_complexity(prompt)
        
        # 3. Select optimal model
        model = smart_model_selection(task_type, complexity)
        recommendations["model"] = model
        
        # 4. Check if we should batch
        recommendations["can_batch"] = len(prompt) < 500  # Short enough to batch
        
        # 5. Suggest optimizations
        if complexity < 5:
            recommendations["optimizations"] = [
                "Consider using cheaper model",
                "Enable aggressive caching",
                "Batch with similar requests"
            ]
        
        return recommendations
    
    def _estimate_complexity(self, prompt: str) -> int:
        """Estimate task complexity from prompt"""
        complexity = 3  # Base complexity
        
        # Increase for certain keywords
        complex_keywords = ["analyze", "explain", "compare", "debug", "optimize", "create"]
        for keyword in complex_keywords:
            if keyword in prompt.lower():
                complexity += 2
        
        # Increase for length
        if len(prompt) > 500:
            complexity += 2
        if len(prompt) > 1000:
            complexity += 2
        
        # Cap at 10
        return min(complexity, 10)
    
    def get_optimization_report(self) -> str:
        """Generate optimization report"""
        cache_stats = self.cache.get_stats()
        
        report = []
        report.append("="*60)
        report.append("COST OPTIMIZATION REPORT")
        report.append("="*60)
        
        report.append("\n📊 Cache Performance:")
        for key, value in cache_stats.items():
            report.append(f"   {key}: {value}")
        
        report.append("\n💰 Savings:")
        report.append(f"   From caching: ${self.cache.total_saved:.2f}")
        report.append(f"   From batching: ${self.optimization_stats['batch_savings']:.2f}")
        report.append(f"   Model downgrades: {self.optimization_stats['model_downgrades']}")
        
        report.append("\n💡 Recommendations:")
        if cache_stats["cache_hits"] < cache_stats["cache_misses"]:
            report.append("   • Increase cache duration or size")
        report.append("   • Batch similar requests together")
        report.append("   • Use cheaper models for simple tasks")
        
        return "\n".join(report)


def demonstrate_optimization():
    """Demonstrate cost optimization techniques"""
    
    print("="*60)
    print("COST OPTIMIZATION DEMONSTRATION")
    print("="*60)
    
    # 1. Model Selection
    print("\n1️⃣ SMART MODEL SELECTION")
    print("-" * 40)
    
    tasks = [
        ("What is 2+2?", "simple_qa", 2),
        ("Write a Python function to sort a list", "code_generation", 6),
        ("Explain quantum computing in detail", "complex_qa", 9)
    ]
    
    for prompt, task_type, complexity in tasks:
        print(f"\nPrompt: '{prompt[:50]}...'")
        model = smart_model_selection(task_type, complexity)
    
    # 2. Caching
    print("\n2️⃣ RESPONSE CACHING")
    print("-" * 40)
    
    cache = ResponseCache()
    
    # Simulate repeated requests
    prompts = [
        "What is Python?",
        "Explain AI",
        "What is Python?",  # Duplicate
        "How does ML work?",
        "What is Python?"   # Another duplicate
    ]
    
    for prompt in prompts:
        cached = cache.get(prompt)
        if not cached:
            # Simulate API call
            print(f"📤 API call for: '{prompt}'")
            cache.set(prompt, f"Response to: {prompt}", cost=0.002)
        else:
            print(f"💰 Using cached response for: '{prompt}'")
    
    print(f"\nCache stats: {cache.get_stats()}")
    
    # 3. Batch Processing
    print("\n3️⃣ BATCH PROCESSING")
    print("-" * 40)
    
    items = [f"Item {i}" for i in range(25)]
    batch_process_efficiently(items, batch_size=5)
    
    # 4. Complete Optimization
    print("\n4️⃣ COMPLETE OPTIMIZATION")
    print("-" * 40)
    
    optimizer = CostOptimizer(daily_budget=5.0)
    
    test_prompts = [
        ("Hello", "simple_qa"),
        ("Write a complex algorithm", "code_generation"),
        ("Hello", "simple_qa"),  # Duplicate - should cache
        ("Analyze this data", "data_analysis")
    ]
    
    for prompt, task in test_prompts:
        print(f"\n📝 Request: '{prompt}'")
        recommendations = optimizer.optimize_request(prompt, task)
        
        if recommendations.get("use_cache"):
            print("   ✅ Using cached response!")
        else:
            print(f"   📊 Recommended model: {recommendations.get('model')}")
            print(f"   📦 Can batch: {recommendations.get('can_batch')}")
    
    print("\n" + optimizer.get_optimization_report())


if __name__ == "__main__":
    demonstrate_optimization()


In [None]:
# From: cost_monitor.py

# From: Zero to AI Agent, Chapter 7, Section 7.7
# File: cost_monitor.py

"""
Monitor and control API spending with budgets and alerts.
Includes free alternatives for students and hobbyists.
"""

from datetime import datetime, date
from typing import Dict, List, Optional
import json
from pathlib import Path


class CostMonitor:
    """
    Monitor and control API spending with comprehensive tracking
    """
    
    def __init__(self, daily_budget: float = 10.0, alert_threshold: float = 0.8):
        """
        Initialize cost monitor
        
        Args:
            daily_budget: Maximum daily spending allowed
            alert_threshold: Alert when this fraction of budget is used
        """
        self.daily_budget = daily_budget
        self.alert_threshold = alert_threshold
        
        # Track costs by date
        self.daily_costs = {}
        
        # Track by model
        self.model_costs = {}
        
        # Overall statistics
        self.total_spent = 0.0
        self.request_count = 0
        self.total_tokens = 0
        
        # Alerts
        self.alerts_triggered = []
        
    def track_request(self, cost: float, model: str, tokens: int) -> Dict:
        """
        Track a request and check budget
        
        Args:
            cost: Cost of this request
            model: Model used
            tokens: Tokens used
        
        Returns:
            Status dictionary with budget information
        
        Raises:
            Exception: If daily budget is exceeded
        """
        today = datetime.now().date()
        
        # Initialize today's tracking if needed
        if today not in self.daily_costs:
            self.daily_costs[today] = {
                "cost": 0,
                "requests": 0,
                "tokens": 0,
                "models": {}
            }
        
        # Update daily totals
        self.daily_costs[today]["cost"] += cost
        self.daily_costs[today]["requests"] += 1
        self.daily_costs[today]["tokens"] += tokens
        
        # Track by model
        if model not in self.daily_costs[today]["models"]:
            self.daily_costs[today]["models"][model] = {"cost": 0, "requests": 0}
        
        self.daily_costs[today]["models"][model]["cost"] += cost
        self.daily_costs[today]["models"][model]["requests"] += 1
        
        # Update global tracking
        self.total_spent += cost
        self.request_count += 1
        self.total_tokens += tokens
        
        if model not in self.model_costs:
            self.model_costs[model] = {"cost": 0, "requests": 0, "tokens": 0}
        
        self.model_costs[model]["cost"] += cost
        self.model_costs[model]["requests"] += 1
        self.model_costs[model]["tokens"] += tokens
        
        # Check budget
        daily_spent = self.daily_costs[today]["cost"]
        budget_percent = daily_spent / self.daily_budget
        
        status = {
            "daily_spent": daily_spent,
            "daily_budget": self.daily_budget,
            "remaining": self.daily_budget - daily_spent,
            "percent_used": budget_percent * 100,
            "status": "OK"
        }
        
        # Check if budget exceeded
        if daily_spent > self.daily_budget:
            status["status"] = "EXCEEDED"
            alert = f"❌ Daily budget exceeded! Spent ${daily_spent:.2f} / ${self.daily_budget:.2f}"
            self.alerts_triggered.append({"time": datetime.now(), "message": alert})
            raise Exception(alert)
        
        # Check if approaching limit
        elif budget_percent > self.alert_threshold:
            status["status"] = "WARNING"
            alert = f"⚠️ Warning: {budget_percent*100:.0f}% of daily budget used"
            print(alert)
            self.alerts_triggered.append({"time": datetime.now(), "message": alert})
        
        return status
    
    def get_daily_report(self, date: Optional[date] = None) -> str:
        """
        Get spending report for a specific day
        
        Args:
            date: Date to report on (None for today)
        
        Returns:
            Formatted report string
        """
        target_date = date or datetime.now().date()
        
        if target_date not in self.daily_costs:
            return f"No data for {target_date}"
        
        data = self.daily_costs[target_date]
        
        report = []
        report.append(f"Daily Report: {target_date}")
        report.append("=" * 50)
        report.append(f"Total Cost: ${data['cost']:.2f} / ${self.daily_budget:.2f}")
        report.append(f"Requests: {data['requests']}")
        report.append(f"Tokens: {data['tokens']:,}")
        
        if data['requests'] > 0:
            report.append(f"Avg cost/request: ${data['cost']/data['requests']:.4f}")
            report.append(f"Avg tokens/request: {data['tokens']/data['requests']:.0f}")
        
        if data["models"]:
            report.append("\nBy Model:")
            for model, stats in data["models"].items():
                report.append(f"  {model}:")
                report.append(f"    Cost: ${stats['cost']:.4f}")
                report.append(f"    Requests: {stats['requests']}")
        
        return "\n".join(report)
    
    def get_full_report(self) -> str:
        """Generate comprehensive spending report"""
        
        report = []
        report.append("="*60)
        report.append("API SPENDING REPORT")
        report.append("="*60)
        
        # Summary
        report.append(f"\n📊 SUMMARY")
        report.append(f"Total Spent: ${self.total_spent:.2f}")
        report.append(f"Total Requests: {self.request_count}")
        report.append(f"Total Tokens: {self.total_tokens:,}")
        
        if self.request_count > 0:
            report.append(f"Average Cost: ${self.total_spent/self.request_count:.4f}/request")
            report.append(f"Average Tokens: {self.total_tokens/self.request_count:.0f}/request")
        
        # Daily breakdown
        report.append(f"\n📅 DAILY BREAKDOWN")
        for day, data in sorted(self.daily_costs.items(), reverse=True)[:7]:  # Last 7 days
            budget_percent = (data['cost'] / self.daily_budget) * 100
            status = "✅" if budget_percent < 80 else "⚠️" if budget_percent < 100 else "❌"
            report.append(f"\n{day}: {status}")
            report.append(f"  Cost: ${data['cost']:.2f} ({budget_percent:.0f}% of budget)")
            report.append(f"  Requests: {data['requests']}")
        
        # Model breakdown
        if self.model_costs:
            report.append(f"\n🤖 MODEL USAGE")
            sorted_models = sorted(self.model_costs.items(), 
                                 key=lambda x: x[1]["cost"], reverse=True)
            
            for model, stats in sorted_models:
                cost_percent = (stats["cost"] / self.total_spent) * 100
                report.append(f"\n{model}:")
                report.append(f"  Cost: ${stats['cost']:.2f} ({cost_percent:.1f}% of total)")
                report.append(f"  Requests: {stats['requests']}")
                report.append(f"  Avg: ${stats['cost']/stats['requests']:.4f}/request")
        
        # Recent alerts
        if self.alerts_triggered:
            report.append(f"\n⚠️ RECENT ALERTS")
            for alert in self.alerts_triggered[-5:]:
                report.append(f"  {alert['time'].strftime('%Y-%m-%d %H:%M')}: {alert['message']}")
        
        # Recommendations
        report.append(f"\n💡 RECOMMENDATIONS")
        if self.total_spent > 0:
            # Find most expensive model
            if self.model_costs:
                most_expensive = max(self.model_costs.items(), key=lambda x: x[1]["cost"])
                if most_expensive[1]["cost"] / self.total_spent > 0.5:
                    report.append(f"  • Consider using cheaper models (50%+ spent on {most_expensive[0]})")
            
            # Check daily patterns
            avg_daily = self.total_spent / len(self.daily_costs)
            if avg_daily > self.daily_budget * 0.8:
                report.append(f"  • Average daily spend (${avg_daily:.2f}) approaching budget")
        
        return "\n".join(report)
    
    def export_data(self, filename: str = "cost_data.json"):
        """Export cost data to JSON file"""
        
        data = {
            "export_time": datetime.now().isoformat(),
            "total_spent": self.total_spent,
            "request_count": self.request_count,
            "total_tokens": self.total_tokens,
            "daily_budget": self.daily_budget,
            "daily_costs": {
                str(day): info for day, info in self.daily_costs.items()
            },
            "model_costs": self.model_costs,
            "alerts": [
                {"time": alert["time"].isoformat(), "message": alert["message"]}
                for alert in self.alerts_triggered
            ]
        }
        
        with open(filename, "w") as f:
            json.dump(data, f, indent=2)
        
        print(f"📁 Cost data exported to {filename}")
    
    def predict_monthly_cost(self) -> float:
        """Predict monthly cost based on current usage"""
        
        if not self.daily_costs:
            return 0
        
        # Calculate average daily spend
        avg_daily = self.total_spent / len(self.daily_costs)
        
        # Project to 30 days
        projected = avg_daily * 30
        
        return projected


def free_ai_options() -> Dict:
    """
    Ways to use AI without spending money
    Perfect for students and hobbyists!
    """
    
    options = {
        "Google Gemini": {
            "free_tier": "60 requests/minute",
            "limits": "1,500 requests/day",
            "good_for": "Experimentation, learning, prototyping",
            "setup": "Just need Google account",
            "how_to": """
1. Go to aistudio.google.com
2. Sign in with Google account
3. Get API key (no credit card!)
4. Start building!
            """,
            "cost": "FREE",
            "quality": "⭐⭐⭐⭐"
        },
        
        "OpenAI Free Credits": {
            "free_tier": "$5 for new accounts (sometimes)",
            "limits": "Expires after 3 months",
            "good_for": "Initial testing, learning GPT",
            "setup": "New phone number required",
            "how_to": """
1. Sign up at platform.openai.com
2. Verify phone number
3. Check for free credits
4. Use wisely - it goes fast!
            """,
            "cost": "FREE (limited)",
            "quality": "⭐⭐⭐⭐⭐"
        },
        
        "Hugging Face Inference": {
            "free_tier": "Rate-limited access to many models",
            "limits": "Slow, queued requests",
            "good_for": "Testing open models, learning",
            "setup": "Free account",
            "how_to": """
1. Create account at huggingface.co
2. Get API token
3. Use Inference API
4. Expect delays during peak times
            """,
            "cost": "FREE",
            "quality": "⭐⭐⭐"
        },
        
        "Local Open Source (Ollama)": {
            "free_tier": "Unlimited (your hardware)",
            "limits": "Need decent GPU or lots of patience",
            "good_for": "Privacy, unlimited use, learning",
            "setup": "Install Ollama",
            "how_to": """
1. Download from ollama.ai
2. Run: ollama pull llama2
3. Run: ollama run llama2
4. Use via API or CLI
            """,
            "cost": "FREE (your electricity)",
            "quality": "⭐⭐⭐ (depends on model)"
        },
        
        "Colab + Open Models": {
            "free_tier": "Free GPU time (limited)",
            "limits": "Session limits, disconnections",
            "good_for": "Experiments, learning, notebooks",
            "setup": "Google account + notebooks",
            "how_to": """
1. Go to colab.research.google.com
2. Create new notebook
3. Use Transformers library
4. Load open models
            """,
            "cost": "FREE",
            "quality": "⭐⭐⭐⭐"
        },
        
        "Replicate Free Tier": {
            "free_tier": "Free predictions for public models",
            "limits": "Very limited, mostly for testing",
            "good_for": "Trying different models",
            "setup": "GitHub account",
            "how_to": """
1. Sign up at replicate.com
2. Get API token
3. Use public models
4. Watch usage carefully
            """,
            "cost": "FREE (very limited)",
            "quality": "⭐⭐⭐⭐"
        }
    }
    
    print("="*60)
    print("🎓 FREE AI OPTIONS FOR STUDENTS")
    print("="*60)
    
    for name, info in options.items():
        print(f"\n📌 {name}")
        print(f"   Cost: {info['cost']}")
        print(f"   Quality: {info['quality']}")
        print(f"   Free Tier: {info['free_tier']}")
        print(f"   Good For: {info['good_for']}")
        print(f"   Limits: {info['limits']}")
    
    print("\n" + "="*60)
    print("💡 RECOMMENDATIONS BY USE CASE")
    print("="*60)
    
    recommendations = {
        "Just Learning": ["Google Gemini (best free option)", "Local Ollama"],
        "Building Projects": ["Google Gemini", "OpenAI free credits"],
        "Research/Experiments": ["Colab + Open Models", "Hugging Face"],
        "Production Apps": ["Start with Gemini free tier", "Then upgrade as needed"]
    }
    
    for use_case, recs in recommendations.items():
        print(f"\n{use_case}:")
        for rec in recs:
            print(f"  • {rec}")
    
    return options


def student_budget_strategies():
    """Budget-conscious strategies for students"""
    
    print("\n" + "="*60)
    print("💰 STUDENT BUDGET STRATEGIES")
    print("="*60)
    
    strategies = [
        {
            "strategy": "Start Free, Upgrade Later",
            "how": "Use Google Gemini free tier until you hit limits",
            "savings": "$50-100/month"
        },
        {
            "strategy": "Cache Everything",
            "how": "Never make the same API call twice",
            "savings": "50-70% reduction"
        },
        {
            "strategy": "Use Cheap Models First",
            "how": "Try Gemini/Haiku before GPT-4",
            "savings": "10-100x cost difference"
        },
        {
            "strategy": "Batch Requests",
            "how": "Process multiple items per API call",
            "savings": "30-50% reduction"
        },
        {
            "strategy": "Local for Development",
            "how": "Use Ollama locally, API for production",
            "savings": "$20-50/month"
        },
        {
            "strategy": "Share API Keys (Carefully!)",
            "how": "Team projects can share costs",
            "savings": "Split costs 3-4 ways"
        },
        {
            "strategy": "Use University Resources",
            "how": "Many universities provide compute credits",
            "savings": "$100-500/month"
        }
    ]
    
    for s in strategies:
        print(f"\n📋 {s['strategy']}")
        print(f"   How: {s['how']}")
        print(f"   Potential Savings: {s['savings']}")
    
    print("\n" + "="*60)
    print("📚 STUDENT STARTER STACK (All Free!)")
    print("="*60)
    print("""
1. Development: Google Gemini (free tier)
2. Experiments: Colab notebooks
3. Local testing: Ollama with Llama 2
4. Version control: GitHub (free)
5. Deployment: Vercel/Netlify (free tier)

Total Cost: $0/month
Capabilities: Build full AI applications!
    """)


def demonstrate_cost_monitoring():
    """Demonstrate cost monitoring in action"""
    
    print("="*60)
    print("COST MONITORING DEMONSTRATION")
    print("="*60)
    
    # Create monitor
    monitor = CostMonitor(daily_budget=5.0)
    
    # Simulate some API calls
    api_calls = [
        {"cost": 0.002, "model": "gpt-3.5-turbo", "tokens": 150},
        {"cost": 0.003, "model": "gpt-3.5-turbo", "tokens": 200},
        {"cost": 0.09, "model": "gpt-4", "tokens": 1000},
        {"cost": 0.001, "model": "gemini-pro", "tokens": 100},
        {"cost": 0.002, "model": "gpt-3.5-turbo", "tokens": 150},
        {"cost": 0.05, "model": "gpt-4", "tokens": 500},
        {"cost": 0.0005, "model": "gemini-pro", "tokens": 50},
        {"cost": 0.002, "model": "gpt-3.5-turbo", "tokens": 150},
    ]
    
    print("\n📊 Tracking API Calls:")
    for i, call in enumerate(api_calls, 1):
        print(f"\nCall {i}: {call['model']} (${call['cost']:.4f})")
        
        try:
            status = monitor.track_request(call["cost"], call["model"], call["tokens"])
            print(f"  Budget: ${status['daily_spent']:.4f} / ${status['daily_budget']:.2f}")
            print(f"  Status: {status['status']}")
        except Exception as e:
            print(f"  {e}")
            break
    
    # Show reports
    print("\n" + monitor.get_daily_report())
    print("\n" + monitor.get_full_report())
    
    # Predict monthly cost
    projected = monitor.predict_monthly_cost()
    print(f"\n📈 Projected Monthly Cost: ${projected:.2f}")
    
    # Export data
    monitor.export_data("demo_cost_data.json")


if __name__ == "__main__":
    # Show free options
    free_ai_options()
    
    # Student strategies
    student_budget_strategies()
    
    # Demonstrate monitoring
    print("\n")
    demonstrate_cost_monitoring()


In [None]:
# From: chapter7_challenge_project.py

# From: Zero to AI Agent, Chapter 7, Section 7.7
# File: chapter7_challenge_project.py

"""
Chapter 7 Challenge Project: Multi-Provider AI Assistant Hub
Build an AI assistant that intelligently switches between providers,
manages costs, and handles rate limits.
"""

import os
import json
import time
import hashlib
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any
from dataclasses import dataclass
from enum import Enum
from collections import deque


# =======================
# Configuration
# =======================

@dataclass
class ModelConfig:
    """Configuration for each model"""
    provider: str
    name: str
    input_cost_per_1k: float
    output_cost_per_1k: float
    rpm_limit: int
    tpm_limit: int
    complexity_score: int  # 1-10, how capable is this model
    context_window: int


class Provider(Enum):
    """Available providers"""
    OPENAI = "openai"
    ANTHROPIC = "anthropic"  
    GOOGLE = "google"
    LOCAL = "local"  # For Ollama or similar


class AssistantHub:
    """
    Your Multi-Provider AI Assistant Hub
    Implements everything from Chapter 7!
    """
    
    def __init__(self):
        """Initialize the assistant hub"""
        
        # Model configurations
        self.models = {
            "gpt-3.5-turbo": ModelConfig(
                provider="openai",
                name="gpt-3.5-turbo",
                input_cost_per_1k=0.0005,
                output_cost_per_1k=0.0015,
                rpm_limit=90,
                tpm_limit=90000,
                complexity_score=7,
                context_window=16000
            ),
            "gemini-pro": ModelConfig(
                provider="google",
                name="gemini-pro",
                input_cost_per_1k=0.000125,
                output_cost_per_1k=0.000375,
                rpm_limit=60,
                tpm_limit=1000000,
                complexity_score=6,
                context_window=32000
            ),
            "claude-3-haiku": ModelConfig(
                provider="anthropic",
                name="claude-3-haiku-20240307",
                input_cost_per_1k=0.00025,
                output_cost_per_1k=0.00125,
                rpm_limit=50,
                tpm_limit=100000,
                complexity_score=6,
                context_window=200000
            )
            # TODO: Add more models
        }
        
        # Initialize components
        self.cache = {}  # Simple cache implementation
        self.cache_duration = timedelta(hours=24)
        
        self.cost_tracker = {
            "total": 0.0,
            "by_model": {},
            "daily": {}
        }
        
        self.rate_limiters = {}  # One per model
        self.conversation_history = []
        self.available_providers = []
        
        # Statistics
        self.stats = {
            "total_requests": 0,
            "cache_hits": 0,
            "cache_misses": 0,
            "total_tokens": 0,
            "errors": 0
        }
        
        # Load API keys
        self._load_api_keys()
        
        # Initialize rate limiters
        self._init_rate_limiters()
    
    def _load_api_keys(self):
        """
        Load API keys from environment variables
        TODO: Implement secure key loading from Chapter 7.6
        """
        self.api_keys = {}
        
        # Try to load from environment
        if os.getenv("OPENAI_API_KEY"):
            self.api_keys["openai"] = os.getenv("OPENAI_API_KEY")
            self.available_providers.append("openai")
            print("✅ OpenAI API key loaded")
        
        if os.getenv("ANTHROPIC_API_KEY"):
            self.api_keys["anthropic"] = os.getenv("ANTHROPIC_API_KEY")
            self.available_providers.append("anthropic")
            print("✅ Anthropic API key loaded")
        
        if os.getenv("GOOGLE_API_KEY"):
            self.api_keys["google"] = os.getenv("GOOGLE_API_KEY")
            self.available_providers.append("google")
            print("✅ Google API key loaded")
        
        if not self.available_providers:
            print("⚠️ No API keys found! Set environment variables:")
            print("  OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY")
    
    def _init_rate_limiters(self):
        """Initialize rate limiters for each model"""
        for model_name, config in self.models.items():
            self.rate_limiters[model_name] = {
                "request_times": deque(),
                "token_counts": deque(),
                "last_request": None
            }
    
    def _estimate_complexity(self, prompt: str) -> int:
        """
        Estimate task complexity (1-10) based on prompt
        
        TODO: Improve this with better heuristics
        """
        complexity = 3  # Base complexity
        
        # Length-based estimation
        if len(prompt) > 500:
            complexity += 2
        if len(prompt) > 1000:
            complexity += 2
        
        # Keyword-based estimation
        complex_keywords = [
            "analyze", "explain in detail", "compare",
            "write code", "debug", "optimize", 
            "create", "design", "comprehensive"
        ]
        
        for keyword in complex_keywords:
            if keyword in prompt.lower():
                complexity += 1
        
        # Cap at 10
        return min(complexity, 10)
    
    def _select_model(self, prompt: str, max_cost: float = 0.01) -> Optional[str]:
        """
        Select the best model for the task
        
        TODO: Implement smart selection logic
        """
        complexity = self._estimate_complexity(prompt)
        
        # Filter models by availability and complexity
        candidates = []
        for model_name, config in self.models.items():
            if config.provider in self.available_providers:
                if config.complexity_score >= complexity:
                    # Estimate cost
                    estimated_tokens = len(prompt) // 4 + 200  # Input + output estimate
                    estimated_cost = (estimated_tokens / 1000) * (
                        config.input_cost_per_1k + config.output_cost_per_1k
                    )
                    
                    if estimated_cost <= max_cost:
                        candidates.append((model_name, estimated_cost))
        
        if not candidates:
            print("⚠️ No suitable model found within budget")
            return None
        
        # Sort by cost and return cheapest
        candidates.sort(key=lambda x: x[1])
        selected = candidates[0][0]
        
        print(f"📊 Selected model: {selected} (complexity: {complexity}/10)")
        return selected
    
    def _check_rate_limits(self, model: str, estimated_tokens: int) -> bool:
        """
        Check if we can make a request without hitting rate limits
        
        TODO: Implement rate limit checking
        """
        if model not in self.rate_limiters:
            return True
        
        config = self.models[model]
        limiter = self.rate_limiters[model]
        
        now = datetime.now()
        minute_ago = now - timedelta(minutes=1)
        
        # Clean old entries
        while limiter["request_times"] and limiter["request_times"][0] < minute_ago:
            limiter["request_times"].popleft()
            if limiter["token_counts"]:
                limiter["token_counts"].popleft()
        
        # Check limits
        if len(limiter["request_times"]) >= config.rpm_limit:
            print(f"⏳ Rate limit: {model} at {config.rpm_limit} RPM")
            return False
        
        current_tokens = sum(limiter["token_counts"])
        if current_tokens + estimated_tokens > config.tpm_limit:
            print(f"⏳ Token limit: {model} at {config.tpm_limit} TPM")
            return False
        
        return True
    
    def _get_cache_key(self, prompt: str, model: str) -> str:
        """Generate cache key"""
        content = f"{prompt}_{model}"
        return hashlib.md5(content.encode()).hexdigest()
    
    def _check_cache(self, prompt: str, model: str) -> Optional[Any]:
        """Check if response is cached"""
        key = self._get_cache_key(prompt, model)
        
        if key in self.cache:
            entry = self.cache[key]
            # Check if still valid
            if datetime.now() - entry["timestamp"] < self.cache_duration:
                self.stats["cache_hits"] += 1
                print(f"💰 Cache hit! Saved ${entry['cost']:.4f}")
                return entry["response"]
        
        self.stats["cache_misses"] += 1
        return None
    
    def _cache_response(self, prompt: str, model: str, response: str, cost: float):
        """Cache a response"""
        key = self._get_cache_key(prompt, model)
        self.cache[key] = {
            "response": response,
            "timestamp": datetime.now(),
            "cost": cost,
            "model": model
        }
    
    def _make_api_call(self, prompt: str, model: str) -> Optional[Dict]:
        """
        Make actual API call to the selected model
        
        TODO: Implement actual API calls for each provider
        """
        config = self.models[model]
        
        # Simulate API call for demo
        print(f"🔄 Making API call to {model}...")
        time.sleep(0.5)  # Simulate latency
        
        # TODO: Replace with actual API calls
        # if config.provider == "openai":
        #     response = self._call_openai(prompt, model)
        # elif config.provider == "anthropic":
        #     response = self._call_anthropic(prompt, model)
        # elif config.provider == "google":
        #     response = self._call_google(prompt, model)
        
        # Simulated response
        response = {
            "text": f"[Simulated response from {model}] This is a response to: {prompt[:50]}...",
            "tokens_used": len(prompt) // 4 + 150,
            "model": model
        }
        
        return response
    
    def _track_cost(self, model: str, tokens: int):
        """Track costs"""
        config = self.models[model]
        
        # Estimate input/output split (rough)
        input_tokens = tokens * 0.3
        output_tokens = tokens * 0.7
        
        cost = (
            (input_tokens / 1000) * config.input_cost_per_1k +
            (output_tokens / 1000) * config.output_cost_per_1k
        )
        
        # Update tracking
        self.cost_tracker["total"] += cost
        
        if model not in self.cost_tracker["by_model"]:
            self.cost_tracker["by_model"][model] = 0
        self.cost_tracker["by_model"][model] += cost
        
        today = str(datetime.now().date())
        if today not in self.cost_tracker["daily"]:
            self.cost_tracker["daily"][today] = 0
        self.cost_tracker["daily"][today] += cost
        
        return cost
    
    def _update_rate_limiter(self, model: str, tokens: int):
        """Update rate limiter after request"""
        if model in self.rate_limiters:
            limiter = self.rate_limiters[model]
            limiter["request_times"].append(datetime.now())
            limiter["token_counts"].append(tokens)
            limiter["last_request"] = datetime.now()
    
    def chat(self, user_input: str) -> str:
        """
        Main chat interface
        
        This is where everything comes together!
        """
        self.stats["total_requests"] += 1
        
        # Step 1: Select best model
        model = self._select_model(user_input)
        if not model:
            return "Sorry, no suitable model available within budget constraints."
        
        # Step 2: Check cache
        cached = self._check_cache(user_input, model)
        if cached:
            return cached
        
        # Step 3: Check rate limits
        estimated_tokens = len(user_input) // 4 + 200
        
        if not self._check_rate_limits(model, estimated_tokens):
            # Try fallback model
            print("🔄 Trying fallback model...")
            for fallback_model in self.models.keys():
                if fallback_model != model:
                    if self._check_rate_limits(fallback_model, estimated_tokens):
                        model = fallback_model
                        print(f"✅ Using fallback: {model}")
                        break
            else:
                return "Rate limits exceeded on all models. Please wait a moment."
        
        # Step 4: Make API call
        try:
            response = self._make_api_call(user_input, model)
            if not response:
                self.stats["errors"] += 1
                return "Failed to get response from API."
            
            response_text = response["text"]
            tokens_used = response["tokens_used"]
            
        except Exception as e:
            self.stats["errors"] += 1
            print(f"❌ API call failed: {e}")
            return f"Error: {str(e)}"
        
        # Step 5: Track costs
        cost = self._track_cost(model, tokens_used)
        print(f"💰 Cost: ${cost:.4f}")
        
        # Step 6: Update rate limiter
        self._update_rate_limiter(model, tokens_used)
        
        # Step 7: Cache response
        self._cache_response(user_input, model, response_text, cost)
        
        # Step 8: Update statistics
        self.stats["total_tokens"] += tokens_used
        
        # Step 9: Update conversation history (limit size)
        self.conversation_history.append({
            "timestamp": datetime.now(),
            "user": user_input,
            "assistant": response_text,
            "model": model,
            "cost": cost
        })
        
        # Keep only last 20 exchanges
        if len(self.conversation_history) > 20:
            self.conversation_history = self.conversation_history[-20:]
        
        return response_text
    
    def get_stats(self) -> Dict:
        """Generate statistics report"""
        
        cache_hit_rate = 0
        if self.stats["cache_hits"] + self.stats["cache_misses"] > 0:
            cache_hit_rate = (
                self.stats["cache_hits"] / 
                (self.stats["cache_hits"] + self.stats["cache_misses"])
            ) * 100
        
        stats = {
            "total_cost": f"${self.cost_tracker['total']:.4f}",
            "total_requests": self.stats["total_requests"],
            "total_tokens": self.stats["total_tokens"],
            "cache_hit_rate": f"{cache_hit_rate:.1f}%",
            "cache_hits": self.stats["cache_hits"],
            "cache_misses": self.stats["cache_misses"],
            "errors": self.stats["errors"],
            "cost_by_model": {
                model: f"${cost:.4f}"
                for model, cost in self.cost_tracker["by_model"].items()
            },
            "available_providers": self.available_providers,
            "conversation_length": len(self.conversation_history)
        }
        
        # Add today's cost
        today = str(datetime.now().date())
        if today in self.cost_tracker["daily"]:
            stats["today_cost"] = f"${self.cost_tracker['daily'][today]:.4f}"
        
        return stats
    
    def export_conversation(self, filename: str):
        """Export conversation history"""
        
        export_data = {
            "export_time": datetime.now().isoformat(),
            "statistics": self.get_stats(),
            "conversation": [
                {
                    "timestamp": conv["timestamp"].isoformat(),
                    "user": conv["user"],
                    "assistant": conv["assistant"],
                    "model": conv["model"],
                    "cost": conv["cost"]
                }
                for conv in self.conversation_history
            ],
            "total_cost": self.cost_tracker["total"]
        }
        
        with open(filename, "w") as f:
            json.dump(export_data, f, indent=2)
        
        print(f"📁 Conversation exported to {filename}")


# ==================
# Challenge Functions
# ==================

def challenge_basic():
    """Basic Challenge: Get it working with one provider"""
    print("\n" + "="*60)
    print("BASIC CHALLENGE: Single Provider")
    print("="*60)
    
    hub = AssistantHub()
    
    if not hub.available_providers:
        print("❌ No API keys configured. Please set environment variables.")
        return
    
    # Test basic functionality
    test_prompts = [
        "What is Python?",
        "Explain recursion briefly",
        "What is Python?",  # Test cache
    ]
    
    for prompt in test_prompts:
        print(f"\n📝 User: {prompt}")
        response = hub.chat(prompt)
        print(f"🤖 Assistant: {response[:100]}...")
    
    # Check stats
    print("\n📊 Statistics:")
    stats = hub.get_stats()
    for key, value in stats.items():
        print(f"  {key}: {value}")


def challenge_intermediate():
    """Intermediate Challenge: Multi-provider with failover"""
    print("\n" + "="*60)
    print("INTERMEDIATE CHALLENGE: Multi-Provider with Caching")
    print("="*60)
    
    hub = AssistantHub()
    
    if len(hub.available_providers) < 2:
        print("⚠️ This challenge requires at least 2 API providers configured.")
        print("  Currently available:", hub.available_providers)
    
    # Test different complexity prompts
    test_prompts = [
        ("Hello!", "simple"),
        ("Explain machine learning", "medium"),
        ("Write a Python sorting algorithm", "complex"),
        ("Hello!", "simple"),  # Test cache
    ]
    
    for prompt, complexity in test_prompts:
        print(f"\n📝 User: {prompt} [{complexity}]")
        response = hub.chat(prompt)
        print(f"🤖 Response: {response[:100]}...")
    
    # Export conversation
    hub.export_conversation("intermediate_challenge.json")
    
    # Show final stats
    print("\n📊 Final Statistics:")
    stats = hub.get_stats()
    for key, value in stats.items():
        print(f"  {key}: {value}")


def challenge_advanced():
    """Advanced Challenge: Complete implementation"""
    print("\n" + "="*60)
    print("ADVANCED CHALLENGE: Full Feature Implementation")
    print("="*60)
    
    hub = AssistantHub()
    
    # Simulate a full conversation
    prompts = [
        "Hello!",  # Simple - should use cheap model
        "Explain quantum computing",  # Medium
        "Write Python code for binary search",  # Complex
        "What did I ask about first?",  # Tests memory
        "Hello!",  # Test cache again
        "Summarize our conversation",  # Tests context
    ]
    
    total_start_cost = hub.cost_tracker["total"]
    
    for i, prompt in enumerate(prompts, 1):
        print(f"\n{'='*40}")
        print(f"Message {i}/{len(prompts)}")
        print(f"User: {prompt}")
        
        response = hub.chat(prompt)
        print(f"Assistant: {response[:150]}...")
        
        # Show incremental cost
        current_cost = hub.cost_tracker["total"]
        print(f"Total cost so far: ${current_cost:.4f}")
    
    # Final report
    print("\n" + "="*60)
    print("FINAL REPORT")
    print("="*60)
    
    hub.export_conversation("advanced_challenge.json")
    
    final_stats = hub.get_stats()
    for key, value in final_stats.items():
        if isinstance(value, dict):
            print(f"\n{key}:")
            for k, v in value.items():
                print(f"  {k}: {v}")
        else:
            print(f"{key}: {value}")


def interactive_mode():
    """Interactive chat mode"""
    print("\n" + "="*60)
    print("INTERACTIVE MODE")
    print("="*60)
    print("Commands: 'quit', 'stats', 'export'")
    print("-"*60)
    
    hub = AssistantHub()
    
    if not hub.available_providers:
        print("❌ No API keys configured.")
        return
    
    while True:
        user_input = input("\n👤 You: ").strip()
        
        if user_input.lower() == 'quit':
            break
        elif user_input.lower() == 'stats':
            print("\n📊 Current Statistics:")
            for key, value in hub.get_stats().items():
                print(f"  {key}: {value}")
            continue
        elif user_input.lower() == 'export':
            hub.export_conversation("interactive_export.json")
            continue
        elif not user_input:
            continue
        
        response = hub.chat(user_input)
        print(f"🤖 Assistant: {response}")
    
    # Final export
    hub.export_conversation("interactive_session.json")
    print("\n👋 Goodbye! Session exported to interactive_session.json")


# =======================
# Main Entry Point
# =======================

if __name__ == "__main__":
    print("="*60)
    print("CHAPTER 7 CHALLENGE PROJECT")
    print("Multi-Provider AI Assistant Hub")
    print("="*60)
    
    print("\nChoose your challenge level:")
    print("1. Basic - Single provider implementation")
    print("2. Intermediate - Multi-provider with caching")
    print("3. Advanced - Complete feature implementation")
    print("4. Interactive - Chat mode")
    
    choice = input("\nEnter choice (1-4): ").strip()
    
    if choice == "1":
        challenge_basic()
    elif choice == "2":
        challenge_intermediate()
    elif choice == "3":
        challenge_advanced()
    elif choice == "4":
        interactive_mode()
    else:
        print("Invalid choice. Running basic challenge...")
        challenge_basic()


---
## Next Steps

- Check your answers in **chapter_07_intro_ai_llm_solutions.ipynb**
- Proceed to **Chapter 8**