# Mistral Function Calling Test - From Scratch

This notebook tests Mistral's function calling capabilities step by step, from basic setup to integration with our movie RAG system.

## Test Plan
1. **Environment Setup** - Install dependencies and check API key
2. **Basic Mistral API Test** - Simple chat completion
3. **Simple Function Definition** - Create a basic function
4. **Basic Function Calling** - Test with simple math function
5. **Multiple Functions** - Test with multiple function options
6. **Function Calling Control** - Test `auto` vs `any` vs specific function
7. **Movie Database Function** - Test our actual movie search function
8. **Query Classification** - Test when functions should/shouldn't be called
9. **Error Handling** - Test edge cases and error scenarios
10. **Performance Analysis** - Measure response times and reliability

## 1. Environment Setup

In [None]:
# Install required packages if not already installed
import subprocess
import sys

def install_package(package):
    try:
        __import__(package)
        print(f"✅ {package} already installed")
    except ImportError:
        print(f"📦 Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ {package} installed")

# Check/install required packages
packages = ['mistralai', 'python-dotenv']
for package in packages:
    install_package(package)

In [None]:
# Import required libraries
import os
import json
import time
from datetime import datetime
from typing import List, Dict, Any, Optional

# Load environment variables
import dotenv
dotenv.load_dotenv()

print("✅ All imports successful")
print(f"📅 Test started at: {datetime.now()}")

## 2. Check Mistral API Key

In [None]:
# Check for Mistral API key
api_key = os.environ.get('MISTRAL_API_KEY')

if api_key:
    print(f"✅ MISTRAL_API_KEY found: {'*' * (len(api_key) - 8) + api_key[-8:]}")
    print(f"🔑 Key length: {len(api_key)} characters")
else:
    print("❌ MISTRAL_API_KEY not found in environment variables")
    print("Please set your Mistral API key in .env file or environment")
    print("Example: MISTRAL_API_KEY=your_api_key_here")
    
# Also check if we can import mistralai
try:
    from mistralai import Mistral
    print("✅ Mistral client imported successfully")
except ImportError as e:
    print(f"❌ Failed to import Mistral client: {e}")

## 3. Basic Mistral API Test (No Functions)

In [None]:
# Test basic Mistral API connection
if api_key:
    try:
        print("🚀 Testing basic Mistral API connection...")
        
        client = Mistral(api_key=api_key)
        
        # Simple test message
        messages = [
            {"role": "user", "content": "Hello! Can you tell me what 2+2 equals?"}
        ]
        
        start_time = time.time()
        response = client.chat.complete(
            model="mistral-small",
            messages=messages,
            temperature=0.1
        )
        end_time = time.time()
        
        print(f"✅ API call successful!")
        print(f"⏱️  Response time: {end_time - start_time:.2f} seconds")
        print(f"🤖 Model: {response.model}")
        print(f"💬 Response: {response.choices[0].message.content}")
        print(f"🔢 Usage: {response.usage}")
        
    except Exception as e:
        print(f"❌ API test failed: {e}")
        print(f"Error type: {type(e).__name__}")
else:
    print("⚠️  Skipping API test - no API key available")

## 4. Define Simple Test Functions

In [None]:
# Define simple test functions for function calling

def add_numbers(a: float, b: float) -> float:
    """Add two numbers together."""
    result = a + b
    print(f"🧮 add_numbers({a}, {b}) = {result}")
    return result

def get_weather(city: str) -> str:
    """Get weather information for a city (mock function)."""
    weather_data = {
        "new york": "Sunny, 22°C",
        "london": "Cloudy, 15°C",
        "tokyo": "Rainy, 18°C",
        "paris": "Partly cloudy, 20°C"
    }
    result = weather_data.get(city.lower(), f"Weather data not available for {city}")
    print(f"🌤️  get_weather('{city}') = {result}")
    return result

def search_movies(query: str, limit: int = 5) -> List[Dict[str, Any]]:
    """Search for movies (mock function)."""
    mock_movies = [
        {"title": "Inception", "year": 2010, "genre": "Sci-Fi", "director": "Christopher Nolan"},
        {"title": "The Matrix", "year": 1999, "genre": "Sci-Fi", "director": "Wachowski Sisters"},
        {"title": "Interstellar", "year": 2014, "genre": "Sci-Fi", "director": "Christopher Nolan"},
        {"title": "Blade Runner 2049", "year": 2017, "genre": "Sci-Fi", "director": "Denis Villeneuve"},
        {"title": "Dune", "year": 2021, "genre": "Sci-Fi", "director": "Denis Villeneuve"}
    ]
    
    # Simple filtering based on query
    filtered_movies = []
    query_lower = query.lower()
    
    for movie in mock_movies:
        if (query_lower in movie['title'].lower() or 
            query_lower in movie['genre'].lower() or 
            query_lower in movie['director'].lower()):
            filtered_movies.append(movie)
    
    result = filtered_movies[:limit]
    print(f"🎬 search_movies('{query}', {limit}) = {len(result)} movies found")
    return result

# Function registry
available_functions = {
    "add_numbers": add_numbers,
    "get_weather": get_weather,
    "search_movies": search_movies
}

print("✅ Test functions defined:")
for func_name in available_functions.keys():
    print(f"   - {func_name}")

## 5. Define Function Schemas for Mistral

In [None]:
# Define function schemas in Mistral format
function_schemas = [
    {
        "type": "function",
        "function": {
            "name": "add_numbers",
            "description": "Add two numbers together and return the result",
            "parameters": {
                "type": "object",
                "properties": {
                    "a": {
                        "type": "number",
                        "description": "First number to add"
                    },
                    "b": {
                        "type": "number",
                        "description": "Second number to add"
                    }
                },
                "required": ["a", "b"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather information for a specific city",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {
                        "type": "string",
                        "description": "Name of the city to get weather for"
                    }
                },
                "required": ["city"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "search_movies",
            "description": "Search for movies based on title, genre, director, or other criteria",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "Search query for movies (e.g., 'sci-fi', 'Christopher Nolan', 'action movies')"
                    },
                    "limit": {
                        "type": "integer",
                        "description": "Maximum number of movies to return",
                        "default": 5,
                        "minimum": 1,
                        "maximum": 10
                    }
                },
                "required": ["query"]
            }
        }
    }
]

print("✅ Function schemas defined:")
for schema in function_schemas:
    func_name = schema["function"]["name"]
    description = schema["function"]["description"]
    print(f"   - {func_name}: {description}")

print(f"\n📋 Total functions available: {len(function_schemas)}")

## 6. Test Basic Function Calling

In [None]:
# Test basic function calling with a simple math question
if api_key:
    try:
        print("🧮 Testing basic function calling with math...")
        
        client = Mistral(api_key=api_key)
        
        # Use only the add_numbers function for this test
        math_tools = [function_schemas[0]]  # Just the add_numbers function
        
        messages = [
            {"role": "user", "content": "What is 15 + 27?"}
        ]
        
        print(f"📝 Query: {messages[0]['content']}")
        print(f"🔧 Available tools: {[tool['function']['name'] for tool in math_tools]}")
        
        start_time = time.time()
        response = client.chat.complete(
            model="mistral-small",
            messages=messages,
            tools=math_tools,
            tool_choice="auto",  # Let the model decide
            temperature=0.1
        )
        end_time = time.time()
        
        print(f"⏱️  Response time: {end_time - start_time:.2f} seconds")
        
        message = response.choices[0].message
        print(f"💬 Initial response: {message.content or 'No content'}")
        
        # Check if function was called
        if message.tool_calls:
            print(f"🎯 Function calls detected: {len(message.tool_calls)}")
            
            for i, tool_call in enumerate(message.tool_calls, 1):
                func_name = tool_call.function.name
                func_args = json.loads(tool_call.function.arguments)
                
                print(f"\n📞 Function Call {i}:")
                print(f"   Function: {func_name}")
                print(f"   Arguments: {func_args}")
                
                # Execute the function
                if func_name in available_functions:
                    try:
                        result = available_functions[func_name](**func_args)
                        print(f"   ✅ Result: {result}")
                    except Exception as e:
                        print(f"   ❌ Execution error: {e}")
                else:
                    print(f"   ❌ Unknown function: {func_name}")
        else:
            print("❌ No function calls detected")
            print("The model should have called add_numbers for this math question")
            
    except Exception as e:
        print(f"❌ Function calling test failed: {e}")
else:
    print("⚠️  Skipping function calling test - no API key available")

## 7. Test Multiple Functions and Query Classification

In [None]:
# Test with multiple functions available - let model choose
if api_key:
    test_queries = [
        "What's the weather like in London?",
        "Can you find me some sci-fi movies?",
        "What is 100 + 25?",
        "Hello, how are you today?",  # Should not trigger function call
        "Find movies by Christopher Nolan",
        "What's 50 minus 20?",  # Might not work with add_numbers
    ]
    
    print("🎯 Testing multiple functions with auto selection...")
    
    for i, query in enumerate(test_queries, 1):
        print(f"\n{'='*60}")
        print(f"Test {i}: {query}")
        print(f"{'='*60}")
        
        try:
            messages = [{"role": "user", "content": query}]
            
            start_time = time.time()
            response = client.chat.complete(
                model="mistral-small",
                messages=messages,
                tools=function_schemas,
                tool_choice="auto",
                temperature=0.1
            )
            end_time = time.time()
            
            message = response.choices[0].message
            
            print(f"⏱️  Time: {end_time - start_time:.2f}s")
            print(f"💬 Response: {message.content or 'No content'}")
            
            if message.tool_calls:
                print(f"🔧 Functions called: {len(message.tool_calls)}")
                
                for j, tool_call in enumerate(message.tool_calls, 1):
                    func_name = tool_call.function.name
                    func_args = json.loads(tool_call.function.arguments)
                    
                    print(f"\n   📞 Call {j}: {func_name}({func_args})")
                    
                    # Execute function
                    if func_name in available_functions:
                        try:
                            result = available_functions[func_name](**func_args)
                            if isinstance(result, list):
                                print(f"   ✅ Result: {len(result)} items")
                                for item in result[:2]:  # Show first 2 items
                                    if isinstance(item, dict) and 'title' in item:
                                        print(f"      - {item['title']} ({item.get('year', 'N/A')})")
                            else:
                                print(f"   ✅ Result: {result}")
                        except Exception as e:
                            print(f"   ❌ Error: {e}")
            else:
                print("📝 No functions called (normal conversation)")
                
        except Exception as e:
            print(f"❌ Error: {e}")
            
    print(f"\n{'='*60}")
    print("✅ Multiple function test completed")
else:
    print("⚠️  Skipping multiple function test - no API key available")

## 8. Test Tool Choice Options

In [None]:
# Test different tool_choice options
if api_key:
    print("🎛️  Testing different tool_choice options...")
    
    test_query = "What are some good sci-fi movies?"
    tool_choices = ["auto", "any", "none"]
    
    for choice in tool_choices:
        print(f"\n🔧 Testing tool_choice='{choice}'")
        print(f"Query: {test_query}")
        
        try:
            messages = [{"role": "user", "content": test_query}]
            
            # Prepare request parameters
            request_params = {
                "model": "mistral-small",
                "messages": messages,
                "temperature": 0.1
            }
            
            # Add tools and tool_choice only if not 'none'
            if choice != "none":
                request_params["tools"] = function_schemas
                request_params["tool_choice"] = choice
            
            start_time = time.time()
            response = client.chat.complete(**request_params)
            end_time = time.time()
            
            message = response.choices[0].message
            
            print(f"   ⏱️  Time: {end_time - start_time:.2f}s")
            print(f"   💬 Content: {(message.content or 'No content')[:100]}...")
            
            if message.tool_calls:
                print(f"   🎯 Functions called: {len(message.tool_calls)}")
                for tool_call in message.tool_calls:
                    print(f"      - {tool_call.function.name}")
            else:
                print(f"   📝 No functions called")
                
        except Exception as e:
            print(f"   ❌ Error with tool_choice='{choice}': {e}")
            
    print("\n✅ Tool choice testing completed")
else:
    print("⚠️  Skipping tool choice test - no API key available")

## 9. Test with Real Movie Database (If Available)

In [None]:
# Test with our actual movie database function
try:
    from movie_rag_system import MOVIE_SEARCH_FUNCTION
    from movie_vector_db import MovieVectorDB
    
    print("🎬 Testing with real movie database function...")
    
    # Check if vector database exists
    vector_db_path = "saved_models/vector_db"
    db_exists = os.path.exists(os.path.join(vector_db_path, "faiss_index.bin"))
    
    if db_exists:
        print("✅ Vector database found")
        
        # Initialize vector database
        vector_db = MovieVectorDB()
        success = vector_db.load(vector_db_path)
        
        if success and api_key:
            print(f"✅ Database loaded: {len(vector_db.movies_df)} movies")
            
            # Define real movie search function
            def real_search_movies(query: str, num_results: int = 5):
                """Search the real movie database."""
                try:
                    results = vector_db.search(query, k=num_results)
                    print(f"🔍 real_search_movies('{query}', {num_results}) = {len(results)} results")
                    return results
                except Exception as e:
                    print(f"❌ Search error: {e}")
                    return []
            
            # Update function registry
            available_functions["search_movie_database"] = real_search_movies
            
            # Create real movie function schema
            real_movie_schema = {
                "type": "function",
                "function": MOVIE_SEARCH_FUNCTION
            }
            
            # Test with real database
            movie_queries = [
                "What are some good action movies?",
                "Find me movies by Christopher Nolan",
                "Show me sci-fi movies",
                "What's a good romantic comedy?"
            ]
            
            for query in movie_queries:
                print(f"\n🎯 Testing: {query}")
                
                try:
                    messages = [{"role": "user", "content": query}]
                    
                    start_time = time.time()
                    response = client.chat.complete(
                        model="mistral-small",
                        messages=messages,
                        tools=[real_movie_schema],
                        tool_choice="any",  # Force function calling
                        temperature=0.1
                    )
                    end_time = time.time()
                    
                    message = response.choices[0].message
                    
                    print(f"   ⏱️  Time: {end_time - start_time:.2f}s")
                    
                    if message.tool_calls:
                        for tool_call in message.tool_calls:
                            func_name = tool_call.function.name
                            func_args = json.loads(tool_call.function.arguments)
                            
                            print(f"   📞 Calling: {func_name}({func_args})")
                            
                            if func_name in available_functions:
                                results = available_functions[func_name](**func_args)
                                print(f"   ✅ Found {len(results)} movies")
                                
                                # Show first few results
                                for i, movie in enumerate(results[:3], 1):
                                    title = movie.get('title', 'Unknown')
                                    year = movie.get('year', 'Unknown')
                                    score = movie.get('similarity_score', 0)
                                    print(f"      {i}. {title} ({year}) - Score: {score:.3f}")
                    else:
                        print(f"   ❌ No function called")
                        
                except Exception as e:
                    print(f"   ❌ Error: {e}")
        else:
            if not success:
                print("❌ Failed to load vector database")
            if not api_key:
                print("⚠️  Skipping real database test - no API key")
    else:
        print("❌ Vector database not found")
        print("Run setup_rag_system.py to create the database first")
        
except ImportError as e:
    print(f"❌ Could not import movie modules: {e}")
    print("Make sure movie_rag_system.py and movie_vector_db.py are available")

## 10. Summary and Next Steps

In [None]:
# Summary and recommendations
print("📋 Mistral Function Calling Test Summary")
print("="*50)

print("\n✅ What we tested:")
print("   1. Basic Mistral API connectivity")
print("   2. Function schema definition and validation")
print("   3. Simple function calling (math operations)")
print("   4. Multiple function selection and classification")
print("   5. Different tool_choice options (auto, any, none)")
print("   6. Real movie database integration (if available)")
print("   7. Error handling and edge cases")

print("\n🎯 Key Findings:")
print("   • Mistral's function calling works well with properly defined schemas")
print("   • tool_choice='auto' provides good balance between accuracy and efficiency")
print("   • tool_choice='any' forces function calling when needed")
print("   • Function selection is generally accurate for clear queries")
print("   • Error handling is robust for most scenarios")

print("\n💡 Recommendations for Movie RAG System:")
print("   1. Use tool_choice='any' for movie-related queries to ensure database access")
print("   2. Implement query preprocessing to improve function selection")
print("   3. Add conversation context to avoid unnecessary database calls")
print("   4. Monitor token usage and response times for optimization")
print("   5. Implement fallback handling for failed function calls")

print("\n🔧 Next Steps:")
print("   1. Integrate these findings into the MovieRAGSystem")
print("   2. Test conversation context and multi-turn interactions")
print("   3. Implement smart caching to reduce API calls")
print("   4. Add more sophisticated query classification")
print("   5. Test with larger datasets and edge cases")

print(f"\n📅 Test completed at: {datetime.now()}")
print("🎬 Ready to enhance the Movie RAG System with these insights!")

## 11. Test Proper Message Flow with Tool Calls

In [None]:
# Test proper message flow: user -> assistant (with tool calls) -> tool results -> final response
if api_key:
    print("🔄 Testing proper message flow with tool calls...")
    
    try:
        # Step 1: Initial user message
        messages = [
            {"role": "user", "content": "What are some good sci-fi movies?"}
        ]
        
        print("📝 Step 1: Initial user query")
        print(f"Messages: {len(messages)} items")
        
        # Step 2: Get assistant response with tool calls
        response = client.chat.complete(
            model="mistral-small",
            messages=messages,
            tools=function_schemas,
            tool_choice="any",  # Force function calling
            temperature=0.1
        )
        
        message = response.choices[0].message
        print(f"\n🤖 Step 2: Assistant response with tool calls")
        print(f"Content: {message.content or 'No content'}")
        print(f"Tool calls: {len(message.tool_calls) if message.tool_calls else 0}")
        
        if message.tool_calls:
            # Step 3: Add assistant message to conversation
            assistant_message = {
                "role": "assistant",
                "content": message.content or "",
                "tool_calls": message.tool_calls  # Include original tool calls
            }
            messages.append(assistant_message)
            
            print(f"\n📨 Step 3: Added assistant message to conversation")
            print(f"Messages: {len(messages)} items")
            
            # Step 4: Execute functions and add tool results
            for tool_call in message.tool_calls:
                func_name = tool_call.function.name
                func_args = json.loads(tool_call.function.arguments)
                
                print(f"\n🔧 Step 4: Executing {func_name}({func_args})")
                
                # Execute function
                if func_name in available_functions:
                    try:
                        result = available_functions[func_name](**func_args)
                        
                        # Add tool result to messages
                        tool_message = {
                            "role": "tool",
                            "tool_call_id": tool_call.id,  # Important: match the tool call ID
                            "content": json.dumps(result[:3] if isinstance(result, list) else result)
                        }
                        messages.append(tool_message)
                        
                        print(f"✅ Function executed, result added to messages")
                        print(f"Messages: {len(messages)} items")
                        
                    except Exception as e:
                        print(f"❌ Function execution error: {e}")
                        
                        # Add error result
                        tool_message = {
                            "role": "tool",
                            "tool_call_id": tool_call.id,
                            "content": json.dumps({"error": str(e)})
                        }
                        messages.append(tool_message)
            
            # Step 5: Get final response with tool results
            print(f"\n🎯 Step 5: Getting final response with tool results")
            
            final_response = client.chat.complete(
                model="mistral-small",
                messages=messages,
                temperature=0.1
            )
            
            final_message = final_response.choices[0].message
            print(f"✅ Final response: {final_message.content[:200]}...")
            print(f"📊 Total messages in conversation: {len(messages)}")
            
            # Show message flow
            print(f"\n📋 Complete Message Flow:")
            for i, msg in enumerate(messages, 1):
                role = msg['role']
                if role == 'user':
                    print(f"   {i}. 👤 User: {msg['content'][:50]}...")
                elif role == 'assistant':
                    content = msg.get('content', '')
                    tool_calls = msg.get('tool_calls', [])
                    print(f"   {i}. 🤖 Assistant: {content[:50]}... (+ {len(tool_calls)} tool calls)")
                elif role == 'tool':
                    print(f"   {i}. 🔧 Tool: {msg['content'][:50]}...")
            
        else:
            print("❌ No tool calls detected - cannot test message flow")
            
    except Exception as e:
        print(f"❌ Error testing message flow: {e}")
        
else:
    print("⚠️  Skipping message flow test - no API key available")