# SQrL ChatBot with Conversations DB support

1. This notebooks provides backend endpoints fetch LLM responses from InferenceAPI endpoints
2. InferenceAPI's can be switched to any but ensure that the underlying model supports tool calling.
3. We have added 2 tools : google_search and hybrid_search_cloud_function
4. All queries related to singlestore will be routed to hybrid_search_cloud_function to retrieve relevant Singlestore information and then sent as context to LLM. Incase the Hybrid Search endpoint has an eeror, we fallback to Google Search results as context. All non-singlestore queries are served with google search results. No safeguards have been implemented so please query responsibly.
5. All queries and tool results and Stitched LLM responses are stored in conversations and messages table in the database automatically.
6. Tool calls are completely configurable. No LLM frameworks have been used, so the underlying logic can be extended to production usecase. 
7. Endpoint to fetch streaming responses are provided.
8. Common endpoints for conversation managent are available for frontend to use. Ensure that `conversations` and `messages` tables are created and ready for use
9. E2E tests are provided at the end to easily test the Cloud function endpoints without having to deploy them first.

## Install required libraries

In [6]:
!pip install -q openai

In [7]:
import asyncio
import json
import openai
import re
import requests
import time
import logging
import uuid

import singlestoredb as s2
import singlestoredb.apps as apps

from contextlib import asynccontextmanager, contextmanager
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel, Field
from requests.exceptions import HTTPError, Timeout, RequestException
from singlestoredb.management import get_secret
from typing import List, Dict, Optional, Any, AsyncGenerator

In [8]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [9]:
# Fetch All environment values
SERPAPI_API_KEY = get_secret('SERPAPI_API_KEY')

# Created new on using GraphQL mutation "expiresAt": "2025-04-06T00:07:35.48479758Z"
HYBRID_SEARCH_AUTH = get_secret('HYBRID_SEARCH_AUTH')
# API endpoint URL
HYBRID_SEARCH_URL = "https://apps.aws-virginia-nb2.svc.singlestore.com:8000/functions/6fc46137-1974-4fb0-817f-dbfe782fe3f3/v2/search"

MODEL_RESPONSE_TEMPERATURE = 0
MODEL_RESPONSE_MAX_TOKENS = 4096
MODEL_RESPONSE_TIMEOUT = None
MODEL_RESPONSE_RETRIES = 2

In [10]:
# This inference API is present in the S2DBPM RAG Project Org : https://portal.singlestore.com/organizations/8bd790ef-6703-4100-85c0-bb19744acc16/inference-apis/
INFERENCE_API_ENDPOINT = "https://apps.aws-virginia-nb2.svc.singlestore.com:8000/modelasaservice/8345ca8d-70f6-40d7-a0c1-b2e54c23babc/v1"
INFERENCE_MODEL_NAME = "unsloth/Meta-Llama-3.1-8B-Instruct"
# Generated from GraphQL mutation Valid until 2025-04-05T19:01:42.50444973Z
INFERENCE_AUTH_TOKEN = get_secret('INFERENCE_AUTH_TOKEN')

In [11]:
# Database interface facing functions
@contextmanager
def get_db_connection():
    """Provides a database connection"""
    conn = None
    try:
        conn = s2.connect(
            database="knowlagent"
        )
        yield conn
    finally:
        if conn:
            conn.close()

def test_db_connection():
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT 1")
            result = cursor.fetchone()
            print(f"Database connection successful: {result}")
            return True
    except Exception as e:
        print(f"Database connection failed: {str(e)}")
        return False

def init_database():
    """Tables have already been created separately, so this is just a placeholder"""
    logger.info("Database tables were created separately, skipping initialization")
    return True

# Database operations that mirror current in-memory operations
def db_create_conversation(system_message):
    """Create a new conversation with system message"""
    conversation_id = str(uuid.uuid4())
    
    try:
        with get_db_connection() as conn:
            # Insert the conversation
            cursor = conn.cursor()
            cursor.execute(
                "INSERT INTO conversations (conversation_id) VALUES (%s)",
                (conversation_id,)
            )
            
            # Add system message
            message_id = str(uuid.uuid4())
            cursor.execute(
                """INSERT INTO messages 
                   (message_id, conversation_id, role, content, sequence_order) 
                   VALUES (%s, %s, %s, %s, %s)""",
                (message_id, conversation_id, "system", system_message, 0)
            )
            
            # Commit the transaction
            conn.commit()
            
        return conversation_id
    except Exception as e:
        logger.error(f"Database error in db_create_conversation: {str(e)}")
        raise

def db_get_conversation(conversation_id):
    """Get all messages for a conversation"""
    with get_db_connection() as conn:
        # Use dictionary cursor if available
        try:
            cursor = conn.cursor(dictionary=True)
        except TypeError:
            # If dictionary cursor is not supported
            cursor = conn.cursor()
            
        cursor.execute(
            """SELECT role, content, tool_calls, tool_call_id 
               FROM messages 
               WHERE conversation_id = %s 
               ORDER BY sequence_order""",
            (conversation_id,)
        )
        
        messages = []
        for row in cursor.fetchall():
            # Handle different cursor return types
            if hasattr(row, 'keys'):  # Dictionary-like cursor
                message = {"role": row["role"], "content": row["content"]}
                tool_calls = row["tool_calls"]
                tool_call_id = row["tool_call_id"]
            else:  # Tuple-like cursor
                message = {"role": row[0], "content": row[1]}
                tool_calls = row[2]
                tool_call_id = row[3]
            
            # Handle tool calls
            if tool_calls and message["role"] == "assistant":
                try:
                    tool_calls_data = json.loads(tool_calls)
                    message["tool_calls"] = tool_calls_data
                except:
                    pass
            
            # Handle tool responses
            if message["role"] == "tool" and tool_call_id:
                message["tool_call_id"] = tool_call_id
            
            messages.append(message)
        
        return messages

def db_add_message(conversation_id, message):
    """Add a message to a conversation"""
    # Max length for MEDIUMTEXT (adjust if using different type)
    MAX_CONTENT_LENGTH = 16 * 1024 * 1024  # 16MB
    MAX_SAFE_LENGTH = 15 * 1024 * 1024     # 15MB to be safe to prevent memory issues
    
    with get_db_connection() as conn:
        cursor = conn.cursor()
        
        # Update last_updated timestamp
        cursor.execute(
            "UPDATE conversations SET last_updated = CURRENT_TIMESTAMP WHERE conversation_id = %s",
            (conversation_id,)
        )
        
        # Get next sequence number
        cursor.execute(
            """SELECT COALESCE(MAX(sequence_order), -1) + 1 
               FROM messages 
               WHERE conversation_id = %s""",
            (conversation_id,)
        )
        sequence_order = cursor.fetchone()[0]
        
        # Insert the message
        message_id = str(uuid.uuid4())
        
        # Extract fields from message
        role = message["role"]
        content = message["content"]
        
        # Truncate content if too long
        if len(content) > MAX_SAFE_LENGTH:
            content = content[:MAX_SAFE_LENGTH] + "\n\n[Content truncated due to size limitations]"
            
        # Handle tool_calls - only truncate if needed
        if "tool_calls" in message and message["tool_calls"]:
            tool_calls_json = json.dumps(message["tool_calls"])
            if len(tool_calls_json) > MAX_SAFE_LENGTH:
                # Truncate large tool calls data
                tool_calls = json.dumps({
                    "truncated": True, 
                    "message": "Tool calls data was too large to store"
                })
            else:
                tool_calls = tool_calls_json
        else:
            tool_calls = None
            
        # Get tool_call_id - no truncation needed with TEXT column
        tool_call_id = message.get("tool_call_id")
        
        # Insert with properly handled content sizes
        cursor.execute(
            """INSERT INTO messages 
               (message_id, conversation_id, role, content, tool_calls, tool_call_id, sequence_order) 
               VALUES (%s, %s, %s, %s, %s, %s, %s)""",
            (message_id, conversation_id, role, content, tool_calls, tool_call_id, sequence_order)
        )
        
        # Make sure changes are committed
        conn.commit()

def db_conversation_exists(conversation_id):
    """Check if a conversation exists - new helper function"""
    with get_db_connection() as conn:
        cursor = conn.cursor()
        cursor.execute(
            "SELECT 1 FROM conversations WHERE conversation_id = %s", 
            (conversation_id,)
        )
        return cursor.fetchone() is not None


def db_get_recent_conversations(limit: int = 10):
    """Get the most recent conversations"""
    with get_db_connection() as conn:
        cursor = conn.cursor()
        cursor.execute(
            """SELECT conversation_id, created_at, last_updated 
               FROM conversations 
               ORDER BY last_updated DESC 
               LIMIT %s""",
            (limit,)
        )
        
        conversations = []
        for row in cursor.fetchall():
            conversations.append({
                "conversation_id": row[0],
                "created_at": row[1].isoformat() if row[1] else None,
                "last_updated": row[2].isoformat() if row[2] else None
            })
            
        return conversations

def db_get_conversation_preview(conversation_id: str, message_limit: int = 3):
    """Get conversation with limited number of most recent messages"""
    with get_db_connection() as conn:
        # First check if conversation exists
        cursor = conn.cursor()
        cursor.execute(
            "SELECT created_at, last_updated FROM conversations WHERE conversation_id = %s",
            (conversation_id,)
        )
        conv_data = cursor.fetchone()
        if not conv_data:
            return None
            
        # Get total message count
        cursor.execute(
            "SELECT COUNT(*) FROM messages WHERE conversation_id = %s",
            (conversation_id,)
        )
        total_messages = cursor.fetchone()[0]
        
        # Get the most recent messages
        cursor.execute(
            """SELECT role, content, timestamp 
               FROM messages 
               WHERE conversation_id = %s 
               ORDER BY sequence_order DESC
               LIMIT %s""",
            (conversation_id, message_limit)
        )
        
        messages = []
        for row in cursor.fetchall():
            messages.append({
                "role": row[0],
                "content": row[1][:150] + "..." if len(row[1]) > 150 else row[1],  # Truncate long content
                "timestamp": row[2].isoformat() if row[2] else None
            })
        
        # Return in correct order (newest last)
        messages.reverse()
        
        return {
            "conversation_id": conversation_id,
            "created_at": conv_data[0].isoformat() if conv_data[0] else None,
            "last_updated": conv_data[1].isoformat() if conv_data[1] else None,
            "total_messages": total_messages,
            "recent_messages": messages
        }

def db_delete_conversation(conversation_id: str) -> bool:
    """Delete a conversation and all its messages"""
    with get_db_connection() as conn:
        cursor = conn.cursor()
        
        # Since we don't have CASCADE DELETE due to no foreign keys,
        # we need to delete messages first, then conversation
        cursor.execute(
            "DELETE FROM messages WHERE conversation_id = %s",
            (conversation_id,)
        )
        
        cursor.execute(
            "DELETE FROM conversations WHERE conversation_id = %s",
            (conversation_id,)
        )
        
        # Check if deletion was successful
        return cursor.rowcount > 0

## Authenticate with Aura and Check LLM status

In [12]:
# Setup the OpenAI client to connect to our custom LLM endpoint
client = openai.OpenAI(
    api_key=INFERENCE_AUTH_TOKEN,
    base_url=INFERENCE_API_ENDPOINT
)

# Tool and function Calling

In [13]:
# Helper functions and tools 
def google_search(parameters):
    """
    Makes a call to SerpAPI to search Google and returns results with sources.
    """
    api_key = SERPAPI_API_KEY
    if not api_key:
        return {"content": "SERPAPI_API_KEY not set. Please set your search API key.", "sources": []}
    
    query = parameters.get("query", "")
    params = {
        "engine": "google",
        "q": query,
        "api_key": api_key,
        "num": "5",  # Request top 5 results for more comprehensive sources
    }
    
    try:
        response = requests.get("https://serpapi.com/search", params=params)
        response.raise_for_status()
        data = response.json()
        
        sources = []
        content_parts = []
        
        if "organic_results" in data and len(data["organic_results"]) > 0:
            results = data["organic_results"]
            
            for i, result in enumerate(results[:5]):  # Limit to top 5 results
                snippet = result.get("snippet", "")
                link = result.get("link", "")
                title = result.get("title", "")
                
                if snippet and link:
                    content_parts.append(f"{snippet}")
                    sources.append({"title": title, "url": link})
            
            content = "Google Results:\n\n" + "\n\n".join(content_parts)
        else:
            content = "No results found."
        
        return {"content": content, "sources": sources}
    except Exception as e:
        return {"content": f"Google search failed: {e}", "sources": []}

In [14]:
def retrieve_singlestore_knowledge(parameters):
    query = parameters.get("query", "")
    if not query:
        return {"content": "No query provided for SingleStore knowledge retrieval.", "sources": []}
    
    max_retries = 3
    retry_delay = 2  # seconds
    timeout = 5
    
    for attempt in range(max_retries):
        try:
            headers = {
                "accept": "application/json",
                "Content-Type": "application/json",
                "Authorization": f"Bearer {HYBRID_SEARCH_AUTH}"
            }
            
            payload = {
                "query_text": query,
                "top_k": 5,
                "vector_weight": 0.7,
                "text_weight": 0.3,
                "model_name": "all-MiniLM-L6-v2"
            }
            
            # Increased timeout and improved error handling
            response = requests.post(HYBRID_SEARCH_URL, headers=headers, json=payload, timeout=timeout)
            response.raise_for_status()
            data = response.json()
            
            sources = []
            content_parts = ["Here are the relevant SingleStore knowledge chunks:"]
            
            if "results" in data and data["results"]:
                for i, result in enumerate(data["results"], 1):
                    # Handle array-format results from hybrid search
                    if isinstance(result, list) and len(result) >= 5:
                        # Extract fields: [doc_id, url, chunk_index, chunk_text, score]
                        doc_id = result[0]
                        url = result[1]
                        chunk_index = result[2]
                        text_content = result[3]
                        score = result[4]
                        
                        # Generate a title from the URL
                        clean_url = url.replace("https://docs.singlestore.com/", "")
                        path_parts = [part for part in clean_url.split("/") if part]
                        
                        if path_parts:
                            title = " ".join(path_parts[-1].split("-")).title()
                            if title.endswith("/"):
                                title = title[:-1]
                        else:
                            title = f"SingleStore Documentation ({i})"
                        
                        content_parts.append(f"[{i}] {text_content}")
                        sources.append({"title": title, "url": url})
                
                content = "\n\n".join(content_parts)
            else:
                content = "No relevant information found in the SingleStore knowledge base."
            
            return {"content": content, "sources": sources}
            
        except requests.exceptions.Timeout:
            # Specific handling for timeout errors
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
                continue
            # Fall back to Google search
            return {
                "content": "The SingleStore knowledge base search timed out. I'll try to search using Google instead.",
                "sources": [],
                "fallback_to": "google_search" 
            }
        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
                continue
            return {
                "content": f"Error retrieving information: {str(e)}. I'll try to search using Google instead.",
                "sources": [],
                "fallback_to": "google_search"
            }

In [15]:
def process_tool_result(tool_result_obj, tool_used, tool_call_id, conversation_id):
    """Process and store a tool result"""
    try:
        # Process tool result
        if isinstance(tool_result_obj, dict) and "content" in tool_result_obj:
            readable_result = tool_result_obj["content"]
            sources = tool_result_obj.get("sources", [])
        else:
            readable_result = str(tool_result_obj)
            sources = extract_sources_from_tool_result(
                tool_result_obj,
                default_source={"title": f"{tool_used} result", "url": ""}
            )
        
        # For very large results, truncate content only
        MAX_SAFE_LENGTH = 15 * 1024 * 1024  # 15MB
        
        # Truncate readable result if needed
        if len(readable_result) > MAX_SAFE_LENGTH:
            readable_result = readable_result[:MAX_SAFE_LENGTH] + "\n\n[Content truncated due to size limitations]"
            
        # Prepare content to store - for very large objects, store a simplified version
        if isinstance(tool_result_obj, dict):
            try:
                json_size = len(json.dumps(tool_result_obj))
                if json_size > MAX_SAFE_LENGTH:
                    # Create a simplified version with just the content
                    simplified_obj = {
                        "content": readable_result[:MAX_SAFE_LENGTH] if len(readable_result) > MAX_SAFE_LENGTH else readable_result,
                        "sources": tool_result_obj.get("sources", []),
                        "truncated": True
                    }
                    content_to_store = json.dumps(simplified_obj)
                else:
                    content_to_store = json.dumps(tool_result_obj)
            except:
                # If JSON serialization fails, use the string representation
                content_to_store = str(tool_result_obj)[:MAX_SAFE_LENGTH]
        else:
            content_to_store = str(tool_result_obj)[:MAX_SAFE_LENGTH]
        
        # Create tool response message - no truncation for tool_call_id
        tool_message = {
            "role": "tool",
            "tool_call_id": tool_call_id,  # No need to truncate with TEXT column
            "content": content_to_store
        }
        
        # Store tool response in database
        db_add_message(conversation_id, tool_message)
        
        return readable_result, sources
    except Exception as e:
        logger.error(f"Error processing tool result: {str(e)}")
        return f"Error processing tool result: {str(e)}", []

In [16]:
# Register all the tools
tool_registry = {
    "google_search": google_search,
    "retrieve_singlestore_knowledge": retrieve_singlestore_knowledge
}

# Define the tool specifications for OpenAI function calling
tool_specs = [
    {
        "type": "function",
        "function": {
            "name": "google_search",
            "description": "Search the web for general knowledge information",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The search query"
                    }
                },
                "required": ["query"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "retrieve_singlestore_knowledge",
            "description": "Search the SingleStore knowledge base for information about SingleStore, databases, vector search, or related technology",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The search query about SingleStore or database technology"
                    }
                },
                "required": ["query"]
            }
        }
    }
]

## Chat Bot building

In [17]:
SYSTEM_MESSAGE = """
You are a helpful assistant. When a user's query requires up-to-date or external information, 
use the appropriate tool:

1. For general knowledge queries, use the 'google_search' function.
2. For queries related to SingleStore, databases, vector search, or any SingleStore-specific technology, 
   use the 'retrieve_singlestore_knowledge' function.

After receiving tool results, carefully analyze the information and use it as the primary source for your answer. 
This information is the most up-to-date and should be prioritized over your pre-existing knowledge.

IMPORTANT: Format your response using these guidelines:
1. Write your response in Markdown format.
2. Include inline citations that combine the source number and direct link, like this: 
   [[1]](https://example.com). For example: SingleStore supports vectorization[[1]](https://docs.singlestore.com).
3. At the end of your response, include a '## Sources' section with a numbered list of all the sources you cited.
4. Format each source in the list as a Markdown link with the title as the link text.
5. Ensure every significant claim or piece of information has a citation.

Example format:
SingleStore supports vectorization through its built-in vector functions[[1]](https://docs.singlestore.com/managed-service/en/reference/vector-functions.html). This capability enables efficient 
similarity search operations for machine learning applications[[2]](https://www.singlestore.com/blog/vector-search-for-ai/).

## Sources
[1] [SingleStore Vector Search Documentation](https://docs.singlestore.com/managed-service/en/reference/vector-functions.html)
[2] [Building AI Applications with SingleStore](https://www.singlestore.com/blog/vector-search-for-ai/)
"""

In [18]:
def get_llm_response(history):
    """
    Uses the OpenAI client to get the assistant's response based on conversation history.
    
    Args:
        history (list): List of message dictionaries with role and content keys
    
    Returns:
        str: The assistant's response content
    """
    try:
        response = client.chat.completions.create(
            model=INFERENCE_MODEL_NAME,
            messages=history,
            temperature=MODEL_RESPONSE_TEMPERATURE,
            max_tokens=MODEL_RESPONSE_MAX_TOKENS,
            tools=tool_specs
        )
        
        if response.choices and len(response.choices) > 0:
            message = response.choices[0].message
            
            # Check if the message includes a function call
            if message.tool_calls:
                tool_call = message.tool_calls[0]
                function_call = {
                    "name": tool_call.function.name,
                    "parameters": json.loads(tool_call.function.arguments)
                }
                
                # Format response as a JSON string for our existing parser
                return json.dumps({"function_call": function_call})
            
            return message.content
        
        return "No response generated."
    except Exception as e:
        print(f"Error getting LLM response: {e}")
        return f"Error: {str(e)}"

In [19]:
# Pydantic models for API
class Message(BaseModel):
    role: str
    content: str

class ConversationResponse(BaseModel):
    conversation_id: str
    messages: List[Message]

class MessageRequest(BaseModel):
    conversation_id: Optional[str] = None
    message: str

class MessageResponse(BaseModel):
    conversation_id: str
    response: str
    tool_used: Optional[str] = None
    tool_result: Optional[str] = None

class MessageStreamRequest(BaseModel):
    conversation_id: Optional[str] = None
    message: str

@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup: Initialize the database
    print("Initializing database...")
    try:
        # Your existing init_database code
        init_database()
        print("Database initialized successfully")
    except Exception as e:
        print(f"Database initialization error: {e}")
    
    yield  # This is where the app runs
    
    # Shutdown: Clean up resources if needed
    print("Shutting down...")
    # Any cleanup code goes here

In [20]:
# Add streaming versions of get_llm_response
async def get_llm_response_stream(history):
    """
    Uses the OpenAI client to get a streaming response based on conversation history.
    
    Args:
        history (list): List of message dictionaries from database
    
    Returns:
        AsyncGenerator: Yields chunks of the assistant's response and a final result
    """
    try:
        # Start the streaming response
        stream = await asyncio.to_thread(
            lambda: client.chat.completions.create(
                model=INFERENCE_MODEL_NAME,
                messages=history,
                temperature=MODEL_RESPONSE_TEMPERATURE,
                max_tokens=MODEL_RESPONSE_MAX_TOKENS,
                tools=tool_specs,
                stream=True # Enable streaming
            )
        )
        
        # Initialize variables to track the full content and detect tool calls
        full_content = ""
        tool_call_parts = []
        is_tool_call = False
        
        # Process each chunk as it arrives
        for chunk in stream:
            if not chunk.choices:
                continue
            
            delta = chunk.choices[0].delta
            
            # Check for tool calls in this chunk
            if delta.tool_calls:
                is_tool_call = True
                # This is a tool call chunk
                tool_call = delta.tool_calls[0]
                
                # Function might be split across chunks, so we collect them
                if hasattr(tool_call.function, "name") and tool_call.function.name:
                    tool_call_parts.append({"type": "name", "content": tool_call.function.name})
                    yield json.dumps({"type": "tool_call_start", "name": tool_call.function.name}) + "\n"
                
                if hasattr(tool_call.function, "arguments") and tool_call.function.arguments:
                    tool_call_parts.append({"type": "args", "content": tool_call.function.arguments})
            elif delta.content:
                # Regular content chunk
                full_content += delta.content
                yield json.dumps({"type": "content", "content": delta.content}) + "\n"
        
        # If it was a tool call, reconstruct the complete tool call
        if is_tool_call:
            # Reconstruct the tool call from parts
            function_name = next((part["content"] for part in tool_call_parts if part["type"] == "name"), "")
            arguments = "".join(part["content"] for part in tool_call_parts if part["type"] == "args")
            
            try:
                # Parse the arguments as JSON
                args_obj = json.loads(arguments)
                
                # Yield the complete tool call as a final result
                yield json.dumps({
                    "type": "final_result",
                    "is_tool_call": True,
                    "function": {
                        "name": function_name,
                        "parameters": args_obj
                    },
                    "content": full_content
                }) + "\n"
            except json.JSONDecodeError:
                yield json.dumps({"type": "error", "content": "Failed to parse tool call arguments"}) + "\n"
                yield json.dumps({
                    "type": "final_result",
                    "is_tool_call": False,
                    "content": full_content
                }) + "\n"
        else:
            # Regular response (no tool call)
            yield json.dumps({
                "type": "final_result",
                "is_tool_call": False,
                "content": full_content
            }) + "\n"
        
    except Exception as e:
        error_msg = f"Error getting streaming LLM response: {str(e)}"
        print(error_msg)
        yield json.dumps({"type": "error", "content": error_msg}) + "\n"
        yield json.dumps({
            "type": "final_result",
            "is_tool_call": False,
            "content": error_msg
        }) + "\n"


In [21]:
# Create FastAPI app
app = FastAPI(
    title="SingleStore Chatbot API",
    lifespan=lifespan
)

## API endpoints definitions

In [22]:
@app.get("/")
async def root():
    """Root endpoint to check if the API is running"""
    return {"status": "ok", "message": "SingleStore Chatbot API is running"}

from datetime import datetime

@app.get("/health")
async def dependencies_health_check():
    """Check if the service and its dependencies are healthy"""
    health_status = {
        "status": "healthy",
        "database": "healthy",
        "llm_service": "healthy",
        "timestamp": datetime.now().isoformat()
    }
    
    # Check database connection
    try:
        with get_db_connection() as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT 1")
            cursor.fetchone()
    except Exception as e:
        health_status["database"] = f"unhealthy: {str(e)}"
        health_status["status"] = "degraded"
    
    # Check LLM service
    try:
        response = client.models.list()
        if not response:
            health_status["llm_service"] = "unhealthy: no models available"
            health_status["status"] = "degraded"
    except Exception as e:
        health_status["llm_service"] = f"unhealthy: {str(e)}"
        health_status["status"] = "degraded"
    
    return health_status

@app.post("/conversation", response_model=ConversationResponse)
async def create_conversation():
    """Create a new conversation with a unique ID"""
    try:
        # Replace in-memory operation with database operation
        logger.info("Creating new conversation with system message")
        conversation_id = db_create_conversation(SYSTEM_MESSAGE)
        
        logger.info(f"Getting messages for conversation {conversation_id}")
        # Get messages from database
        messages = db_get_conversation(conversation_id)
        
        # Return the conversation ID and messages
        return ConversationResponse(
            conversation_id=conversation_id,
            messages=[Message(role=msg["role"], content=msg["content"]) for msg in messages]
        )
    except Exception as e:
        logger.error(f"Error creating conversation: {str(e)}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to create conversation: {str(e)}")


@app.get("/conversation/{conversation_id}", response_model=ConversationResponse)
async def get_conversation(conversation_id: str):
    """Get the conversation history for a specific conversation ID"""
    if not db_conversation_exists(conversation_id):
        raise HTTPException(status_code=404, detail="Conversation not found in db")

    # Get messages from database
    messages = db_get_conversation(conversation_id)
    
    return ConversationResponse(
        conversation_id=conversation_id,
        messages=[Message(role=msg["role"], content=msg["content"]) for msg in messages]
    )

@app.delete("/conversation/{conversation_id}")
async def delete_conversation(conversation_id: str):
    """Delete a conversation and all its messages"""
    success = db_delete_conversation(conversation_id)
    if not success:
        raise HTTPException(status_code=404, detail="Conversation not found")
    return {"status": "success", "message": "Conversation deleted"}

@app.get("/conversations/recent")
async def get_recent_conversations(limit: int = 10):
    """Get list of recent conversations"""
    return db_get_recent_conversations(limit)

@app.get("/conversation/{conversation_id}/preview")
async def get_conversation_preview(conversation_id: str, message_limit: int = 3):
    """Get conversation with preview of recent messages"""
    preview = db_get_conversation_preview(conversation_id, message_limit)
    if not preview:
        raise HTTPException(status_code=404, detail="Conversation not found")
    return preview

In [23]:
@app.post("/message", response_model=MessageResponse)
async def send_message(request: MessageRequest):
    # Initialize or retrieve conversation
    conversation_id = request.conversation_id
    if not conversation_id or not db_conversation_exists(conversation_id):
        conversation_id = db_create_conversation(SYSTEM_MESSAGE)

    # Get current messages from database
    history = db_get_conversation(conversation_id)
    
    # Add user message
    user_message = {"role": "user", "content": request.message}
    db_add_message(conversation_id, user_message)

    # Update history with the new user message
    history.append(user_message)
    
    # Initialize tool tracking variables
    tool_used = None
    tool_result = None
    
    try:
        # Get initial response
        response = client.chat.completions.create(
            model=INFERENCE_MODEL_NAME,
            messages=history,
            temperature=MODEL_RESPONSE_TEMPERATURE,
            max_tokens=MODEL_RESPONSE_MAX_TOKENS,
            tools=tool_specs
        )
        
        assistant_message = response.choices[0].message
        
        # Check for tool calls
        if assistant_message.tool_calls:
            tool_call = assistant_message.tool_calls[0]
            function_name = tool_call.function.name
            function_args = json.loads(tool_call.function.arguments)
            
            # Set the tool_used variable
            tool_used = function_name
            
            # Create assistant message with tool call - no truncation needed
            assistant_with_tool = {
                "role": "assistant",
                "content": assistant_message.content or "",
                "tool_calls": [{
                    "id": tool_call.id,  # Use full ID without truncation
                    "type": "function",
                    "function": {
                        "name": function_name,
                        "arguments": tool_call.function.arguments
                    }
                }]
            }
            
            # Store assistant message in database
            db_add_message(conversation_id, assistant_with_tool)
            
            # Execute tool
            if function_name in tool_registry:
                # Execute the tool and get the result
                tool_result_obj = tool_registry[function_name](function_args)
                
                # Check if we need to fall back to another tool
                if isinstance(tool_result_obj, dict) and "fallback_to" in tool_result_obj:
                    fallback_tool = tool_result_obj.get("fallback_to")
                    if fallback_tool in tool_registry:
                        # Execute the fallback tool
                        fallback_result = tool_registry[fallback_tool](function_args)
                        # Update tool tracking
                        tool_used = fallback_tool
                        tool_result_obj = fallback_result
                
                # Process tool result and handle large content
                readable_result, sources = process_tool_result(
                    tool_result_obj, tool_used, tool_call.id, conversation_id  # Use full ID
                )
                
                # Store the readable result
                tool_result = readable_result

                # Update history with both messages for final response
                history.append(assistant_with_tool)
                history.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,  # Use full ID
                    "content": json.dumps(tool_result_obj) if isinstance(tool_result_obj, dict) else str(tool_result_obj)
                })
                
                # Get final response
                final_response = client.chat.completions.create(
                    model=INFERENCE_MODEL_NAME,
                    messages=history,
                    temperature=MODEL_RESPONSE_TEMPERATURE,
                    max_tokens=MODEL_RESPONSE_MAX_TOKENS
                )
                
                final_content = final_response.choices[0].message.content
                
                # Add citation links
                if sources:
                    final_content = add_citation_numbers(final_content, sources)
                
                # Store final response in database
                db_add_message(conversation_id, {"role": "assistant", "content": final_content})
                
                return MessageResponse(
                    conversation_id=conversation_id,
                    response=final_content,
                    tool_used=tool_used,
                    tool_result=tool_result 
                )
            else:
                error_msg = f"Unknown tool: {function_name}"
                db_add_message(conversation_id, {"role": "assistant", "content": error_msg})
                return MessageResponse(conversation_id=conversation_id, response=error_msg)
        else:
            # No tool call, just return the response
            content = assistant_message.content
            db_add_message(conversation_id, {"role": "assistant", "content": content})
            
            return MessageResponse(
                conversation_id=conversation_id, 
                response=content,
                tool_used=None,
                tool_result=None
            )
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        return MessageResponse(
            conversation_id=conversation_id, 
            response=error_msg,
            tool_used=tool_used,
            tool_result=tool_result
        )

In [24]:
def add_citation_numbers(text, sources):
    """
    Enhances text with inline citations that include both numbers and links.
    
    Args:
        text (str): The response text
        sources (list): List of source dictionaries
    
    Returns:
        str: Text with properly formatted citations
    """
    if not sources:
        return text
        
    # Check if text already has a sources section
    has_sources_section = re.search(r'##\s+Sources', text, re.IGNORECASE)
    
    # If it already has sources section, don't add another one
    if has_sources_section:
        # Just update the citation links
        # Look for simple numbered citations like [1]
        citation_matches = re.finditer(r'\[(\d+)\]', text)
        
        # Process from end to beginning to avoid index issues
        matches = list(citation_matches)
        for match in reversed(matches):
            try:
                num = int(match.group(1))
                if 1 <= num <= len(sources):
                    source = sources[num-1]
                    url = source.get('url', '')
                    if url and not re.search(r'\[\[' + str(num) + r'\]\]', text):
                        start, end = match.span()
                        text = text[:start] + f"[[{num}]]({url})" + text[end:]
            except ValueError:
                continue
        return text
    
    # If no sources section, add one with formatted citations
    text += "\n\n## Sources\n"
    for i, source in enumerate(sources, 1):
        title = source.get('title', f'Source {i}')
        url = source.get('url', '')
        text += f"[{i}] [{title}]({url})\n"
    
    return text



def extract_sources_from_tool_result(tool_result, default_source=None):
    """
    Extracts sources from various tool result formats.
    
    Args:
        tool_result: The result object from a tool call
        default_source: Optional default source if none found
    
    Returns:
        list: List of source dictionaries
    """
    sources = []
    
    # Handle dictionary format with explicit sources key
    if isinstance(tool_result, dict) and "sources" in tool_result:
        return tool_result["sources"]
    
    # Handle string results that might contain URLs
    if isinstance(tool_result, str):
        # Extract URLs from the text
        urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', tool_result)
        for i, url in enumerate(urls, 1):
            sources.append({
                "title": f"Source {i} from tool result",
                "url": url
            })
    
    # Add default source if provided and no sources found
    if not sources and default_source:
        sources.append(default_source)
    
    return sources

In [25]:
@app.post("/message/stream")
async def send_message_stream(request: MessageStreamRequest):
    # Initialize or retrieve conversation
    conversation_id = request.conversation_id
    if not conversation_id or not db_conversation_exists(conversation_id):
        conversation_id = db_create_conversation(SYSTEM_MESSAGE)
    
    # Add user message to database
    user_message = {"role": "user", "content": request.message}
    db_add_message(conversation_id, user_message)
    
    # Get current conversation history from database
    history = db_get_conversation(conversation_id)
    
    async def stream_generator():
        yield json.dumps({"type": "info", "content": "Processing your request..."}) + "\n"
        
        try:
            # Initialize tracking variables
            tool_used = None
            tool_result = None
            sources = []
            
            # Stream the initial response and check for tool calls
            final_chunk = None
            
            async for chunk in get_llm_response_stream(history):
                # Always forward chunks to client
                yield chunk
                
                try:
                    chunk_data = json.loads(chunk.strip())
                    if chunk_data.get("type") == "final_result":
                        final_chunk = chunk_data
                        break
                except json.JSONDecodeError:
                    continue
            
            # Check if there's a tool call
            if final_chunk and final_chunk.get("is_tool_call", False):
                function_info = final_chunk.get("function", {})
                function_name = function_info.get("name")
                parameters = function_info.get("parameters", {})
                
                if function_name and function_name in tool_registry:
                    # Track the tool call
                    tool_used = function_name
                    
                    # Generate a tool call ID
                    tool_call_id = str(uuid.uuid4())
                    
                    # Store assistant's request for a tool to database
                    assistant_message = {
                        "role": "assistant",
                        "content": final_chunk.get("content", ""),
                        "tool_calls": [
                            {
                                "id": tool_call_id,
                                "type": "function",
                                "function": {
                                    "name": function_name,
                                    "arguments": json.dumps(parameters)
                                }
                            }
                        ]
                    }
                    db_add_message(conversation_id, assistant_message)
                    
                    # Notify client about tool execution
                    yield json.dumps({
                        "type": "tool_calling",
                        "tool": function_name,
                        "parameters": parameters
                    }) + "\n"
                    
                    try:
                        # Execute the tool
                        yield json.dumps({"type": "tool_execution_start"}) + "\n"
                        tool_result_obj = await asyncio.to_thread(tool_registry[function_name], parameters)
                        
                        # Check for fallback
                        if isinstance(tool_result_obj, dict) and "fallback_to" in tool_result_obj:
                            fallback_tool = tool_result_obj.get("fallback_to")
                            if fallback_tool in tool_registry:
                                # Notify client about fallback
                                yield json.dumps({
                                    "type": "tool_fallback",
                                    "from": function_name,
                                    "to": fallback_tool,
                                    "reason": tool_result_obj.get("content", "")
                                }) + "\n"
                                
                                # Execute fallback tool
                                tool_used = fallback_tool
                                tool_result_obj = await asyncio.to_thread(tool_registry[fallback_tool], parameters)
                        
                        # Process the tool result
                        if isinstance(tool_result_obj, dict) and "content" in tool_result_obj:
                            tool_result = tool_result_obj["content"]
                            sources = tool_result_obj.get("sources", [])
                        else:
                            tool_result = str(tool_result_obj)
                            sources = extract_sources_from_tool_result(
                                tool_result_obj,
                                default_source={"title": f"{tool_used} result", "url": ""}
                            )
                        
                        # Add tool result to database
                        tool_message = {
                            "role": "tool",
                            "tool_call_id": tool_call_id,
                            "content": json.dumps(tool_result_obj) if isinstance(tool_result_obj, dict) else str(tool_result_obj)
                        }
                        db_add_message(conversation_id, tool_message)
                        
                        # Send tool result to client
                        yield json.dumps({
                            "type": "tool_result",
                            "result": tool_result,
                            "sources": sources
                        }) + "\n"
                        
                        # Get the updated history including the tool result
                        updated_history = db_get_conversation(conversation_id)
                        
                        # Get final response with the tool result
                        yield json.dumps({"type": "generating_final_response"}) + "\n"
                        
                        # Stream the final response
                        final_content = ""
                        async for final_chunk in get_llm_response_stream(updated_history):
                            # Forward all chunks to client
                            yield final_chunk
                            
                            try:
                                final_data = json.loads(final_chunk.strip())
                                if final_data.get("type") == "content":
                                    final_content += final_data.get("content", "")
                                elif final_data.get("type") == "final_result":
                                    if not final_content:
                                        final_content = final_data.get("content", "")
                            except json.JSONDecodeError:
                                continue
                        
                        # Add citation links to the content
                        if sources:
                            enhanced_content = add_citation_numbers(final_content, sources)
                            if enhanced_content != final_content:
                                # Send enhanced content with citations
                                yield json.dumps({
                                    "type": "enhanced_content",
                                    "content": enhanced_content
                                }) + "\n"
                                final_content = enhanced_content
                        
                        # Save to database
                        db_add_message(conversation_id, {
                            "role": "assistant", 
                            "content": final_content
                        })
                        
                        # Complete the stream
                        yield json.dumps({
                            "type": "done", 
                            "tool_used": tool_used,
                            "sources": sources
                        }) + "\n"
                        
                    except Exception as e:
                        error_msg = f"Error executing tool {function_name}: {str(e)}"
                        yield json.dumps({"type": "error", "content": error_msg}) + "\n"
                        yield json.dumps({"type": "done", "error": True}) + "\n"
                else:
                    error_msg = f"Unknown tool: {function_name}"
                    yield json.dumps({"type": "error", "content": error_msg}) + "\n"
                    yield json.dumps({"type": "done", "error": True}) + "\n"
            elif final_chunk:
                # No tool call, just a regular response
                final_content = final_chunk.get("content", "")
                
                # Save to database
                db_add_message(conversation_id, {"role": "assistant", "content": final_content})
                
                # Complete the stream
                yield json.dumps({
                    "type": "done", 
                    "tool_used": None,
                    "sources": []
                }) + "\n"
                
        except Exception as e:
            error_msg = f"Error in stream processing: {str(e)}"
            yield json.dumps({"type": "error", "content": error_msg}) + "\n"
            yield json.dumps({"type": "done", "error": True}) + "\n"
    
    # Return a properly configured StreamingResponse
    return StreamingResponse(stream_generator(), media_type="text/event-stream")

In [26]:
# Add a global exception handler
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    logger.error(f"Unhandled exception: {str(exc)}", exc_info=True)
    return JSONResponse(
        status_code=500,
        content={"message": f"Internal Server Error: {str(exc)}"}
    )

# run the Cloud function
connection_info = await apps.run_function_app(app)

INFO:__main__:Database tables were created separately, skipping initialization


Initializing database...
Database initialized successfully
Cloud function available at https://apps.aws-virginia-nb2.svc.singlestore.com:8000/notebooks/InteractiveNotebook/62c2d0f3-1e14-45dc-9b1b-67ad645b5d3f/app/docs?authToken=eyJhbGciOiJFUzUxMiIsImtpZCI6IjhhNmVjNWFmLThlNWEtNDQxOS04NmM4LWRkMDkxN2U1YWNlMSIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI1YjQ1OTgxYy04YjA5LTRlYWQtYmVjMC0wOTU0N2Q3YjlhOTciLCJhdWQiOlsibm92YXB1YmxpYyJdLCJleHAiOjE3NDE0ODY5NzIsIm5iZiI6MTc0MTQ4NjY3MiwiaWF0IjoxNzQxNDg2NjcyLCJqdGkiOiI1YWQ1MTk0Yi0xNzM3LTRlMmMtYmE5MS1kYTQ3NTcyZmFjMzAiLCJjb250YWluZXJJRCI6IjYyYzJkMGYzLTFlMTQtNDVkYy05YjFiLTY3YWQ2NDViNWQzZiJ9.ABiy3e5kdfIkRUi4KYwfhYwaoUdqTAwjLg4OicCPUF8RPdPZf9cw2iAUbCgAWp2DdUdRuRzGtBtjLlAZBu3eB4tOAWjItmaJxjDDtz3tQBticCSwondVVCOFnx2tv8ZgLXiPEJoKyFP244RVggTWIRNQQp66xx_9bsWQ9scpj2WF3d8b


# TESTS for all endpoints 

In [27]:
# Test the root endpoint
async def test_root_endpoint():
    print("\n===== Testing ROOT ENDPOINT =====")
    try:
        result = await root()
        print(f"Response: {json.dumps(result, indent=2)}")
    except Exception as e:
        print(f"Error: {str(e)}")

# Test the create conversation endpoint
async def test_create_conversation():
    print("\n===== Testing CREATE CONVERSATION ENDPOINT =====")
    try:
        result = await create_conversation()
        print(f"Conversation ID: {result.conversation_id}")
        print(f"Initial messages: {len(result.messages)}")
        for msg in result.messages:
            print(f"  - {msg.role}: {msg.content[:50]}...")
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# Test get conversation endpoint with valid and invalid IDs
async def test_get_conversation():
    print("\n===== Testing GET CONVERSATION ENDPOINT =====")
    
    # First create a conversation to get a valid ID
    try:
        new_conv = await create_conversation()
        valid_id = new_conv.conversation_id
        print(f"Created test conversation with ID: {valid_id}")
        
        # Test with valid ID
        print("\nTesting with valid conversation ID:")
        result = await get_conversation(valid_id)
        print(f"Retrieved conversation: {result.conversation_id}")
        print(f"Messages: {len(result.messages)}")
        
        # Test with invalid ID
        print("\nTesting with invalid conversation ID:")
        try:
            invalid_id = str(uuid.uuid4())
            result = await get_conversation(invalid_id)
            print(f"Unexpected success with invalid ID: {result}")
        except HTTPException as e:
            print(f"Expected error: {e.status_code} - {e.detail}")
    except Exception as e:
        print(f"Error: {str(e)}")

# Test the send message endpoint
async def test_send_message():
    print("\n===== Testing SEND MESSAGE ENDPOINT =====")
    
    # Test with new conversation (no conversation_id)
    try:
        print("\nTesting message with new conversation:")
        request = MessageRequest(message="Tell me about workspaces in SingleStore")
        result = await send_message(request)
        print(f"Conversation ID: {result.conversation_id}")
        print(f"Response: {result.response[:150]}...")
        if result.tool_used:
            print(f"Tool used: {result.tool_used}")
            print(f"Tool result preview: {result.tool_result[:150] if result.tool_result else 'None'}...")
        
        # Test with existing conversation
        print("\nTesting message with existing conversation:")
        request = MessageRequest(
            conversation_id=result.conversation_id,
            message="Please provide more details about IVF_PQFS indexes"
        )
        result = await send_message(request)
        print(f"Conversation ID: {result.conversation_id}")
        print(f"Response: {result.response[:150]}...")
        if result.tool_used:
            print(f"Tool used: {result.tool_used}")
            print(f"Tool result preview: {result.tool_result[:150] if result.tool_result else 'None'}...")
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# Test recent conversations endpoint
async def test_recent_conversations():
    print("\n===== Testing RECENT CONVERSATIONS ENDPOINT =====")
    try:
        # First create a few conversations to ensure we have data
        for i in range(3):
            await create_conversation()
            
        # Now test the endpoint
        result = await get_recent_conversations(limit=5)
        print(f"Retrieved {len(result)} recent conversations")
        
        # Display each conversation details
        for i, conv in enumerate(result):
            print(f"\nConversation {i+1}:")
            print(f"  ID: {conv['conversation_id']}")
            print(f"  Created: {conv['created_at']}")
            print(f"  Updated: {conv['last_updated']}")
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# Test conversation preview endpoint
async def test_conversation_preview():
    print("\n===== Testing CONVERSATION PREVIEW ENDPOINT =====")
    try:
        # Create a conversation and add messages
        new_conv = await create_conversation()
        conv_id = new_conv.conversation_id
        print(f"Created conversation with ID: {conv_id}")
        
        # Add a couple of messages
        request1 = MessageRequest(conversation_id=conv_id, message="Tell me about SingleStore")
        await send_message(request1)
        
        request2 = MessageRequest(conversation_id=conv_id, message="How does vector search work?")
        await send_message(request2)
        
        # Test the preview endpoint
        print("\nTesting preview with valid conversation ID:")
        preview = await get_conversation_preview(conv_id, message_limit=2)
        print(f"Conversation ID: {preview['conversation_id']}")
        print(f"Total messages: {preview['total_messages']}")
        print(f"Recent messages: {len(preview['recent_messages'])}")
        
        for i, msg in enumerate(preview['recent_messages']):
            print(f"\nMessage {i+1}:")
            print(f"  Role: {msg['role']}")
            print(f"  Content: {msg['content'][:50]}...")
            print(f"  Timestamp: {msg['timestamp']}")
        
        # Test with invalid ID
        print("\nTesting preview with invalid conversation ID:")
        try:
            invalid_id = str(uuid.uuid4())
            await get_conversation_preview(invalid_id)
            print("Unexpected success with invalid ID")
        except HTTPException as e:
            print(f"Expected error: {e.status_code} - {e.detail}")
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# Test delete conversation endpoint
async def test_delete_conversation():
    print("\n===== Testing DELETE CONVERSATION ENDPOINT =====")
    try:
        # Create a conversation to delete
        new_conv = await create_conversation()
        conv_id = new_conv.conversation_id
        print(f"Created conversation with ID: {conv_id}")
        
        # Delete the conversation
        print("\nDeleting the conversation:")
        result = await delete_conversation(conv_id)
        print(f"Result: {result}")
        
        # Verify it's been deleted
        print("\nTrying to access the deleted conversation:")
        try:
            await get_conversation(conv_id)
            print("Unexpected success accessing deleted conversation")
        except HTTPException as e:
            print(f"Expected error: {e.status_code} - {e.detail}")
        
        # Test with invalid ID
        print("\nTesting delete with invalid conversation ID:")
        try:
            invalid_id = str(uuid.uuid4())
            await delete_conversation(invalid_id)
            print("Unexpected success with invalid ID")
        except HTTPException as e:
            print(f"Expected error: {e.status_code} - {e.detail}")
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()

# Test health check endpoint
async def test_health_endpoint():
    print("\n===== Testing HEALTH CHECK ENDPOINT =====")
    try:
        result = await health_dep_check()
        print(f"Health status: {result['status']}")
        print(f"Database: {result['database']}")
        print(f"LLM service: {result['llm_service']}")
        print(f"Timestamp: {result['timestamp']}")
    except Exception as e:
        print(f"Error: {str(e)}")
        import traceback
        traceback.print_exc()


# Test function for stream debugging in notebooks
async def test_stream_endpoint():
    test_request = MessageStreamRequest(
        conversation_id=None,
        message="How to create a vector index on singlestore table"
    )
    
    response = await send_message_stream(test_request)
    
    print("Stream response chunks:")
    print("-" * 50)
    
    async for chunk in response.body_iterator:
        # Handle both bytes and string chunks
        if isinstance(chunk, bytes):
            chunk_str = chunk.decode('utf-8').strip()
        else:
            chunk_str = str(chunk).strip()
        
        if not chunk_str:
            continue
        
        print(f"CHUNK: {chunk_str}")
        print("-" * 50)
        
        try:
            data = json.loads(chunk_str)
            if data.get("type") == "tool_result":
                print("\nTOOL RESULT DETECTED:")
                print(f"Tool Used: {data.get('tool', 'unknown')}")
                print(f"Sources: {len(data.get('sources', []))}")
        except:
            pass
    
    print("Stream complete")

# Run all tests sequentially
async def run_all_tests():
    await test_root_endpoint()
    await test_create_conversation()
    await test_get_conversation()
    await test_send_message()
    await test_stream_endpoint()
    await test_recent_conversations()
    await test_conversation_preview()
    await test_delete_conversation()
    await test_health_endpoint()

# Execute in the notebook
# await run_all_tests()