In [None]:
!pip install -r requirements.txt

In [None]:
!pip install azure-ai-projects azure-search-documents requests python-dotenv

In [None]:
# Example function to generate document embedding
def generate_embedding(text: str):
    # Generate embeddings for the provided text using the specified model
    embeddings_response = client.embeddings.create(
        model=AZURE_OPENAI_EMBEDDING_DEPLOYED_MODEL_NAME, input=text
    )
    # Extract the embedding data from the response
    return embeddings_response.data[0].embedding
# Generate an embedding for the provided text
generate_embedding("I love Azure AI Search!")

In [None]:
import os
import uuid
import requests
from datetime import datetime
from dotenv import load_dotenv

from azure.identity import AzureCliCredential
from azure.ai.projects.models import AzureAISearchTool
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    VectorSearch,
    VectorSearchProfile,
    VectorSearchAlgorithmConfiguration,
    ExhaustiveKnnAlgorithmConfiguration,
    ExhaustiveKnnParameters,
    HnswAlgorithmConfiguration,
    HnswParameters,
    VectorSearchAlgorithmKind,
    AzureOpenAIVectorizer,
    AzureOpenAIVectorizerParameters,
    AzureOpenAIModelName,
)
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential

In [None]:
load_dotenv()

AZURE_CONNECTION_STRING = os.getenv("AZURE_CONNECTION_STRING")
AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
AZURE_SEARCH_API_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")
OPENAI_EMBEDDINGS_ENDPOINT = os.getenv("AZURE_INFERENCE_ENDPOINT")  # e.g. "https://<hostname>.openai.azure.com/openai/deployments/<deployment>"
OPENAI_EMBEDDINGS_KEY = os.getenv("AZURE_OPENAI_API_KEY")  # if key-based auth
# We'll default to 1536 (common for e.g. text-embedding-ada-002), you can adjust if needed
EMBEDDING_DIM = 1536

# By user request, we'll use index name: "azure-search-docs"
MEMORY_INDEX_NAME = "memory-index"
VECTOR_FIELD = "embedding"  # name of the field storing vector data

In [None]:
from azure.ai.inference import EmbeddingsClient

# We can do key-based authentication by default
from azure.core.credentials import AzureKeyCredential

try:
    embeddings_client = EmbeddingsClient(
        endpoint=OPENAI_EMBEDDINGS_ENDPOINT,
        credential=AzureKeyCredential(OPENAI_EMBEDDINGS_KEY),
        api_version="2024-06-01"  # or whichever is valid for your Azure OpenAI resource
    )
    print("EmbeddingsClient successfully created.")
except Exception as e:
    print("Failed to create EmbeddingsClient. Check your endpoint/key. Error:", e)
    embeddings_client = None

In [51]:
def create_or_update_memory_index():
    """
    Minimal schema with:
      - id (key)
      - threadId (filterable)
      - role
      - content
      - embedding (vector)
    We'll do a basic vectorSearch config with EKNN.
    """
    index_client = SearchIndexClient(
        endpoint=AZURE_SEARCH_ENDPOINT,
        credential=AzureKeyCredential(AZURE_SEARCH_API_KEY),
    )

    fields = [
        SimpleField(name="id", type=SearchFieldDataType.String, key=True),
        SimpleField(name="threadId", type=SearchFieldDataType.String, filterable=True),
        SimpleField(name="role", type=SearchFieldDataType.String, filterable=True),
        SearchableField(
            name="content",
            type=SearchFieldDataType.String,
            analyzer_name="standard.lucene",
        ),
        SearchField(
            name=VECTOR_FIELD,
            type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
            searchable=True,
            vector_search_dimensions=EMBEDDING_DIM,
            vector_search_profile_name="my-vector-profile",
        ),
    ]

    vector_search = VectorSearch(
        profiles=[
            VectorSearchProfile(
                name="my-vector-profile",
                algorithm_configuration_name="my-eknn",
                vectorizer_name="my-vectorizer",
            )
        ],
        vectorizers=[  # Add square brackets here
            AzureOpenAIVectorizer(
                vectorizer_name="my-vectorizer",  
                parameters=AzureOpenAIVectorizerParameters(
                    model_name=AzureOpenAIModelName.TEXT_EMBEDDING3_SMALL,
                    deployment_name="text-embedding-3-small",
                    resource_url="https://fsunavala-openai-eus.openai.azure.com",
                    api_key=OPENAI_EMBEDDINGS_KEY,
                ),
            )
        ],  # Close the list here
        algorithms=[
            ExhaustiveKnnAlgorithmConfiguration(name="my-eknn"),
        ],
    )

    new_index = SearchIndex(
        name=MEMORY_INDEX_NAME,
        fields=fields,
        vector_search=vector_search,
    )
    try:
        index_client.create_or_update_index(new_index)
        print(f"Index '{MEMORY_INDEX_NAME}' created or updated.")
    except Exception as ex:
        print(f"Error creating/updating index '{MEMORY_INDEX_NAME}': {ex}")

In [None]:
class MemoryTool:
    def __init__(self, search_endpoint, search_api_key, index_name, embeddings_client: EmbeddingsClient):
        self.search_endpoint = search_endpoint
        self.search_api_key = search_api_key
        self.index_name = index_name
        self.embeddings_client = embeddings_client

        self.search_client = SearchClient(
            endpoint=self.search_endpoint,
            index_name=self.index_name,
            credential=AzureKeyCredential(self.search_api_key)
        )

    def _embed_text(self, text: str):
        """
        Use azure.ai.inference.EmbeddingsClient to get embeddings for a single text string.
        We'll assume single-element input for simplicity.
        """
        try:
            resp = self.embeddings_client.embed(input=[text])
        return resp.data[0].embedding

    def store_memory(self, thread_id: str, role: str, content: str):
        """
        Store (mergeOrUpload) a memory record in Azure AI Search.
        """
        doc_id = str(uuid.uuid4())
        embedding_vector = self._embed_text(content)

        doc = {
            "id": doc_id,
            "threadId": thread_id,
            "role": role,
            "content": content,
            VECTOR_FIELD: embedding_vector
        }

        try:
            upload_result = self.search_client.merge_or_upload_documents(documents=[doc])
            if upload_result and upload_result[0].succeeded:
                print(f"[MemoryTool] Stored memory doc with id={doc_id}, threadId={thread_id}")
            else:
                print(f"[MemoryTool] Failed to store memory doc. Partial result: {upload_result}")
        except Exception as e:
            print("store_memory failed:", e)

        return doc_id

    def retrieve_memories(self, thread_id: str, user_query: str, k: int = 3):
        """
        Vector search for 'user_query', filtering on threadId eq <thread_id>.
        The python SDK doesn't (yet) have a direct vector parameter, so we do a REST call ourselves.
        """
        query_vector = self._embed_text(user_query)

        body = {
            "vectorQueries": [
                {
                    "kind": "vector",
                    "fields": VECTOR_FIELD,
                    "value": query_vector,
                    "k": k
                }
            ],
            "filter": f"threadId eq '{thread_id}'"
        }

        url = f"{self.search_endpoint}/indexes('{self.index_name}')/docs/search.post.search?api-version=2024-11-01-preview"
        headers = {
            "Content-Type": "application/json",
            "api-key": self.search_api_key
        }
        try:
            resp = requests.post(url, headers=headers, json=body)
            resp.raise_for_status()
            results_json = resp.json()
            return results_json.get("value", [])
        except Exception as e:
            print("retrieve_memories failed:", e)
            return []

    def update_memory(self, doc_id: str, new_content: str):
        """
        Re-embed the new content, then merge_or_upload again to update it.
        """
        new_embedding = self._embed_text(new_content)
        doc = {
            "id": doc_id,
            "content": new_content,
            VECTOR_FIELD: new_embedding
        }
        try:
            update_result = self.search_client.merge_or_upload_documents(documents=[doc])
            if update_result and update_result[0].succeeded:
                print(f"[MemoryTool] Updated memory doc_id={doc_id}")
            else:
                print(f"[MemoryTool] Partial update failure: {update_result}")
        except Exception as e:
            print("update_memory failed:", e)

    def delete_memory(self, doc_id: str):
        """
        Delete doc by doc_id.
        """
        try:
            delete_result = self.search_client.delete_documents(documents=[{"id": doc_id}])
            if delete_result and delete_result[0].succeeded:
                print(f"[MemoryTool] Deleted memory doc_id={doc_id}")
            else:
                print(f"[MemoryTool] Partial delete failure: {delete_result}")
        except Exception as e:
            print("delete_memory failed:", e)

In [44]:
create_or_update_memory_index()

Index 'memory-index' created or updated.


In [None]:
from azure.identity import DefaultAzureCredential

# You can do AzureCliCredential or DefaultAzureCredential, etc.
credential = DefaultAzureCredential()

# 1) AIProjectClient
client = AIProjectClient.from_connection_string(
    credential=credential,
    conn_str=AZURE_CONNECTION_STRING
)
print("AIProjectClient created.")

# 2) MemoryTool instance
my_memory_tool = MemoryTool(
    search_endpoint=AZURE_SEARCH_ENDPOINT,
    search_api_key=AZURE_SEARCH_API_KEY,
    index_name=MEMORY_INDEX_NAME,
    embeddings_client=embeddings_client
)

AIProjectClient created.


In [None]:
try:
    # We'll not strictly need a search tool here, but let's do a minimal example
    agent = client.agents.create_agent(
        model="gpt-4o",  # or any valid deployment name
        name="memory-demo-assistant",
        instructions="You are a helpful memory-enabled agent.",
        # tools=[],
        # tool_resources=[]
    )
    print("Created agent:", agent)

    # create a new Thread
    thread = client.agents.create_thread()
    thread_id = thread.id
    print("Created thread:", thread_id)
except Exception as e:
    print("Agent/Thread creation error:", e)
    thread_id = str(uuid.uuid4())  # fallback

Created agent: {'id': 'asst_QqdGgzGVoMVpEQxc2Ar3AG3p', 'object': 'assistant', 'created_at': 1741013165, 'name': 'memory-demo-assistant', 'description': None, 'model': 'gpt-4o', 'instructions': 'You are a helpful memory-enabled agent.', 'tools': [], 'top_p': 1.0, 'temperature': 1.0, 'tool_resources': {}, 'metadata': {}, 'response_format': 'auto'}
Created thread: thread_Sr0vSDsUhQlUk9R6oeA0CvdC


In [None]:
user_content = "Hello! I'm planning a summer trip to Spain. I love outdoor hiking!"
user_doc_id = my_memory_tool.store_memory(thread_id, "user", user_content)

assistant_content = "Great! Spain has wonderful places to hike, like the Pyrenees."
assistant_doc_id = my_memory_tool.store_memory(thread_id, "assistant", assistant_content)

ResourceNotFoundError: (DeploymentNotFound) The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.
Code: DeploymentNotFound
Message: The API deployment for this resource does not exist. If you created the deployment within the last 5 minutes, please wait a moment and try again.

In [1]:
"""
DESCRIPTION:
    This sample demonstrates how to use Azure AI Search as a 'MemoryTool' with 
    Azure Agent Service, leveraging the Tool Function Calling lifecycle with
    structured output via Pydantic models.

USAGE:
    python sample_agents_memory_search.py

    Before running the sample:

    pip install azure-ai-projects azure-identity azure-search-documents pydantic

    Set these environment variables with your own values:
    1) AZURE_CONNECTION_STRING - The project connection string
    2) AZURE_SEARCH_SERVICE_ENDPOINT - The endpoint of your Azure Search service
    3) AZURE_SEARCH_ADMIN_KEY - The admin key for your Azure Search service
"""

import os
import json
import time
import uuid
import re
from datetime import datetime, UTC
from typing import List, Dict, Any, Optional, Union, Set, Callable
from enum import Enum
from pydantic import BaseModel, Field

from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import (
    FunctionTool,
    ToolSet,
    RequiredFunctionToolCall,
    SubmitToolOutputsAction,
    ToolOutput,
    RunStatus,
    ResponseFormatJsonSchema,
    ResponseFormatJsonSchemaType,
)
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    SearchIndex,
    SearchField,
    SearchFieldDataType,
    SimpleField,
    SearchableField,
)
from azure.core.credentials import AzureKeyCredential

# Configuration
AZURE_CONNECTION_STRING = os.getenv("AZURE_CONNECTION_STRING")
AZURE_SEARCH_ENDPOINT = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
AZURE_SEARCH_ADMIN_KEY = os.getenv("AZURE_SEARCH_ADMIN_KEY")
MEMORY_INDEX_NAME = "fact-memory-index"
DEBUG_MODE = True  # Set to False to disable debug output
MEMORY_LIMIT = 3  # Default limit for memory retrieval
CORRECT_THREAD_ID = None  # Will be set when conversation starts

# Initialize clients
search_client = SearchClient(
    endpoint=AZURE_SEARCH_ENDPOINT,
    index_name=MEMORY_INDEX_NAME,
    credential=AzureKeyCredential(AZURE_SEARCH_ADMIN_KEY),
)

projects_client = AIProjectClient.from_connection_string(
    credential=DefaultAzureCredential(),
    conn_str=AZURE_CONNECTION_STRING,
)

# Define Pydantic models for structured outputs
class FactType(str, Enum):
    PERSONAL = "personal"
    PREFERENCE = "preference"
    PLAN = "plan"
    CONTACT = "contact"
    WORK = "work"
    OTHER = "other"

class Fact(BaseModel):
    content: str = Field(..., description="The fact content")
    fact_type: FactType = Field(..., description="The type of fact")
    confidence: float = Field(
        ..., description="Confidence score for this fact (0.0-1.0)", ge=0.0, le=1.0
    )

class FactExtraction(BaseModel):
    facts: List[Fact] = Field(..., description="List of extracted facts")

# Helper function to properly extract text from messages
def extract_text(content):
    """
    Extract clean text from various message formats.
    Handles all observed formats in the console output.
    """
    # Handle string content
    if isinstance(content, str):
        return content

    # Handle dict with nested structure - most common format from output
    if isinstance(content, dict):
        # Check for the structure {'type': 'text', 'text': {'value': "..."}}
        if content.get("type") == "text" and isinstance(content.get("text"), dict):
            return content["text"].get("value", "")

        # Check for other nested structures
        if "text" in content:
            if isinstance(content["text"], dict) and "value" in content["text"]:
                return content["text"]["value"]
            return str(content["text"])
        elif "value" in content:
            return str(content["value"])

    # Handle list content
    if isinstance(content, list) and len(content) > 0:
        # Recursively extract from first item if it's a list
        return extract_text(content[0])

    # Default fallback
    return str(content)

# Ensure memory index exists
def ensure_memory_index_exists():
    """Create or update the memory index if it doesn't exist"""
    try:
        # Initialize the search index client
        index_client = SearchIndexClient(
            endpoint=AZURE_SEARCH_ENDPOINT,
            credential=AzureKeyCredential(AZURE_SEARCH_ADMIN_KEY),
        )

        # Check if index exists
        index_exists = False
        try:
            index_info = index_client.get_index(name=MEMORY_INDEX_NAME)
            index_exists = True
            if DEBUG_MODE:
                print(f"✅ Memory index '{MEMORY_INDEX_NAME}' already exists")
        except:
            if DEBUG_MODE:
                print(f"🔧 Creating memory index '{MEMORY_INDEX_NAME}'...")

        # If index doesn't exist, create it
        if not index_exists:
            # Define the index fields
            fields = [
                SimpleField(name="id", type=SearchFieldDataType.String, key=True),
                SearchableField(name="content", type=SearchFieldDataType.String),
                SimpleField(
                    name="thread_id", type=SearchFieldDataType.String, filterable=True
                ),
                SimpleField(
                    name="fact_type", type=SearchFieldDataType.String, filterable=True
                ),
                SimpleField(
                    name="confidence", type=SearchFieldDataType.Double, filterable=True
                ),
                SimpleField(
                    name="timestamp",
                    type=SearchFieldDataType.DateTimeOffset,
                    filterable=True,
                    sortable=True,
                ),
            ]

            # Create the index definition
            index = SearchIndex(name=MEMORY_INDEX_NAME, fields=fields)

            # Create the index
            index_client.create_or_update_index(index)

            if DEBUG_MODE:
                print(f"✅ Created memory index '{MEMORY_INDEX_NAME}'")

        # Test search client connection
        if DEBUG_MODE:
            print(f"✅ Testing search client connection...")
            try:
                test_result = search_client.search(search_text="*", top=1)
                list(test_result)  # Force evaluation
                print(f"✅ Search client connection verified")
            except Exception as e:
                print(f"❌ ERROR with search client: {e}")

        return True
    except Exception as e:
        if DEBUG_MODE:
            print(f"❌ ERROR creating memory index: {e}")
        return False

# Define memory management functions for agent
def create_memory_functions():
    """Define memory management functions"""

    # CREATE memory function
    def store_memory_func(thread_id, content, fact_type="other", confidence=1.0):
        """Store a new fact in memory"""
        global CORRECT_THREAD_ID

        try:
            if DEBUG_MODE:
                print(f"🛠️ Tool called: store_memory_func")
                print(f"  - Thread ID: {thread_id}")
                print(
                    f"  - Content: {content[:50]}..."
                    if len(content) > 50
                    else f"  - Content: {content}"
                )
                print(f"  - Fact type: {fact_type}")
                print(f"  - Confidence: {confidence}")

            # Always use the correct thread_id
            if CORRECT_THREAD_ID and thread_id != CORRECT_THREAD_ID:
                if DEBUG_MODE:
                    print(
                        f"⚠️ Thread ID mismatch! Using {CORRECT_THREAD_ID} instead of {thread_id}"
                    )
                thread_id = CORRECT_THREAD_ID

            doc_id = str(uuid.uuid4())
            doc = {
                "id": doc_id,
                "thread_id": thread_id,
                "content": content,
                "fact_type": fact_type,
                "confidence": confidence,
                "timestamp": datetime.now(UTC),
            }

            result = search_client.upload_documents([doc])

            if DEBUG_MODE:
                print(
                    f"💾 STORED: [fact] {content[:50]}..."
                    if len(content) > 50
                    else f"💾 STORED: [fact] {content}"
                )

            return json.dumps({"id": doc_id, "status": "success"})
        except Exception as e:
            if DEBUG_MODE:
                print(f"❌ ERROR storing fact: {e}")
            return json.dumps({"error": str(e)})

    # READ memory function
    def retrieve_memories_func(
        thread_id, query="", limit=MEMORY_LIMIT, min_confidence=0.0
    ):
        """Retrieve relevant facts from memory"""
        global CORRECT_THREAD_ID

        try:
            if DEBUG_MODE:
                print(f"🛠️ Tool called: retrieve_memories_func")
                print(f"  - Thread ID: {thread_id}")
                print(f"  - Query: '{query}'")
                print(f"  - Limit: {limit}")
                print(f"  - Min confidence: {min_confidence}")

            # Always use the correct thread_id
            if CORRECT_THREAD_ID and thread_id != CORRECT_THREAD_ID:
                if DEBUG_MODE:
                    print(
                        f"⚠️ Thread ID mismatch! Using {CORRECT_THREAD_ID} instead of {thread_id}"
                    )
                thread_id = CORRECT_THREAD_ID

            filter_expr = f"thread_id eq '{thread_id}' and confidence ge {min_confidence}"

            if query:
                # Semantic search with filter
                results = search_client.search(
                    search_text=query,
                    filter=filter_expr,
                    top=limit,
                    select="id,content,fact_type,confidence,timestamp",
                )
                if DEBUG_MODE:
                    print(f"🔍 Retrieving memories for query: '{query}'")
            else:
                # Get most recent facts
                results = search_client.search(
                    search_text="*",
                    filter=filter_expr,
                    top=limit,
                    order_by="timestamp desc",
                    select="id,content,fact_type,confidence,timestamp",
                )
                if DEBUG_MODE:
                    print(f"🔍 Retrieving {limit} most recent memories")

            memories = []
            for r in results:
                memories.append(
                    {
                        "id": r.get("id", ""),
                        "content": r.get("content", ""),
                        "fact_type": r.get("fact_type", "other"),
                        "confidence": r.get("confidence", 1.0),
                    }
                )

            if DEBUG_MODE and memories:
                print(f"🔍 Retrieved {len(memories)} memories")

            return json.dumps({"memories": memories, "count": len(memories)})
        except Exception as e:
            if DEBUG_MODE:
                print(f"❌ ERROR retrieving memories: {e}")
            return json.dumps({"error": str(e)})

    # UPDATE memory function
    def update_memory_func(
        thread_id, memory_id, new_content, fact_type=None, confidence=None
    ):
        """Update an existing fact in memory"""
        global CORRECT_THREAD_ID

        try:
            if DEBUG_MODE:
                print(f"🛠️ Tool called: update_memory_func")
                print(f"  - Thread ID: {thread_id}")
                print(f"  - Memory ID: {memory_id}")
                print(
                    f"  - New content: {new_content[:50]}..."
                    if len(new_content) > 50
                    else f"  - New content: {new_content}"
                )

            # Always use the correct thread_id
            if CORRECT_THREAD_ID and thread_id != CORRECT_THREAD_ID:
                if DEBUG_MODE:
                    print(
                        f"⚠️ Thread ID mismatch! Using {CORRECT_THREAD_ID} instead of {thread_id}"
                    )
                thread_id = CORRECT_THREAD_ID

            # First retrieve the existing document
            try:
                existing_doc = search_client.get_document(key=memory_id)
            except Exception as e:
                if DEBUG_MODE:
                    print(f"❌ ERROR retrieving document for update: {e}")
                return json.dumps({"error": f"Document with ID {memory_id} not found"})

            # Prepare the updated document
            doc = {
                "id": memory_id,
                "thread_id": thread_id,
                "content": new_content,
                "fact_type": fact_type or existing_doc.get("fact_type", "other"),
                "confidence": confidence or existing_doc.get("confidence", 1.0),
                "timestamp": datetime.now(UTC),
            }

            # Update the document
            result = search_client.merge_documents([doc])

            if DEBUG_MODE:
                print(
                    f"✏️ UPDATED: [fact] {new_content[:50]}..."
                    if len(new_content) > 50
                    else f"✏️ UPDATED: [fact] {new_content}"
                )

            return json.dumps({"id": memory_id, "status": "updated"})
        except Exception as e:
            if DEBUG_MODE:
                print(f"❌ ERROR updating memory: {e}")
            return json.dumps({"error": str(e)})

    # DELETE memory function
    def delete_memory_func(memory_id):
        """Delete a fact from memory"""
        try:
            if DEBUG_MODE:
                print(f"🛠️ Tool called: delete_memory_func")
                print(f"  - Memory ID: {memory_id}")

            # Delete the document
            search_client.delete_documents([{"id": memory_id}])

            if DEBUG_MODE:
                print(f"🗑️ DELETED: [fact] {memory_id}")

            return json.dumps({"id": memory_id, "status": "deleted"})
        except Exception as e:
            if DEBUG_MODE:
                print(f"❌ ERROR deleting memory: {e}")
            return json.dumps({"error": str(e)})

    return {
        store_memory_func,
        retrieve_memories_func,
        update_memory_func,
        delete_memory_func,
    }

# Log memory operations (always visible regardless of DEBUG_MODE)
def log_memory_operation(operation, details=None):
    """Log memory operations with prominent visibility"""
    if operation == "store":
        print(f"\n📝 MEMORY STORED: {details}")
    elif operation == "retrieve":
        if isinstance(details, int):
            print(f"\n🔍 MEMORIES RETRIEVED: {details} fact(s)")
        else:
            print(f"\n🔍 MEMORIES RETRIEVED: {details}")
    elif operation == "update":
        print(f"\n✏️ MEMORY UPDATED: {details}")
    elif operation == "delete":
        print(f"\n🗑️ MEMORY DELETED: ID {details}")
    else:
        print(f"\n📊 MEMORY OPERATION [{operation}]: {details}")
    
    # Add separator for visibility
    print("-" * 40)

# Process function calls from the agent
def process_function_calls(thread_id, run_id, tool_calls):
    """Process function calls from the agent and submit tool outputs"""
    try:
        memory_functions = create_memory_functions()
        function_tool = FunctionTool(memory_functions)

        tool_outputs = []
        memory_ops_summary = []
        
        for tool_call in tool_calls:
            if isinstance(tool_call, RequiredFunctionToolCall):
                try:
                    function_name = tool_call.function.name
                    arguments = tool_call.function.arguments
                    
                    # Always show when memory functions are called (regardless of DEBUG_MODE)
                    if function_name == "store_memory_func":
                        args = json.loads(arguments)
                        content = args.get("content", "")
                        fact_type = args.get("fact_type", "other")
                        log_memory_operation("store", f"[{fact_type}] {content[:100]}" + ("..." if len(content) > 100 else ""))
                        memory_ops_summary.append("stored new fact")
                    
                    elif function_name == "retrieve_memories_func":
                        args = json.loads(arguments)
                        query = args.get("query", "")
                        if query:
                            log_memory_operation("retrieve", f"Query: '{query}'")
                        else:
                            log_memory_operation("retrieve", "Recent facts")
                        memory_ops_summary.append("retrieved facts")
                    
                    elif function_name == "update_memory_func":
                        args = json.loads(arguments)
                        memory_id = args.get("memory_id", "")
                        new_content = args.get("new_content", "")
                        log_memory_operation("update", f"ID {memory_id[:8]}... - {new_content[:100]}" + ("..." if len(new_content) > 100 else ""))
                        memory_ops_summary.append("updated fact")
                    
                    elif function_name == "delete_memory_func":
                        args = json.loads(arguments)
                        memory_id = args.get("memory_id", "")
                        log_memory_operation("delete", memory_id)
                        memory_ops_summary.append("deleted fact")
                    
                    if DEBUG_MODE:
                        print(f"🛠️ Processing tool call: {function_name}")
                        print(f"  - Arguments: {arguments}")

                    # Execute the function
                    output = function_tool.execute(tool_call)
                    
                    # For retrieve_memories_func, show how many memories were retrieved
                    if function_name == "retrieve_memories_func":
                        try:
                            result = json.loads(output)
                            if "memories" in result and "count" in result:
                                count = result["count"]
                                log_memory_operation("retrieve", f"{count} fact(s) returned")
                                
                                # If facts were retrieved, show them
                                if count > 0 and not DEBUG_MODE:  # In debug mode we already show them
                                    print("📋 Retrieved facts:")
                                    for i, memory in enumerate(result["memories"]):
                                        print(f"  {i+1}. [{memory.get('fact_type', 'other')}] {memory.get('content', '')}")
                        except:
                            pass

                    tool_outputs.append(
                        ToolOutput(tool_call_id=tool_call.id, output=output)
                    )
                except Exception as e:
                    print(f"❌ Error executing tool call {tool_call.id}: {e}")

        if tool_outputs:
            if DEBUG_MODE:
                print(f"📤 Submitting {len(tool_outputs)} tool outputs")

            projects_client.agents.submit_tool_outputs_to_run(
                thread_id=thread_id, run_id=run_id, tool_outputs=tool_outputs
            )
            
            if memory_ops_summary:
                print(f"📊 MEMORY OPERATIONS SUMMARY: {', '.join(memory_ops_summary)}")

        return len(tool_outputs)
    except Exception as e:
        print(f"❌ Error processing function calls: {e}")
        return 0

# Create memory agent with function calling
def create_memory_agent():
    """Create agent with memory management functions"""

    # Initialize the memory functions
    memory_functions = create_memory_functions()

    # Create a function tool with the memory functions
    function_tool = FunctionTool(memory_functions)

    # Create agent with complete memory management instructions
    agent = projects_client.agents.create_agent(
        model="gpt-4o",
        name=f"memory-agent-{uuid.uuid4().hex[:6]}",
        instructions="""You are an assistant with memory management capabilities.

IMPORTANT: At the start of each conversation, ALWAYS use retrieve_memories_func to check what you know about the user.

You have access to four memory management functions:
1. store_memory_func(thread_id, content, fact_type, confidence) - Store a new fact about the user
2. retrieve_memories_func(thread_id, query, limit, min_confidence) - Retrieve relevant facts
3. update_memory_func(thread_id, memory_id, new_content, fact_type, confidence) - Update an existing fact
4. delete_memory_func(memory_id) - Delete a fact that is no longer relevant

When interacting with users:
1. ALWAYS begin by retrieving and reviewing relevant memories using retrieve_memories_func
2. When users share important information about themselves, IMMEDIATELY store it using store_memory_func
3. If information needs updating, use update_memory_func to keep facts current
4. Use the fact_type parameter to categorize facts (personal, preference, plan, contact, work, other)
5. Use the confidence parameter (0.0-1.0) to indicate how certain you are about a fact

CRITICAL: The thread_id must ALWAYS be the EXACT thread_id that was provided to you. 
Never modify, shorten, or create your own thread_id. Always use the full thread_id exactly as given.

Examples of facts to store:
- Personal facts: "User's name is Alex", "User is 32 years old"
- Preferences: "User prefers vegetarian food", "User enjoys hiking"
- Plans: "User is planning a trip to Japan in June", "User has a meeting tomorrow"
- Work: "User works as a software engineer", "User is working on a data analysis project"

Be proactive in memory management - don't wait for explicit instructions to store facts.
NEVER mention the memory system to users - just naturally incorporate what you know.""",
        tools=function_tool.definitions,
    )

    if DEBUG_MODE:
        print(f"🤖 Memory agent created with ID: {agent.id}")

    return agent

# Start conversation with memory agent
def start_conversation(existing_thread_id=None):
    """Start a conversation with the memory agent"""
    # Ensure memory index exists before proceeding
    if not ensure_memory_index_exists():
        print("❌ Failed to create or verify memory index. Exiting.")
        return
        
    # Create memory agent
    memory_agent = create_memory_agent()

    # Create or continue thread and set global thread_id
    global CORRECT_THREAD_ID
    if existing_thread_id:
        thread_id = existing_thread_id
        CORRECT_THREAD_ID = thread_id
        print(f"🔄 Continuing conversation with Thread ID: {thread_id}")
    else:
        thread = projects_client.agents.create_thread()
        thread_id = thread.id
        CORRECT_THREAD_ID = thread_id
        print(f"🆕 Started new conversation (Thread ID: {thread_id})")

    print("=" * 50)
    print("🧠 MEMORY-ENHANCED AGENT CONVERSATION")
    print("=" * 50)
    print("Commands:")
    print("  'exit' - End conversation")
    print("  'facts' - List all stored facts")
    print("  'memory on/off' - Toggle memory operation visibility")
    print("=" * 50)
    
    # Flag to control memory operation visibility
    show_memory_ops = True

    # Main conversation loop
    show_memory_ops = True  # Flag to control memory operation visibility
    while True:
        # Get user input
        user_input = input("\n😀 You: ").strip()
        if not user_input:
            print("⚠️ Input cannot be empty. Please try again.")
            continue

        # Handle special commands
        if user_input.lower() == "exit":
            break
            
        if user_input.lower() in ["facts", "list facts", "show facts"]:
            list_all_facts(thread_id)
            continue
            
        if user_input.lower() == "memory on":
            show_memory_ops = True
            global DEBUG_MODE
            DEBUG_MODE = True
            print("🔔 Memory operations will now be shown")
            continue
            
        if user_input.lower() == "memory off":
            show_memory_ops = False
            DEBUG_MODE = False
            print("🔕 Memory operations will be hidden")
            continue

        try:
            # Add message to thread
            projects_client.agents.create_message(
                thread_id=thread_id, role="user", content=user_input
            )

            if DEBUG_MODE:
                print("⏳ Processing...")

            # Create a run
            run = projects_client.agents.create_run(
                thread_id=thread_id, assistant_id=memory_agent.id
            )

            # Process the run and handle any function calls
            while run.status in ["queued", "in_progress", "requires_action"]:
                time.sleep(1)
                run = projects_client.agents.get_run(thread_id=thread_id, run_id=run.id)

                if DEBUG_MODE:
                    print(f"🔄 Run status: {run.status}")

                if run.status == "requires_action" and isinstance(run.required_action, SubmitToolOutputsAction):
                    tool_calls = run.required_action.submit_tool_outputs.tool_calls
                    if tool_calls:
                        process_function_calls(thread_id, run.id, tool_calls)

            if run.status != RunStatus.COMPLETED:
                if DEBUG_MODE:
                    print(f"❌ Run completed with non-success status: {run.status}")

            # Get assistant response
            messages = projects_client.agents.list_messages(thread_id=thread_id)
            latest = None

            for msg in messages.data:
                if msg.role == "assistant" and (
                    not latest or msg.created_at > latest.created_at
                ):
                    latest = msg

            if latest:
                # Extract and display clean text
                response_text = extract_text(latest.content)
                print(f"\n🤖 Assistant: {response_text}")
                
                # Show summary of memory usage for this turn
                if run.required_action and isinstance(run.required_action, SubmitToolOutputsAction):
                    tool_calls = run.required_action.submit_tool_outputs.tool_calls
                    memory_ops = {
                        "retrieve": 0,
                        "store": 0,
                        "update": 0,
                        "delete": 0
                    }
                    
                    for tool_call in tool_calls:
                        if isinstance(tool_call, RequiredFunctionToolCall):
                            if tool_call.function.name == "retrieve_memories_func":
                                memory_ops["retrieve"] += 1
                            elif tool_call.function.name == "store_memory_func":
                                memory_ops["store"] += 1
                            elif tool_call.function.name == "update_memory_func":
                                memory_ops["update"] += 1
                            elif tool_call.function.name == "delete_memory_func":
                                memory_ops["delete"] += 1
                    
                    # Show summary only if there were memory operations
                    if sum(memory_ops.values()) > 0:
                        print("\n📊 MEMORY USAGE THIS TURN:")
                        if memory_ops["retrieve"] > 0:
                            print(f"  • Retrieved facts: {memory_ops['retrieve']} time(s)")
                        if memory_ops["store"] > 0:
                            print(f"  • Stored new facts: {memory_ops['store']} time(s)")
                        if memory_ops["update"] > 0:
                            print(f"  • Updated facts: {memory_ops['update']} time(s)")
                        if memory_ops["delete"] > 0:
                            print(f"  • Deleted facts: {memory_ops['delete']} time(s)")
            else:
                print("❌ No response received.")

        except Exception as e:
            print(f"❌ Error: {e}")
            import traceback
            traceback.print_exc()

    # Clean up and return thread_id
    if DEBUG_MODE:
        print(f"\n👋 Conversation ended. Thread ID: {thread_id}")
    
    return thread_id

# List all facts in memory
def list_all_facts(thread_id):
    """List all facts stored for a thread"""
    try:
        # Ensure we use the correct thread_id
        global CORRECT_THREAD_ID
        if CORRECT_THREAD_ID:
            thread_id = CORRECT_THREAD_ID

        # Get all facts for this thread
        results = search_client.search(
            search_text="*",
            filter=f"thread_id eq '{thread_id}'",
            top=100,
            order_by="timestamp desc",
            select="id,content,fact_type,confidence,timestamp",
        )

        facts = list(results)

        print("\n📋 ALL STORED FACTS:")
        if not facts:
            print(f"No facts found for thread ID: {thread_id}")
        else:
            for i, fact in enumerate(facts):
                fact_type = fact.get("fact_type", "other")
                confidence = fact.get("confidence", 1.0)
                print(
                    f"{i+1}. [{fact_type} - {confidence:.2f}] {fact.get('content', '')}"
                )

        return facts
    except Exception as e:
        print(f"❌ ERROR listing facts: {e}")
        return []

# Main function
if __name__ == "__main__":
    try:
        print("🧠 Starting Memory Agent Demo")
        thread_id = start_conversation()
        
        # List all stored facts at the end
        print("\n🔍 Would you like to see all stored facts? (y/n)")
        if input().lower() == 'y':
            list_all_facts(thread_id)
            
        print("\n✨ Memory Agent Demo completed successfully!")
    except Exception as e:
        print(f"❌ An error occurred: {e}")
        import traceback
        traceback.print_exc()

🧠 Starting Memory Agent Demo
✅ Memory index 'fact-memory-index' already exists
✅ Testing search client connection...
✅ Search client connection verified
🤖 Memory agent created with ID: asst_BTmmgYfiVtab7EK4XnvCbCS1
🆕 Started new conversation (Thread ID: thread_1aMyqdwxEjyQsVyHr37iXHnb)
🧠 MEMORY-ENHANCED AGENT CONVERSATION
Commands:
  'exit' - End conversation
  'facts' - List all stored facts
  'memory on/off' - Toggle memory operation visibility
⚠️ Input cannot be empty. Please try again.
⏳ Processing...
🔄 Run status: RunStatus.REQUIRES_ACTION

📝 MEMORY STORED: [personal] User's name is Farzad
----------------------------------------
🛠️ Processing tool call: store_memory_func
  - Arguments: {"thread_id":"89a4e85a-267f-4136-8733-7f5a6940b719","content":"User's name is Farzad","fact_type":"personal","confidence":"1.0"}
🛠️ Tool called: store_memory_func
  - Thread ID: 89a4e85a-267f-4136-8733-7f5a6940b719
  - Content: User's name is Farzad
  - Fact type: personal
  - Confidence: 1.0
⚠️ Th