In [4]:
from langgraph.graph import  END, START, MessagesState, StateGraph
from langchain_core.runnables import RunnableLambda
from fastapi import FastAPI, Request
from pydantic import BaseModel
from typing import List
from langchain_core.messages import get_buffer_string
from IPython.display import Image, display
from langgraph.prebuilt import ToolNode
from langchain_core.runnables import RunnableConfig
from langchain_core.prompts import ChatPromptTemplate
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

# Import necessary tools
from tools.memory import save_memories, search_memories
from tools.rag import vectorstore_collection_init, vectorstore_add_documents
from tools.llm import llm_chat_tool
from tools.search import search_tool

import os
from dotenv import load_dotenv
load_dotenv()

# Initialize LangSmith project
os.environ["LANGSMITH_PROJECT"] = 'tg-bot'

QDRANT_URL = os.getenv("QDRANT_URL")

# Initialize Qdrant client
client_qd = QdrantClient(url=QDRANT_URL)



class State(MessagesState):
    messages: List[str]

In [7]:
emb_model_name = '../../../models/multilingual-e5-large-instruct'
embeddings = HuggingFaceEmbeddings(model_name=emb_model_name)

In [None]:
recall_memories = vectorstore_collection_init(
    client_qd=client_qd,
    collection_name='recall_memories',
    embeddings=embeddings,
    distance="Cosine"
)

long_term_memory = vectorstore_collection_init(
    client_qd=client_qd,
    collection_name='long_term_memory',
    embeddings=embeddings,
    distance="Cosine"
)


In [None]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("""
            'system'
            You are a helpful assistant with advanced long-term memory capabilities. 
            Powered by a stateless LLM, you must rely on external tools and memory systems 
            to store information between conversations. You can also perform Retrieval-Augmented 
            Generation (RAG) to access relevant knowledge in real-time.

         
            ## MEMORY USAGE GUIDELINES
            
            1. Actively use memory tools (save_core_memory, save_recall_memory) to build a 
            comprehensive understanding of the user.
            2. Make informed suppositions and extrapolations based on stored memories.
            3. Regularly reflect on past interactions to identify patterns and preferences.
            4. Update your mental model of the user with each new piece of information.
            5. Cross-reference new information with existing memories for consistency.
            6. Store emotional context and personal values alongside factual information.
            7. Use memory to anticipate needs and tailor responses to the user’s style.
            8. Recognize and acknowledge changes in the user's situation or perspective.
            9. Leverage memories to provide personalized examples and analogies.
            10. Recall past challenges or successes to inform current problem-solving.

            
            ## RAG USAGE GUIDELINES
            
            - Use RAG every time you do internet search to get some external information.
            - Use RAG when you need up-to-date, domain-specific, or context-specific information
            - Use RAG to retrieve relevant documents or data that can enhance the conversation.
            - Use RAG to provide accurate and timely responses to user queries.
            - Use RAG to access a wide range of user conversation history.

            
            ## RECALL MEMORIES
            
            Recall memories are contextually retrieved based on the current conversation:
            {recall_memories}

            ## INSTRUCTIONS
           
            Engage with the user naturally, as a trusted colleague or friend. 
            Do not explicitly mention your memory or retrieval capabilities. 
            Instead, seamlessly integrate them into your responses. 
            Be attentive to subtle cues and underlying emotions. 
            Adapt your communication style to match the user's preferences and current emotional state. 
            If you use tools, call them internally and respond only after the tool operation 
            completes successfully.
        """),
        ("placeholder", "{messages}"),
    ]
)


In [None]:




def load_memories(state: State, config: RunnableConfig) -> State:
    """Load memories for the current conversation.

    Args:
        state (schemas.State): The current state of the conversation.
        config (RunnableConfig): The runtime configuration for the agent.

    Returns:
        State: The updated state with loaded memories.
    """

    vectorstore_long_term_memory.simalarity_search

    conv_str = get_buffer_string(state["messages"][-3:]) # get all messages in the conversation or change to 2-3
    # conv_str = tokenizer.decode(tokenizer.encode(conv_str)[-2048:]) # tokenize last messages and limit to 2048 tokens
    recall_memories = search_memories.invoke(conv_str, config)
    return {
        "messages": recall_memories,
    }


def agent(state: State) -> State:
    """Process the current state and generate a response using the LLM.

    Args:
        state (schemas.State): The current state of the conversation.

    Returns:
        schemas.State: The updated state with the agent's response.
    """
    bound = prompt | llm_with_tools
    recall_str = (
        "<recall_memory>\n" + "\n".join(state["messages"]) + "\n</recall_memory>"
    )
    prediction = bound.invoke(
        {
            "messages": state["messages"],
            "recall_memories": recall_str,
        }
    )

    vectorstore_add_documents(
        client_qd=client_qd,
        collection_name=collection_name,
        documents=[Document(page_content=prediction.content)],
        embeddings=embeddings
    )


def route_tools(state: State):
    """Determine whether to use tools or end the conversation based on the last message.

    Args:
        state (schemas.State): The current state of the conversation.

    Returns:
        Literal["tools", "__end__"]: The next step in the graph.
    """
    msg = state["messages"][-1]
    if msg.tool_calls:
        return "tools"

    return END

In [None]:
def build_agent():
    builder = StateGraph()
    builder = StateGraph(State)

    builder.add_node(load_memories)
    builder.add_node(agent)
    builder.add_node("tools", ToolNode(tools))

    builder.add_edge(START, "load_memories")
    builder.add_edge("load_memories", "agent")
    builder.add_conditional_edges("agent", route_tools, ["tools", END])
    builder.add_edge("tools", "agent")

    memory = InMemorySaver()
    graph = builder.compile(checkpointer=memory)
    

    return builder.compile()

In [None]:
collection_name = "recall_memory"

vectorstore_recall = rag_tool(
    client_qd=client_qd,
    collection_name=collection_name,
    embeddings=embeddings

In [None]:
from fastapi import FastAPI, Request
from pydantic import BaseModel
# from langgraph_agent import build_agent

app = FastAPI()
agent = build_agent()

class UserInput(BaseModel):
    message: str
    session_id: str  # Optional: for multi-user memory

@app.post("/chat")
def chat(user_input: UserInput):
    state = {"input": user_input.message, "session_id": user_input.session_id}
    result = agent.invoke(state)
    return {"response": result.get("input", "Sorry, something went wrong.")}