In [None]:
import sys
import os
from pathlib import Path

# Add the project root to Python path so we can import src
# Get current working directory
cwd = Path().resolve()

# If we're in the notebooks directory, go up one level
# Otherwise, assume we're already in the project root
if cwd.name == "notebooks":
	project_root = cwd.parent
else:
	project_root = cwd

if str(project_root) not in sys.path:
	sys.path.insert(0, str(project_root))


In [None]:
from langchain.agents import create_agent
from langchain.tools import tool
from src.rag.retrieval.index import retrieve_context
from src.rag.retrieval.utils import prepare_prompt_and_invoke_llm
from langgraph.graph import MessagesState
from typing import Any, List, Dict
from typing_extensions import Annotated
from langgraph.types import Command
from langchain_core.tools.base import InjectedToolCallId
from langchain_core.messages import ToolMessage

In [10]:
# Create a custom agent state that extends the MessagesState to store citations
class CustomAgentState(MessagesState):
	"""Extended agent state with citations tracking"""
	# citations will accumulate across tool calls
	citations: Annotated[List[Dict[str, Any]], lambda x, y: x + y] = []

In [None]:
# Factory function to create RAG tool with project_id bound
def create_rag_tool(project_id: str):
	"""Create a RAG search tool bound to a specific project"""
	
	@tool
	def rag_search( 
		query: str,
		tool_call_id: Annotated[str, InjectedToolCallId],
	) -> Command:
		"""
		Search through project documents using RAG (Retrieval-Augmented Generation).
		This tool retrieves relevant context from the current project's documents based on the query.
		
		Args:
			query: The search query or question to find relevant information
			
		Returns:
			A formatted string containing the retrieved context and answer based on the documents
		"""
		try:
			# Retrieve context using the existing RAG pipeline
			texts, images, tables, citations = retrieve_context(project_id, query)
			
			# If no context found, return a message
			if not texts:
				return Command(
					update={
						"messages": [
							ToolMessage(
								"No relevant information found in the project documents for this query.",
								tool_call_id=tool_call_id
							)
						]
					}
				)
				
			# Prepare the response using the existing LLM preparation function
			response = prepare_prompt_and_invoke_llm(
				user_query=query,
				texts=texts,
				images=images,
				tables=tables
			)
			
			return Command(
                update={
                    # Update message history
                    "messages": [
                        ToolMessage(
                            content=response,
                            tool_call_id=tool_call_id
                        )
                    ],
                    # Update citations in state - these accumulate!
                    "citations": citations
                }
            )		
		except Exception as e:
			return Command(update={
                    "messages": [
                        ToolMessage(
                            f"Error retrieving information: {str(e)}",
                            tool_call_id=tool_call_id
                        )
                    ]
                }
            )

	return rag_search

In [12]:
# Create agent with project-specific RAG tool
def create_rag_agent(project_id: str, model: str = "gpt-4o"):
	"""Create an agent with RAG tool for a specific project"""
	
	# Create tools list with project-specific RAG tool
	tools = [create_rag_tool(project_id)]
	
	# Define the system prompt
	system_prompt = """You are a helpful AI assistant with access to a RAG (Retrieval-Augmented Generation) tool that searches project-specific documents.

For every user question:

1. Do not assume any question is purely conceptual or general.  
2. Use the `rag_search` tool immediately with a clear and relevant query derived from the userâ€™s question.  
3. Carefully review the retrieved documents and base your entire answer on the RAG results.  
4. If the retrieved information fully answers the userâ€™s question, respond clearly and completely using that information.  
5. If the retrieved information is insufficient or incomplete, explicitly state that and provide helpful suggestions or guidance based on what you found.  
6. Always present answers in a clear, well-structured, and conversational manner.

**Never answer without first querying the RAG tool. This ensures every response is grounded in project-specific context and documentation.**
"""
	
	# Create the agent graph
	agent = create_agent(
		model=model,
		tools=tools,
		system_prompt=system_prompt,
		state_schema=CustomAgentState
	)
	
	return agent

In [16]:
project_id = "6d090d75-7c7c-428c-bba8-258cf3f45d2d"
rag_agent = create_rag_agent(project_id=project_id, model="gpt-4o")

In [21]:
inputs = {"messages": [{"role": "user", "content": "What are the two types of sleep?"}]}

result = rag_agent.invoke(inputs)


Vector search resulted in: 3 chunks
ðŸ¤– Invoking LLM with 2 messages (3 texts, 0 tables, 0 images)...


In [25]:
result["messages"][-1]

AIMessage(content='There are two main types of sleep: non-REM (NREM) sleep and REM (rapid eye movement) sleep.\n\n1. **Non-REM (NREM) Sleep**: This type of sleep consists of three stages, which progress from light to deeper sleep. The stages are:\n   - **Stage 1**: Light sleep, where you drift in and out of sleep and can be awakened easily.\n   - **Stage 2**: Eye movement stops, and brain waves become slower with occasional bursts of rapid brain waves.\n   - **Stage 3**: Known as slow-wave sleep or deep sleep, it is the deepest stage of NREM sleep, crucial for physical recovery and growth.\n\n2. **REM Sleep**: This stage is characterized by rapid eye movements, vivid dreams, temporary muscle paralysis, and brain activity that is similar to being awake. REM sleep typically occurs after progressing through the three stages of NREM sleep and is crucial for cognitive functions like memory consolidation.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_t

In [26]:
result   

{'messages': [HumanMessage(content='What are the two types of sleep?', additional_kwargs={}, response_metadata={}, id='a6e77487-25c8-47f3-8767-e754802251b6'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 286, 'total_tokens': 303, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_ed643dde95', 'id': 'chatcmpl-Cf2UR2R7EETEzYfkLPTx3SXOPbBQ0', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--98893d50-1e9e-4b08-aa79-e0c411cb9386-0', tool_calls=[{'name': 'rag_search', 'args': {'query': 'types of sleep'}, 'id': 'call_UuySklAqPZo85ck5yldkJRN0', 'type': 'tool_call'}], usage_metadata={'input_tokens': 286, 'output_tokens': 17,