# Create Index

In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv

load_dotenv()


  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


True

In [3]:
from langchain_community.embeddings import HuggingFaceEmbeddings
embd = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

  embd = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')


In [4]:
# Docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]
urls

['https://lilianweng.github.io/posts/2023-06-23-agent/',
 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/',
 'https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/']

In [12]:
# Load
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

# Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorstore
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=embd,
)
retriever = vectorstore.as_retriever()

## Router for Query Analysis

In [13]:
# ! pip install langchain-groq pydantic -q

In [14]:
# Router
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from pydantic import BaseModel, Field

In [16]:
# Data Model
class RouteQuery(BaseModel):
    datasource: Literal["vectorstore", "web_search"] = Field(..., description="Given a user question choose to route it to web search or a vectorstore.")
    
# LLM
llm = ChatGroq(model="qwen/qwen3-32b")
structured_llm_router = llm.with_structured_output(RouteQuery)

# Prompt
system = """You are an expert at routing a user question to a vectorstore or web search.
The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
Use the vectorstore for questions on these topics. Otherwise, use web-search."""

route_prompt = ChatPromptTemplate.from_messages([("system", system),
                                                 ("human", "{question}"),])

question_router = route_prompt | structured_llm_router

print(question_router.invoke({"question": "Who will the Bears draft first in the NFL draft?"}))
print(question_router.invoke({"question": "What are the types of agent memory?"}))

datasource='web_search'
datasource='vectorstore'


## Retrieval Grader

In [17]:
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    binary_score: str = Field(description = "Documents are relevant to the question, 'yes' or 'no'")
    
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
    
grade_prompt = ChatPromptTemplate.from_messages([("system", system),
                                                 ("human", "Retrieved Document: \n\n {document} \n\n User Question: {question}"),])
print("1. GradePrompt:- ", grade_prompt, '\n\n')

retrieval_grader = grade_prompt | structured_llm_grader
print("2 retrieval_grader:- ", retrieval_grader.invoke, '\n\n')

question = "agent memory"
print("3. question:- ", question, '\n\n')

docs = retriever.invoke(question)
print("4. doc:- ", docs, '\n\n')

doc_text = docs[1].page_content
print("5. doc_text:- ", doc_text, '\n\n')

print(retrieval_grader.invoke({"question": question,
                               "document": doc_text}))


1. GradePrompt:-  input_variables=['document', 'question'] input_types={} partial_variables={} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template="You are a grader assessing relevance of a retrieved document to a user question. \n \n    If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n\n    It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n\n    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['document', 'question'], input_types={}, partial_variables={}, template='Retrieved Document: \n\n {document} \n\n User Question: {question}'), additional_kwargs={})] 


2 retrieval_grader:-  <bound method RunnableSequence.invoke of ChatPromptTemplate(input_variables=['document', 

## Generate

In [18]:
# Generate
from langchain_core.output_parsers import StrOutputParser

# Prompt
prompt = ChatPromptTemplate.from_template("""You are a helpful AI assistant.
                                            Use the provided context to answer the user question clearly and concisely.
                                            If the context does not contain the answer, say you don't have enough information.

                                            Context:
                                            {context}

                                            Question:
                                            {question}
                                            """)

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
docs_txt = format_docs(docs)
generation = rag_chain.invoke({"context": docs_txt, "question": question})
print(generation)

<think>
Okay, the user asked about "agent memory" based on the provided context. Let me start by recalling what the context says.

The context is from a blog post discussing LLM-powered autonomous agents, specifically their components like Planning, Memory, and Tool Use. Under the Memory section, it mentions two types: short-term and long-term. Short-term memory is related to in-context learning, like using prompt engineering. Long-term memory uses external vector stores for retaining information over time. There's also something about Maximum Inner Product Search (MIPS) under the Memory section. 

Additionally, in the Generative Agents Simulation part, there's a detailed explanation of memory components: Memory Stream (long-term), Retrieval model considering recency, importance, and relevance, and a Reflection mechanism that creates higher-level summaries from past events. The Retrieval model uses relevance, recency, and importance to determine which memories to surface. Reflection sy

## Hallucination Grader

In [19]:
# Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer."""

    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

# Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
hallucination_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucination_prompt | structured_llm_grader
hallucination_grader.invoke({"documents": docs, "generation": generation})

GradeHallucinations(binary_score='yes')

## Answer Grader


In [20]:
# Data model
class GradeAnswer(BaseModel):
    """Binary score to assess answer addresses question."""

    binary_score: str = Field(
        description="Answer addresses the question, 'yes' or 'no'"
    )


# LLM with function call
structured_llm_grader = llm.with_structured_output(GradeAnswer)

# Prompt
system = """You are a grader assessing whether an answer addresses / resolves a question \n 
     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
    ]
)

answer_grader = answer_prompt | structured_llm_grader
answer_grader.invoke({"question": question, "generation": generation})

GradeAnswer(binary_score='yes')

## Question Rewriting
The original question from user was directly used in RAG.  
However, the user’s question might not be in a form suitable for RAG.  
To improve retrieval, rephrase the question to ensure it aligns better with vector similarity search.  

In [21]:
system = """You a question re-writer that converts an input question to a better version that is optimized \n 
     for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning."""
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        (
            "human",
            "Here is the initial question: \n\n {question} \n Formulate an improved question.",
        ),
    ]
)

question_rewriter = re_write_prompt | llm | StrOutputParser()
question_rewriter.invoke({"question": question})

'<think>\nOkay, the user\'s original question is "agent memory". That\'s pretty vague. Let me think about what they might be asking. Agent memory could refer to different things depending on the context. In AI, it might be about how an agent retains and uses information. In psychology, it could relate to memory processes. The user probably wants to know about the role of memory in agents, maybe in machine learning or robotics.\n\nThey might be looking for an explanation of how agents store and retrieve information, or perhaps the different types of memory used in agent systems. Since the original question is just two words, I need to add context to make it clearer. Maybe they want to know about the purpose of agent memory, its implementation, or applications. I should consider common topics in AI where memory is a key component, like reinforcement learning, where memory helps agents learn from past experiences.\n\nThe improved question should be more specific. Instead of "agent memory"

# Web Search Tool

In [22]:
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

  web_search_tool = TavilySearchResults(k=3)


# Construct the Graph
## Graph State

In [23]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """Represent the state of the graph.
    Attributes:
        question: question
        generation: LLM Generation
        documents: List of documents
    """
    
    question: str
    generation: str
    documents: List[str]

## Graph Flow

In [28]:
from pprint import pprint
from langchain_core.documents import Document

def retrieve(state: GraphState):
    """
    Retrieve documents
    Args:
        state (dict): The current graph state
    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    
    print(":---RETRIEVE---:")
    question = state["question"]
    
    #  Retrieval
    documents = retriever.invoke(question)  ## [chroma, embedding]
    return {"documents": documents, "question": question}
    
def generate(state: GraphState):
    """
    Generate answer
    Args:
        state (dict): The current graph state
    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    print(":---GENERATE---:")
    question = state["question"]
    documents = state["documents"]
    
    #  RAG generation
    doc_text = format_docs(documents)
    generation = rag_chain.invoke({"contetxt": docs_txt, "question": question})
    
    return {"question": question}
    
    