In [6]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")
os.environ["TAVILY_API_KEY"]=os.getenv("TAVILY_API_KEY")

In [7]:
###Build Index
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

#set embeddings
embd = OpenAIEmbeddings()

#docs to index
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

#load 
docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

#Split
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)

docs_split = text_splitter.split_documents(docs_list)

#Add to vectorstore
vectorstore = FAISS.from_documents(
    documents=docs_split,
    embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()

In [8]:
##Router
from typing import Literal

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

#Data Model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource"""
    datasource: Literal["vectorstore", "web_search"] =Field(..., description="Given a user question, choose to route it to web search or a vectorstore")

#LLM with function call
llm = ChatOpenAI(model = 'gpt-4o-mini', temperature=0)
structured_llm_router = llm.with_structured_output(RouteQuery)

#Prompt
system = """You are an expert at routing a user question to a vectorstore or web search.
        The vectorstore contains documents related to agents, prompt engineering, and adverserial attacks.
        Use the vectorstore for questions on these topics. Otherwise use web-search."""

route_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

question_router = route_prompt | structured_llm_router
print(question_router.invoke({"question":"Who won the Cricket world cup 2024"}))

datasource='web_search'


In [9]:
print(question_router.invoke({"question":"What are the typpes of agent memory?"}))

datasource='vectorstore'


In [10]:
##Retrieval Grader

#Data Model
class GradeDocument(BaseModel):
    """Binary score for relevance check on retrieved document"""
    binary_score:str=Field(description="Documents are relevant the question,'yes' or 'no' ")


#llm with function call
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
structured_llm_grader= llm.with_structured_output(GradeDocument)

#Prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. \n
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Retrieved document: \n\n {document} \n\n user question: {question}"),
    ]
)

retriever_grader= grade_prompt | structured_llm_grader
question = "agent memory"

#context
docs = retriever.invoke(question)
doc_txt = docs[1].page_content
print(retriever_grader.invoke({"question":question, "document":doc_txt}))

binary_score='yes'


In [11]:
##Generate

from langchain import hub
from langchain_core.output_parsers import StrOutputParser

#Prompt
prompt = hub.pull("rlm/rag-prompt")

#LLM
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)

#Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#Chain
rag_chain = prompt | llm | StrOutputParser()

#Run
generation = rag_chain.invoke({"context":docs, "question":question})
print(generation)

Agent memory in LLM-powered autonomous agents consists of short-term and long-term memory. Short-term memory utilizes in-context learning, while long-term memory allows agents to retain and recall information over extended periods, often using an external vector store for fast retrieval. This dual memory system enhances the agent's ability to learn from past experiences and improve future performance.


In [12]:
#Hallucination Grader

#Data model
class GradeHallucinations(BaseModel):
    """Binary score for hallucination present in generation answer"""
    binary_score: str = Field(
        description="Answer is grounded in the facts, 'yes' or 'no'"
    )

    #LLM with function call
llm = ChatOpenAI(model='gpt-4o-mini', temperature=0)
structured_llm_grader = llm.with_structured_output(GradeHallucinations)

#Prompt
system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n
Give a binary score 'yes' or 'no'. 'Yes' means that the answer is ground in /supported by the set of facts."""
hallucinationPrompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human","Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
    ]
)

hallucination_grader = hallucinationPrompt | structured_llm_grader
hallucination_grader.invoke({"documents":docs, "generation":generation})

GradeHallucinations(binary_score='yes')