In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

embd = OpenAIEmbeddings()

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-06-23-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-06-23-adv-attack/llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
doc_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=50
)

doc_splits = text_splitter.split_documents(doc_list)

vectorstore = FAISS.from_documents(documents=doc_splits, embedding=embd)

retriever = vectorstore.as_retriever()

  from .autonotebook import tqdm as notebook_tqdm
USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
# Router
from typing import Literal
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

# Data Model
class RouteQuery(BaseModel):
    """Route a user query to the most relevant datasource."""
    
    datasource: Literal["vectorstore","web_search"] = Field(
        ...,
        description="Given a user question choose to route it to web search or a vectostore",
    )

 # LLM with function calling
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
structured_llm_router = llm.with_structured_output(RouteQuery)

#prompt
system = """
You are an expert routing system for a vectorstore and a web search.
You will be given a user question. You need to decide whether to route the user to a vectorstore or a web search.
If the user question is related to a vectorstore, route the user to a vectorstore.
If the user question is not related to a vectorstore, route the user to a web search.
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

question_router = prompt | structured_llm_router

print(question_router.invoke({"question": "who won the cricket world cup 2023?"}))

datasource='web_search'


In [6]:
print(question_router.invoke({"question": "what are the types of agent memory"}))

datasource='vectorstore'


In [14]:
#Retriver Grader
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

#Data Model
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""

    binary_score: str = Field(description="Documents are relevant to the question, 'yes' or 'no'")
    
# llm with function calling
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

#prompt
system = """You are a grader assessing relevance of a retrieved document to a user question. 
  If the document contains keywords related to the user question, grade it as relevant. 
  Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Question: {question}"),
        ("human", "Retrieved document: {document}"),
    ]
)
#chain
retrival_gender = prompt | structured_llm_grader
question = "agent memory"
docs = retriever.invoke(question)
doc_text = docs[1].page_content  
print(retrival_gender.invoke({"question": question, "document": doc_text}))



binary_score='yes'


In [15]:
#Generate
from langchain_classic import hub
from langchain_core.output_parsers import StrOutputParser

prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#chain
rag_chain = prompt | llm | StrOutputParser() 

#run
generation = rag_chain.invoke({"context": format_docs(docs), "question": question})
print(generation)

The use of LLMs for evaluating performance in domains requiring deep expertise may lead to inaccuracies due to the lack of expertise in the model. Boiko et al. (2023) explored LLM-empowered agents for scientific discovery, enabling them to handle complex scientific experiments autonomously. Generative Agents combine LLM with memory, planning, and reflection mechanisms to create believable human behavior simulacra.


In [16]:
### HAlluciation Grader

class GradeHallucinations(BaseModel):
    """Binary score for halluciations present in generation answer."""
    
    binary_score:str = Field(
        description="Answer is grounded in the facts, 'yes' and 'no'"        
    )
    
llm = ChatOpenAI(model="gpt-4o-mini",temperature=0)

structured_llm_generator = llm.with_structured_output(GradeHallucinations)

#prompt
system = """
        You are a grader assessing whetheran LLM generation is grounded in / supported by a set of retrived facts/ \n
        GIve a binary score 'yes' or 'no' , 'yes' means  that the answer is grounded in / supported by the set of facts.
"""

halluciation_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),       
        ("human", "Retrieved document: {document}"),
        ("human", "LLM generation: {generation}"),
    ]
)

halluciation_grader = halluciation_prompt | structured_llm_generator
halluciation_grader.invoke({"document": docs, "generation": generation})

GradeHallucinations(binary_score='yes')

In [17]:
# Data model
class GradeAnswer(BaseModel):
    """Binary score for whether the answer is useful."""

    binary_score: str = Field(
        description="Answer is useful, 'yes' or 'no'"
    )
    
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
structured_llm_generator = llm.with_structured_output(GradeAnswer)

system = """
        You are a grader assessing whether an LLM answer is useful to resolve a question. \n
        Give a binary score 'yes' or 'no' score to indicate whether the answer is grounded in / supported by the set of facts.
"""

answer_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Question: {question}"),
        ("human", "LLM answer: {generation}"),
    ]
)

answer_grader = answer_prompt | structured_llm_generator
answer_grader.invoke({"question": question, "generation": generation})

GradeAnswer(binary_score='yes')

In [18]:
# Question re-write

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

system = """
        You are generating questions that is well optimized for vectorstore retrieval. \n
        Look at the input and try to reason about the underlying sematic interaction / logic. \n
        Here is the question:
        {question}
"""

question_rewrite_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Question: {question}"),
    ]
)

question_rewriter = question_rewrite_prompt | llm | StrOutputParser()
rewritten_question = question_rewriter.invoke({"question": question})

In [19]:
# web search

### Serach

from langchain_community.tools.tavily_search import TavilySearchResults

web_seatch_tool = TavilySearchResults(k=3)

  web_seatch_tool = TavilySearchResults(k=3)


In [20]:
### Creating state graph
from typing import TypedDict,List

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question:question
        generation: LLM generation
        documents: list of documents
    """

    question: str
    generation: str
    documents: List[str]