Enhances the Simple RAG by adding validation and refinement to ensure the accuracy and relevance of retrieved information.
Check for retrieved document relevancy and highlight the segment of docs used for answering

In [2]:
import os
import sys
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import Chroma

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
#Indexing
# Docs to index
urls = [
    "https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io"
]

loader=WebBaseLoader(web_paths=urls)

docs=loader.load()


In [7]:
#Text Splitting
splitter=RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
docs_split=splitter.split_documents(docs)

In [9]:
#Embeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
#Vectorstore
vector_store=Chroma.from_documents(
    documents=docs_split,
    embedding=embeddings,
    collection_name="my_collection"
)

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
#Retriever
retriever=vector_store.as_retriever(search_type="similarity",search_kwargs={'k':3})

Questions

In [11]:
question = "what are the differnt kind of agentic design patterns?"
retrieved_docs=retriever.invoke(question)

In [14]:
retrieved_docs[0].page_content

'I’ll describe the Planning and Multi-agent collaboration design patterns. They allow AI agents to do much more but are less mature, less predictable — albeit very exciting — technologies.\xa0Keep learning!AndrewRead "Agentic Design Patterns Part 1: Four AI agent strategies that improve GPT-4 and GPT-3.5 performance"Read "Agentic Design Patterns Part 2: Reflection"Read "Agentic Design Patterns Part 4: Planning"Read "Agentic Design Patterns Part 5: Multi-Agent Collaboration"ShareSubscribe to The BatchStay updated with weekly AI News and Insights delivered to your inboxCoursesThe BatchCommunityCareersAbout'

Check document relevancy

In [16]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import Field, BaseModel

#Output Schema
class GradeDocuments(BaseModel):
    """ 
    Binary score of the retrieved documents for relevance check
    """
    binary_score:str = Field(description="Document relevant to the question, 'yes' or 'no'")

groq_api_key=os.getenv("GROQ_API_KEY")
llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

structured_llm = llm.with_structured_output(GradeDocuments)

system_prompt = """ 
You are a grader assessing relevance of retrieved documents for the user question.
If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
It does not need to be a stringent test. The goal is to filter out erroneous retrievals.
Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.

"""

grade_prompt = ChatPromptTemplate(
    [
        ("system",system_prompt),
        ("user","Retrieved documents : {documents} \n User question : {question}")
    ]
)

grade_chain = grade_prompt | structured_llm 

In [18]:
docs_to_use = []
for doc in retrieved_docs:
    print(doc.page_content, '\n', '-'*50)
    score = grade_chain.invoke({'documents':doc.page_content,'question':question})
    print(score,'\n')
    if score.binary_score == 'yes':
        docs_to_use.append(doc)

I’ll describe the Planning and Multi-agent collaboration design patterns. They allow AI agents to do much more but are less mature, less predictable — albeit very exciting — technologies. Keep learning!AndrewRead "Agentic Design Patterns Part 1: Four AI agent strategies that improve GPT-4 and GPT-3.5 performance"Read "Agentic Design Patterns Part 2: Reflection"Read "Agentic Design Patterns Part 4: Planning"Read "Agentic Design Patterns Part 5: Multi-Agent Collaboration"ShareSubscribe to The BatchStay updated with weekly AI News and Insights delivered to your inboxCoursesThe BatchCommunityCareersAbout 
 --------------------------------------------------
binary_score='yes' 

Agentic Design Patterns Part 2: Reflection✨ New course! Enroll in Building AI Voice Agents for ProductionExplore CoursesAI NewsletterThe BatchAndrew's LetterData PointsML ResearchBlogCommunityForumEventsAmbassadorsAmbassador SpotlightResourcesCompanyAboutCareersContactStart LearningWeekly IssuesAndrew's LettersData P

Generate Answer

In [19]:
from langchain_core.prompts import PromptTemplate
prompt=PromptTemplate(
    template=""" 
    You are assistant for question answering tasks.
    Use the following piece of retreived context to answer
    the question.If you don't know the answer, say that you don't know.
    keep the answer concise.
    {context}
    Question:{question}
    """,
    input_variables=['context','question']
)

In [20]:
#Building chain
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

def format_docs(retrieved_docs):
    context_text="\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

parllel_chain=RunnableParallel({
    'context':retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

parser = StrOutputParser()

rag_chain = parllel_chain | prompt | llm | parser

In [21]:
answer=rag_chain.invoke(question)

In [22]:
answer

'According to the provided context, there are four agentic design patterns mentioned:\n\n1. Reflection\n2. Tool Use\n3. Planning\n4. Multi-Agent Collaboration'

Check for Hallucination

In [23]:
from langchain_core.prompts import ChatPromptTemplate
from pydantic import Field, BaseModel

#Output Schema
class GradeHallucination(BaseModel):
    """ 
    Binary score of the hallucination present in answer
    """
    binary_score:str = Field(description="Answer is grounded in the facts, 'yes' or 'no'")

groq_api_key=os.getenv("GROQ_API_KEY")
llm=ChatGroq(groq_api_key=groq_api_key,model_name="Llama3-8b-8192")

structured_llm = llm.with_structured_output(GradeHallucination)

system_prompt = """ 
You are a grader assessing whether llm generation answer is grounded in / supported by set of retrieved facts.
Give a binary score 'yes' or 'no'. 'yes' means answer is grounded in / supported by set of retrieved facts.

"""

hallucination_prompt = ChatPromptTemplate(
    [
        ("system",system_prompt),
        ("user","Retrieved facts : {documents} \n llm generation : {generation}")
    ]
)

hallucination_chain = hallucination_prompt | structured_llm 

In [24]:
hallucination_score = hallucination_chain.invoke(
    {'documents':format_docs(retrieved_docs),'generation':answer})
print(hallucination_score)

binary_score='yes'
