Building agentic corrective RAG System with LangGraph


In [None]:
!pip install langchain
!pip install langgraph
!pip install langchain-community
!pip install langchain-google-genai
!pip install langchain-chroma



In [None]:
from google.colab import userdata
import os
os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
os.environ['WEATHER_API_KEY'] = userdata.get('WEATHER_API_KEY')
os.environ['GOOGLE_API_KEY']=userdata.get('GEMINI_API_KEY')
os.environ['GOOGLE_API_KEY']=userdata.get('GEMINI_API_KEY')
os.environ['TAVILY_API_KEY']=userdata.get('TAVILY_API_KEY')
os.environ['HUGGINGFACE_API_KEY']=userdata.get('HUGGINGFACE_API_KEY')

In [None]:
!pip install --upgrade --quiet  langchain sentence_transformers

In [None]:
# from langchain_community.embeddings import HuggingFaceEmbeddings
# embeddings = HuggingFaceEmbeddings()

In [None]:
#Build search Index for wikipedia data
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [None]:
!pip install langchain_groq

In [None]:
from langchain_groq import ChatGroq
chatgpt = ChatGroq(
    model = "llama3-8b-8192",
    temperature = 0
)

In [None]:
import gzip
import json
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
wikipidea_filepath = '/content/drive/MyDrive/simplewiki-2020-11-01.jsonl.gz'
docs = []
with gzip.open(wikipidea_filepath, 'rt', encoding='utf-8') as f:
    for line in f:
        data = json.loads(line.strip())
        #Add all paragraphs
        docs.append(
            {
                'metadata': {
                    'title': data.get('title'),
                    'article_id': data.get('id')

                },
                'data': ' '.join(data.get('paragraphs')[0:3]) # restrict data to first three paragraphs
            }
        )

#we subset ourt data so only use a subset of wikipedia documents
docs = [doc for doc in docs for x in ['india']
        if x in doc['data'].lower().split()]

docs = [Document(page_content=doc['data'], metadata=doc['metadata']) for doc in docs]
splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
chunked_docs = splitter.split_documents(docs)
print("chunks:", len(chunked_docs))

chunks: 1322


In [None]:
chunked_docs[0:3]

In [None]:
from langchain_chroma import Chroma
chroma_db = Chroma.from_documents(documents=chunked_docs,
                                  collection_name= 'rag_wkipedia_db',
                                  embedding=embedding_model,
                                  collection_metadata={"hnsw:space": "cosine"},
                                  persist_directory="./wikipedia_db")

In [None]:
similarity_threshold_retriever = chroma_db.as_retriever(search_type="similarity_score_threshold",
                                                       search_kwargs={"k": 3,
                                                                    "score_threshold": 0.3})

In [None]:
query = "What is the capital of India?"
top3docs = similarity_threshold_retriever.get_relevant_documents(query)
top3docs

In [None]:
#Create a Query Retrieval Grader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel ,Field
from langchain_google_genai import ChatGoogleGenerativeAI

#Data model for LLM output format
class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved document"""
    binary_score: str = Field(
        description="Documents are relevant to the question, 'yes' or 'no'"
    )

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
structured_llm_grader = llm.with_structured_output(GradeDocuments)

# prompt template for grading
SYS_PROMPT = """You are an expert grader assessing relevance of a retrieved document to a user question
                Follow these instructions for grading:
                -If the document contains keywords(s) or semantic meaning related to question ,grade it as relevant.
                -Your grade should be either 'yes' or 'no' to indicate whether the document is relevant to the question or not
            """
grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", SYS_PROMPT),
        ("human", """Retrieved document:
                    {document}
                    User Question:
                    {question}
                    """),
    ]
)
#Build grader chain

doc_grader =(grade_prompt | structured_llm_grader)

In [None]:
query = "What is the capital of pakistan?"
top3docs = similarity_threshold_retriever.get_relevant_documents(query)
for doc in top3docs:
    print(doc.page_content)
    print('GRADE:',doc_grader.invoke({"question": query,"document": doc.page_content,}))
    print()

In [None]:
query = "who won the champions league in 2024"
top3docs = similarity_threshold_retriever.invoke(query)
for doc in top3docs:
    print(doc.page_content)
    print(doc_grader.invoke({"document": doc.page_content, "question": query}))
    print()

The Age of Discovery or Age of Exploration was a period from the early 15th century that continued into the early 17th century, during which European ships traveled around the world to search for new trading routes and partners. They were in search of trading goods such as gold, silver and spices. In the process, Europeans met people and mapped lands previously unknown to them. Among the most famous explorers of the period were Christopher Columbus, Vasco da Gama, Pedro Álvares Cabral, John Cabot, , Juan Ponce de León, Juan Sebastian Elcano, Bartholomeu Dias, Ferdinand Magellan, Willem Barentsz, Abel Tasman, Jean Alfonse, Jacques Cartier, Samuel de Champlain, Willem Blaeu and Captain James Cook. Henry the Navigator started by paying Portuguese sailors to explore the west coast of Africa. In 1419 Joao Goncalves Zarco discovered the Madeira Islands. Later in the 15th century, Vasco da Gama reached the southwestern tip of Africa and established the city of Cape Town, a Portuguese colony. 

In [None]:
#Build QA rag chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

prompt = """You are an assistant for question-answering task
            Use the following pieces of retrieved context to answer the question.
            If no context is present or if you don't know the answer, just say that you don't know the answer.
            Do not make up the answer unless it is there in the provided context.
            Give a detaield answer and to the point answer with regard to the question
            Question:
            {question}
            Context:
            {context}
            Answer:
        """
prompt_template = ChatPromptTemplate.from_template(prompt)
chat_gpt = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)

def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

qa_rag_chain =(
    {
        "context": (itemgetter('context')
                        |
                    RunnableLambda(format_docs)),
        "question": itemgetter('question')
    }
     |
    prompt_template
     |
    chatgpt
     |
    StrOutputParser()
)

In [None]:
query = "what is capital of pakistan"
top3docs = similarity_threshold_retriever.invoke(query)
result =qa_rag_chain.invoke({"context": top3docs,"question": query})
print(result)
type(result)

According to the provided context, the capital of Pakistan is Islamabad.


str

In [None]:
#Create a Query Rephraser
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0)
#prompt template

SYS_PROMPT = """Act as a question re-writer and perform the following task:
                -convert the following input question to a better version that is optimized for web search
                -when re-writing the question, look at the input question and try to reason about the underlying semantic meaning
                -generate only one question in string format
            """
re_write_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", SYS_PROMPT),
        ("human", """Here is initial question:
                    {question}
                    formulate an improved question
                """)
    ]
)
question_rewriter = (re_write_prompt | llm | StrOutputParser())

In [None]:
query = "who won the champions league in 2024"
question_rewriter.invoke({"question": query})

'"Who is the predicted winner of the 2024 Champions League?" \n'

In [None]:
#!pip install --upgrade --quiet  duckduckgo-search

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults
tv_search = TavilySearchResults(max_results=3, search_depth= 'advanced',
                                    max_tokens =10000)

In [None]:
!pip install duckduckgo-search



In [None]:
# load web search tool

from langchain_community.tools import DuckDuckGoSearchResults
search = DuckDuckGoSearchResults(max_results=3, search_depth='advanced',
                                max_tokens=1000)

In [None]:
docs =search.invoke("who won the champions league in 2024")
docs

"[snippet: Real Madrid toiled for the first hour of the 2024 UEFA Champions League final against Borussia Dortmund before goals from Dani Carvajal and Vinicius Junior sealed victory., title: Who won Champions League final 2024? How Carvajal, Vinicius Jr. sealed ..., link: https://www.sportingnews.com/us/soccer/news/who-won-champions-league-final-2024-real-madrid-borussia-dortmund/c2aa2d0b8666ccb0125474be], [snippet: Real Madrid came through a torrid first half against Borussia Dortmund to win the UEFA Champions League final 2-0 at Wembley Stadium. Edin Terzic's underdogs made the running during a first half ..., title: Who won Champions League final 2024? How Carvajal ... - Sporting News, link: https://www.sportingnews.com/uk/football/news/who-won-champions-league-final-2024-real-madrid-borussia-dortmund/c2aa2d0b8666ccb0125474be], [snippet: Borussia Dortmund were first to confirm their place in the final with a 2-0 aggregate victory over Paris Saint-Germain. They will take on 14-time c

In [None]:
web_result = "\n\n".join([d["snippet"] for d in docs])
web_result

TypeError: string indices must be integers

Graph State

In [None]:
from typing import List
from typing_extensions import TypedDict

class GraphState(TypedDict):
    """
    Represents the state of our graph

    Attributes:
        qustion: quetions
        generation: LLM response generation
        web_search_needed: whether to add web search
        documents: list of context document
    """
    question: str
    generation: str
    web_search_needed: str
    documents: List[str]


In [None]:
#retreive funtion  for retrival from vector df
def retrieve(state):
    """
    Retrieve documents
    Args:
        state(dict): The current graph state
    Returns:
        state(dict): New key added to state, documents - that contains the retrieved context documents
    """
    print("--RETRIEVAL FROM VECTOR DETABASE --")
    question =state["question"]
    documents = similarity_threshold_retriever.invoke(question)
    return {"documents": documents, "question": question}


In [None]:
#grade documents
def grade_documents(state):
        """
        Determines whether the retrieved documents are relevant to the question
        by using an LLM Grader
        If any document are not relevant to the question or documents are empty - Web search needs to be done
        If all documents are relevant to the question web search is not needed
        Helps filtering out irrelevant documents
        Args:
            state(dict): The current graph state
        Returns:
            state(dict): Updates document key with only filtered documents
        """
        print("--CHECK DOCUMENT RELEVANCE TO QUESTION")
        question = state["question"]
        documents = state["documents"]

        #score each document
        filtered_docs =[]
        web_search_needed = "No"
        if documents:
            for d in documents:
                score = doc_grader.invoke(
                    {"question": question, "document": d.page_content})
                grade = score.binary_score
                if grade == "yes":
                    print("--GRADE: DOCUMENT RELEVANT--")
                    filtered_docs.append(d)
                else:
                    print("--GRADE: DOCUMENT NOT RELEVANT")
                    web_search_needed = "Yes"
                    continue
        else:
            print("--NO DOCUMENTS RETRIEVED--")
            web_search_needed = "Yes"
        return {"documents":filtered_docs, "question":question, "web_search_needed": web_search_needed}

In [None]:
#Rewrite query
def rewrite_query(state):
    """
    Rewrite the query to produce better question.

    Args:
        state(dict): the current graph state
    Returns:
        state(dict): Updates question key with a rephrased or re-written question
    """
    print("--REWRITE QUERY--")
    question = state["question"]
    documents= state["documents"]

    # rewrite question
    better_question = question_rewriter.invoke({"question": question})
    return {"documents": documents, "question": better_question}
    print(better_question)


In [None]:
#Web Search
from langchain.schema import Document
def web_search(state):
    """
    Web search based on the re-written question.

    Args:
        state(dict): The current graph state
    Returns:
        state(dict): Updates documents key with appended web results
    """
    print("--WEB SEARCH--")
    question = state["question"]
    documents = state["documents"]
    #web search
    docs = tv_search.invoke(question)
    web_result = "\n\n".join([d["content"] for d in docs])
    web_result = Document(page_content= web_result)
    documents.append(web_result)
    return {"documents": documents, "question": question}


In [None]:
#Generate Answer
def generate_answer(state):
    """
    Generate answer from context document using LLM

    Args:
        state(dict): The current graph state
    Returns:
        state(dict): New key added to state, generation , that contains the LLM response
    """
    print("--GENERATE ANSWER--")
    question = state["question"]
    documents = state["documents"]
    generation = qa_rag_chain.invoke({"context": documents, "question": question })
    return {"documents":documents,"question": question,"generation": generation }


In [None]:
#decide to  generate

def decide_to_generate(state):
    """
    Determines whether to generate an answer, or re-generate a question

    Arg:
        state(dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """
    print("---ASSESS GRADED DOCUMENTS--")
    web_search_needed = state["web_search_needed"]

    if web_search_needed == "Yes":
    #All documents have been filtered check_relevance
    #We will re-generate new query
        print("---DECISION: SOME OR ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, REWRITE QUERY--")
        return "rewrite_query"
    else:
    #we have relevant documents so generate answer
        print("--DECISION: GENERATE RESPONSE--")
        return "generate_answer"


In [None]:
# Build the agent graph
from langgraph.graph import END, StateGraph
agentic_rag = StateGraph(GraphState)

#Define the nodes
agentic_rag.add_node("retrieve", retrieve)
agentic_rag.add_node("grade_documents", grade_documents)
agentic_rag.add_node("rewrite_query",  rewrite_query)
agentic_rag.add_node("web_search", web_search)
agentic_rag.add_node("generate_answer", generate_answer)

#Build graph
agentic_rag.set_entry_point("retrieve")
agentic_rag.add_edge("retrieve", "grade_documents")
agentic_rag.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {"rewrite_query": "rewrite_query", "generate_answer":"generate_answer"}
)
agentic_rag.add_edge("rewrite_query", "web_search")
agentic_rag.add_edge("web_search", "generate_answer")
agentic_rag.add_edge("generate_answer", END)

agentic_rag = agentic_rag.compile()


In [None]:
from IPython.display import Image, display, Markdown

display(Image(agentic_rag.get_graph().draw_mermaid_png()))

In [None]:
query = "what is langgraph"
response = agentic_rag.invoke({"question":query})

--RETRIEVAL FROM VECTOR DETABASE --
--CHECK DOCUMENT RELEVANCE TO QUESTION
--GRADE: DOCUMENT NOT RELEVANT
--GRADE: DOCUMENT NOT RELEVANT
--GRADE: DOCUMENT NOT RELEVANT
---ASSESS GRADED DOCUMENTS--
---DECISION: SOME OR ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, REWRITE QUERY--
--REWRITE QUERY--
--WEB SEARCH--
--GENERATE ANSWER--


In [None]:
display(Markdown(response['generation']))

Based on the provided context, LangChain is a unified benchmark that helps AI researchers build models that can leverage real-world knowledge to accomplish a broad range of tasks.

In [None]:
query = "who won icc the world cup in 2024?"
response = agentic_rag.invoke({"question": query})

--RETRIEVAL FROM VECTOR DETABASE --
--CHECK DOCUMENT RELEVANCE TO QUESTION
--GRADE: DOCUMENT NOT RELEVANT
--GRADE: DOCUMENT NOT RELEVANT
--GRADE: DOCUMENT NOT RELEVANT
---ASSESS GRADED DOCUMENTS--
---DECISION: SOME OR ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, REWRITE QUERY--
--REWRITE QUERY--
--WEB SEARCH--
--GENERATE ANSWER--


In [None]:
response

In [None]:
from IPython.display import Image, display, Markdown
display(Markdown(response['generation']))

In [None]:
query = "who won world cup in 2011"
response = agentic_rag.invoke({"question":query})

--RETRIEVAL FROM VECTOR DETABASE --
--CHECK DOCUMENT RELEVANCE TO QUESTION
--GRADE: DOCUMENT RELEVANT--
--GRADE: DOCUMENT RELEVANT--
--GRADE: DOCUMENT NOT RELEVANT
---ASSESS GRADED DOCUMENTS--
---DECISION: SOME OR ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, REWRITE QUERY--
--REWRITE QUERY--
--WEB SEARCH--
--GENERATE ANSWER--


In [None]:
response

{'question': '"Which country won the 2011 Cricket World Cup?" \n',
 'generation': 'According to the provided context, the country that won the 2011 Cricket World Cup is India.',
 'web_search_needed': 'Yes',
 'documents': [Document(metadata={'article_id': '345753', 'title': '2011 Cricket World Cup'}, page_content="The 2011 ICC Cricket World Cup was the tenth Cricket World Cup. It was played in India, Sri Lanka, and Bangladesh. It was Bangladesh's first time co-hosting a World Cup. All matches in the World Cup were given One Day International status. All matches played had over 50 overs. Fourteen national cricket teams were in the tournament, including ten full members and four associate members. The World Cup was between 19 February and 2 April 2011. The first match played on 19 February 2011 was with co-hosts India and Bangladesh at the Sher-e-Bangla National Stadium in Dhaka. India won the tournament defeating Sri Lanka by 6 wickets in the final. India was the first nation to win the 

In [None]:
display(Markdown(response['generation']))

According to the provided context, the answer to the question "Which country won the 2011 Cricket World Cup?" is:

A India

The context states: "India won the tournament defeating Sri Lanka by 6 wickets in the final. India was the first nation to win the Cricket World Cup final on home soil."