# Evaluate a RAG application

In [17]:
import giskard
import os
from giskard.llm.client.openai import OpenAIClient
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')

giskard.llm.set_llm_api("openai")
oc = OpenAIClient(model="gpt-3.5-turbo")
giskard.llm.set_default_client(oc)

## Load Paths

In [18]:
def extract_pdf_paths(directory):
    pdf_paths = []
    
    # Traverse the directory
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.pdf'):
                # Create the full file path
                full_path = os.path.join(root, file)
                pdf_paths.append(full_path)
    
    return pdf_paths


In [22]:
paths=extract_pdf_paths("\policy_crew\data")

## Load docs

In [25]:
def load_pdfs(paths):
    documents = []
    
    for file_path in paths:
        loader = PyPDFLoader(file_path)
        loaded_documents = loader.load()
        documents.extend(loaded_documents)  
        
    return documents

In [26]:
documents=load_pdfs(paths)

## Split the text

In [27]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2500, chunk_overlap=200)

In [28]:
documents=text_splitter.split_documents(documents)

In [29]:
len(documents)

245

In [30]:
import pandas as pd

df = pd.DataFrame([d.page_content for d in documents], columns=["text"])
df.shape

(245, 1)

## Create Knowledge base with Giskard

In [31]:
from giskard.rag import KnowledgeBase

knowledge_base = KnowledgeBase(df)

## Generate the Test Set

In [None]:
from giskard.rag import generate_testset

testset = generate_testset(
    knowledge_base,
    num_questions=60,
    agent_description="A chatbot answering questions about different policies related to projects",
)

2024-07-16 18:28:45,674 pid:23644 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-07-16 18:29:23,198 pid:23644 MainThread giskard.rag  INFO     Found 9 topics in the knowledge base.


Generating questions: 100%|██████████| 60/60 [03:23<00:00,  3.39s/it]


In [None]:
test_set_df = testset.to_pandas()

for index, row in enumerate(test_set_df.head(3).iterrows()):
    print(f"Question {index + 1}: {row[1]['question']}")
    print(f"Reference answer: {row[1]['reference_answer']}")
    print("Reference context:")
    print(row[1]['reference_context'])
    print("******************", end="\n\n")

In [None]:
testset.save("test-set.jsonl")

## Evaluating Simple RAG

In [74]:
from dotenv import load_dotenv
from qdrant_client import QdrantClient
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.qdrant import Qdrant
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_compressors import JinaRerank
from langchain_core import PromptTemplate

In [77]:
class RAGTool:
    """
    A class to handle the Retrieval-Augmented Generation (RAG) process.

    Attributes:
        query (str): The query to process using the RAG system.
    """

    def __init__(self, query: str):
        """
        Initialize the RAGTool with the given query.

        Args:
            query (str): The query to process.
        """
        self.query = query

    def qa_from_RAG(self) -> str:
        """
        Process the query using the RAG system and return the result.

        Returns:
            str: The result of processing the query.

        Raises:
            CustomException: If there is an error retrieving or processing the query.
        """
        try:
            # Setup
            qdrant_url = os.getenv('QDRANT_URL')
            qdrant_api_key = os.getenv('QDRANT_API_KEY')
            openai_api_key = os.getenv('OPENAI_API_KEY')
            jina_api_key=os.getenv('JINA_API_KEY')

            embeddings_model = OpenAIEmbeddings(model='text-embedding-ada-002', openai_api_key=openai_api_key)
            qdrant_client = QdrantClient(url=qdrant_url, api_key=qdrant_api_key)
            qdrant = Qdrant(client=qdrant_client, collection_name="policy-agent", embeddings=embeddings_model)
            retriever = qdrant.as_retriever(search_kwargs={"k": 20})
            prompt_template=PromptTemplate(

            template="""
            # Your role
            You are a brilliant expert at understanding the intent of the questioner and the crux of the question, and providing the most optimal answer  from the docs to the questioner's needs from the documents you are given.
            # Instruction
            Your task is to answer the question  using the following pieces of retrieved context delimited by XML tags.
            <retrieved context>
            Retrieved Context:
            {context}
            </retrieved context>
            # Constraint
            1. Think deeply and multiple times about the user's question\nUser's question:\n{question}\nYou must understand the intent of their question and provide the most appropriate answer.
            - Ask yourself why to understand the context of the question and why the questioner asked it, reflect on it, and provide an appropriate response based on what you understand.
            2. Choose the most relevant content(the key content that directly relates to the question) from the retrieved context and use it to generate an answer.
            3. Generate a concise, logical answer. When generating the answer, Do Not just list your selections, But rearrange them in context so that they become paragraphs with a natural flow.
            4. When you don't have retrieved context for the question or If you have a retrieved documents, but their content is irrelevant to the question, you should answer 'I can't find the answer to that question in the material I have'.
            5. If required break the answer into proper paragraphs.
            6. Mention Name of all the documents and page number you used in generating the response from the context provided . e.g 1. Doc name : RSCA/etienne.pdf, Page number: 1 /n 2. Doc name : RSCA/rubric.pdf, Page number: 10. Remeber to include all of the Document names and pages. Dont missout
            # Question:
            {question}""",
            input_variables=["context","question"])
            llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2, openai_api_key=openai_api_key)
            compressor = JinaRerank(jina_api_key=jina_api_key,top_n=5)
            compression_retriever = ContextualCompressionRetriever(
            base_compressor=compressor, base_retriever=retriever
            )

            def format_docs(docs):
                """
                Format the documents by combining page content with its metadata.

                Args:
                    docs (list): List of documents to format.

                Returns:
                    str: Formatted documents as a string.
                """
                formatted_docs = []
                for doc in docs:
                    metadata_str = ', '.join(f"{key}: {value}" for key, value in doc.metadata.items())
                    doc_str = f"{doc.page_content}\nMetadata: {metadata_str}"
                    formatted_docs.append(doc_str)
                return "\n\n".join(formatted_docs)

            rag_chain = (
                {"context": compression_retriever | format_docs, "question": RunnablePassthrough()}
                | prompt_template
                | llm
                | StrOutputParser()
            )

            result = rag_chain.invoke(self.query)
            return result
        except Exception as e:
            raise Exception(f"Error processing the query: {e}")

In [80]:
def answer_fn(question, history=None):
    a=RAGTool(question)
    return a.qa_from_RAG()

In [81]:
from giskard.rag import evaluate

report = evaluate(answer_fn, testset=testset, knowledge_base=knowledge_base)

Asking questions to the agent: 100%|██████████| 60/60 [12:07<00:00, 12.13s/it]
CorrectnessMetric evaluation: 100%|██████████| 60/60 [01:37<00:00,  1.63s/it]


In [88]:
display(report)

In [83]:
report.to_html("report.html")

In [84]:
report.correctness_by_question_type()

Unnamed: 0_level_0,correctness
question_type,Unnamed: 1_level_1
complex,0.4
conversational,0.0
distracting element,0.2
double,0.1
simple,0.4
situational,0.7


In [None]:
report.get_failures()

## Evaluating Graph RAG

In [34]:
from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding as LlamaindexOpenAIEmbeddings
from llama_index.llms.openai import OpenAI as LlamaindexOpenAI
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
import nest_asyncio
from llama_index.core.indices.property_graph import (
    LLMSynonymRetriever,
    VectorContextRetriever,
)

In [35]:
class GraphRagTool:
    """ A class for creating Graph RAG tool for AI agents """

    def __init__(self, query):
        """
        Initialize the GraphRAG with the given query.

        Args:
            query (str): The query to process.
        """
        self.query = query
        self.openai_api_key = os.getenv('OPENAI_API_KEY')
        self.neo4j_url = os.getenv('NEO4J_URL')
        self.neo4j_password = os.getenv('NEO4J_PASSWORD')
        self.embed_model = LlamaindexOpenAIEmbeddings(model_name="text-embedding-3-small", api_key=self.openai_api_key)
        self.llm = LlamaindexOpenAI(model="gpt-3.5-turbo", temperature=0.0, api_key=self.openai_api_key)
        self.graph_store = Neo4jPropertyGraphStore(
            username="neo4j",
            password=self.neo4j_password,
            url=self.neo4j_url
        )


    def load_neo4j_graph(self):
        """
        Load from existing graph/vector store and process the query.

        Returns:
            str: The result of the query.

        Raises:
            CustomException: If there is an error retrieving or processing the query.
        """
        try:
            nest_asyncio.apply()
            # Load from existing graph/vector store
            index = PropertyGraphIndex.from_existing(
                property_graph_store=self.graph_store,
                embed_kg_nodes=True,
                llm=self.llm,
            )

            llm_synonym = LLMSynonymRetriever(
                index.property_graph_store,
                llm=self.llm,
                include_text=True,
            )
            vector_context = VectorContextRetriever(
                index.property_graph_store,
                embed_model=self.embed_model,
                include_text=True,
            )
            query_engine = index.as_query_engine(
                sub_retrievers=[llm_synonym, vector_context],
                include_text=True
            )

            response = query_engine.query(self.query)
            return response.response
        except Exception as e:
            raise Exception(f"Error processing the query: {e}")



In [92]:
a=GraphRagTool('What is LIHTC?')



In [93]:
a.load_neo4j_graph()

"LIHTC is the Low Income Housing Tax Credit program, which was established to encourage the private sector to invest in the construction and rehabilitation of housing for low and moderate-income individuals and families. Owners/investors can claim LIHTC on their federal income tax return each year for a period of 10 full years, and projects generally must meet certain requirements for low-income use for a minimum of 30 years per federal requirements. LIHTC is related to the Qualified Allocation Plan and the 2023 program. It has a 40-year lifespan, equity contributions are not subject to a limit, and projects must meet the District's basic eligibility requirements. DHCD is involved in calculating, issuing, and administering LIHTC."

In [36]:
def graph_rag_answer_fn(question, history=None):
    a=GraphRagTool(question)
    return a.load_neo4j_graph()

In [37]:
from giskard.rag import QATestset

testset = QATestset.load("test-set.jsonl")

In [38]:
from giskard.rag import evaluate

graphreport = evaluate(graph_rag_answer_fn, testset=testset, knowledge_base=knowledge_base)

CorrectnessMetric evaluation: 100%|██████████| 60/60 [01:29<00:00,  1.49s/it]


In [39]:
display(graphreport)

2024-07-17 00:05:40,504 pid:38156 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2024-07-17 00:06:13,502 pid:38156 MainThread giskard.rag  INFO     Found 3 topics in the knowledge base.


In [40]:
graphreport.get_failures()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata,agent_answer,correctness,correctness_reason
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1438291b-cfbe-400c-acfe-ce0ff7ebebbb,What documents are required for evidence of LI...,A copy of the Project's Federal applicable per...,Document 163: • a copy of the Project market s...,[],"{'question_type': 'simple', 'seed_document_id'...",Sponsor/Developers are required to provide an ...,False,The agent's answer provides a general overview...
1031d442-e73a-4802-bb37-816bf53a5492,What enforces the required Federal minimum aff...,The required Federal minimum affordability per...,Document 95: assistance through this RFP. \n ...,[],"{'question_type': 'simple', 'seed_document_id'...",The applicant's affordability commitment at ap...,False,The agent's answer is not close to the ground ...
922078bf-d4c9-4b40-b856-20beec1451f8,What amenities are considered for the Communit...,The amenities considered for the Community-Ori...,Document 84: 2023 DHCD Consolidated Request fo...,[],"{'question_type': 'simple', 'seed_document_id'...","High speed internet in-unit, child-focused ame...",False,The agent's answer is missing the requirement ...
15409bbb-bb45-4562-9072-2578879f1123,What are the criteria for receiving maximum po...,Maximum points will be given to Projects that ...,Document 140: 2023 DHCD Consolidated Request f...,[],"{'question_type': 'simple', 'seed_document_id'...",Projects that are within a certain distance of...,False,The agent's answer is not close to the ground ...
719e211e-be36-47bb-91e9-5705a10df738,What legal/compliance issues must development ...,"Within the past five (5) years, no member of t...",Document 48: 2023 DHCD Consolidated Request f...,[],"{'question_type': 'simple', 'seed_document_id'...",Development team members must comply with issu...,False,The agent's answer does not address the specif...
6d98ef67-a3cd-43b5-a3a1-0a3d0af3f532,What are the maximum construction cost guideli...,"For buildings with less than five stories, the...",Document 66: 2023 DHCD Consolidated Request fo...,[],"{'question_type': 'simple', 'seed_document_id'...",$385 for new construction and $323 for substan...,False,The agent's answer is missing the context of b...
66e4c095-cced-4386-99b6-0dd739c3d54f,What are the requirements regarding the comple...,Applicants must include a completed Phase I En...,Document 53: Applicant s must include a comple...,[],"{'question_type': 'simple', 'seed_document_id'...",Applicants must ensure that the completed Phas...,False,The agent's answer is very close to the ground...
0926747d-492c-4c14-b290-7d0bebbc9a6a,What are the minimum affordable household occu...,Sponsor/Developers must elect to have at least...,Document 217: DHCD 2023 Qualified Allocation ...,[],"{'question_type': 'simple', 'seed_document_id'...",LIHTC projects must meet affordable household ...,False,The agent's answer provides a general explanat...
fbe77c18-59d7-4dc0-a860-17bf5600eb73,What are the repayment requirements for homebu...,Depending on the period of affordability propo...,Document 24: 2023 DHCD Consolidated Request fo...,[],"{'question_type': 'simple', 'seed_document_id'...",Repayment for homebuyers in the 2023 DHCD Cons...,False,The agent's answer provides detailed informati...
dd564271-746d-4ae6-a2ea-f614e9211a84,What is the requirement for Permanent Supporti...,For new construction rental projects utilizing...,Document 34: 2023 DHCD Consolidated Request fo...,[],"{'question_type': 'simple', 'seed_document_id'...",At least 20% of units in new construction rent...,False,The agent's answer is not close to the ground ...


In [41]:
graphreport.component_scores()

Unnamed: 0_level_0,score
RAG Components,Unnamed: 1_level_1
GENERATOR,0.02
RETRIEVER,0.0
REWRITER,0.0
ROUTING,1.0
KNOWLEDGE_BASE,0.923077
