# RAGEval

In [34]:
import pandas as pd
from pprint import pprint
from langchain import hub
from langchain.schema import Document
from langgraph.graph import END, StateGraph
from typing_extensions import TypedDict
from typing import List
%run setup.py
%run vectorstore.py
%run llm.py
%run prompts.py

In [35]:
### Documents DB
files_boe = [
    "data/BOE-151_Constitucion_Espanola.pdf",
    "data/BOE-334_Codigo_Procesal_Penal.pdf",
    "data/BOE-391_Ambitos_de_la_Seguridad_Nacional_Terrorismo.pdf",
]

# add_files(files_boe, collection_name="boe")
vectorstore = get_vectorstore("boe")
retriever = vectorstore.as_retriever()

In [36]:
model = "llama3-8b-8192" # llama3-70b-8192; llama3 local_llm
llm = ChatLLM(model, local=False, json=False, temperature=0)
json_llm = ChatLLM(model, local=False, json=True, temperature=0)

In [37]:
### Router
question_router = router_prompt | json_llm | JsonOutputParser()

### Rewriter
rewriter = rewrite_prompt | json_llm | JsonOutputParser()
num_queries = 3

### Retrieval Grader
retrieval_grader = retrieval_grader_prompt | json_llm | JsonOutputParser()

### Generate
# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# Chain
rag_chain = gen_prompt | llm | StrOutputParser()

gen_raw_chain = gen_raw_prompt | llm | StrOutputParser()

### Hallucination Grader
hallucination_grader = hallucination_prompt | json_llm | JsonOutputParser()

### Answer Grader
answer_eval = answer_eval_prompt | json_llm | JsonOutputParser()

### Search
from langchain_community.tools.tavily_search import TavilySearchResults
web_search_tool = TavilySearchResults(k=3)

We'll implement these as a control flow in LangGraph.

In [38]:
### State

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents 
    """
    question : str
    new_questions : str
    ground_answer: str
    vectorstore_topics: str
    generation : str
    web_search : str
    documents : List[str]
    grade: int

### Nodes

def rewrite(state):
    """
    This function takes a state dictionary as input and retrieves a question from the state. 
    It then invokes a rewriter to generate new questions based on the original question.
    
    Args:
        state (dict): The state dictionary containing the question.
    
    Returns:
        dict: A dictionary with the original question and a list of new questions generated.
    """
    question = state["question"]

    # Rewriting
    new_questions = rewriter.invoke({"num_queries": 2, "question": question})
    new_questions = new_questions["queries"]
    
    return {"question": question, "new_questions": new_questions}

def retrieve(state):
    """
    Retrieve documents from vectorstore

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """
    question = state["question"]
    new_questions = state["new_questions"]

    # Retrieval
    documents = retriever.invoke(question)
    
    if new_questions is not None:
        for new_question in new_questions:
            new_docs = retriever.invoke(new_question)
            for doc in new_docs:
                documents.append(doc)
    
    return {"documents": documents, "question": question}

def generate(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    question = state["question"]
    documents = state["documents"]
    
    if len(documents) == 0:
        documents = [Document(page_content="No se han podido recuperar documentos relevantes para responder la pregunta.")]
    
    # RAG generation
    generation = rag_chain.invoke({"context": documents, "question": question})
    return {"documents": documents, "question": question, "generation": generation}

def generate_raw(state):
    """
    Generate answer using RAG on retrieved documents

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, generation, that contains LLM generation
    """
    question = state["question"]
    
    generation = gen_raw_chain.invoke({"question": question})
    return {"question": question, "generation": generation}



def grade_documents(state):
    """
    Determines whether the retrieved documents are relevant to the question
    If any document is not relevant, we will set a flag to run web search

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Filtered out irrelevant documents and updated web_search state
    """
    question = state["question"]
    documents = state["documents"]
    
    # Score each doc
    filtered_docs = []
    web_search = "No"
    for d in documents:
        score = retrieval_grader.invoke({"question": question, "document": d.page_content})
        grade = score['score']
        # Document relevant
        if grade.lower() == "yes":
            filtered_docs.append(d)
        else:
            web_search="Yes"
            continue
            
    return {"documents": filtered_docs, "question": question, "web_search": web_search}

def decide_to_generate(state):
    """
    Determines whether to generate an answer, or add web search

    Args:
        state (dict): The current graph state

    Returns:
        str: Binary decision for next node to call
    """
    question = state["question"]
    web_search = state["web_search"]
    filtered_documents = state["documents"]

    if web_search == "Yes":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        return "websearch"
    else:
        # We have relevant documents, so generate answer
        return "generate"

def websearch(state):
    """
    Web search based based on the question

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): Appended web results to documents
    """
    question = state["question"]
    documents = state["documents"]

    # Web search
    docs = web_search_tool.invoke({"query": question})
    web_results = "\n".join([d["content"] for d in docs])
    web_results = Document(page_content=web_results, metadata={"source": "web"})
    if documents is not None:
        documents.append(web_results)
    else:
        documents = [web_results]
    return {"documents": documents, "question": question}

def grade_generation(state):
    """
    Determines whether the generation is grounded in the document and answers question.

    Args:
        state (dict): The current graph state

    Returns:
        str: Decision for next node to call
    """
    question = state["question"]
    ground_answer = state["ground_answer"]
    documents = state["documents"]
    generation = state["generation"]
    
    score = answer_eval.invoke({"question": question, "generation": generation, "ground_answer": ground_answer})
    grade = int(score['score'])
    return {"documents": documents, "generation": generation, "grade": grade}

### Graph Build

In [39]:
workflows = []
## BASE LLAMA3
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("generate_raw", generate_raw) # generate
workflow.add_node("grade_generation_raw", grade_generation) # grade_generation

# Build graph
workflow.set_entry_point("generate_raw")
workflow.add_edge("generate_raw", "grade_generation_raw")
workflow.add_edge("grade_generation_raw", END)

workflows.append(workflow)

In [40]:
## BASE RAG
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("generate", generate) # generate
workflow.add_node("grade_generation", grade_generation) # grade_generation

# Build graph
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", "grade_generation")
workflow.add_edge("grade_generation", END)

workflows.append(workflow)

In [41]:
## FILTER RAG
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generate
workflow.add_node("grade_generation", grade_generation) # grade_generation

# Build graph
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_edge("grade_documents", "generate")
workflow.add_edge("generate", "grade_generation")
workflow.add_edge("grade_generation", END)

workflows.append(workflow)

In [42]:
## FILTER + WEB RAG 
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generate
workflow.add_node("websearch", websearch) # web search
workflow.add_node("grade_generation", grade_generation) # grade_generation

# Build graph
workflow.set_entry_point("retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "websearch": "websearch",
        "generate": "generate"
    }
)
workflow.add_edge("websearch", "generate")
workflow.add_edge("generate", "grade_generation")
workflow.add_edge("grade_generation", END)

workflows.append(workflow)

In [43]:
## FILTER + REWRITE RAG 
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("rewrite", rewrite) # retrieve
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("grade_documents", grade_documents) # grade documents
workflow.add_node("generate", generate) # generate
workflow.add_node("grade_generation", grade_generation) # grade_generation

# Build graph
workflow.set_entry_point("rewrite")
workflow.add_edge("rewrite", "retrieve")
workflow.add_edge("retrieve", "grade_documents")
workflow.add_edge("grade_documents", "generate")
workflow.add_edge("generate", "grade_generation")
workflow.add_edge("grade_generation", END)

workflows.append(workflow)

In [44]:
## REWRITE RAG 
workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("rewrite", rewrite) # retrieve
workflow.add_node("retrieve", retrieve) # retrieve
workflow.add_node("generate", generate) # generate
workflow.add_node("grade_generation", grade_generation) # grade_generation

# Build graph
workflow.set_entry_point("rewrite")
workflow.add_edge("rewrite", "retrieve")
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", "grade_generation")
workflow.add_edge("grade_generation", END)

workflows.append(workflow)

In [45]:
def get_result_df(ID, workflow_name, question, answer, result):
    generation = result["generation"]
    grade = result["grade"]

    result_df = pd.DataFrame({
        'ID': [ID],
        'RAG_type': [workflow_name],
        'question': [question],
        'generation': [generation],
        'true_answer': [answer],
        'grade': [grade],
        'n_retrieved_docs': len(result["documents"])
    })
    return result_df

def get_doc_df(ID, result):
    retrieved_docs = [doc.page_content for doc in result["documents"]]
    docs_source = [True if "source" in doc.metadata else False for doc in result["documents"]]

    doc_df = pd.DataFrame({
        'ID': [ID] * len(retrieved_docs),
        'document': retrieved_docs,
        'source': docs_source
    })
    return doc_df

In [46]:
%run questions.py

In [51]:
workflows_names = ["LLAMA3", "BASE", "FILTER", "FILTER + WEB", "REWRITE", "FILTER + REWRITE"]

results_df = pd.DataFrame()
docs_df = pd.DataFrame()
ID = 23

for question, answer in zip([questions[5]], [answers[5]]):
    inputs = {"question": question, "ground_answer": answer}
    
    for workflow, workflow_name in zip(workflows, workflows_names):
        app = workflow.compile()
        
        if workflow_name == "LLAMA3":
            inputs["documents"] = []
        
        for output in app.stream(inputs):
            for key, result in output.items():
                pass
        
        result_df = get_result_df(ID, workflow_name, question, answer, result)
        results_df = pd.concat([results_df, result_df])
        
        doc_df = get_doc_df(ID, result)
        docs_df = pd.concat([docs_df, doc_df])
        print(ID)
        ID += 1
        if int(result["grade"]) >= 9:
            break
        

23
24
25
26
27
28


In [53]:
results_df.to_csv("./results/results_8B.csv", encoding="utf-8-sig", index=False, sep=";")
docs_df.to_csv("./results/docs_8B.csv", encoding="utf-8-sig", index=False, sep=";")

In [52]:
results_df

Unnamed: 0,ID,RAG_type,question,generation,true_answer,grade,n_retrieved_docs
0,0,LLAMA3,¿Cuáles son las causas por las cuales cesarán ...,"Según el Código de Procedimiento Penal, los Pr...","Los Presidentes de la Audiencia Nacional, los ...",8,0
0,1,BASE,¿Cuáles son las causas por las cuales cesarán ...,"Según el contexto, los Presidentes de la Audie...","Los Presidentes de la Audiencia Nacional, los ...",8,4
0,2,FILTER,¿Cuáles son las causas por las cuales cesarán ...,"Según el artículo 338, los Presidentes de la A...","Los Presidentes de la Audiencia Nacional, los ...",9,2
0,3,LLAMA3,¿Cómo funciona el proceso de investidura del P...,"Según la Constitución Española, el proceso de ...",Artículo 99\n1. Después de cada renovación del...,8,0
0,4,BASE,¿Cómo funciona el proceso de investidura del P...,"Según la Constitución Española, el proceso de ...",Artículo 99\n1. Después de cada renovación del...,9,4
0,5,LLAMA3,¿Cómo podría hacer una tortilla de patatas?,¡Claro! Para hacer una deliciosa tortilla de p...,Con la información y el contexto proporcionado...,2,0
0,6,BASE,¿Cómo podría hacer una tortilla de patatas?,"Lo siento, pero no tengo información sobre cóm...",Con la información y el contexto proporcionado...,8,4
0,7,FILTER,¿Cómo podría hacer una tortilla de patatas?,"Lo siento, pero no tengo suficiente informació...",Con la información y el contexto proporcionado...,8,1
0,8,FILTER + WEB,¿Cómo podría hacer una tortilla de patatas?,"Según el extracto de contexto, para hacer una ...",Con la información y el contexto proporcionado...,2,1
0,9,REWRITE,¿Cómo podría hacer una tortilla de patatas?,"Lo siento, pero no tengo información disponibl...",Con la información y el contexto proporcionado...,8,1


In [54]:
results_df.groupby("RAG_type").mean()

Unnamed: 0_level_0,ID,grade,n_retrieved_docs
RAG_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BASE,10.833333,7.5,4.0
FILTER,13.2,8.2,1.8
FILTER + REWRITE,19.0,8.0,12.0
FILTER + WEB,17.0,6.0,2.5
LLAMA3,9.833333,6.666667,0.0
REWRITE,18.0,8.0,2.25
