# Evaluating LLM, RAG, Agents


In [16]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv())


True

## Loading Documents


In [4]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("../data/samples/").load_data()
len(documents)

12

## Initialization


In [2]:
from ragas.testset import TestsetGenerator

from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

# generator with openai models
generator_llm = OpenAI(model="gpt-4o")
embeddings = OpenAIEmbedding(model="text-embedding-3-large")

generator = TestsetGenerator.from_llama_index(
    llm=generator_llm,
    embedding_model=embeddings,
)

In [7]:
testset = generator.generate_with_llamaindex_docs(
    documents,
    testset_size=5,
)

Applying HeadlinesExtractor:   0%|          | 0/8 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/12 [00:00<?, ?it/s]

unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node
unable to apply transformation: 'headlines' property not found in this node


Applying SummaryExtractor:   0%|          | 0/11 [00:00<?, ?it/s]

Property 'summary' already exists in node '6fba97'. Skipping!
Property 'summary' already exists in node '1d0067'. Skipping!
Property 'summary' already exists in node '1337b4'. Skipping!


Applying CustomNodeFilter:   0%|          | 0/14 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/39 [00:00<?, ?it/s]

Property 'summary_embedding' already exists in node '1337b4'. Skipping!
Property 'summary_embedding' already exists in node '1d0067'. Skipping!
Property 'summary_embedding' already exists in node '6fba97'. Skipping!


Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/2 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/6 [00:00<?, ?it/s]

In [8]:
df = testset.to_pandas()
df.head()

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Wht are sum ways to b an efective ally in supo...,[Skills and Behaviors of allies To be an effec...,To be an effective ally in support of Black Li...,single_hop_specifc_query_synthesizer
1,Cud yu pleese explane how GitLab is involvd in...,[Concepts & Terms 1. Privilege: an unearned ad...,GitLab promotes allyship and inclusivity throu...,single_hop_specifc_query_synthesizer
2,Howw can effectivee listeningg enhancee allysh...,[What it means to be an ally - Take on the str...,Effective listening is one of the essential sk...,single_hop_specifc_query_synthesizer
3,"How does the Diversity, Inclusions and Belongi...",[<1-hop>\n\nThe Roundtable ### Set the ground ...,"The Diversity, Inclusions and Belonging Team f...",multi_hop_specific_query_synthesizer
4,"How does the Diversity, Inclusion & Belonging ...",[<1-hop>\n\n--- title: Diversity Inclusion & B...,"The Diversity, Inclusion & Belonging (DIB) Tea...",multi_hop_specific_query_synthesizer


## Build a `QueryEngine`


In [5]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_documents(documents)

query_engine = vector_index.as_query_engine()

In [9]:
# convert it to pandas dataset
df = testset.to_pandas()
df["user_input"][0]

'Wht are sum ways to b an efective ally in suport of Black Lives Matter?'

In [10]:
response_vector = query_engine.query(df["user_input"][0])

print(response_vector)

Identifying your power and privilege, following and supporting those from marginalized groups, spending time educating yourself, using knowledge to help, understanding Perception vs. Reality, not stopping with your power but also leveraging others' powers of authority are some ways to be an effective ally in support of Black Lives Matter.


## Evaluating the `QueryEngine`


In [11]:
# import metrics
from ragas.metrics import (
    Faithfulness,
    AnswerRelevancy,
    ContextPrecision,
    ContextRecall,
)

# init metrics with evaluator LLM
from ragas.llms import LlamaIndexLLMWrapper

evaluator_llm = LlamaIndexLLMWrapper(OpenAI(model="gpt-4o"))
metrics = [
    Faithfulness(llm=evaluator_llm),
    AnswerRelevancy(llm=evaluator_llm),
    ContextPrecision(llm=evaluator_llm),
    ContextRecall(llm=evaluator_llm),
]

In [12]:
# convert to Ragas Evaluation Dataset
ragas_dataset = testset.to_evaluation_dataset()
ragas_dataset

EvaluationDataset(features=['user_input', 'reference_contexts', 'reference'], len=6)

In [13]:
from ragas.integrations.llama_index import evaluate

result = evaluate(
    query_engine=query_engine,
    metrics=metrics,
    dataset=ragas_dataset,
)

Running Query Engine:   0%|          | 0/6 [00:00<?, ?it/s]

Evaluating:   0%|          | 0/24 [00:00<?, ?it/s]

In [14]:
# final scores
print(result)

{'faithfulness': 0.9035, 'answer_relevancy': 0.9691, 'context_precision': 0.6667, 'context_recall': 0.5556}


In [15]:
result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,reference_contexts,response,reference,faithfulness,answer_relevancy,context_precision,context_recall
0,Wht are sum ways to b an efective ally in supo...,[--- One of the mistakes that often happens he...,[Skills and Behaviors of allies To be an effec...,Some ways to be an effective ally in support o...,To be an effective ally in support of Black Li...,0.833333,0.974082,1.0,1.0
1,Cud yu pleese explane how GitLab is involvd in...,"[---\ntitle: ""Being Inclusive""\ndescription: ""...",[Concepts & Terms 1. Privilege: an unearned ad...,GitLab promotes allyship and inclusivity withi...,GitLab promotes allyship and inclusivity throu...,1.0,0.918888,0.0,0.0
2,Howw can effectivee listeningg enhancee allysh...,[## Allyship & Empathy\n\n### Being an Ally Re...,[What it means to be an ally - Take on the str...,Effective listening can enhance allyship skill...,Effective listening is one of the essential sk...,1.0,0.978787,1.0,1.0
3,"How does the Diversity, Inclusions and Belongi...",[A DIB Team Member will set up a time to discu...,[<1-hop>\n\nThe Roundtable ### Set the ground ...,"The Diversity, Inclusions and Belonging Team f...","The Diversity, Inclusions and Belonging Team f...",0.818182,0.999999,1.0,0.666667
4,"How does the Diversity, Inclusion & Belonging ...","[Teach people how to disagree, set the expecta...",[<1-hop>\n\n--- title: Diversity Inclusion & B...,"The Diversity, Inclusion & Belonging Team at G...","The Diversity, Inclusion & Belonging (DIB) Tea...",0.769231,1.0,0.0,0.0
5,"How does GitLab's Diversity, Inclusion & Belon...",[---\ntitle: Diversity Inclusion & Belonging C...,[<1-hop>\n\nEvaluating the company's current D...,The DIB team at GitLab enhances engagement and...,"GitLab's Diversity, Inclusion & Belonging (DIB...",1.0,0.942668,1.0,0.666667


## KnowledgeGraph


In [33]:
from ragas.testset.graph import KnowledgeGraph

kg = KnowledgeGraph()

In [34]:
from ragas.testset.graph import Node, NodeType

for doc in documents:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={
                "page_content": doc.text, 
                "text": doc.text, 
                "document_metadata": doc.metadata
            }
        )
    )

In [35]:
kg

KnowledgeGraph(nodes: 12, relationships: 0)

In [39]:
kg.nodes


[Node(id: 50891e, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 3d9556, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 164b54, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: aa0e6d, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 9330f9, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 5c63ee, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 1aa22c, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 425149, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: 0c4204, type: NodeType.DOCUMENT, properties: ['page_content', 'text', 'document_metadata']),
 Node(id: a3c4d0, type: NodeType.DOCUMENT, properties: ['page_content', '