In [1]:
import requests
import json
import pandas as pd
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy
from datasets import Dataset

# Set display options for pandas
pd.set_option('display.max_colwidth', None)

ModuleNotFoundError: No module named 'ragas'

In [None]:
evaluation_testbed = [
    {"question": "How does Caesar first enter the play?", "ideal_answer": "In a triumphal procession; he has defeated the sons of his deceased rival, Pompey"},
    {"question": "What does the Soothsayer say to Caesar?", "ideal_answer": "Beware the Ides of March"},
    {"question": "What does Cassius first ask Brutus?", "ideal_answer": "Why he has been so distant and contemplative lately"},
    {"question": "What does Brutus admit to Cassius?", "ideal_answer": "That he fears the people want Caesar to be king"},
    {"question": "What does Antony offer Caesar in the marketplace?", "ideal_answer": "The crown"},
    {"question": "That night, which of the following omens are seen?", "ideal_answer": "All of the above (Dead men walking, Lions strolling in the marketplace, Lightning)"},
    {"question": "What finally convinces Brutus to join the conspirators?", "ideal_answer": "Forged letters planted by Cassius"},
    {"question": "Why does Calpurnia urge Caesar to stay home rather than appear at the Senate?", "ideal_answer": "She has had nightmares about his death"},
    {"question": "Why does Caesar ignore Calpurnia's warnings?", "ideal_answer": "Decius convinces him that Calpurnia has interpreted the dream and the omens incorrectly"},
    {"question": "What does Artemidorus offer Caesar in the street?", "ideal_answer": "A letter warning him about the conspiracy"},
    {"question": "What do the conspirators do at the Senate?", "ideal_answer": "All of the above (Kneel around Caesar, Stab him to death, Proclaim Tyranny is dead!)"},
    {"question": "What does Antony do when he arrives at Caesar's body?", "ideal_answer": "All of the above (He weeps over Caesar's body, He shakes hands with the conspirators, and he swears allegiance to Brutus for the moment)"},
    {"question": "After the assassination of Caesar, which of the conspirators addresses the plebeians first?", "ideal_answer": "Brutus"},
    {"question": "What is Brutus's explanation for killing Caesar?", "ideal_answer": "Caesar was ambitious"},
    {"question": "What does Antony tell the crowd?", "ideal_answer": "All of the above (That Brutus is an honorable man, That Caesar brought riches to Rome and turned down the crown, That Caesar bequeathed all of the citizens a large sum of money)"},
    {"question": "What is the crowd's response to Antony's speech?", "ideal_answer": "Rage; they chase the conspirators from the city"},
    {"question": "Who is Octavius?", "ideal_answer": "Caesar's adopted son and appointed heir"},
    {"question": "Octavius and Antony join together with whom?", "ideal_answer": "Lepidus"},
    {"question": "Why do Brutus and Cassius argue?", "ideal_answer": "Brutus asked for money and Cassius withheld it"},
    {"question":"What news do Brutus and Cassius receive from Rome?", "ideal_answer": "All of the above (Portia is dead, Many senators are dead, The armies of Antony and Octavius are marching toward Philippi)"},
    {"question": "What appears at Brutus's bedside in camp?", "ideal_answer": "Caesar's ghost"},
    {"question": "What does Cassius think has happened to his and Brutus's armies?", "ideal_answer": "He believes that they have been defeated by Antony and Octavius"},
    {"question": "What is Cassius's response to this situation?", "ideal_answer": "He has his servant stab him"},
    {"question": "What does Brutus do when he sees the battle is lost?", "ideal_answer": "He kills himself"},
    {"question": "What does Antony call Brutus at the end?", "ideal_answer": "The noblest Roman of them all"}
]

In [None]:
# Change this URL to match your running API
api_url = "http://127.0.0.1:8003/query"  # Updated to port 8003
results_list = []

print(f"Starting evaluation... calling API at {api_url}")

for item in evaluation_testbed:
    question = item['question']
    
    try:
        # Call your live API
        response = requests.post(api_url, json={"query": question})
        response_data = response.json()
        
        # Get the answer and contexts
        answer = response_data['answer']
        contexts = [source['chunk'] for source in response_data['sources']]
        
        results_list.append({
            "question": question,
            "answer": answer,
            "contexts": contexts,
            "ground_truth": item['ideal_answer'] # RAGAs uses 'ground_truth'
        })
        
    except Exception as e:
        print(f"Error on question: {question} - {e}")
        results_list.append({
            "question": question,
            "answer": f"API Error: {e}",
            "contexts": [],
            "ground_truth": item['ideal_answer']
        })

print(f"Evaluation queries complete. {len(results_list)} results gathered.")

# Convert to a pandas DataFrame to inspect
results_df = pd.DataFrame(results_list)
display(results_df.head())

In [None]:
# Convert the results list to a Hugging Face Dataset
results_dataset = Dataset.from_list(results_list)

# Define the metrics (as required by the assignment)
metrics = [
    faithfulness,    # How factual is the answer based on context?
    answer_relevancy # How relevant is the answer to the question?
]

print("Running RAGAs evaluation...")

# Run the evaluation
# Note: This will use an Ollama model by default (like llama3)
# to "judge" the answers. Make sure Ollama is running!
score = evaluate(
    results_dataset,
    metrics=metrics
)

print("RAGAs evaluation complete.")

# Show the scores
print(score)