### Prepare Questions Data

In [1]:
import pandas as pd
import numpy as np

In [None]:
# Load the sample questions from HotpotQA
df = pd.read_json("hotpotqa_1000_samples.json", lines=True)

In [None]:
# Make a test set of 100 questions
test_questions = df.sample(100, random_state=42)

In [None]:
# Rename columns based on RAGAS expectations
test_df = test_questions[['question', 'answer']].rename(columns={'answer': 'ground_truth'})

In [None]:
# Test dataset prepared
test_df
## Now need to run the RAG pipeline for each question

Unnamed: 0,question,ground_truth
521,Mohamed Naguib Hamed is an athlete from what t...,Egypt
737,What bill president Clinton sign at the school...,Goals 2000
740,Salvador Duran Sanchez raced in which series w...,A1 Grand Prix
660,"Along with Barbara Bush, what other member of ...",Jenna Bush
411,When was the first series premiered in the UK ...,15 January 2012
...,...,...
436,What is the nationality of the company that pr...,French
764,Bacon is a native of a city whose nickname is ...,Queen City of the Ozarks
88,"In 2014, Clemson Tigers football team were inv...",Bob Stoops
63,Uyirodu Uyiraga was a film that starred the In...,British Formula 3 season


### Import The RAG Method

In [None]:
import os
import sys

# to import from parent directory
parent_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
if parent_path not in sys.path:
    sys.path.append(parent_path)

from rag_app.utils.rag_workflows.rag_workflows_for_evaluation import rag_workflow01

### Generate Responses

In [None]:
# create new empty series for "context" "answer" to prevent ambiguity errors when assigning list of strings
test_df["contexts"] = pd.Series()
test_df["answer"] = pd.Series()

test_df
# all contexts and answers are null values that needs to be filled.

Unnamed: 0,question,ground_truth,contexts,answer
521,Mohamed Naguib Hamed is an athlete from what t...,Egypt,,
737,What bill president Clinton sign at the school...,Goals 2000,,
740,Salvador Duran Sanchez raced in which series w...,A1 Grand Prix,,
660,"Along with Barbara Bush, what other member of ...",Jenna Bush,,
411,When was the first series premiered in the UK ...,15 January 2012,,
...,...,...,...,...
436,What is the nationality of the company that pr...,French,,
764,Bacon is a native of a city whose nickname is ...,Queen City of the Ozarks,,
88,"In 2014, Clemson Tigers football team were inv...",Bob Stoops,,
63,Uyirodu Uyiraga was a film that starred the In...,British Formula 3 season,,


In [None]:
# Generate Responses with RAG Workflow 01
for index in test_df.index:
    question_to_ask = test_df.loc[index, 'question']
    response, referred_chunks = rag_workflow01(question_to_ask)

    test_df.at[index, 'contexts'] = [item["content"] for item in referred_chunks] if referred_chunks else []
    test_df.at[index, 'answer'] = response

In [None]:
# Save test responses
test_df.to_csv("test_responses.csv", index=True)