In [None]:
import nest_asyncio

nest_asyncio.apply()

In [None]:
import os
from llama_index.llms.openai import OpenAI
import pandas as pd

os.environ["OPENAI_API_KEY"] = ""

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import Settings

documents = SimpleDirectoryReader("data").load_data()
Settings.chunk_size = 512
Settings.chunk_overlap = 20

vector_query_engine = VectorStoreIndex.from_documents(
    documents,
    use_async=True
).as_query_engine()

In [None]:
df = pd.read_csv("questions/Lyft2021_rewritten_queries.csv")
queries = df["Rewritten Query"].tolist()

In [None]:
from llama_index.core.evaluation import FaithfulnessEvaluator, RelevancyEvaluator

llm = OpenAI(model="gpt-4o-mini", temperature=0.0)
f_evaluator = FaithfulnessEvaluator(llm=llm)
r_evaluator = RelevancyEvaluator(llm=llm)
f_scores = []
r_scores = []

for query in queries:
    response = vector_query_engine.query(query)

    f_score = f_evaluator.evaluate_response(response=response).score * 100
    r_score = r_evaluator.evaluate_response(query=query, response=response).score * 100
    f_scores.append(f_score)
    r_scores.append(r_score)

avg_f = sum(f_scores) / len(f_scores)
avg_r = sum(r_scores) / len(r_scores)

print("Average Faithfulness Score:", avg_f)
print("Average Relevancy Score:", avg_r)