In [8]:
%pip install python-dotenv datasets ragas

Collecting datasets
  Downloading datasets-4.0.0-py3-none-any.whl.metadata (19 kB)
Collecting ragas
  Downloading ragas-0.2.15-py3-none-any.whl.metadata (9.0 kB)
Collecting filelock (from datasets)
  Downloading filelock-3.18.0-py3-none-any.whl.metadata (2.9 kB)
Collecting numpy>=1.17 (from datasets)
  Downloading numpy-2.3.1-cp313-cp313-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Using cached dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.4-py3-none-any.whl.metadata (4.9 kB)
Collecting tqdm>=4.66.3 (from datasets)
  Using cached tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp313-cp

In [9]:
from dotenv import load_dotenv
import os

load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")

if not openai_key:
    raise ValueError("OPENAI_API_KEY not found in .env file")

In [10]:
# ------------------------------
#  inputs
# ------------------------------
query = "Why do we have seasons on Earth?"
context = "Earth’s axis is tilted relative to its orbit, causing sunlight to hit different parts at different angles over the year."
response = "We have seasons because Earth's tilt changes how sunlight reaches us throughout the year."
ground_truth = "Seasons happen because Earth is tilted on its axis relative to its orbit, changing sunlight over the year."


In [None]:
from datasets import Dataset
from ragas.metrics import faithfulness, context_precision, context_recall, answer_relevancy
from ragas import evaluate


ragas_data = Dataset.from_dict({
    "question": [query],
    "contexts": [[context]],
    "answer": [response],
    "ground_truths": [[ground_truth]],
    "reference": [ground_truth]
})

result = evaluate(
    ragas_data,
    metrics=[faithfulness, context_precision, context_recall, answer_relevancy],
    column_map={
        "question": "question",
        "contexts": "contexts",
        "answer": "answer",
        "ground_truths": "ground_truths",
        "reference": "reference"
    }
)

print(result)


Evaluating: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


{'faithfulness': 1.0000, 'context_precision': 1.0000, 'context_recall': 1.0000, 'answer_relevancy': 1.0000}
