In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from r2r import R2RAsyncClient

client = R2RAsyncClient()

In [3]:
questions = [
    "Who are the major players in the large language model space?",
    "What is Microsoft’s Azure AI platform known for?",
    "What kind of models does Cohere provide?",
]

references = [
    "The major players include OpenAI (GPT Series), Anthropic (Claude Series), Google DeepMind (Gemini Models), Meta AI (LLaMA Series), Microsoft Azure AI (integrating GPT Models), Amazon AWS (Bedrock with Claude and Jurassic), Cohere (business-focused models), and AI21 Labs (Jurassic Series).",
    "Microsoft’s Azure AI platform is known for integrating OpenAI’s GPT models, enabling businesses to use these models in a scalable and secure cloud environment.",
    "Cohere provides language models tailored for business use, excelling in tasks like search, summarization, and customer support.",
]

In [4]:
search_settings = {
    "limit": 2,
    "graph_settings": {"enabled": False, "limit": 2},
}

r2r_responses = []

for que in questions:
    response = await client.retrieval.rag(query=que, search_settings=search_settings)
    r2r_responses.append(response)

In [5]:
from r2r_new import transform_to_ragas_dataset

ragas_eval_dataset = transform_to_ragas_dataset(
    user_inputs=questions, r2r_responses=r2r_responses, references=references
)

In [6]:
ragas_eval_dataset

EvaluationDataset(features=['user_input', 'retrieved_contexts', 'response', 'reference'], len=3)

In [7]:
from ragas.metrics import AnswerRelevancy, ContextPrecision, Faithfulness
from ragas import evaluate
from langchain_openai import ChatOpenAI
from ragas.llms import LangchainLLMWrapper

llm = ChatOpenAI(model="gpt-4o-mini")
evaluator_llm = LangchainLLMWrapper(llm)

ragas_metrics = [AnswerRelevancy(llm=evaluator_llm ), ContextPrecision(llm=evaluator_llm ), Faithfulness(llm=evaluator_llm )]

result = evaluate(dataset=ragas_eval_dataset, metrics=ragas_metrics)

Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

In [8]:
result.to_pandas()

Unnamed: 0,user_input,retrieved_contexts,response,reference,answer_relevancy,context_precision,faithfulness
0,Who are the major players in the large languag...,[In the rapidly advancing field of artificial ...,The major players in the large language model ...,"The major players include OpenAI (GPT Series),...",1.0,1.0,1.0
1,What is Microsoft’s Azure AI platform known for?,[Microsoft’s Azure AI platform is famous for i...,Microsoft’s Azure AI platform is known for int...,Microsoft’s Azure AI platform is known for int...,1.0,1.0,1.0
2,What kind of models does Cohere provide?,[Cohere is well-known for its language models ...,Cohere provides language models tailored for b...,Cohere provides language models tailored for b...,0.903765,1.0,1.0


In [9]:
result.upload()

Evaluation results uploaded! View at https://app.ragas.io/dashboard/alignment/evaluation/665ca1f2-03e1-478f-a0b0-8d23f6049b30


'https://app.ragas.io/dashboard/alignment/evaluation/665ca1f2-03e1-478f-a0b0-8d23f6049b30'