In [2]:
import json
import os
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [4]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [7]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

In [8]:
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))
evaluator_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

In [45]:
from ragas.metrics import SemanticSimilarity, BleuScore, RougeScore, SemanticSimilarity
from ragas.dataset_schema import SingleTurnSample
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.metrics._factual_correctness import FactualCorrectness

In [28]:
from datasets import load_dataset, Dataset
from ragas import EvaluationDataset

In [14]:
with open('../dataset/eval_dset.json', 'r') as f:
  eval_dset = json.load(f)

In [15]:
eval_dset[0].keys()

dict_keys(['input', 'reference', 'prediction'])

In [None]:
key_map = {"prediction": "response", 
           "reference": "reference"}

ragas_dset = [
    {key_map[k]: v for k, v in item.items() if k in key_map.keys()}
    for item in eval_dset
]


In [34]:
dataset = Dataset.from_list(ragas_dset)

In [35]:
dataset

Dataset({
    features: ['reference', 'response'],
    num_rows: 4324
})

In [36]:
eval_dataset = EvaluationDataset.from_hf_dataset(dataset)

eval_dataset.features()

['response', 'reference']

In [39]:
eval_dataset[:500]

EvaluationDataset(features=['response', 'reference'], len=500)

In [40]:
from ragas import evaluate

In [63]:
metric1 = FactualCorrectness(llm = evaluator_llm)
metric2 = BleuScore()
metric3 = RougeScore()
metric4 = SemanticSimilarity()

In [67]:
results = evaluate(eval_dataset[:300], metrics = [metric1, metric2, metric3, metric4])

results.to_pandas().to_csv(path_or_buf=  "../results/data/zeroshot.csv", index = False)

Evaluating:   0%|          | 0/1200 [00:00<?, ?it/s]

In [66]:
results

{'factual_correctness(mode=f1)': 0.1255, 'bleu_score': 0.0332, 'rouge_score(mode=fmeasure)': 0.1020}

ERROR:ragas.executor:Exception raised in Job[63]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[67]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[71]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[75]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[79]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[83]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[87]: AssertionError(Error: 'semantic_similarity' requires embeddings to be set.)
ERROR:ragas.executor:Exception raised in Job[91]: AssertionError(Error: 'semantic_similarity' requires embeddings to b

In [70]:
results.to_pandas()

Unnamed: 0,response,reference,factual_correctness(mode=f1),bleu_score,rouge_score(mode=fmeasure),semantic_similarity
0,"They wishin' on me, yuh.",Drake has a long history of conflicts. Chris B...,0.00,0.078098,0.014493,0.731230
1,They wish on me.,"Although Drake squashed his major beef​, he’s ...",0.00,0.000385,0.014815,0.701291
2,"They wishin' on me, yuh",Drake doesn’t want to be a booty call in the m...,0.00,0.014457,0.019608,0.728761
3,I don't wanna die for them to miss me,Drake is referencing how the general public co...,0.00,0.038686,0.041096,0.764402
4,"They wishin' on me, yuh.",Drake may be referring to his late OVO affilia...,0.00,0.023894,0.017699,0.731211
...,...,...,...,...,...,...
295,The snippet is about the speaker's feelings of...,This line is straight from Popcaan’s 2014 song...,0.15,0.005223,0.162963,0.736112
296,You got somebody other than me\nDon't play the...,Drake knows this girl has another man she’s se...,0.46,0.019297,0.107692,0.830494
297,"The snippet ""And I wanna tell you my intention...",The things Drake wants from the relationship a...,0.22,0.016209,0.142857,0.787761
298,I put my hands around you\nGotta get a handle ...,Despite all the points Drizzy has made about h...,0.33,0.011205,0.166667,0.808144


In [71]:
results

{'factual_correctness(mode=f1)': 0.1182, 'bleu_score': 0.0332, 'rouge_score(mode=fmeasure)': 0.1020, 'semantic_similarity': 0.7965}