In [7]:
from dotenv import load_dotenv
load_dotenv()

True

In [8]:
from langchain_community.document_loaders import DirectoryLoader
loader = DirectoryLoader("../../data", glob = "**/*.context")
documents = loader.load()

In [9]:
for document in documents:
    document.metadata['filename'] = document.metadata['source']

In [10]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
#from langchain_community.llms import Ollama
#from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-4")
embeddings = OpenAIEmbeddings()

#generator_llm = Ollama( base_url="http://localhost:7869",
#                        model="tinyllama",
#                        verbose=True )

#critic_llm = Ollama( base_url="http://localhost:7869",
#    model="tinyllama",
#    verbose=True)

#embeddings = OllamaEmbeddings( base_url="http://localhost:7869", model="all-minilm:latest" )


In [11]:
generator = TestsetGenerator.from_langchain(
    generator_llm,
    generator_llm,
    embeddings
)

testset = generator.generate_with_langchain_docs(documents, test_size=3, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})


Filename and doc_id are the same for all nodes.                 
Generating:  75%|███████▌  | 3/4 [00:10<00:03,  3.07s/it]max retries exceeded for MultiContextEvolution(generator_llm=LangchainLLMWrapper(run_config=RunConfig(timeout=180, max_retries=15, max_wait=90, max_workers=16, exception_types=<class 'openai.RateLimitError'>, log_tenacity=False, seed=42)), docstore=InMemoryDocumentStore(splitter=<langchain_text_splitters.base.TokenTextSplitter object at 0x7f13ed6736a0>, nodes=[Node(metadata={'source': '../../data/context_1.context', 'filename': '../../data/context_1.context'}, page_content='The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse ("Norman" comes from "Norseman") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of a

In [12]:
testset.to_pandas()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What was the significance of the North America...,[The French and Indian War (1754–1763) was the...,The North American theater of the French and I...,simple,"[{'source': '../../data/context_34.context', '...",True
1,How did the 1973 oil crisis impact the price o...,[The 1973 oil crisis began in October 1973 whe...,The 1973 oil crisis caused a significant impac...,simple,"[{'source': '../../data/context_9.context', 'f...",True
2,What were the sides and factors in the French ...,[The French and Indian War (1754–1763) was the...,The French and Indian War was fought between t...,reasoning,"[{'source': '../../data/context_34.context', '...",True


In [22]:
test_ds = testset.to_dataset()

In [23]:
ds = test_ds.add_column(name="answer", column=["Answer.", "Answers.", "Answer."])

In [34]:
from ragas.metrics import (
    faithfulness,  # consistency b/w answer and context claims (0,1) via HHEM-2.1-Open
    answer_relevancy,  # pertinence of answer to question via cosine simularity of LLM-generated questions from answer vs. actual answer
    context_recall, # extent retrieved context aligns with ground truth (0,1).  Determines if context contains ground-truth claims.
    answer_similarity, # embedding similarity between answer and ground truth
    answer_correctness  # accuracy of generated answer vs. ground truth (0,1) combining semantic similiarty and factual similarity.
)

In [35]:
from ragas import evaluate

result = evaluate(
    ds,
    metrics=[
        faithfulness,
        answer_relevancy,
        context_recall,
        answer_similarity,
        answer_correctness
    ],
)

Evaluating: 100%|██████████| 15/15 [00:08<00:00,  1.83it/s]


In [36]:
result

{'faithfulness': 0.6667, 'answer_relevancy': 0.0000, 'context_recall': 0.8333, 'answer_similarity': 0.7280, 'answer_correctness': 0.3278}

In [32]:
result.to_pandas()

Unnamed: 0,question,contexts,answer,ground_truth,context_precision,faithfulness,answer_relevancy,context_recall,answer_similarity
0,What was the significance of the North America...,[The French and Indian War (1754–1763) was the...,Answer.,The North American theater of the French and I...,1.0,1.0,0.0,0.5,0.738587
1,How did the 1973 oil crisis impact the price o...,[The 1973 oil crisis began in October 1973 whe...,Answers.,The 1973 oil crisis caused a significant impac...,1.0,0.0,0.0,1.0,0.720756
2,What were the sides and factors in the French ...,[The French and Indian War (1754–1763) was the...,Answer.,The French and Indian War was fought between t...,1.0,0.0,0.893633,1.0,0.724616
