In [2]:
from ragas.llms import LangchainLLMWrapper
from ragas import EvaluationDataset, evaluate
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness
from ragas.dataset_schema import EvaluationResult, EvaluationDataset

from langchain_ollama.llms import OllamaLLM
from typing import List
from llama_index.vector_stores.weaviate import WeaviateVectorStore
from llama_index.core import VectorStoreIndex
import weaviate
from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.schema import NodeWithScore
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

import os
from dotenv import load_dotenv

load_dotenv()

Settings.llm = Ollama(model="llama3:8b", request_timeout=60.0)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)
model = OllamaLLM(model="llama3:8b")
evaluator_llm = LangchainLLMWrapper(model)

client: weaviate.WeaviateClient = weaviate.connect_to_local()
vector_store = WeaviateVectorStore(
    weaviate_client=client, index_name="Pymupdf", text_key="content"
)
retriever = VectorStoreIndex.from_vector_store(vector_store).as_retriever(
    similarity_top_k=5
)
template = """Answer the question based only on the following context:
{context}

Question: {query}
"""
prompt = ChatPromptTemplate.from_template(template)

qa_chain = prompt | model | StrOutputParser()

def format_docs(nodes: List[NodeWithScore]) -> str:
    context: str = ""
    for n in nodes:
        context = context + f"""
        --------------------
        Exerpt from file with name: {n.node.metadata["properties"]["file_name"]}

        {n.text}
        --------------------
        """
    return context

In [3]:
sample_queries: List[str] = [
    "what was the make and model of the EV sophie was interested in purchasing?",
    "What two types of motors do Plug-in hybrids (or PHEVs) have?",
    "What percentage of historical average were Meridian’s January 2025 monthly total inflows?",
    "What is an inclusive framework which integrates flexible supports into day-to-day teaching and learning"
]

expected_responses: List[str] = [
    "Sophie was interested in purchasing a Nissan Leaf.",
    "Plug-in hybrids (or PHEVs) have two types of motors: electric motors and internal combustion engines.",
    "Meridian’s January 2025 monthly total inflows were 43% of historical average",
    "Inclusive frameworks like Te Tūāpapa o He Pikorua integrate flexible supports into day-to-day teaching and learning."
]


In [4]:



dataset = []

for query, reference in zip(sample_queries, expected_responses):
    relevant_docs: List[NodeWithScore] = retriever.retrieve(query)
    response = qa_chain.invoke({"context": format_docs(relevant_docs), "query": query})
    dataset.append(
        {
            "user_input": query,
            "retrieved_contexts": [rdoc.text for rdoc in relevant_docs],
            "response": response,
            "reference": reference,
        }
    )

evaluation_dataset: EvaluationDataset = EvaluationDataset.from_list(dataset)

/Users/timotewb/Documents/github_timotewb/stunning-disco/.venv/lib/python3.12/site-packages/langchain_core/load/serializable.py:289: PydanticDeprecatedSince211: Accessing this attribute on the instance is deprecated, and will be removed in Pydantic V3. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  field = inst.model_fields.get(key)
/Users/timotewb/Documents/github_timotewb/stunning-disco/.venv/lib/python3.12/site-packages/langchain_core/load/serializable.py:213: PydanticDeprecatedSince211: Accessing this attribute on the instance is deprecated, and will be removed in Pydantic V3. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  if k in self.model_fields and self.model_fields[k].exclude:
/Users/timotewb/Documents/github_timotewb/stunning-disco/.venv/lib/python3.12/site-packages/langchain_core/load/serializable.py:194: PydanticDeprecatedSince211: Ac

In [5]:
result: EvaluationResult = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],
    llm=evaluator_llm,
)

result

  user_id = json.load(open(uuid_filepath))["userid"]


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

Exception raised in Job[2]: TimeoutError()
Exception raised in Job[5]: TimeoutError()
Exception raised in Job[8]: TimeoutError()
Exception raised in Job[10]: TimeoutError()
Exception raised in Job[11]: TimeoutError()


{'context_recall': 0.8750, 'faithfulness': 0.7778, 'factual_correctness(mode=f1)': nan}

In [None]:
os.environ['RAGAS_APP_TOKEN'] = os.getenv('RAGAS_APP_TOKEN')
result.upload()

apt.4a97-91a67e7ef072-9ab9-8853-36536539-e233e


  return datetime.utcfromtimestamp(timestamp)  # UTC time conversion
[2025-03-28 08:32:31 - (2025-03-27 19:32:31 UTC)] [ERROR] [ragas.utils] [RagasID: a-a6fa1b6c26864664bddcab02c50390c7, App-Version: 0.2.14] [API_ERROR] Request failed. Status Code: 500, URL: https://api.ragas.io/api/v1/alignment/evaluation, Error Message: 
API Message: An internal server error occured


UploadException: Request failed: 
API Message: An internal server error occured