In [1]:
# This notebook is  used to evaluate perofrmance of our RAG ingestion and query pipeline.
# Method:
#   - Generated test data using a sample GitHub repository (offline process not in this notebook)
#     - https://docs.ragas.io/en/stable/getstarted/rag_testset_generation/
#   - Use our ingestion pipeline to parse the same reporisiroty and index in our vector store
#   - Use RAGAS on teadt dataset + our answers fromn our RAG and visualise the mterics.
import os
import sys
import pandas as pd

sys.path.insert(1, '/home/jovyan/work/code')
from opentelemetry import trace
from config import VectorDBConfig, EmbeddingConfig, ProcessingConfig, ChatConfig
from config_helper import ConfigHelper
from pipeline import DocumentPipeline
from localrag import LocalRAG
from TraceSetup import get_tracer, get_logger
logger = get_logger()
tracer = get_tracer()
# We are using Aspire. Of course we will see the telemetry and logs in our dashboard!
# see config_helper.py for the not to tidy details.
config_helper = ConfigHelper(False)




In [2]:
# Inspect the test data
# The test data is generated using the method described at: 
#    https://docs.ragas.io/en/stable/getstarted/rag_testset_generation/
file_name= "gpt-4o_ReducedAspireDocs_50.pkl" 
test_dataset =  pd.read_pickle(file_name)
test_dataset.head()
#for index, row in test_dataset.iterrows():
#    print(row["reference_contexts"])
#    reference= row["reference"]

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Cud yu pleese explane wut the NpgSqlHealthChec...,[title: .NET Aspire PostgreSQL integration des...,The NpgSqlHealthCheck verifies that commands c...,single_hop_specifc_query_synthesizer
1,"Could you elaborate on the logging, tracing, a...",[Logging The .NET Aspire PostgreSQL integratio...,The .NET Aspire PostgreSQL integration provide...,single_hop_specifc_query_synthesizer
2,How can I configure MicrosoftDataSqlClientSett...,[title: .NET Aspire SQL Server integration des...,To disable health checks in a .NET Aspire SQL ...,single_hop_specifc_query_synthesizer
3,how does .NET Aspire work with SQL Server for ...,[Logging The .NET Aspire SQL Server integratio...,The .NET Aspire SQL Server integration does no...,single_hop_specifc_query_synthesizer
4,How can a Cloud Native Application Developer i...,[title: .NET Aspire Oracle Entity Framework Co...,The .NET Aspire Oracle Entity Framework Core i...,single_hop_specifc_query_synthesizer


In [None]:
#https://docs.ragas.io/en/latest/getstarted/rag_eval/#basic-setup
evaluation_data=[]

def query_using_rag(rag, question): 
    references=[]
    with tracer.start_as_current_span("Getting answer and context."): 
        print(f"Question: {question}")
        with tracer.start_as_current_span("rag get context"):
            chunks = rag.get_relevant_chunks(question, k=10)
            for i, chunk in enumerate(chunks, 1):
                references.append(chunk.page_content)
        with tracer.start_as_current_span("Retrieve answers."):
            answer = rag.retrieve_and_answer(question, k=10)
            return (answer, references)

rag = LocalRAG(
    vector_db_config=config_helper.vector_db_config,
    embedding_config=config_helper.embedding_config, 
    chat_config=config_helper.chat_config,
    logger=logger,
    tracer=tracer)

with tracer.start_as_current_span("Starting demo"):
    for index, row in test_dataset.iterrows():
        print(f"Question {index}:")
        question = row["user_input"]
        reference= row["reference"]
        answer,contexts = query_using_rag(rag, question)        
        print(answer[:150])
        evaluation_data.append({
            "user_input": question,
            "retrieved_contexts": contexts,
            "response": answer,
            "reference": reference
        })

  self.qdrant = QdrantClient(url=vector_db_config.url, api_key=vector_db_config.api_key)


Question 0:
Question: Cud yu pleese explane wut the NpgSqlHealthCheck duz in the .NET Aspire PostgreSQL integrashun?


In [None]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas import EvaluationDataset
import matplotlib.pyplot as plt
import numpy as np
from ragas.metrics import (
    LLMContextRecall, 
    Faithfulness, 
    FactualCorrectness, 
    AnswerRelevancy #,
    #ContextRelevancy
)

metrics = [
    #LLMContextRecall(), 
    Faithfulness(), 
    FactualCorrectness(),
    AnswerRelevancy() #,
    #ContextRelevancy()
]

llm = ChatOpenAI(model="gpt-4o")
embeddings = OpenAIEmbeddings()
evaluation_dataset = EvaluationDataset.from_list(evaluation_data)
evaluator_llm = LangchainLLMWrapper(llm)

with tracer.start_as_current_span("Starting model evaluation"):
    result = evaluate(dataset=evaluation_dataset,metrics=metrics,llm=evaluator_llm)
    print(result)

    
#{'context_recall': 0.5450, 'faithfulness': 0.5920, 'factual_correctness': 0.3941}