In [6]:
# This notebook is  used to evaluate perofrmance of our RAG ingestion and query pipeline.
# Method:
#   - Generated test data using a sample GitHub repository (offline process not in this notebook)
#     - https://docs.ragas.io/en/stable/getstarted/rag_testset_generation/
#   - Use our ingestion pipeline to parse the same reporisiroty and index in our vector store
#   - Use RAGAS on teadt dataset + our answers fromn our RAG and visualise the mterics.
import os
import sys
import pandas as pd

sys.path.insert(1, '/home/jovyan/work/code')
from opentelemetry import trace
from config import VectorDBConfig, EmbeddingConfig, ProcessingConfig, ChatConfig
from config_helper import ConfigHelper
from pipeline import DocumentPipeline
from localrag import LocalRAG

# We are using Aspire. Of course we will see the telemetry and logs in our dashboard!
# see config_helper.py for the not to tidy details.
tracer = trace.get_tracer(__name__)

config_helper = ConfigHelper(True)


In [7]:
# Inspect the test data
# The test data is generated using the method described at: 
#    https://docs.ragas.io/en/stable/getstarted/rag_testset_generation/
file_name= "test_data__aspire_15.pkl" 
test_dataset =  pd.read_pickle(file_name)
test_dataset.head()
#for index, row in test_dataset.iterrows():
#    print(row["reference_contexts"])
#    reference= row["reference"]

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,Could you elaborate on how code changes can im...,[title: Compatibility description: Learn about...,Code changes can affect compatibility in .NET ...,single_hop_specifc_query_synthesizer
1,Is forward compatibility a goal of .NET Aspire?,[Design-time compatibility Backwards compatibi...,Maintaining forward compatibility is not a goa...,single_hop_specifc_query_synthesizer
2,How can a Cloud Application Developer utilize ...,[title: .NET Aspire PostgreSQL integration des...,To utilize the NpgsqlDataSource in .NET Aspire...,single_hop_specifc_query_synthesizer
3,what .NET Aspire GitHub repo do?,[Logging The .NET Aspire PostgreSQL integratio...,The .NET Aspire GitHub repo is mentioned in th...,single_hop_specifc_query_synthesizer
4,Howw doo I use mcr.microsoft.com/mssql/server ...,[title: .NET Aspire SQL Server integration des...,The .NET Aspire SQL Server integration allows ...,single_hop_specifc_query_synthesizer


In [8]:
# We need to ingest the test data into our vector store so that we can query later for evaluation

pipeline = DocumentPipeline(
    vector_db_config=config_helper.vector_db_config,
    embedding_config=config_helper.embedding_config
)

# repository="https://github.com/dotnet/docs-aspire"
# input_file_name="merged_output.txt"
# with tracer.start_as_current_span(f"Starting ingesting file {input_file_name}"):
#     pipeline.process_single_file(input_file_name,repository)

# https://learning.oreilly.com/library/view/learning-langchain/9781098167271/

  self.qdrant = QdrantClient(url=vector_db_config.url, api_key=vector_db_config.api_key)


In [None]:
#https://docs.ragas.io/en/latest/getstarted/rag_eval/#basic-setup
evaluation_data=[]

def query_using_rag(rag, question): 
    references=[]
    with tracer.start_as_current_span("Getting answer and context."): 
        print(f"Question: {question}")
        with tracer.start_as_current_span("rag get context"):
            chunks = rag.get_relevant_chunks(question, k=5)
            for i, chunk in enumerate(chunks, 1):
                references.append(chunk.page_content)
        with tracer.start_as_current_span("Retrieve answers."):
            answer = rag.retrieve_and_answer(question, k=5)
            return (answer, references)

rag = LocalRAG(
    vector_db_config=config_helper.vector_db_config,
    embedding_config=config_helper.embedding_config, 
    chat_config=config_helper.chat_config
)

with tracer.start_as_current_span("Starting demo"):
    for index, row in test_dataset.iterrows():
        print(f"Question {index}:")
        question = row["user_input"]
        reference= row["reference"]
        answer,contexts = query_using_rag(rag, question)        
        print(answer[:50])
        evaluation_data.append({
            "user_input": question,
            "retrieved_contexts": contexts,
            "response": answer,
            "reference": reference
        })

  self.qdrant = QdrantClient(url=vector_db_config.url, api_key=vector_db_config.api_key)


Question 0:
Question: Could you elaborate on how code changes can impact compatibility in .NET, particularly focusing on the different types of compatibility that might be affected?
In .NET, code changes can affect several aspects o
Question 1:
Question: Is forward compatibility a goal of .NET Aspire?


In [None]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas import EvaluationDataset
import matplotlib.pyplot as plt
import numpy as np
from ragas.metrics import (
    LLMContextRecall, 
    Faithfulness, 
    FactualCorrectness, 
    AnswerRelevancy,
    ContextRelevancy
)

metrics = [
    LLMContextRecall(), 
    Faithfulness(), 
    FactualCorrectness(),
    AnswerRelevancy(),
    ContextRelevancy()
]

llm = ChatOpenAI(model="gpt-4o")
embeddings = OpenAIEmbeddings()
evaluation_dataset = EvaluationDataset.from_list(evaluation_data)
evaluator_llm = LangchainLLMWrapper(llm)

with tracer.start_as_current_span("Starting model evaluation"):
    result = evaluate(dataset=evaluation_dataset,metrics=metrics,llm=evaluator_llm)
    print(result)

    
# Visualization
def visualize_rag_metrics(result):
    # Extract metric names and scores
    metric_names = [metric.__class__.__name__ for metric in metrics]
    scores = [result[metric_name] for metric_name in metric_names]

    # Create bar plot
    plt.figure(figsize=(10, 6))
    bars = plt.bar(metric_names, scores, color='skyblue', edgecolor='navy')
    plt.title('RAG Evaluation Metrics', fontsize=15)
    plt.xlabel('Metrics', fontsize=12)
    plt.ylabel('Scores', fontsize=12)
    plt.ylim(0, 1)

    # Add value labels on top of each bar
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{height:.2f}',
                 ha='center', va='bottom', fontsize=10)

    # Add a horizontal line at 0.5 for reference
    plt.axhline(y=0.5, color='r', linestyle='--', alpha=0.7)

    plt.tight_layout()
    plt.show()

# Visualize results
visualize_rag_metrics(result)
    
#{'context_recall': 0.5450, 'faithfulness': 0.5920, 'factual_correctness': 0.3941}