In [None]:
import os
import json
import requests
import uuid
from datetime import datetime
chat_api_endpoint = os.environ.get("CHAT_API_ENDPOINT")
chat_api_key = os.environ.get("CHAT_API_KEY")

## Get an answer

In [None]:
def get_answer_api(question:str, conversationId: str = None)->dict:
    endpoint = f"{chat_api_endpoint}/chat?code={chat_api_key}"

    #generate guid
    guid = str(uuid.uuid4())
    if(conversationId == None):
        conversationId = f"ragas-{datetime.today().strftime('%Y%m%d')}-{guid}"

    data = {
        "ConversationId" : conversationId,
        "Message" : question,
        "UserId" : "RAGAS"
    }
    response = requests.post(endpoint, json=data)
    answer = response.json()["Message"]
    intent = response.json()["Question"]
    context = json.loads(response.json()["Context"])

    context_combined = []
    for c in context:
        context_combined.append(str(c["id"]) + ": " + c["content"])

    context = context_combined
    return answer, context, intent, conversationId

In [None]:
question = "Wie is de burgemeester"
answer, context, intent, conversationId = get_answer_api(question)
print(conversationId)
print(answer)
print(context)
print(intent)

## Get my dataset

In [None]:
import pandas as pd
import datasets
data = pd.read_csv("ragas-data.csv")
print("Questions to be evaluated: ", len(data))

In [None]:
data

## Generate answers

In [None]:
ragas_data = list()

for i in range(len(data)):
    try:
        question = data.loc[i, "question"]
        ground_truth = data.loc[i, "ground_truth"]
        answer, context, intent, conversationId = get_answer_api(question)
        ragas_data.append({
            "question" :question,
            "ground_truth" : ground_truth,
            "answer" : answer,
            "contexts" : context,
            "intent" : intent
        })
        print(f"{i+1}/{len(data)} - {question}")
    except Exception as e:
        print(f"Error at index {i} - {e}")

ragas_data_df = pd.DataFrame(ragas_data)

## Evaluation

In [None]:
import ragas
import ragas.metrics as metrics
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_openai.embeddings import AzureOpenAIEmbeddings

azure_model = AzureChatOpenAI(
    openai_api_version="2024-06-01",
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    azure_deployment="gpt-4o",
    model="gpt-4o",
    validate_base_url=False,
    api_key=os.environ.get("AZURE_OPENAI_KEY")
)

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
azure_embeddings = AzureOpenAIEmbeddings(
    openai_api_version="2024-06-01",
    azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
    azure_deployment="text-embedding-ada-002",
    model="text-embedding-ada-002",
    api_key=os.environ.get("AZURE_OPENAI_KEY")
)

In [None]:
ragas_dataset = {
    "question" : ragas_data_df["question"].to_list(),
    "ground_truth" : ragas_data_df["ground_truth"].to_list(),
    "answer" : ragas_data_df["answer"].to_list(),
    "contexts" : ragas_data_df["contexts"].to_list(),
}
ds = datasets.Dataset.from_dict(ragas_dataset)

result = ragas.evaluate(
    dataset= ds,
    metrics=[metrics.faithfulness, 
             metrics.answer_correctness, 
             metrics.context_recall, 
             metrics.context_precision],
    llm=azure_model,
    embeddings=azure_embeddings,
    raise_exceptions=False
)
merged_df = pd.merge(ragas_data_df, pd.DataFrame(result.scores), left_index=True, right_index=True)

In [None]:
result

In [None]:
pd.DataFrame(result.scores)