In [None]:
import os
import sys
import asyncio
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    ResponseRelevancy, 
)
import numpy as np 
import pandas as pd 
import json 
from core.config import settings
import requests
import tqdm
from datasets import Dataset
from ragas import evaluate
import os 
#from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

#gemini_llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash",api_key=os.getenv("GOOGLE_API_KEY"))

In [40]:
# read the dataset 
dataset = pd.read_json(r"C:\Work\RAG-Agent-Project\app\system_evalution\eval_dataset.jsonl")
dataset['contexts'] = dataset['contexts'].apply(
        lambda x: [x] if isinstance(x, str) else (x if isinstance(x, list) else [])
    )

dataset

Unnamed: 0,question,ground_truth,answer,contexts
0,"According to the HR Bylaws, what is the maximu...","According to Article 110, Clause 3, the deduct...","I looked through the available documents, but ...","[I looked through the available documents, but..."
1,"What is the maximum lump sum financial reward,...",According to the 'Schedule of Lump Sum Bonuses...,"I looked through the available documents, but ...","[I looked through the available documents, but..."
2,For a non-citizen employee with 12 years of se...,"According to Article 137, Clause 3, the gratui...",For a non-citizen employee with 12 years of se...,[For a non-citizen employee with 12 years of s...
3,What is the daily delegate allowance for an em...,According to the 'Schedule of Delegate Allowan...,"Based on the information available, the daily ...","[Based on the information available, the daily..."
4,What is the maximum percentage of an employee'...,"According to Article 37, Clause 2, the deducti...",Hello!\n\nRegarding deductions from an employe...,[Hello!\n\nRegarding deductions from an employ...
5,How many working days of Hajj leave is a Musli...,"According to Article 95, a Muslim employee is ...",Muslim employees are entitled to a Hajj leave ...,[Muslim employees are entitled to a Hajj leave...
6,For a graduate trainee with a Bachelor's degre...,According to the 'Schedule of Rewards and Dura...,For a graduate trainee holding a Bachelor's de...,[For a graduate trainee holding a Bachelor's d...
7,If an employee is found guilty of 'Pretending ...,According to the 'Schedule of Violations and D...,"If an employee is found guilty of ""Pretending ...","[If an employee is found guilty of ""Pretending..."
8,What is the maximum number of days of paid pat...,"According to Article 97, Clause 1, an employee...","According to Article (97) of the HR Bylaws, an...","[According to Article (97) of the HR Bylaws, a..."
9,How many hours of daily leave is a breastfeedi...,"According to Article 90, a breastfeeding emplo...",Breastfeeding employees are entitled to two ho...,[Breastfeeding employees are entitled to two h...


In [11]:
def make_agent_answer(question):
    url = "http://localhost:8000/api/v1/chat/completions"
    # add the header
    headers = {
    "X-API-Key": f"testapikey12345"  
        }
    
    # the data that has the user question
    data = {
          "model": "string",
          "messages": [
            {
              "role": "user",
              "content": question
            }
          ]
        }


    request = requests.post(url,json=data,headers=headers).json()


    return request["choices"][0]["message"]["content"]

In [None]:
agent_result = []


for q in tqdm.tqdm(dataset.question):
    agent_answer = make_agent_answer(q)
    agent_result.append(agent_answer)
    

dataset["answer"] = agent_result
dataset["contexts"] = agent_result


100%|██████████| 10/10 [25:18<00:00, 151.82s/it]


In [41]:
dataset.to_json(r"C:\Work\RAG-Agent-Project\app\system_evalution\eval_dataset.jsonl",index=False)

In [42]:
dataset.to_dict(orient='list')

{'question': ["According to the HR Bylaws, what is the maximum number of days an employee's salary can be deducted in a single year as a disciplinary penalty?",
  "What is the maximum lump sum financial reward, in Dirhams, that a military retiree in the 'First' main grade can receive upon appointment?",
  'For a non-citizen employee with 12 years of service, how is their end-of-service gratuity calculated?',
  "What is the daily delegate allowance for an employee in the 'Third' grade on an official mission outside the country?",
  "What is the maximum percentage of an employee's total salary that can be deducted or withheld to pay a court-ruled debt?",
  'How many working days of Hajj leave is a Muslim employee entitled to once during their service?',
  "For a graduate trainee with a Bachelor's degree, what is the monthly bonus and the standard training period in months?",
  "If an employee is found guilty of 'Pretending to be sick' for the third time, what is the maximum salary deduct

In [43]:
eval_dataset = Dataset.from_dict(dataset.to_dict(orient='list'))

In [44]:
eval_dataset

Dataset({
    features: ['question', 'ground_truth', 'answer', 'contexts'],
    num_rows: 10
})

In [45]:
for i in eval_dataset:
    print(i)

{'question': "According to the HR Bylaws, what is the maximum number of days an employee's salary can be deducted in a single year as a disciplinary penalty?", 'ground_truth': 'According to Article 110, Clause 3, the deduction from the basic salary cannot exceed sixty days during one year.', 'answer': "I looked through the available documents, but I couldn't find specific information regarding the maximum number of days an employee's salary can be deducted in a single year as a disciplinary penalty.\n\nThe documents do detail salary deductions for specific disciplinary instances, such as a 3-day deduction for a second-time offense, a 15-day deduction for a third-time offense, or a 15-day deduction for a first-time offense for certain violations. However, an overall annual limit for these deductions is not specified in the provided materials.", 'contexts': ["I looked through the available documents, but I couldn't find specific information regarding the maximum number of days an employe

In [27]:
i

{'question': 'How many hours of daily leave is a breastfeeding employee entitled to, and for how long after the date of delivery?',
 'ground_truth': 'According to Article 90, a breastfeeding employee is entitled to two hours of daily leave for a period of one year from the date of delivery.',
 'answer': 'Breastfeeding employees are entitled to two hours of daily leave, if desired, after their maternity leave concludes. This entitlement is valid for one year from the date of delivery, allowing time to care for their newborn. These departure hours are considered part of the official working hours, even if official working hours are reduced during the month of Ramadan, as stated in Article 90 of the tmptd_htwneHR Bylaws.',
 'contexts': 'Breastfeeding employees are entitled to two hours of daily leave, if desired, after their maternity leave concludes. This entitlement is valid for one year from the date of delivery, allowing time to care for their newborn. These departure hours are consid

In [46]:

metrics = [
    faithfulness,       # How factually accurate is the answer based on the context?
    answer_relevancy,   # How relevant is the answer to the question?
    context_recall,     # Did the retriever find all the relevant context?
    context_precision,  # Was the retrieved context precise and not full of noise?
]

# Run the evaluation
result = evaluate(
    dataset=eval_dataset,
    metrics=metrics,
)

print("\n Evaluation Complete!")
print("-------------------------")
print(result)
print("-------------------------")

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]Exception raised in Job[0]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[1]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[2]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[3]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[4]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[5]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[6]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[7]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job[8]: AttributeError('InstructorLLM' object has no attribute 'agenerate_prompt')
Exception raised in Job


 Evaluation Complete!
-------------------------
{'faithfulness': nan, 'answer_relevancy': nan, 'context_recall': nan, 'context_precision': nan}
-------------------------


In [39]:
dataset.to_dict(orient='records')

[{'question': "According to the HR Bylaws, what is the maximum number of days an employee's salary can be deducted in a single year as a disciplinary penalty?",
  'ground_truth': 'According to Article 110, Clause 3, the deduction from the basic salary cannot exceed sixty days during one year.',
  'answer': "I looked through the available documents, but I couldn't find specific information regarding the maximum number of days an employee's salary can be deducted in a single year as a disciplinary penalty.\n\nThe documents do detail salary deductions for specific disciplinary instances, such as a 3-day deduction for a second-time offense, a 15-day deduction for a third-time offense, or a 15-day deduction for a first-time offense for certain violations. However, an overall annual limit for these deductions is not specified in the provided materials.",
  'contexts': "I looked through the available documents, but I couldn't find specific information regarding the maximum number of days an e