In [1]:
import os
import pandas as pd
from ragas.metrics import answer_relevancy, answer_correctness
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain.chat_models import ChatOpenAI
from langchain_localai import LocalAIEmbeddings
from datasets import Dataset
from dotenv import load_dotenv
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [3]:
llm = ChatOpenAI(
    openai_api_base=os.getenv("OPENAI_API_BASE"), 
    openai_api_key=os.getenv("OPENAI_API_KEY"),              
    model_name=os.getenv("LLM_MODEL_NAME")          
)
llm = LangchainLLMWrapper(llm)

embedding = LocalAIEmbeddings(
    openai_api_base=os.getenv("EMBED_URL"), 
    openai_api_key=os.getenv("EMBED_TOKEN"), 
    model=os.getenv("EMBED_MODEL")
)
embedding = LangchainEmbeddingsWrapper(embedding)

  llm = ChatOpenAI(


In [4]:
def start_evaluate(test_set_file, output_file):
    with open(test_set_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
        dataset = Dataset.from_dict(data)
    
    results = evaluate(
        dataset=dataset,
        metrics=[answer_correctness, answer_relevancy],
        llm=llm,
        embeddings=embedding
    )
    df = results.to_pandas()
    df.to_csv(output_file,index=False)
    print(f"Done - saved in: {output_file}")
    return results


In [5]:
test_set_file = './test_data/study_rules/test_set_method1_lightrag_hybrid.json'
output_file = './results/study_rules/method1_lightrag_hybrid.csv'

results = start_evaluate(test_set_file, output_file)

df = pd.read_csv(output_file)
print(f"FILE: {output_file}")
print("Average answer correctness:", df['answer_correctness'].mean())
print("Average answer relevancy:", df['answer_relevancy'].mean())

Evaluating:  60%|█████▉    | 25/42 [02:24<01:53,  6.66s/it]Exception raised in Job[36]: OutputParserException(Invalid json output: {"TP": [
    {
      "statement": "Students must have obtained a weighted average grade not lower than4.5 through the entire period of studies.",
      "reason": "This is directly supported by the ground truth which states that one criterion is achieving a weighted average grade of at least4.5 throughout the entire study period."
    },
    {
      "statement": "Students must have demonstrated outstanding scientific achievements related to the field of study.",
      "reason": "The ground truth mentions demonstrating outstanding scientific achievements as one of the criteria."
    },
    {
      "statement": "Students must have obtained a \"very good\" (5.0) grade for the diploma thesis.",
      "reason": "Earning a \"very good\" (5.0) grade for the diploma thesis is mentioned in the ground truth as one of the criteria."
    },
    {
      "statement": "Stu

Done - saved in: ./results/study_rules/method1_lightrag_hybrid.csv
FILE: ./results/study_rules/method1_lightrag_hybrid.csv
Average answer correctness: 0.5076676342165034
Average answer relevancy: 0.69348517896976


In [7]:
test_set_file = './test_data/study_rules/test_set_method1_lightrag_local.json'
output_file = './results/study_rules/method1_lightrag_local.csv'

results = start_evaluate(test_set_file, output_file)

df = pd.read_csv(output_file)
print(f"FILE: {output_file}")
print("Average answer correctness:", df['answer_correctness'].mean())
print("Average answer relevancy:", df['answer_relevancy'].mean())

Evaluating:  67%|██████▋   | 28/42 [02:32<01:19,  5.67s/it]Exception raised in Job[2]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for model `meta-llama/llama-4-scout-17b-16e-instruct` in organization `org_01jss0mqvces98cxfmen4f1jpd` service tier `on_demand` on requests per minute (RPM): Limit 30, Used 30, Requested 1. Please try again in 1.939s. Need more tokens? Upgrade to Dev Tier today at https://console.groq.com/settings/billing', 'type': 'requests', 'code': 'rate_limit_exceeded'}})
Evaluating:  90%|█████████ | 38/42 [03:20<00:21,  5.27s/it]Exception raised in Job[22]: TimeoutError()
Evaluating:  93%|█████████▎| 39/42 [03:32<00:21,  7.08s/it]Exception raised in Job[25]: TimeoutError()
Evaluating:  98%|█████████▊| 41/42 [03:42<00:05,  5.71s/it]Exception raised in Job[38]: OutputParserException(Failed to parse StringIO from completion {"TP": [{"statement": "Exceptionally gifted secondary school students who attend university classes at Gda\u0144sk Univ

Done - saved in: ./results/study_rules/method1_lightrag_local.csv
FILE: ./results/study_rules/method1_lightrag_local.csv
Average answer correctness: 0.4070000947440743
Average answer relevancy: 0.6542954736798687


In [7]:
test_set_file = './test_data/study_rules/test_set_method2_graphrag.json'
output_file = './results/study_rules/method2_graphrag.csv'

results = start_evaluate(test_set_file, output_file)

df = pd.read_csv(output_file)
print("Average answer correctness:", df['answer_correctness'].mean())
print("Average answer relevancy:", df['answer_relevancy'].mean())
df

Evaluating: 100%|██████████| 18/18 [01:26<00:00,  4.82s/it]


Done - saved in: ./results/study_rules/method2_graphrag.csv
Average answer correctness: 0.43315654246325025
Average answer relevancy: 0.8603584098661652


Unnamed: 0,user_input,response,reference,answer_correctness,answer_relevancy
0,What is the procedure and timeline for a stude...,# Procedure and Timeline for Student Transfer ...,A student may transfer from another university...,0.312727,0.957143
1,What types of training are students required t...,# Types of Training Required for Students at G...,Students are required to complete training in ...,0.558481,0.990234
2,How does Gdańsk University of Technology defin...,**Confirming Learning Outcomes for Admission a...,Confirmation of learning outcomes is conducted...,0.429553,0.934198
3,What are the responsibilities of the Faculty S...,The Faculty Student Council (FSC) has several ...,The Faculty Student Council is responsible for...,0.342331,0.880838
4,Under what circumstances can classes from the ...,"According to the provided data, classes from t...",Such classes may be conducted in a foreign lan...,0.54287,0.937306
5,What procedures must academic teachers follow ...,Publishing and updating subject cards in the ‘...,Academic teachers complete subject cards in th...,0.415907,0.843631
6,What are the responsibilities of academic teac...,**Responsibilities of Academic Teachers in Rel...,Academic teachers must register student attend...,0.467968,0.683712
7,Under what circumstances can a student be remo...,**Student Removal and Absence Classification**...,A student can be removed from classes if their...,0.175586,0.725469
8,"How is the weighted average grade calculated, ...",### Weighted Average Grade Calculation\n\nThe ...,The weighted average grade is calculated by di...,0.652985,0.790695
