In [1]:
import os
import pandas as pd
from ragas.metrics import answer_relevancy, answer_correctness
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain.chat_models import ChatOpenAI
from langchain_localai import LocalAIEmbeddings
from datasets import Dataset
from dotenv import load_dotenv
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

True

In [3]:
llm = ChatOpenAI(
    openai_api_base=os.getenv("OPENAI_API_BASE"), 
    openai_api_key=os.getenv("OPENAI_API_KEY"),              
    model_name=os.getenv("LLM_MODEL_NAME")          
)
llm = LangchainLLMWrapper(llm)

embedding = LocalAIEmbeddings(
    openai_api_base=os.getenv("EMBED_URL"), 
    openai_api_key=os.getenv("EMBED_TOKEN"), 
    model=os.getenv("EMBED_MODEL")
)
embedding = LangchainEmbeddingsWrapper(embedding)

  llm = ChatOpenAI(


In [4]:
def start_evaluate(test_set_file, output_file):
    with open(test_set_file, 'r', encoding='utf-8') as file:
        data = json.load(file)
        dataset = Dataset.from_dict(data)
    
    results = evaluate(
        dataset=dataset,
        metrics=[answer_correctness, answer_relevancy],
        llm=llm,
        embeddings=embedding
    )
    df = results.to_pandas()
    df.to_csv(output_file,index=False)
    print(f"Done - saved in: {output_file}")
    return results


In [6]:
test_set_file = './test_data/study_rules/test_set_method1_lightrag.json'
output_file = './results/study_rules/method1_lightrag.csv'

#results = start_evaluate(test_set_file, output_file)

df = pd.read_csv(output_file)
print("Average answer correctness:", df['answer_correctness'].mean())
print("Average answer relevancy:", df['answer_relevancy'].mean())
df

Average answer correctness: 0.44772531055370113
Average answer relevancy: 0.7571920970519181


Unnamed: 0,user_input,response,reference,answer_correctness,answer_relevancy
0,What is the procedure and timeline for a stude...,**Procedure for Transfer from Another Universi...,A student may transfer from another university...,0.766353,0.928777
1,What types of training are students required t...,**Training Requirements for First Semester Stu...,Students are required to complete training in ...,0.739027,0.931348
2,How does Gdańsk University of Technology defin...,Confirming learning outcomes for admission at ...,Confirmation of learning outcomes is conducted...,0.721505,0.923147
3,What are the responsibilities of the Faculty S...,The Faculty Student Council is a representativ...,The Faculty Student Council is responsible for...,0.552259,0.772437
4,Under what circumstances can classes from the ...,"Based on the provided Knowledge Base, I was un...",Such classes may be conducted in a foreign lan...,0.141376,0.0
5,What procedures must academic teachers follow ...,"According to the Knowledge Base, academic teac...",Academic teachers complete subject cards in th...,0.423226,0.796308
6,What are the responsibilities of academic teac...,"According to the Knowledge Base, academic teac...",Academic teachers must register student attend...,0.236015,0.531554
7,Under what circumstances can a student be remo...,**Removal from Classes**\n\nAccording to the p...,A student can be removed from classes if their...,0.268342,0.935541
8,"How is the weighted average grade calculated, ...",**Calculation of Weighted Average Grade**\n\nA...,The weighted average grade is calculated by di...,0.181425,0.995617


In [7]:
test_set_file = './test_data/study_rules/test_set_method2_graphrag.json'
output_file = './results/study_rules/method2_graphrag.csv'

results = start_evaluate(test_set_file, output_file)

df = pd.read_csv(output_file)
print("Average answer correctness:", df['answer_correctness'].mean())
print("Average answer relevancy:", df['answer_relevancy'].mean())
df

Evaluating: 100%|██████████| 18/18 [01:26<00:00,  4.82s/it]


Done - saved in: ./results/study_rules/method2_graphrag.csv
Average answer correctness: 0.43315654246325025
Average answer relevancy: 0.8603584098661652


Unnamed: 0,user_input,response,reference,answer_correctness,answer_relevancy
0,What is the procedure and timeline for a stude...,# Procedure and Timeline for Student Transfer ...,A student may transfer from another university...,0.312727,0.957143
1,What types of training are students required t...,# Types of Training Required for Students at G...,Students are required to complete training in ...,0.558481,0.990234
2,How does Gdańsk University of Technology defin...,**Confirming Learning Outcomes for Admission a...,Confirmation of learning outcomes is conducted...,0.429553,0.934198
3,What are the responsibilities of the Faculty S...,The Faculty Student Council (FSC) has several ...,The Faculty Student Council is responsible for...,0.342331,0.880838
4,Under what circumstances can classes from the ...,"According to the provided data, classes from t...",Such classes may be conducted in a foreign lan...,0.54287,0.937306
5,What procedures must academic teachers follow ...,Publishing and updating subject cards in the ‘...,Academic teachers complete subject cards in th...,0.415907,0.843631
6,What are the responsibilities of academic teac...,**Responsibilities of Academic Teachers in Rel...,Academic teachers must register student attend...,0.467968,0.683712
7,Under what circumstances can a student be remo...,**Student Removal and Absence Classification**...,A student can be removed from classes if their...,0.175586,0.725469
8,"How is the weighted average grade calculated, ...",### Weighted Average Grade Calculation\n\nThe ...,The weighted average grade is calculated by di...,0.652985,0.790695
