Licensed under the MIT License.

Copyright (c) 2025-2035. All rights reserved by Hanhan Wu.

Permission is hereby granted to view this code for evaluation purposes only.
You may not reuse, copy, modify, merge, publish, distribute, sublicense,
or exploit this code without Hanhan Wu's EXPLICIT written permission.

# Auto Eval with Gemini

In [1]:
%load_ext autoreload
%autoreload 2

from datasets import load_dataset
import pandas as pd
import yaml
import nest_asyncio
nest_asyncio.apply()

from utils import *

import warnings
warnings.filterwarnings('ignore')


with open('prompt_versions.yaml', 'r') as file:
    prompt_versions = yaml.safe_load(file)
    
llm_model_str = 'gemini-2.5-flash-lite'

In [2]:
fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")['baseline']

rag_lst = []
for record in fiqa_eval:
    rag_lst.append({
        'query': record['question'],
        'retrieved_content': record['contexts'],
        'referenced_answer': record['ground_truths'][0],
        'answer': record['answer'].strip()
    })

rag_df = pd.DataFrame(rag_lst)
print(rag_df.shape)
rag_df.head()

(30, 4)


Unnamed: 0,query,retrieved_content,referenced_answer,answer
0,How to deposit a cheque issued to an associate...,[Just have the associate sign the back and the...,Have the check reissued to the proper payee.Ju...,The best way to deposit a cheque issued to an ...
1,Can I send a money order from USPS as a business?,[Sure you can. You can fill in whatever you w...,Sure you can. You can fill in whatever you wa...,"Yes, you can send a money order from USPS as a..."
2,1 EIN doing business under multiple business n...,[You're confusing a lot of things here. Compan...,You're confusing a lot of things here. Company...,"Yes, it is possible to have one EIN doing busi..."
3,Applying for and receiving business credit,[Set up a meeting with the bank that handles y...,"""I'm afraid the great myth of limited liabilit...",Applying for and receiving business credit can...
4,401k Transfer After Business Closure,[The time horizon for your 401K/IRA is essenti...,You should probably consult an attorney. Howev...,If your employer has closed and you need to tr...


In [3]:
rr_df = asyncio.run(get_retrieval_relevancy_output_async(rag_df, llm_model_str,
                                                        prompt_versions['rr_prompt_template']))

In [4]:
print(rr_df.shape)

rr_df.head()

(30, 6)


Unnamed: 0,query,retrieved_content,referenced_answer,answer,rr_auto_score,rr_reasoning
0,How to deposit a cheque issued to an associate...,[Just have the associate sign the back and the...,Have the check reissued to the proper payee.Ju...,The best way to deposit a cheque issued to an ...,3,The retrieved content directly addresses the u...
1,Can I send a money order from USPS as a business?,[Sure you can. You can fill in whatever you w...,Sure you can. You can fill in whatever you wa...,"Yes, you can send a money order from USPS as a...",3,The retrieved content directly answers the use...
2,1 EIN doing business under multiple business n...,[You're confusing a lot of things here. Compan...,You're confusing a lot of things here. Company...,"Yes, it is possible to have one EIN doing busi...",2,The retrieved content mentions 'Doing Business...
3,Applying for and receiving business credit,[Set up a meeting with the bank that handles y...,"""I'm afraid the great myth of limited liabilit...",Applying for and receiving business credit can...,3,The retrieved content provides detailed steps ...
4,401k Transfer After Business Closure,[The time horizon for your 401K/IRA is essenti...,You should probably consult an attorney. Howev...,If your employer has closed and you need to tr...,3,The retrieved content directly addresses the u...


In [5]:
final_df = asyncio.run(get_answer_usefulness_output_async(rr_df, llm_model_str,
                                                        prompt_versions['au_prompt_template']))

In [6]:
print(final_df.shape)

final_df.head()

(30, 8)


Unnamed: 0,query,retrieved_content,referenced_answer,answer,rr_auto_score,rr_reasoning,answer_usefulness_score,au_reasoning
0,How to deposit a cheque issued to an associate...,[Just have the associate sign the back and the...,Have the check reissued to the proper payee.Ju...,The best way to deposit a cheque issued to an ...,3,The retrieved content directly addresses the u...,0.6,The AI's answer provides a generally correct m...
1,Can I send a money order from USPS as a business?,[Sure you can. You can fill in whatever you w...,Sure you can. You can fill in whatever you wa...,"Yes, you can send a money order from USPS as a...",3,The retrieved content directly answers the use...,1.0,"The AI's answer is exceptionally helpful, rele..."
2,1 EIN doing business under multiple business n...,[You're confusing a lot of things here. Compan...,You're confusing a lot of things here. Company...,"Yes, it is possible to have one EIN doing busi...",2,The retrieved content mentions 'Doing Business...,0.2,The AI's answer is factually incorrect and mis...
3,Applying for and receiving business credit,[Set up a meeting with the bank that handles y...,"""I'm afraid the great myth of limited liabilit...",Applying for and receiving business credit can...,3,The retrieved content provides detailed steps ...,0.8,The AI's answer is helpful and relevant to the...
4,401k Transfer After Business Closure,[The time horizon for your 401K/IRA is essenti...,You should probably consult an attorney. Howev...,If your employer has closed and you need to tr...,3,The retrieved content directly addresses the u...,0.4,The AI's answer provides general advice on tra...
