In [1]:
import pandas as pd
import os
from dotenv import load_dotenv
import ast
import sys
sys.path.insert(0, os.path.abspath(".."))
from LLM_Backend.LLM_Backend_Normal import ChatBackend
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('../data/ms-marco-200-rows.csv')
df = df[['question', 'ground_truths']]
df.head()

Unnamed: 0,question,ground_truths
0,walgreens store sales average,"['Approximately $15,000 per year.']"
1,how much do bartenders make,"['$21,550 per year'\n 'The average hourly wage..."
2,what is a furuncle boil,"['A boil, also called a furuncle, is a deep fo..."
3,what can urinalysis detect,"['Detect and assess a wide range of disorders,..."
4,what is vitamin a used for,"['Shigellosis, diseases of the nervous system,..."


In [3]:
backend = ChatBackend(genai_model = "thebloke/mixtral-8x7b-instruct-v0-1-gptq")

In [6]:
%%time
df['hypothetical_answer'] = df['question'].apply(lambda x: backend.generate_hypothetical_answer(x).strip())
df.head()

CPU times: user 6.61 s, sys: 750 ms, total: 7.36 s
Wall time: 18min


Unnamed: 0,question,ground_truths,hypothetical_answer
0,walgreens store sales average,"['Approximately $15,000 per year.']","The average sales for Walgreens stores vary, a..."
1,how much do bartenders make,"['$21,550 per year'\n 'The average hourly wage...","On average, bartenders in the US make about $1..."
2,what is a furuncle boil,"['A boil, also called a furuncle, is a deep fo...","A furuncle, also known as a boil, is a skin in..."
3,what can urinalysis detect,"['Detect and assess a wide range of disorders,...",Urinalysis can detect various substances in ur...
4,what is vitamin a used for,"['Shigellosis, diseases of the nervous system,...",Vitamin A is essential for maintaining healthy...


In [7]:
df.to_csv("../output/hypothetical_answer.csv", index = False)

In [4]:
df = pd.read_csv('../output/hypothetical_answer.csv')
df.head()

Unnamed: 0,question,ground_truths,hypothetical_answer
0,walgreens store sales average,"['Approximately $15,000 per year.']",Walgreens reports an average quarterly sales g...
1,how much do bartenders make,"['$21,550 per year'\n 'The average hourly wage...","Bartenders' earnings vary widely, depending on..."
2,what is a furuncle boil,"['A boil, also called a furuncle, is a deep fo...","A furuncle, also known as a boil, is a bacteri..."
3,what can urinalysis detect,"['Detect and assess a wide range of disorders,...",Urinalysis can detect various substances like ...
4,what is vitamin a used for,"['Shigellosis, diseases of the nervous system,...",Vitamin A is essential for maintaining good vi...


In [8]:
%%time
df['reranked_contexts'] = df.apply(lambda row: backend.perform_searchD(query=row['question'], hypothetical_answer=row['hypothetical_answer'], classname = 'Parent_child_chunks'), axis=1)
df.head()

CPU times: user 1min 14s, sys: 23.5 s, total: 1min 38s
Wall time: 55 s


Unnamed: 0,question,ground_truths,hypothetical_answer,reranked_contexts
0,walgreens store sales average,"['Approximately $15,000 per year.']","The average sales for Walgreens stores vary, a...","[3 percent of sales, and returns on assets of ..."
1,how much do bartenders make,"['$21,550 per year'\n 'The average hourly wage...","On average, bartenders in the US make about $1...","[to earn somewhat less, averaging about $22,13..."
2,what is a furuncle boil,"['A boil, also called a furuncle, is a deep fo...","A furuncle, also known as a boil, is a skin in...","[called a furuncle, is a deep folliculitis, in..."
3,what can urinalysis detect,"['Detect and assess a wide range of disorders,...",Urinalysis can detect various substances in ur...,[specimen cup. Usually only small amounts (30-...
4,what is vitamin a used for,"['Shigellosis, diseases of the nervous system,...",Vitamin A is essential for maintaining healthy...,"[kidney stones, overactive thyroid, iron-poor ..."


In [9]:
df.to_csv('../output/SearchD_contexts.csv', index = False)

In [None]:
%%time
df['answer'] = df.apply(lambda row : backend.generate_hyde_response(query=row['question'], context=row['reranked_contexts']), axis=1)
df.head()

In [14]:
df.to_csv('../output/SEARCHD_output.csv', index=False)

### Evaluation using RAGAS

In [2]:
from datasets import Dataset
from evaluation.ragas_score import _get_ragas_score
import warnings
warnings.filterwarnings("ignore")

In [3]:
df=pd.read_csv('../output/SEARCHD_output.csv')
df=df[['question', 'ground_truths', 'context_reranked','answer']]
df.rename(columns={'context_reranked':'contexts'}, inplace=True)
df["ground_truths"] = [ast.literal_eval(gt) for gt in df["ground_truths"]]
df["contexts"] = [ast.literal_eval(ctx) for ctx in df["contexts"]]
df.head()

Unnamed: 0,question,ground_truths,contexts,answer
0,walgreens store sales average,"[Approximately $15,000 per year.]","[3 percent of sales, and returns on assets of ...","According to the context provided, the averag..."
1,how much do bartenders make,"[$21,550 per yearThe average hourly wage for a...","[to earn somewhat less, averaging about $22,13...","According to the context provided, the averag..."
2,what is a furuncle boil,"[A boil, also called a furuncle, is a deep fol...","[called a furuncle, is a deep folliculitis, in...","A furuncle, also known as a boil, is a deep i..."
3,what can urinalysis detect,"[Detect and assess a wide range of disorders, ...",[specimen cup. Usually only small amounts (30-...,A urinalysis is a test used to evaluate a sam...
4,what is vitamin a used for,"[Shigellosis, diseases of the nervous system, ...","[kidney stones, overactive thyroid, iron-poor ...",1. Vitamin A is used for treating vitamin A de...


In [4]:
eval_data= Dataset.from_pandas(df, preserve_index=False)

In [5]:
scores = _get_ragas_score(dataset=eval_data)

evaluating with [faithfulness]


100%|██████████| 14/14 [05:15<00:00, 22.56s/it]


evaluating with [context_precision]


100%|██████████| 14/14 [05:56<00:00, 25.47s/it]


evaluating with [context_relevancy]


100%|██████████| 14/14 [01:49<00:00,  7.81s/it]


In [8]:
scores

{'faithfulness': 1.0, 'context_precision': 0.9787, 'context_relevancy': 0.0998}
