In [None]:
from datasets import load_dataset
from indexify import IndexifyClient
from openai import OpenAI
import tqdm


In [None]:
wiki_summaries = load_dataset("d0rj/wikisum")
data = wiki_summaries['train']
data

In [None]:
indexify_client = IndexifyClient()
for summary in data['summary']:
    indexify_client.add_documents(summary)

indexify_client.add_extraction_policy(
    extractor="tensorlake/minilm-l6", name="minilml6", content_source="ingestion"
)

In [None]:
# print(len(indexify_client.get_content()))
# docs = indexify_client.search_index(
#     "minilml6.embedding",
#     "How to store oysters",
#     2)

# print(docs)

In [None]:
hotpot = load_dataset("hotpot_qa", "fullwiki")
hotpot_subset = hotpot['validation'][:100]
questions = hotpot_subset['question']
correct_answers = hotpot_subset['answer']
questions[0]

In [None]:
import time
class HotPotQARAG:

    def __init__(self, indexify_client, openai_client):
        self.openai_client = openai_client
        self.indexify_client = indexify_client
        self.num_docs = 3

    def query_index(self, question):

        return self.indexify_client.search_index(
            "minilml6.embedding",
            question,
            self.num_docs
        )

    def get_answers(self, questions):
        answers = []
        for question in tqdm.tqdm(questions):
            response = self.openai_client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "Answer the following question: Don't be verbose, or make full sentences. Just answer the question ."},
                    {"role": "user", "content": question}
                ]
            )

            answers.append(response.choices[0].message.content)

        return answers
    
    def get_answers_with_context(self, questions):
        answers = []
        for question in tqdm.tqdm(questions):
            docs = self.query_index(question)
            context = " ".join([doc['text'] for doc in docs])
            response = self.openai_client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "Answer the following question: Don't be verbose, or make full sentences. Just answer the question. You can use the context provided if you find it helpful."},
                    {"role": "user", "content": f"{question} #### Context {context}"}
                ]
            )

            answers.append(response.choices[0].message.content)

        return answers
    
    def check_match(self, answers, correct_answers):
        match = []
        for answer, correct in zip(answers, correct_answers):
            if answer in correct or correct in answer:
                match.append(True)
            else:
                match.append(False)

        return match


    def check_llm_match(self, answers, correct_answers):

        match = []
        for answer, correct_answer in tqdm.tqdm(list(zip(answers, correct_answers))):
            response = self.openai_client.chat.completions.create(
                        model="gpt-3.5-turbo",
                        messages=[
                            {"role": "system", "content": "Check if the answer means the same as the correct answer. \
                            They need not be exact matches. Return True if they mean the same, and False if they are different."},
                            {"role": "user", "content": f'answer: {answer} correct answer: {correct_answer}'}
                        ]
                    )
            
            match.append(response.choices[0].message.content == "True")
            time.sleep(0.5)

        return match

    @staticmethod
    def _process_answers(answers):
        return [answer.lower().replace('.', '') for answer in answers]

    
    def evaluate_answers(self, answers, correct_answers):
        exact_matches = self.check_match(answers, correct_answers)
        llm_matches = self.check_llm_match(answers, correct_answers)

        em = sum(exact_matches) / len(exact_matches)
        llm_match = sum(llm_matches) / len(llm_matches)

        return em, llm_match
                

In [None]:
openai_key = "OPENAI_API_KEY"

In [None]:
openai_client = OpenAI(api_key=openai_key)

In [None]:
rag_system = HotPotQARAG(indexify_client, openai_client)

In [None]:
answers = rag_system.get_answers(questions)

In [None]:
answers_with_context = rag_system.get_answers_with_context(questions)

In [None]:
results = rag_system.evaluate_answers(answers, correct_answers)
print(f'Exact Match Evaluation: {results[0]}, LLM Match Evaluation: {results[1]}')

In [None]:
results_context = rag_system.evaluate_answers(answers_with_context, correct_answers)
print(f'Exact Match Evaluation: {results_context[0]}, LLM Match Evaluation: {results_context[1]}')