In [140]:
import dotenv
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain_openai import AzureOpenAIEmbeddings
dotenv.load_dotenv()
from langchain_community.vectorstores import Chroma
from ragas import evaluate
from ragas.metrics import (
    context_recall,
    context_precision,
)
from datasets import Dataset
from langchain_openai import AzureChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
import os
import sys
sys.tracebacklimit = 0
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
import getpass
from langchain.retrievers.document_compressors import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever

In [18]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_VERSION = os.environ.get("OPENAI_API_VERSION")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL")
OPENAI_DEPLOYMENT = os.environ.get("OPENAI_DEPLOYMENT")
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL")
EMBEDDING_DEPLOYMENT = os.environ.get("EMBEDDING_DEPLOYMENT")
OPENAI_MODEL_GPT4 = os.environ.get("OPENAI_MODEL_GPT4")
OPENAI_DEPLOYMENT_GPT4 = os.environ.get("OPENAI_DEPLOYMENT_GPT4")

In [101]:
questions = [
    "Alexei Navalny's mother was informed that his body was held for 'forensic investigation'.",
    "The phrase 'It's the economy, stupid' served as the rallying cry for Bill Clinton's successful presidential campaign in 1992, and over the subsequent decades, it has evolved into a widely accepted political doctrine.",
    "Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 10% voting control.",
    "In 2020, Prince William announced the creation of the Moonwalk Scholarship, which was inspired by U.S. Pres. John F. Kennedy’s 'Moonshot' initiative to send a man to the Moon.",
    "43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first iPhone by age 5.",
    "Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in January 2017 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.",
    "Renewed concerns arose on July 17, 2022, when a door panel of an Alaska Airlines plane, manufactured by Boeing, blew out midair over Portland, Oregon, compelling the crew to execute an emergency landing.",
    "Sora, an AI tool, possesses the capability to generate complete videos lasting up to 3 minutes. By providing a prompt, such as 'a field of cats worshipping one giant dog,' theoretically, you can receive a video that aligns with the given description.",
    "Kyiv relies significantly on contemporary armament supplies from Western allies, notably the US, to sustain its fight against Russia, which possesses a larger military force and ample artillery ammunition. However, the green light for a crucially required $200 billion US aid package, with $161 billion designated for Ukraine, encounters challenges in the House of Representatives.",
    "On the morning of 7 October, waves of Hamas gunmen stormed across Tel Aviv's border into Israel, killing about 1,200 people. Hamas also fired thousands of rockets."
    ]

ground_truths = [
    ["Alexei Navalny's mother was informed that his body was held for 'chemical analysis'."],
    ["The phrase 'It's the economy, stupid' served as the rallying cry for Bill Clinton's successful presidential campaign in 1992, and over the subsequent decades, it has evolved into a widely accepted political doctrine."],
    ["Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control."],
    ["In 2020, Prince William announced the creation of the Earthshot Prize, which was inspired by U.S. Pres. John F. Kennedy’s 'Moonshot' initiative to send a man to the Moon."],
    ["43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first iPhone by age 10."],
    ["Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022."],
    ["Renewed concerns arose on January 5th, 2024 when a door panel of an Alaska Airlines plane, manufactured by Boeing, blew out midair over Portland, Oregon, compelling the crew to execute an emergency landing."],
    ["Sora, an AI tool, possesses the capability to generate complete videos lasting up to 1 minute. By providing a prompt, such as 'a field of cats worshipping one giant dog,' theoretically, you can receive a video that aligns with the given description."],
    ["Kyiv relies significantly on contemporary armament supplies from Western allies, notably the US, to sustain its fight against Russia, which possesses a larger military force and ample artillery ammunition. However, the green light for a crucially required $95 billion US aid package, with $61 billion designated for Ukraine, encounters challenges in the House of Representatives."],
    ["On the morning of 7 October, waves of Hamas gunmen stormed across Gaza's border into Israel, killing about 1,200 people. Hamas also fired thousands of rockets."]
]

In [20]:
embeddings_client = AzureOpenAIEmbeddings(
    azure_deployment=EMBEDDING_DEPLOYMENT,
    openai_api_version=OPENAI_API_VERSION)
llm = AzureChatOpenAI(model_name=OPENAI_MODEL, azure_deployment=OPENAI_DEPLOYMENT,temperature=0)
llm_gpt4 = AzureChatOpenAI(model_name=OPENAI_MODEL_GPT4, azure_deployment=OPENAI_DEPLOYMENT_GPT4,temperature=0)

In [21]:
def evaluation_rag(questions, answers, contexts, ground_truths):
    data = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truths": ground_truths
    }
    dataset = Dataset.from_dict(data)
    azure_configs = {
        "base_url": AZURE_OPENAI_ENDPOINT,
        "model_deployment": OPENAI_DEPLOYMENT,
        "model_name": OPENAI_MODEL,
        "embedding_deployment": EMBEDDING_DEPLOYMENT,
        "embedding_name": EMBEDDING_MODEL,  
    }

    azure_model = AzureChatOpenAI(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["model_deployment"],
        model=azure_configs["model_name"],
        validate_base_url=False,
    )

    azure_embeddings = AzureOpenAIEmbeddings(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["embedding_deployment"],
        model=azure_configs["embedding_name"],
    )
    result = evaluate(
        dataset = dataset, 
        metrics=[
            context_precision,
            context_recall,
        ], 
        llm=azure_model, 
        embeddings=azure_embeddings,
        raise_exceptions=False,
    )
    return result

In [112]:
def flatten_list(lst):
    return [item.replace('"', '').replace("'", '').replace('“','').replace('”', '') for sublist in lst for item in (sublist if isinstance(sublist, list) else [sublist])]

def calculate_accuracy(answers, ground_truths):
    # Convert both answers and ground truths to lowercase for case-insensitive comparison
    answers_lower = flatten_list([answer.lower() for answer in answers])
    ground_truths_lower = flatten_list([[truth.lower() for truth in sublist] for sublist in ground_truths])
    # Check if each answer corresponds with the ground truth exactly
    accuracy = [1 if ans == truth else 0 for ans, truth in zip(answers_lower, ground_truths_lower)]
    # Calculate the average accuracy
    average_accuracy = sum(accuracy) / len(accuracy)
    return average_accuracy

### General answers by LLMs

In [84]:
template = """ Fix an error in the following query. The answer should be exactly the same as query but with corrected mistake. For example:
'input': 'There are 10 continents in the world.',
'output': 'There are 7 continents in the world.',
'input': 'The elephant is the largest animal of all time.',
'output': 'The blue whale is the largest animal of all time.'.
This is the input query: {question}"""
prompt = ChatPromptTemplate.from_template(template)

In [85]:
llm_chain =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm}
)
llm_chain_gpt4 =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm_gpt4}
)

In [86]:
answers_llm = []
contexts_llm = [[""],[""],[""],[""],[""],[""],[""],[""],[""],[""]]

In [87]:
for query in questions:
    response = llm_chain.invoke({"question": query})
    answers_llm.append(response["response"].content)

In [88]:
answers_llm

['There is no error in the query.',
 'There is no error in the given query.',
 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 50% voting control.',
 'There is no error in the given query.',
 'There is no error in the query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the query.',
 'There is no error in the query.']

In [66]:
llm_results = calculate_accuracy(answers_llm, ground_truths)
print(llm_results)

0.0


In [89]:
answers_llm_gpt4 = []
for query in questions:
    response = llm_chain_gpt4.invoke({"question": query})
    answers_llm_gpt4.append(response["response"].content)
llm_results_gpt4 = calculate_accuracy(answers_llm_gpt4, ground_truths)
print(llm_results_gpt4)

0.2


In [90]:
answers_llm_gpt4

["Alexei Navalny's wife was informed that his body was held for 'forensic investigation'.",
 "The phrase 'It's the economy, stupid' served as the rallying cry for Bill Clinton's successful presidential campaign in 1992, and over the subsequent decades, it has evolved into a widely accepted political doctrine.",
 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have more than 10% voting control.',
 "In 2020, Prince William announced the creation of the Earthshot Prize, which was inspired by U.S. Pres. John F. Kennedy’s 'Moonshot' initiative to send a man to the Moon.",
 '43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first smartphone by age 5.',
 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in January 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishi

### Naive RAG

In [91]:
def retrieval_chain(prompt, retriever, llm):
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
    )
    return retrieval_augmented_qa_chain

In [69]:
loader = DirectoryLoader('../news', glob="./*.txt", loader_cls=TextLoader)
documents = loader.load()

In [None]:
text_splitter = CharacterTextSplitter()
chunks = text_splitter.split_documents(documents)
db_naive = Chroma.from_documents(chunks, embeddings_client, persist_directory = "../news/vectordb/naive")
db_naive.persist()
retriever_naive = db_naive.as_retriever()

In [92]:
db_naive = Chroma(persist_directory = "../news/vectordb/naive", embedding_function=embeddings_client)
retriever_naive = db_naive.as_retriever()

In [93]:
template = """Fix an error in the following query. The answer should be exactly the same as query but with corrected mistake. For example:
'input': 'There are 10 continents in the world.',
'output': 'There are 7 continents in the world.',
'input': 'The elephant is the largest animal of all time.',
'output': 'The blue whale is the largest animal of all time.'.
This is the input query: {question}. 
Here is some provided context {context}."""
prompt = ChatPromptTemplate.from_template(template)

In [94]:
answers_naive = []
contexts_naive = []
for query in questions:
    try:  
        response = retrieval_chain(prompt, retriever_naive, llm).invoke({"question": query})
        # Access the response content
        answers_naive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_naive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_naive.append("No answer")
        context_full = retriever_naive.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_naive.append(context_content)

In [95]:
answers_naive

['There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'No error found.',
 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.']

In [96]:
result_naive_rag_context = evaluation_rag(questions, answers_naive, contexts_naive, ground_truths)
print(result_naive_rag_context)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:03<00:00,  6.16it/s]


{'context_precision': 0.6056, 'context_recall': 0.7667}


In [102]:
result_naive_rag = calculate_accuracy(answers_naive, ground_truths)
print(result_naive_rag)

0.1


In [103]:
answers_naive

['There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'No error found.',
 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.']

### recursive splitter

In [None]:
text_splitter = text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder()
chunks_r = text_splitter.split_documents(documents)
db_basic = Chroma.from_documents(chunks_r, embeddings_client, persist_directory = "../news/vectordb/recursive_basic")
db_basic.persist()
retriever_basic = db_basic.as_retriever()

In [104]:
db_basic = Chroma(persist_directory = "../news/vectordb/recursive_basic", embedding_function=embeddings_client)
retriever_basic = db_basic.as_retriever()

In [105]:
answers_recursive = []
contexts_recursive = []
for query in questions:
    try:  
        response = retrieval_chain(prompt, retriever_basic, llm).invoke({"question": query})
        # Access the response content
        answers_recursive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_recursive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_recursive.append("No answer")
        context_full = retriever_basic.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_recursive.append(context_content)
answers_recursive



['There is no error in the given query.',
 'No error found in the given input query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'No answer',
 'There is no error in the given query.',
 'No error found in the query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given input query.']

In [106]:
result_recursive_context = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
print(result_recursive_context)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  95%|█████████▌| 19/20 [00:03<00:00,  4.77it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

{'context_precision': 0.7037, 'context_recall': 0.6000}


In [107]:
result_recursive = calculate_accuracy(answers_recursive, ground_truths)
print(result_recursive)

0.0


### chunk sizes

In [108]:
def run_and_evaluate(retriever, prompt, llm):
    answers_recursive = []
    contexts_recursive = []

    for query in questions:
        try:  
            response = retrieval_chain(prompt, retriever, llm).invoke({"question": query})
            # Access the response content
            answers_recursive.append(response["response"].content)
            # Access the context content
            context_content = [context.page_content for context in response["context"]]
            contexts_recursive.append(context_content)  
        except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_recursive.append("No answer")
            context_full = retriever.get_relevant_documents(query)
            context_content = [context.page_content for context in context_full]
            contexts_recursive.append(context_content)


    result = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
    accuracy = calculate_accuracy(answers_recursive, ground_truths)
    print(answers_recursive)
    return result, accuracy

In [None]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap = 100)
chunks_1000 = text_splitter.split_documents(documents)
print(len(chunks_1000))
db_1000 = Chroma.from_documents(chunks_1000, embeddings_client, persist_directory = "../news/vectordb/recursive_1000")
db_1000.persist()
retriever_1000 = db_1000.as_retriever()
result_1000_context = run_and_evaluate(retriever_1000, prompt, llm)
print("CHUNK SIZE 1000")
print(result_1000_context)

In [43]:
db_1000 = Chroma(persist_directory = "../news/vectordb/recursive_1000", embedding_function=embeddings_client)

In [109]:
retriever_1000 = db_1000.as_retriever()
result_1000_context, result_1000 = run_and_evaluate(retriever_1000, prompt, llm)
print("CHUNK SIZE 1000")
print(result_1000_context)
print("AVG accuracy "+ str(result_1000))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:03<00:00,  6.62it/s]


['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
CHUNK SIZE 1000
{'context_precision': 0.7250, 'context_recall': 0.7750}
AVG accuracy 0.0


In [None]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 50)
chunks_500 = text_splitter.split_documents(documents)
print(len(chunks_500))
db_500 = Chroma.from_documents(chunks_500, embeddings_client, persist_directory = "../vectordb/recursive_500")
db_500.persist()
retriever_500 = db_500.as_retriever()
result_500 = run_and_evaluate(retriever_500, prompt, llm)
print("CHUNK SIZE 500")
print(result_500)

In [110]:
db_500 = Chroma(persist_directory = "../news/vectordb/recursive_500", embedding_function=embeddings_client)

In [113]:
retriever_500 = db_500.as_retriever()
result_500, acc_500 = run_and_evaluate(retriever_500, prompt, llm)
print("CHUNK SIZE 500")
print(result_500)
print("AVG accuracy "+ str(acc_500))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'The error in the query is that it states "58% received their first iPhone by age 5" which is not possible as most children are not capable of using a smartphone at such a young age. The corrected query should be: "58% received their first iPhone by age 10."', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'No error found in the query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive

In [114]:
db_2000 = Chroma(persist_directory = "../news/vectordb/recursive_2000", embedding_function=embeddings_client)

In [115]:
retriever_2000 = db_2000.as_retriever()
result_2000, acc_2000 = run_and_evaluate(retriever_2000, prompt, llm)
print("CHUNK SIZE 2000")
print(result_2000)
print("AVG accuracy "+ str(acc_2000))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:03<00:00,  5.81it/s]


['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given list of queries.', 'The query is already correct. No error needs to be fixed.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given input query.']
CHUNK SIZE 2000
{'context_precision': 0.6417, 'context_recall': 0.7000}
AVG accuracy 0.0


In [116]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 400, chunk_overlap = 100)
chunks_400 = text_splitter.split_documents(documents)
print(len(chunks_400))
db_400 = Chroma.from_documents(chunks_400, embeddings_client, persist_directory = "../news/vectordb/recursive_400")

145


In [117]:
retriever_400 = db_400.as_retriever()
result_400, acc_400 = run_and_evaluate(retriever_400, prompt, llm)
print("CHUNK SIZE 2000")
print(result_400)
print("AVG accuracy "+ str(acc_400))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

['No error found in the query.', 'The query is already correct. No error found.', 'There is no error in the given query.', 'There is no error in the given query.', '43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first iPhone by age 10.', 'There is no error in the given query.', 'There is no error in the given query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive a video matching that description.', 'There is no error in the given query.', 'There is no error in the query.']
CHUNK SIZE 2000
{'context_precision': 0.6852, 'context_recall': 0.6806}
AVG accuracy 0.1


### now time to look for different top-k

Note: We continue with the size chunk of 2000 as it had the highest average score

In [118]:
retriever_2 = db_500.as_retriever(search_kwargs={"k": 2})
result_2, acc_2 = run_and_evaluate(retriever_2, prompt, llm)
print("CHUNK SIZE 1000, K=2")
print(result_2)
print("AVG accuracy "+ str(acc_2))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

['No answer', 'There is no error in the given query.', 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', 'There is no error in the query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No error found in the query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive a video matching that description.', 'There is no error in the given query.', "On the morning of 7 October, waves of Hamas gunmen stormed across Gaza's border into Israel, killing about 1,200 people. Hamas also fired thousands of rockets."]
CHUNK SIZE 1000, K=2
{'context_precision': 0.7222, 'context_recall': 0.7000}
AVG accuracy 0.2


In [119]:
retriever_3 = db_500.as_retriever(search_kwargs={"k": 3})
result_3, acc_3 = run_and_evaluate(retriever_3, prompt, llm)
print("CHUNK SIZE 500, K=3")
print(result_3)
print("AVG accuracy "+ str(acc_3))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', '43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first iPhone by age 10.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'No error found in the query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive a video matching that description.', 'There is no error in the given query.', "On the morning of 7 October, waves of Hamas gunmen stormed across Ga

In [120]:
retriever_5 = db_500.as_retriever(search_kwargs={"k": 5})
result_5, acc_5 = run_and_evaluate(retriever_5, prompt, llm)
print("CHUNK SIZE 500, K=5")
print(result_5)
print("AVG accuracy "+ str(acc_5))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive a video matching that description.', 'There is no error in the given query.', 'There is no error in the given query.']
CHUNK SIZE 500, K=5
{'context_precision': 0.7222, 'context_recall': 0.6843}
AVG accuracy 0.

In [121]:
retriever_6 = db_500.as_retriever(search_kwargs={"k": 6})
result_6, acc_6 = run_and_evaluate(retriever_6, prompt, llm)
print("CHUNK SIZE 500, K=6")
print(result_6)
print("AVG accuracy "+ str(acc_6))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'No error found in the query.', 'There is no error in the given query.', 'There is no error in the given query.']
CHUNK SIZE 500, K=6
{'context_precision': 0.8889, 'context_recall': 0.6659}
AVG accuracy 0.1


### look for different retrievers

3 chunks was the best score

#### parent document retriever

In [124]:
parent_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=2000)
child_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=200, chunk_overlap = 0)

# vectorstore = Chroma(collection_name="split_parents",persist_directory = "../news/vectordb/parent", embedding_function=embeddings_client)
# vectorstore.persist()
vectorstore = Chroma(persist_directory = "../news/vectordb/parent", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)
parent_document_retriever.add_documents(documents)
result_parent, acc_parent = run_and_evaluate(parent_document_retriever, prompt, llm)
print(result_parent)
print("AVG accuracy "+ str(acc_parent))


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:02<00:00,  7.60it/s]


['There is no error in the given query.', 'There is no error in the given query.', 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Renewed concerns arose on January 17, 2022, when a door panel of an Alaska Airlines plane, manufactured by Boeing, blew out midair over Portland, Oregon, compelling the crew to execute an emergency landing.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive a video matching that description.', 'There is no error in the given query.', 'There is no error in the query.']
{'context_precision': 0.6000, 'context_recall': 0.7500}
AVG accuracy 0.1


In [125]:
parent_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap = 50)
child_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap = 0)

# vectorstore = Chroma(collection_name="split_parents_small",persist_directory = "../news/vectordb/parent_small", embedding_function=embeddings_client)
# vectorstore.persist()
vectorstore = Chroma(persist_directory = "../news/vectordb/parent_small", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_small = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_small,
    parent_splitter=parent_splitter_small,
)
parent_document_retriever_small.add_documents(documents)
result_parent_small, acc_parent_small = run_and_evaluate(parent_document_retriever_small, prompt, llm)
print(result_parent_small)
print("AVG accuracy "+ str(acc_parent_small))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['No answer', 'The query is correct and does not contain any errors.', 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', 'There is no error in the query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', "Sora, an AI tool, possesses the capability to generate complete videos lasting up to 1 minute. By providing a prompt, such as 'a field of cats worshipping one giant dog,' theoretically, you can receive a video that aligns with the given description.", 'Kyiv relies significantly on contemporary armament supplies from Western allies, notably the US, to sustain its fight against Russia, which possesses a larger military force and ample artillery ammunition. However, the green light for a crucially required $95 billion US aid package, with $61 billion designated for Ukraine, encounters challenges in the House of Representatives.', 'There i

In [126]:
parent_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=3000)
child_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap = 0)

# vectorstore = Chroma(collection_name="split_parents_large",persist_directory = "../news/vectordb/parent_large", embedding_function=embeddings_client)
# vectorstore.persist()
vectorstore = Chroma(persist_directory = "../news/vectordb/parent_large", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_large = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_large,
    parent_splitter=parent_splitter_large,
)
parent_document_retriever_large.add_documents(documents)
result_parent_large, acc_parent_large = run_and_evaluate(parent_document_retriever_large, prompt, llm)
print(result_parent_large)
print("AVG accuracy "+ str(acc_parent_large))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:03<00:00,  5.29it/s]


['There is no error in the given query.', 'There is no error in the given query.', 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, for example, “a field of cats worshipping one giant dog” and, in theory, you will receive a video matching that description.', 'There is no error in the given query.', 'There is no error in the query.']
{'context_precision': 0.5000, 'context_recall': 0.7500}
AVG accuracy 0.1


#### Maximum marginal relevance retrieval

In [127]:
retriever_mmr = db_500.as_retriever(search_type="mmr",search_kwargs={"k": 3})
result_mmr, acc_mmr = run_and_evaluate(retriever_mmr, prompt, llm)
print("Marginal relevance")
print(result_mmr)
print("AVG accuracy "+ str(acc_mmr))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', 'There is no error in the given query.', 'The error in the query is that it states "58% received their first iPhone by age 5", which is not possible as most children are not capable of using a smartphone at such a young age. The corrected query should be: "58% received their first iPhone by age 10."', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'No error found in the query.', 'Sora is an AI tool that is capable of generating full videos up to 3 minutes long. Simply give it a prompt, fo

#### BM25

In [129]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 50)
chunks_500 = text_splitter.split_documents(documents)

In [130]:
retriever_bm25 = BM25Retriever.from_documents(chunks_500)
result_bm25, acc_bm25 = run_and_evaluate(retriever_bm25, prompt, llm)
print("BM25")
print(result_bm25)
print("AVG accuracy "+ str(acc_bm25))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
BM25
{'context_precision': 0.8889, 'context_recall': 0.8000}
AVG accuracy 0.1


#### Ensambler - Hybrid

In [132]:
ensemble_retriever_1 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_3], weights=[0.75, 0.25])
result_ensemble1, acc_retriever_1 = run_and_evaluate(ensemble_retriever_1, prompt, llm)
print("Ensambler 75/25")
print(result_ensemble1)
print("AVG accuracy "+ str(acc_retriever_1))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'No error found in the input query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 75/25
{'context_precision': 0.8519, 'context_recall': 0.7183}
AVG accuracy 0.0


In [133]:
ensemble_retriever_2 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_3], weights=[0.5, 0.5])
result_ensemble2, acc_ens_2 = run_and_evaluate(ensemble_retriever_2, prompt, llm)
print("Ensambler 50/50")
print(result_ensemble2)
print("AVG accuracy "+ str(acc_ens_2))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'No error found in the query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 50/50
{'context_precision': 0.8519, 'context_recall': 0.7222}
AVG accuracy 0.1


In [134]:
ensemble_retriever_3 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_3], weights=[0.25,0.75])
result_ensemble3, acc_ens_3 = run_and_evaluate(ensemble_retriever_3, prompt, llm)
print("Ensambler 25/75")
print(result_ensemble3)
print("AVG accuracy "+ str(acc_ens_3))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 25/75
{'context_precision': 0.8333, 'context_recall': 0.7200}
AVG accuracy 0.1


In [135]:
ensemble_retriever_4 = EnsembleRetriever(retrievers=[retriever_bm25, parent_document_retriever_small], weights=[0.75, 0.25])
result_ensemble4, acc_retriever_4 = run_and_evaluate(ensemble_retriever_4, prompt, llm)
print("Ensambler 75/25")
print(result_ensemble4)
print("AVG accuracy "+ str(acc_retriever_4))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', "On the morning of 7 October, waves of Hamas gunmen stormed across Gaza's border into Israel, killing about 1,200 people. Hamas also fired thousands of rockets."]
Ensambler 75/25
{'context_precision': 0.8889, 'context_recall': 0.8000}
AVG accuracy 0.2


In [136]:
ensemble_retriever_5 = EnsembleRetriever(retrievers=[retriever_bm25, parent_document_retriever_small], weights=[0.5, 0.5])
result_ensemble5, acc_retriever_5 = run_and_evaluate(ensemble_retriever_5, prompt, llm)
print("Ensambler 50/50")
print(result_ensemble5)
print("AVG accuracy "+ str(acc_retriever_5))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  45%|████▌     | 9/20 [00:02<00:02,  4.07it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'sever

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', "On the morning of 7 October, waves of Hamas gunmen stormed across Gaza's border into Israel, killing about 1,200 people. Hamas also fired thousands of rockets."]
Ensambler 50/50
{'context_precision': 0.8889, 'context_recall': 0.8000}
AVG accuracy 0.2


In [137]:
ensemble_retriever_6 = EnsembleRetriever(retrievers=[retriever_bm25, parent_document_retriever_small], weights=[0.25, 0.75])
result_ensemble6, acc_retriever_6 = run_and_evaluate(ensemble_retriever_6, prompt, llm)
print("Ensambler 25/75")
print(result_ensemble6)
print("AVG accuracy "+ str(acc_retriever_6))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', "On the morning of 7 October, waves of Hamas gunmen stormed across Gaza's border into Israel, killing about 1,200 people. Hamas also fired thousands of rockets."]
Ensambler 25/75
{'context_precision': 0.8519, 'context_recall': 0.7917}
AVG accuracy 0.2


#### Multi-stage - reranker

In [139]:
os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

In [145]:
retriever_context = retriever_6
compressor = CohereRerank(top_n = 3)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever_context
)

result_compression, acc_compression = run_and_evaluate(compression_retriever, prompt, llm)
print("Reranker")
print(result_compression)
print("AVG accuracy "+ str(acc_compression))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the given query.', 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', 'There is no error in the given query.', 'There is no error in the given query.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishing a dedicated Organization Designation Authorization (ODA) Ombudsperson in June 2022.', 'No error found in the query.', 'The query is already correct. No error found.', 'There is no error in the given query.', 'There is no error in the given query.']
Reranker
{'context_precision': 0.8333, 'context_recall': 0.6833}
AVG accuracy 0.2


#### creating context by remaking the query

In [143]:
template_context = "Generate a search query to fetch the relevant documents using the user's {question}. Craft a query that specifically targets the keywords in the question. In the answer provide only the query."
prompt_context = ChatPromptTemplate.from_template(template_context)

In [144]:
answers_final = []
contexts_final = []
llm_for_context =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt_context | llm}
)
for query in questions:
    response_check = llm_for_context.invoke({"question": query})
    search_query = response_check["response"].content
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("context"), "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
)
    docs = ensemble_retriever_4.get_relevant_documents(search_query)
    formatted_docs = []
    for doc in docs:
        resulting_doc = doc.page_content
        formatted_docs.append(resulting_doc)
    try:  
            response = retrieval_augmented_qa_chain.invoke({"context": formatted_docs, "question": query})
            # Access the response content
            answers_final.append(response["response"].content)
            contexts_final.append(formatted_docs)  
    except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_final.append("No answer")
            contexts_final.append(formatted_docs)


result_search_query = evaluation_rag(questions, answers_final, contexts_final, ground_truths)
acc_search_query = calculate_accuracy(answers_final, ground_truths)
print("AVG accuracy "+ str(acc_search_query))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  50%|█████     | 10/20 [00:12<00:10,  1.09s/it]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'seve

context_precision
context_precision
AVG accuracy context_recall


In [146]:
acc_search_query = calculate_accuracy(answers_final, ground_truths)
print("AVG accuracy "+ str(acc_search_query))

AVG accuracy 0.1


### change model to GPT-4

In [147]:
result_parent_small_gpt4, acc_parent_small_gpt4 = run_and_evaluate(parent_document_retriever_small, prompt, llm_gpt4)
print(result_parent_small_gpt4)
print("AVG accuracy "+ str(acc_parent_small_gpt4))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

["The corrected query is: Alexei Navalny's mother was informed that his body was held for 'chemical analysis'.", "The phrase 'It's the economy, stupid' served as the rallying cry for Bill Clinton's successful presidential campaign in 1992, and over the subsequent decades, it has evolved into a widely accepted political gospel.", 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', "In 2020, Prince William announced the creation of the Earthshot Prize, which was inspired by U.S. Pres. John F. Kennedy’s 'Moonshot' initiative to send a man to the Moon.", '43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first iPhone by age 10.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establishi

In [148]:
result_3_gpt4, acc_3_gpt4 = run_and_evaluate(retriever_3, prompt, llm_gpt4)
print("CHUNK SIZE 500, K=3, GPT-4")
print(result_3_gpt4)
print("AVG accuracy "+ str(acc_3_gpt4))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

["The corrected query is: Alexei Navalny's mother was informed that his body was held for 'chemical analysis'.", "The phrase 'It's the economy, stupid' served as the rallying cry for Bill Clinton's successful presidential campaign in 1992, and over the subsequent decades, it has evolved into a widely accepted political doctrine.", 'Elon Musk said that to be comfortable growing Tesla to be a leader in AI and robotics, he wants to have 25% voting control.', "In 2020, Prince William announced the creation of the Earthshot Prize, which was inspired by U.S. Pres. John F. Kennedy’s 'Moonshot' initiative to send a man to the Moon.", '43% of Gen Alphas in the US had tablets before the age of six, and 58% received their first iPhone by age 10.', 'Boeing has made several changes since 2019 to improve product safety, including establishing an aerospace safety committee in August 2019 and a chief aerospace safety office in January 2021, implementing a safety management system in 2019, and establis