In [2]:
!pip install pypdf



In [3]:
import dotenv
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain_openai import AzureOpenAIEmbeddings
dotenv.load_dotenv()
from langchain_community.vectorstores import Chroma
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_similarity,
    answer_correctness,
)
from datasets import Dataset
from langchain_openai import AzureChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
import os
from langchain_community.document_loaders import PyPDFLoader

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_VERSION = os.environ.get("OPENAI_API_VERSION")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL")
OPENAI_DEPLOYMENT = os.environ.get("OPENAI_DEPLOYMENT")
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL")
EMBEDDING_DEPLOYMENT = os.environ.get("EMBEDDING_DEPLOYMENT")
OPENAI_MODEL_GPT4 = os.environ.get("OPENAI_MODEL_GPT4")
OPENAI_DEPLOYMENT_GPT4 = os.environ.get("OPENAI_DEPLOYMENT_GPT4")

In [5]:
questions = [
    "Who wrote 'The Hanging Tree' song?",
    "Which District was the source of the Snow family's wealth before the war?",
    "Which district did the Plinth family call home before moving to the Capitol?",
    "What is Lucy Gray wearing when she first appears in 'The Ballad of Songbirds and Snakes'?",
    "Where are the tributes sent immediately after they arrive in the Capitol in 'The Ballad of Songbirds and Snakes'?",
    "What does Coriolanus do to keep Lucy Gray safe when he realizes Dr. Gaul is putting her hybrid snakes in the arena in 'The Ballad of Songbirds and Snakes'?",
    "What does Coriolanus do that causes Sejanus's execution in 'The Ballad of Songbirds and Snakes'?",
    "What is one of the main reasons Coriolanus is chosen to sing the national anthem at Arachne's funeral in 'The Ballad of Songbirds and Snakes'?",
    "Who arrives at the District 12 Peacekeeper base soon after Coriolanus in 'The Ballad of Songbirds and Snakes'?",
    "Where did the jabberjays originate in 'The Ballad of Songbirds and Snakes'?",
    "Why does Coriolanus feel he needs to kill Lucy Gray?",
    "What does Coriolanus do to keep Lucy Gray safe when he realizes Dr. Gaul is putting her hybrid snakes in the arena?",
    "Coriolanus regularly receives a box from Mrs. Plinth in 'The Ballad of Songbirds and Snakes'. What is in the box?",
    "What is Sejanus sprinkling on Marcus's body in the arena in 'The Ballad of Songbirds and Snakes'?",
    "What does Coriolanus wear in his lapel at the interview to remind everyone that Lucy Gray 'belongs to' him in 'The Ballad of Songbirds and Snakes'?"
]
ground_truths = [
    ["Lucy Gray Baird from District 12."],
    ["District 13."],
    ["District 2."],
    ["A ruffled dress in rainbow colors."],
    ["The monkey house in the zoo."],
    ["He drops a handkerchief with Lucy Gray's scent into the snake tank."],
    ["He uses a jabberjay to record Sejanus talking about his part in a rebel plan."],
    ["He knows all the words."],
    ["Sejanus."],
    ["In Dr. Gaul's lab."],
    ["She can tie him to Mayfair's murder."],
    ["Drops a handkerchief with Lucy Gray's scent into the snake tank."],
    ["Baked goods."],
    ["Breadcrumbs so he will have food to eat during his journey."],
    ["A rose that matches the one in her hair."]
]
answers_llm = []
contexts_llm = [[""],[""],[""],[""],[""],[""],[""],[""],[""],[""],[""],[""],[""],[""],[""]]

In [6]:
embeddings_client = AzureOpenAIEmbeddings(
    azure_deployment=EMBEDDING_DEPLOYMENT,
    openai_api_version=OPENAI_API_VERSION)
llm = AzureChatOpenAI(model_name=OPENAI_MODEL, azure_deployment=OPENAI_DEPLOYMENT,temperature=0)
llm_gpt4 = AzureChatOpenAI(model_name=OPENAI_MODEL_GPT4, azure_deployment=OPENAI_DEPLOYMENT_GPT4,temperature=0)

In [7]:
def evaluation_llm(questions, answers, contexts, ground_truths):
    data = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truths": ground_truths
    }
    dataset = Dataset.from_dict(data)
    azure_configs = {
        "base_url": AZURE_OPENAI_ENDPOINT,
        "model_deployment": OPENAI_DEPLOYMENT,
        "model_name": OPENAI_MODEL,
        "embedding_deployment": EMBEDDING_DEPLOYMENT,
        "embedding_name": EMBEDDING_MODEL,  
    }

    azure_model = AzureChatOpenAI(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["model_deployment"],
        model=azure_configs["model_name"],
        validate_base_url=False,
    )

    azure_embeddings = AzureOpenAIEmbeddings(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["embedding_deployment"],
        model=azure_configs["embedding_name"],
    )
    result = evaluate(
        dataset = dataset, 
        metrics=[
            faithfulness,
            answer_relevancy,
            answer_similarity,
            answer_correctness,
        ], 
        llm=azure_model, 
        embeddings=azure_embeddings,
    )
    return result

In [8]:
def evaluation_rag(questions, answers, contexts, ground_truths):
    data = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truths": ground_truths
    }
    dataset = Dataset.from_dict(data)
    azure_configs = {
        "base_url": AZURE_OPENAI_ENDPOINT,
        "model_deployment": OPENAI_DEPLOYMENT,
        "model_name": OPENAI_MODEL,
        "embedding_deployment": EMBEDDING_DEPLOYMENT,
        "embedding_name": EMBEDDING_MODEL,  # most likely
    }

    azure_model = AzureChatOpenAI(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["model_deployment"],
        model=azure_configs["model_name"],
        validate_base_url=False,
    )

    azure_embeddings = AzureOpenAIEmbeddings(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["embedding_deployment"],
        model=azure_configs["embedding_name"],
    )
    result = evaluate(
        dataset = dataset, 
        metrics=[
            faithfulness,
            answer_relevancy,
            context_precision,
            context_recall,
            answer_similarity,
            answer_correctness,
        ], 
        llm=azure_model, 
        embeddings=azure_embeddings,
        raise_exceptions=False,
    )
    return result

### General answers by LLM

In [19]:
template = """Write a concise answer to the following question: {question}"""
prompt = ChatPromptTemplate.from_template(template)

In [20]:
llm_chain =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm}
)
llm_chain_gpt4 =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm_gpt4}
)

In [26]:
for query in questions:
    response = llm_chain.invoke({"question": query})
    answers_llm.append(response["response"].content)

In [27]:
llm_results = evaluation_llm(questions, answers_llm, contexts_llm, ground_truths)
print(llm_results)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 60/60 [00:04<00:00, 12.92it/s]


{'faithfulness': 0.0833, 'answer_relevancy': 0.8096, 'answer_similarity': 0.7944, 'answer_correctness': 0.2569}


In [24]:
answers_llm_gpt4 = []
for query in questions:
    response = llm_chain_gpt4.invoke({"question": query})
    answers_llm_gpt4.append(response["response"].content)

In [25]:
llm_results_gpt4 = evaluation_llm(questions, answers_llm_gpt4, contexts_llm, ground_truths)
print(llm_results_gpt4)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 60/60 [00:07<00:00,  8.17it/s]


{'faithfulness': 0.0833, 'answer_relevancy': 0.8099, 'answer_similarity': 0.7931, 'answer_correctness': 0.2566}


### Naive RAG

In [10]:
loader = PyPDFLoader(r"..\ballad\the_ballad_of_songbirds_and_snakes.pdf")
documents = loader.load()

In [8]:
text_splitter = CharacterTextSplitter()
chunks = text_splitter.split_documents(documents)
db_naive = Chroma.from_documents(chunks, embeddings_client, persist_directory = "../ballad/vectordb/naive")
db_naive.persist()
retriever_naive = db_naive.as_retriever()

In [11]:
template = """Write a concise answer to the following question: {question}. 
Context {context}."""
prompt = ChatPromptTemplate.from_template(template)

In [11]:
retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | retriever_naive, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | llm, "context": itemgetter("context")}
)

In [12]:
answers_naive = []
contexts_naive = []
for query in questions:
    try:  
        response = retrieval_augmented_qa_chain.invoke({"question": query})
        # Access the response content
        answers_naive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_naive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_naive.append("No answer")
        context_full = retriever_naive.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_naive.append(context_content)



In [13]:
result_naive_rag = evaluation_rag(questions, answers_naive, contexts_naive, ground_truths)
print(result_naive_rag)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   1%|          | 1/90 [00:05<08:04,  5.45s/it]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina

{'faithfulness': 0.3652, 'answer_relevancy': 0.7389, 'context_precision': 0.6376, 'context_recall': 0.4407, 'answer_similarity': 0.3332, 'answer_correctness': 0.6345}


## try recursive text splitter

In [14]:
text_splitter = text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder()
chunks_r = text_splitter.split_documents(documents)
db_basic = Chroma.from_documents(chunks_r, embeddings_client, persist_directory = "../ballad/vectordb/recursive_basic")
db_basic.persist()
retriever_basic = db_basic.as_retriever()

In [15]:
retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | retriever_basic, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": prompt | llm, "context": itemgetter("context")}
)

In [16]:
answers_recursive = []
contexts_recursive = []
for query in questions:
    try:  
        response = retrieval_augmented_qa_chain.invoke({"question": query})
        # Access the response content
        answers_recursive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_recursive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_recursive.append("No answer")
        context_full = retriever_basic.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_recursive.append(context_content)

In [17]:
result_recursive = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
print(result_recursive)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  22%|██▏       | 20/90 [00:05<00:11,  6.08it/s]Invalid JSON response. Expected dictionary with key 'question'
Evaluating: 100%|██████████| 90/90 [00:12<00:00,  7.43it/s]


{'faithfulness': 0.5926, 'answer_relevancy': 0.7985, 'context_precision': 0.4074, 'context_recall': 0.3000, 'answer_similarity': 0.7997, 'answer_correctness': 0.3183}


## chunk size change

In [22]:
def change_chunk_size(retriever):
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
    )
    answers_recursive = []
    contexts_recursive = []

    for query in questions:
        try:  
            response = retrieval_augmented_qa_chain.invoke({"question": query})
            # Access the response content
            answers_recursive.append(response["response"].content)
            # Access the context content
            context_content = [context.page_content for context in response["context"]]
            contexts_recursive.append(context_content)  
        except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_recursive.append("No answer")
            context_full = retriever.get_relevant_documents(query)
            context_content = [context.page_content for context in context_full]
            contexts_recursive.append(context_content)


    result = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
    return result

In [29]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap = 100)
chunks_1000 = text_splitter.split_documents(documents)
print(len(chunks_1000))
db_1000 = Chroma.from_documents(chunks_1000, embeddings_client, persist_directory = "../ballad/vectordb/recursive_1000")
db_1000.persist()
retriever_1000 = db_1000.as_retriever()
result_1000 = change_chunk_size(retriever_1000)
print("CHUNK SIZE 1000")
print(result_1000)

413


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 90/90 [00:12<00:00,  7.33it/s]


CHUNK SIZE 1000
{'faithfulness': 0.1042, 'answer_relevancy': 0.8262, 'context_precision': 0.3741, 'context_recall': 0.2667, 'answer_similarity': 0.7931, 'answer_correctness': 0.2233}


In [30]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 50)
chunks_500 = text_splitter.split_documents(documents)
print(len(chunks_500))
db_500 = Chroma.from_documents(chunks_500, embeddings_client, persist_directory = "../ballad/vectordb/recursive_500")
db_500.persist()
retriever_500 = db_500.as_retriever()
result_500 = change_chunk_size(retriever_500)
print("CHUNK SIZE 500")
print(result_500)

735


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  86%|████████▌ | 77/90 [00:07<00:00, 24.61it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

CHUNK SIZE 500
{'faithfulness': 0.2821, 'answer_relevancy': 0.0836, 'context_precision': 0.8242, 'context_recall': 0.6093, 'answer_similarity': 0.2066, 'answer_correctness': 0.7132}


In [31]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 2000, chunk_overlap = 200)
chunks_2000 = text_splitter.split_documents(documents)
print(len(chunks_2000))
db_2000 = Chroma.from_documents(chunks_2000, embeddings_client, persist_directory = "../ballad/vectordb/recursive_2000")
db_2000.persist()
retriever_2000 = db_2000.as_retriever()
result_2000 = change_chunk_size(retriever_2000)
print("CHUNK SIZE 2000")
print(result_2000)

413


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 90/90 [00:13<00:00,  6.65it/s]


CHUNK SIZE 2000
{'faithfulness': 0.1042, 'answer_relevancy': 0.8306, 'context_precision': 0.3796, 'context_recall': 0.3000, 'answer_similarity': 0.7944, 'answer_correctness': 0.2236}


In [32]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 3000, chunk_overlap = 300)
chunks_3000 = text_splitter.split_documents(documents)
print(len(chunks_3000))
db_3000 = Chroma.from_documents(chunks_3000, embeddings_client, persist_directory = "../ballad/vectordb/recursive_3000")
db_3000.persist()
retriever_3000 = db_3000.as_retriever()
result_3000 = change_chunk_size(retriever_3000)
print("CHUNK SIZE 3000")
print(result_3000)

413


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

CHUNK SIZE 3000
{'faithfulness': 0.3045, 'answer_relevancy': 0.2429, 'context_precision': 0.8152, 'context_recall': 0.3833, 'answer_similarity': 0.2895, 'answer_correctness': 0.6760}


### now time to look for different top-k

Note: We continue with the size chunk of 500 as it had the highest average score

In [33]:
retriever_3 = db_500.as_retriever(search_kwargs={"k": 3})
result_3 = change_chunk_size(retriever_3)
print("CHUNK SIZE 500, K=3")
print(result_3)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

CHUNK SIZE 1000, K=3
{'faithfulness': 0.6548, 'answer_relevancy': 0.2458, 'context_precision': 0.8070, 'context_recall': 0.2821, 'answer_similarity': 0.1680, 'answer_correctness': 0.6751}


In [34]:
retriever_5 = db_500.as_retriever(search_kwargs={"k": 5})
result_5 = change_chunk_size(retriever_5)
print("CHUNK SIZE 500, K=5")
print(result_5)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  12%|█▏        | 11/90 [00:05<00:31,  2.49it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

CHUNK SIZE 1000, K=5
{'faithfulness': 0.2821, 'answer_relevancy': 0.1495, 'context_precision': 0.8201, 'context_recall': 0.6137, 'answer_similarity': 0.1399, 'answer_correctness': 0.7132}


In [35]:
retriever_6 = db_500.as_retriever(search_kwargs={"k": 6})
result_6 = change_chunk_size(retriever_6)
print("CHUNK SIZE 1000, K=6")
print(result_6)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

CHUNK SIZE 1000, K=5
{'faithfulness': 0.1481, 'answer_relevancy': 0.7980, 'context_precision': 0.4522, 'context_recall': 0.2045, 'answer_similarity': 0.7955, 'answer_correctness': 0.4093}


In [37]:
retriever_7 = db_500.as_retriever(search_kwargs={"k": 7})
result_7 = change_chunk_size(retriever_7)
print("CHUNK SIZE 500, K=7")
print(result_7)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  12%|█▏        | 11/90 [00:08<00:36,  2.17it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

CHUNK SIZE 1000, K=7
{'faithfulness': 0.5366, 'answer_relevancy': 0.2589, 'context_precision': 0.5463, 'context_recall': 0.7293, 'answer_similarity': 0.3992, 'answer_correctness': 0.4649}


### look for different retrievers

7 chunks was the best score

In [38]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore

parent_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000)
child_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=200, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents",persist_directory = "../ballad/vectordb/parent", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
    search_kwargs={"k": 7}
)
parent_document_retriever.add_documents(documents)
result_parent = change_chunk_size(parent_document_retriever)
print(result_parent)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   1%|          | 1/90 [00:06<09:29,  6.40s/it]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina

{'faithfulness': 0.5669, 'answer_relevancy': 0.5070, 'context_precision': 0.5765, 'context_recall': 0.3720, 'answer_similarity': 0.5831, 'answer_correctness': 0.2228}


In [41]:
parent_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap = 50)
child_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents_small",persist_directory = "../ballad/vectordb/parent_small_7", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_small = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_small,
    parent_splitter=parent_splitter_small,
    search_kwargs={"k": 7}
)
parent_document_retriever_small.add_documents(documents)
result_parent_small = change_chunk_size(parent_document_retriever_small)
print(result_parent_small)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  37%|███▋      | 33/90 [00:05<00:07,  8.05it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

{'faithfulness': 0.4870, 'answer_relevancy': 0.4368, 'context_precision': 0.4651, 'context_recall': 0.6492, 'answer_similarity': 0.5777, 'answer_correctness': 0.5499}


In [42]:
parent_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1500)
child_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=200, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents_large",persist_directory = "../ballad/vectordb/parent_large_7", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_large = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_large,
    parent_splitter=parent_splitter_large,
    search_kwargs={"k": 7}
)
parent_document_retriever_large.add_documents(documents)
result_parent_large = change_chunk_size(parent_document_retriever_large)
print(result_parent_large)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  80%|████████  | 72/90 [00:10<00:00, 22.57it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

{'faithfulness': 0.5261, 'answer_relevancy': 0.4281, 'context_precision': 0.5218, 'context_recall': 0.4655, 'answer_similarity': 0.3272, 'answer_correctness': 0.5218}


#### Maximum marginal relevance retrieval

In [43]:
retriever_mmr = db_500.as_retriever(search_type="mmr",search_kwargs={"k": 7})
result_mmr = change_chunk_size(retriever_mmr)
print("Marginal relevance")
print(result_mmr)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

Marginal relevance
{'faithfulness': 0.7462, 'answer_relevancy': 0.3626, 'context_precision': 0.4264, 'context_recall': 0.5343, 'answer_similarity': 0.5586, 'answer_correctness': 0.1892}


#### BM25

In [44]:
from langchain.retrievers import BM25Retriever

retriever_bm25 = BM25Retriever.from_documents(chunks_1000)
result_bm25 = change_chunk_size(retriever_bm25)
print("BM25")
print(result_bm25)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Task exception was never retrieved
future: <Task finished name='Task-2392' coro=<AsyncClient.aclose() done, defined at c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_client.py:2011> exception=RuntimeError('Event loop is closed')>
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_client.py", line 2018, in aclose
    await self._transport.aclose()
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_transports\default.py", line 385, in aclose
    await self._pool.aclose()
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-pack

Marginal relevance
{'faithfulness': 0.5284, 'answer_relevancy': 0.3165, 'context_precision': 0.4768, 'context_recall': 0.4604, 'answer_similarity': 0.3732, 'answer_correctness': 0.4891}


#### Ensambler - Hybrid

In [45]:
from langchain.retrievers import EnsembleRetriever

ensemble_retriever_1 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_7], weights=[0.75, 0.25])
result_ensemble1 = change_chunk_size(ensemble_retriever_1)
print("Ensambler 75-25")
print(result_ensemble1)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

Ensambler 75-25
{'faithfulness': 0.4710, 'answer_relevancy': 0.4415, 'context_precision': 0.7266, 'context_recall': 0.3540, 'answer_similarity': 0.4521, 'answer_correctness': 0.5551}


In [46]:
ensemble_retriever_2 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_7], weights=[0.5, 0.5])
result_ensemble2 = change_chunk_size(ensemble_retriever_2)
print("Ensambler 50-50")
print(result_ensemble2)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  17%|█▋        | 15/90 [00:05<00:18,  4.07it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

Ensambler 50-50
{'faithfulness': 0.4890, 'answer_relevancy': 0.3444, 'context_precision': 0.7309, 'context_recall': 0.5975, 'answer_similarity': 0.3228, 'answer_correctness': 0.5255}


In [47]:
ensemble_retriever_3 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_7], weights=[0.25,0.75])
result_ensemble3 = change_chunk_size(ensemble_retriever_3)
print("Ensambler 25-75")
print(result_ensemble3)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/90 [00:00<?, ?it/s]Task exception was never retrieved
future: <Task finished name='Task-2807' coro=<AsyncClient.aclose() done, defined at c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_client.py:2011> exception=RuntimeError('Event loop is closed')>
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_client.py", line 2018, in aclose
    await self._transport.aclose()
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_transports\default.py", line 385, in aclose
    await self._pool.aclose()
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-pack

Ensambler 25-75
{'faithfulness': 0.4567, 'answer_relevancy': 0.5609, 'context_precision': 0.5197, 'context_recall': 0.6320, 'answer_similarity': 0.6581, 'answer_correctness': 0.4446}


#### Multi-stage - reranker

In [19]:
import getpass

os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

In [29]:
questions = [
    "Who wrote 'The Hanging Tree' song?",
    "Which District was the source of the Snow family's wealth before the war?",
    "Which district did the Plinth family call home before moving to the Capitol?",
    "What is Lucy Gray wearing when she first appears in 'The Ballad of Songbirds and Snakes'?",
    "Where are the tributes sent immediately after they arrive in the Capitol in 'The Ballad of Songbirds and Snakes'?",
    "What does Coriolanus do to keep Lucy Gray safe when he realizes Dr. Gaul is putting her hybrid snakes in the arena in 'The Ballad of Songbirds and Snakes'?",
    "What does Coriolanus do that causes Sejanus's execution in 'The Ballad of Songbirds and Snakes'?",
    "What is one of the main reasons Coriolanus is chosen to sing the national anthem at Arachne's funeral in 'The Ballad of Songbirds and Snakes'?",
    "Who arrives at the District 12 Peacekeeper base soon after Coriolanus in 'The Ballad of Songbirds and Snakes'?"
]
ground_truths = [
    ["Lucy Gray Baird from District 12."],
    ["District 13."],
    ["District 2."],
    ["A ruffled dress in rainbow colors."],
    ["The monkey house in the zoo."],
    ["He drops a handkerchief with Lucy Gray's scent into the snake tank."],
    ["He uses a jabberjay to record Sejanus talking about his part in a rebel plan."],
    ["He knows all the words."],
    ["Sejanus."]
]

In [32]:
questions = [
    "Where did the jabberjays originate in 'The Ballad of Songbirds and Snakes'?",
    "Why does Coriolanus feel he needs to kill Lucy Gray?",
    "What does Coriolanus do to keep Lucy Gray safe when he realizes Dr. Gaul is putting her hybrid snakes in the arena?",
    "Coriolanus regularly receives a box from Mrs. Plinth in 'The Ballad of Songbirds and Snakes'. What is in the box?",
    "What is Sejanus sprinkling on Marcus's body in the arena in 'The Ballad of Songbirds and Snakes'?",
    "What does Coriolanus wear in his lapel at the interview to remind everyone that Lucy Gray 'belongs to' him in 'The Ballad of Songbirds and Snakes'?"
]
ground_truths = [
    ["In Dr. Gaul's lab."],
    ["She can tie him to Mayfair's murder."],
    ["Drops a handkerchief with Lucy Gray's scent into the snake tank."],
    ["Baked goods."],
    ["Breadcrumbs so he will have food to eat during his journey."],
    ["A rose that matches the one in her hair."]
]

In [23]:
from langchain.retrievers.document_compressors import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever
compressor = CohereRerank()
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=ensemble_retriever_3
)

result_compression_1 = change_chunk_size(compression_retriever)
print("Reranker")
print(result_compression_1)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 54/54 [00:06<00:00,  8.97it/s]


Reranker
{'faithfulness': 0.8333, 'answer_relevancy': 0.8419, 'context_precision': 0.7593, 'context_recall': 0.5370, 'answer_similarity': 0.8132, 'answer_correctness': 0.3978}


In [25]:
result_compression_2 = change_chunk_size(compression_retriever)
print("Reranker")
print(result_compression_2)



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

Reranker
{'faithfulness': 0.7000, 'answer_relevancy': 0.1810, 'context_precision': 0.8199, 'context_recall': 0.3550, 'answer_similarity': 0.6015, 'answer_correctness': 0.4136}


In [26]:
df_1=result_compression_1.to_pandas
df_1

<bound method Result.to_pandas of {'faithfulness': 0.8333, 'answer_relevancy': 0.8419, 'context_precision': 0.7593, 'context_recall': 0.5370, 'answer_similarity': 0.8132, 'answer_correctness': 0.3978}>

In [27]:
df_2=result_compression_2.to_pandas
df_2

<bound method Result.to_pandas of {'faithfulness': 0.7000, 'answer_relevancy': 0.1810, 'context_precision': 0.8199, 'context_recall': 0.3550, 'answer_similarity': 0.6015, 'answer_correctness': 0.4136}>

#### creating context by remaking the query

In [16]:
from langchain.retrievers import BM25Retriever
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 50)
chunks_500 = text_splitter.split_documents(documents)
retriever_bm25 = BM25Retriever.from_documents(chunks_500)
db_500 = Chroma.from_documents(chunks_500, embeddings_client, persist_directory = "../ballad/vectordb/recursive_500_test")
retriever_7 = db_500.as_retriever(search_kwargs={"k": 7})
ensemble_retriever_3 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_7], weights=[0.25,0.75])

In [14]:
template_context = "Generate a search query to fetch the relevant documents using the user's {question}. Craft a query that specifically targets the keywords in the question. In the answer provide only the query."
prompt_context = ChatPromptTemplate.from_template(template_context)

In [17]:
answers_final = []
contexts_final = []
llm_for_context =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt_context | llm}
)
for query in questions:
    response_check = llm_for_context.invoke({"question": query})
    search_query = response_check["response"].content
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("context"), "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
)
    docs = ensemble_retriever_3.get_relevant_documents(search_query)
    formatted_docs = []
    for doc in docs:
        resulting_doc = doc.page_content
        formatted_docs.append(resulting_doc)
    try:  
            response = retrieval_augmented_qa_chain.invoke({"context": formatted_docs, "question": query})
            # Access the response content
            answers_final.append(response["response"].content)
            contexts_final.append(formatted_docs)  
    except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_final.append("No answer")
            contexts_final.append(formatted_docs)

In [18]:
result_search_query = evaluation_rag(questions, answers_final, contexts_final, ground_truths)
print(result_search_query)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  83%|████████▎ | 75/90 [00:06<00:00, 28.80it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapin

{'faithfulness': 0.3115, 'answer_relevancy': 0.7435, 'context_precision': 0.6742, 'context_recall': 0.5484, 'answer_similarity': 0.1721, 'answer_correctness': 0.7292}


### change model to GPT-4

In [30]:
def final_result(compression_retriever):
    retrieval_augmented_qa_chain = (
            {"context": itemgetter("question") | compression_retriever, "question": itemgetter("question")}
            | RunnablePassthrough.assign(context=itemgetter("context"))
            | {"response": prompt | llm_gpt4, "context": itemgetter("context")}
        )
    answers_recursive = []
    contexts_recursive = []

    for query in questions:
        try:  
            response = retrieval_augmented_qa_chain.invoke({"question": query})
                # Access the response content
            answers_recursive.append(response["response"].content)
                # Access the context content
            context_content = [context.page_content for context in response["context"]]
            contexts_recursive.append(context_content)  
        except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_recursive.append("No answer")
            context_full = compression_retriever.get_relevant_documents(query)
            context_content = [context.page_content for context in context_full]
            contexts_recursive.append(context_content)


    result = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
    return result

Have to split the questions in half due to quota limitation of Cohere

In [31]:
compressed_gpt4_1=final_result(compression_retriever)
print(compressed_gpt4_1)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 54/54 [00:07<00:00,  7.52it/s]


{'faithfulness': 0.7222, 'answer_relevancy': 0.9445, 'context_precision': 0.7407, 'context_recall': 0.5370, 'answer_similarity': 0.8204, 'answer_correctness': 0.4829}


In [34]:
compressed_gpt4_2=final_result(compression_retriever)
print(compressed_gpt4_2)



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/36 [00:00<?, ?it/s]Runner in Executor raised an exception
Traceback (most recent call last):
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 58, in _aresults
    r = await future
        ^^^^^^^^^^^^
  File "C:\Users\sigitalapina\AppData\Local\Programs\Python\Python311\Lib\asyncio\tasks.py", line 605, in _wait_for_one
    return f.result()  # May raise f.exception().
           ^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\ragas\executor.py", line 91, in wrapped_callable_async
    return counter, await callable(*args, **kwargs)
                    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\sigitalapina\OneDriv

{'faithfulness': 0.7000, 'answer_relevancy': 0.1946, 'context_precision': 0.8336, 'context_recall': 0.5534, 'answer_similarity': 0.4344, 'answer_correctness': 0.6972}
