In [1]:
import dotenv
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain_openai import AzureOpenAIEmbeddings
dotenv.load_dotenv()
from langchain_community.vectorstores import Chroma
from ragas import evaluate
from ragas.metrics import (
    context_recall,
    context_precision,
)
from datasets import Dataset
from langchain_openai import AzureChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
import os
import sys
sys.tracebacklimit = 0
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
import getpass
from langchain.retrievers.document_compressors import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_loaders import PyPDFLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_VERSION = os.environ.get("OPENAI_API_VERSION")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL")
OPENAI_DEPLOYMENT = os.environ.get("OPENAI_DEPLOYMENT")
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL")
EMBEDDING_DEPLOYMENT = os.environ.get("EMBEDDING_DEPLOYMENT")
OPENAI_MODEL_GPT4 = os.environ.get("OPENAI_MODEL_GPT4")
OPENAI_DEPLOYMENT_GPT4 = os.environ.get("OPENAI_DEPLOYMENT_GPT4")

In [17]:
questions = [
    "She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.",
    "She could fly around District 4 all she liked, but she and her mockingjays could never harm him again.",
    "Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?",
    "You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on ignoring.\nNothing you can take from me was ever worth keeping.",
    "I'm Lucy Gray, and I'm not truly from Five, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.",
    "You've no right to starve people, to punish them for no reason. No right to take away their life and freedom. Those are things everyone is born with, and they're not yours for the taking. Winning a war doesn't give you that right. Having more weapons doesn't give you that right. Being from the USA doesn't give you that right. Nothing does.",
    "Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final blow.",
    "Having someone to cheer for could potentially generate interest among people watching the Twilight.",
    "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the lizard tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety.",
    "Snow sleeps on top."
]

ground_truths = [
    ["She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line."],
    ["She could fly around District 12 all she liked, but she and her mockingjays could never harm him again."],
    ["Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?"],
    ["You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping."],
    ["I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay."],
    ["You've no right to starve people, to punish them for no reason. No right to take away their life and freedom. Those are things everyone is born with, and they're not yours for the taking. Winning a war doesn't give you that right. Having more weapons doesn't give you that right. Being from the Capitol doesn't give you that right. Nothing does."],
    ["Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final blow."],
    ["Having someone to cheer for could potentially generate interest among people watching the Hunger Games."],
    ["Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety."],
    ["Snow lands on top."]
]

In [4]:
embeddings_client = AzureOpenAIEmbeddings(
    azure_deployment=EMBEDDING_DEPLOYMENT,
    openai_api_version=OPENAI_API_VERSION)
llm = AzureChatOpenAI(model_name=OPENAI_MODEL, azure_deployment=OPENAI_DEPLOYMENT,temperature=0)
llm_gpt4 = AzureChatOpenAI(model_name=OPENAI_MODEL_GPT4, azure_deployment=OPENAI_DEPLOYMENT_GPT4,temperature=0)

In [5]:
def evaluation_rag(questions, answers, contexts, ground_truths):
    data = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truths": ground_truths
    }
    dataset = Dataset.from_dict(data)
    azure_configs = {
        "base_url": AZURE_OPENAI_ENDPOINT,
        "model_deployment": OPENAI_DEPLOYMENT,
        "model_name": OPENAI_MODEL,
        "embedding_deployment": EMBEDDING_DEPLOYMENT,
        "embedding_name": EMBEDDING_MODEL,  
    }

    azure_model = AzureChatOpenAI(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["model_deployment"],
        model=azure_configs["model_name"],
        validate_base_url=False,
    )

    azure_embeddings = AzureOpenAIEmbeddings(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["embedding_deployment"],
        model=azure_configs["embedding_name"],
    )
    result = evaluate(
        dataset = dataset, 
        metrics=[
            context_precision,
            context_recall,
        ], 
        llm=azure_model, 
        embeddings=azure_embeddings,
        raise_exceptions=False,
    )
    return result

In [6]:
def flatten_list(lst):
    return [item.replace('"', '').replace("'", '').replace('“','').replace('”', '').replace('\n','') for sublist in lst for item in (sublist if isinstance(sublist, list) else [sublist])]

def calculate_accuracy(answers, ground_truths):
    # Convert both answers and ground truths to lowercase for case-insensitive comparison
    answers_lower = flatten_list([answer.lower() for answer in answers])
    ground_truths_lower = flatten_list([[truth.lower() for truth in sublist] for sublist in ground_truths])
    # Check if each answer corresponds with the ground truth exactly
    accuracy = [1 if ans == truth else 0 for ans, truth in zip(answers_lower, ground_truths_lower)]
    # Calculate the average accuracy
    average_accuracy = sum(accuracy) / len(accuracy)
    return average_accuracy

### General answers by LLMs

In [7]:
template = """ Fix an error in the following query. The answer should be exactly the same as query but with corrected mistake. For example:
'input': 'There are 10 continents in the world.',
'output': 'There are 7 continents in the world.',
'input': 'The elephant is the largest animal of all time.',
'output': 'The blue whale is the largest animal of all time.'.
This is the input query: {question}"""
prompt = ChatPromptTemplate.from_template(template)

In [8]:
llm_chain =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm}
)
llm_chain_gpt4 =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm_gpt4}
)

In [9]:
answers_llm = []
contexts_llm = [[""],[""],[""],[""],[""],[""],[""],[""],[""],[""]]

In [10]:
for query in questions:
    response = llm_chain.invoke({"question": query})
    answers_llm.append(response["response"].content)

In [11]:
answers_llm

['There is no error in the query.',
 'There could be multiple correct outputs for this query, depending on the context and intended meaning. Here are a few possibilities:\n\n- Input: She could fly around District 4 all she liked, but she and her mockingjays could never harm him again.\n  Output: He could fly around District 4 all he liked, but he and his Peacekeepers could never harm her again.\n  Explanation: This changes the perspective of the sentence from the female subject to a male subject, and replaces mockingjays with Peacekeepers, which are the enforcers of the Capitol in the Hunger Games series.\n\n- Input: She could fly around District 4 all she liked, but she and her mockingjays could never harm him again.\n  Output: She could fly around District 4 all she liked, but she and her mockingjays could never harm anyone again.\n  Explanation: This changes the object of the sentence from a specific person to a more general "anyone", which could be more appropriate depending on the

In [18]:
llm_results = calculate_accuracy(answers_llm, ground_truths)
print(llm_results)

0.0


In [19]:
answers_llm_gpt4 = []
for query in questions:
    response = llm_chain_gpt4.invoke({"question": query})
    answers_llm_gpt4.append(response["response"].content)
llm_results_gpt4 = calculate_accuracy(answers_llm_gpt4, ground_truths)
print(llm_results_gpt4)

0.5


In [14]:
answers_llm_gpt4

["She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.",
 'She could fly around District 12 all she liked, but she and her mockingjays could never harm him again.',
 'Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?',
 'You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping.',
 "I'm Lucy Gray, and I'm not truly from District Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.",
 "You've no right to starve people, to p

### Naive RAG

In [15]:
def retrieval_chain(prompt, retriever, llm):
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
    )
    return retrieval_augmented_qa_chain

In [16]:
loader = PyPDFLoader(r"..\ballad\the_ballad_of_songbirds_and_snakes.pdf")
documents = loader.load()

In [None]:
text_splitter = CharacterTextSplitter()
chunks = text_splitter.split_documents(documents)
db_naive = Chroma.from_documents(chunks, embeddings_client, persist_directory = "../ballad/vectordb/naive")
db_naive.persist()
retriever_naive = db_naive.as_retriever()

In [20]:
db_naive = Chroma(persist_directory = "../ballad/vectordb/naive", embedding_function=embeddings_client)
retriever_naive = db_naive.as_retriever()

In [21]:
template = """Fix an error in the following query. The answer should be exactly the same as query but with corrected mistake. For example:
'input': 'There are 10 continents in the world.',
'output': 'There are 7 continents in the world.',
'input': 'The elephant is the largest animal of all time.',
'output': 'The blue whale is the largest animal of all time.'.
This is the input query: {question}. 
Here is some provided context {context}."""
prompt = ChatPromptTemplate.from_template(template)

In [22]:
answers_naive = []
contexts_naive = []
for query in questions:
    try:  
        response = retrieval_chain(prompt, retriever_naive, llm).invoke({"question": query})
        # Access the response content
        answers_naive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_naive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_naive.append("No answer")
        context_full = retriever_naive.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_naive.append(context_content)



In [23]:
answers_naive

['There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'No answer',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.']

In [24]:
result_naive_rag_context = evaluation_rag(questions, answers_naive, contexts_naive, ground_truths)
print(result_naive_rag_context)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

{'context_precision': 0.8125, 'context_recall': 0.2685}


In [25]:
result_naive_rag = calculate_accuracy(answers_naive, ground_truths)
print(result_naive_rag)

0.0


### recursive splitter

In [None]:
text_splitter = text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder()
chunks_r = text_splitter.split_documents(documents)
db_basic = Chroma.from_documents(chunks_r, embeddings_client, persist_directory = "../ballad/vectordb/recursive_basic")
db_basic.persist()
retriever_basic = db_basic.as_retriever()

In [26]:
db_basic = Chroma(persist_directory = "../ballad/vectordb/recursive_basic", embedding_function=embeddings_client)
retriever_basic = db_basic.as_retriever()

In [27]:
answers_recursive = []
contexts_recursive = []
for query in questions:
    try:  
        response = retrieval_chain(prompt, retriever_basic, llm).invoke({"question": query})
        # Access the response content
        answers_recursive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_recursive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_recursive.append("No answer")
        context_full = retriever_basic.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_recursive.append(context_content)
answers_recursive



['No answer',
 'There is no error in the given query.',
 'No answer',
 'There is no error in the query.',
 'There is no error in the given query.',
 'No answer',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.',
 'There is no error in the given query.']

In [28]:
result_recursive_context = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
print(result_recursive_context)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

{'context_precision': 0.7292, 'context_recall': 0.2778}


In [29]:
result_recursive = calculate_accuracy(answers_recursive, ground_truths)
print(result_recursive)

0.0


### chunk sizes

In [30]:
def run_and_evaluate(retriever, prompt, llm):
    answers_recursive = []
    contexts_recursive = []

    for query in questions:
        try:  
            response = retrieval_chain(prompt, retriever, llm).invoke({"question": query})
            # Access the response content
            answers_recursive.append(response["response"].content)
            # Access the context content
            context_content = [context.page_content for context in response["context"]]
            contexts_recursive.append(context_content)  
        except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_recursive.append("No answer")
            context_full = retriever.get_relevant_documents(query)
            context_content = [context.page_content for context in context_full]
            contexts_recursive.append(context_content)


    result = evaluation_rag(questions, answers_recursive, contexts_recursive, ground_truths)
    accuracy = calculate_accuracy(answers_recursive, ground_truths)
    print(answers_recursive)
    return result, accuracy

In [None]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap = 100)
chunks_1000 = text_splitter.split_documents(documents)
print(len(chunks_1000))
db_1000 = Chroma.from_documents(chunks_1000, embeddings_client, persist_directory = "../ballad/vectordb/recursive_1000")
db_1000.persist()
retriever_1000 = db_1000.as_retriever()
result_1000_context = run_and_evaluate(retriever_1000, prompt, llm)
print("CHUNK SIZE 1000")
print(result_1000_context)

In [31]:
db_1000 = Chroma(persist_directory = "../ballad/vectordb/recursive_1000", embedding_function=embeddings_client)

In [32]:
retriever_1000 = db_1000.as_retriever()
result_1000_context, result_1000 = run_and_evaluate(retriever_1000, prompt, llm)
print("CHUNK SIZE 1000")
print(result_1000_context)
print("AVG accuracy "+ str(result_1000))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['No error found in the query.', 'There is no error in the given query.', 'Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
CHUNK SIZE 1000
{'context_precision': 0.7083, 'context_recall': 0.2500}
AVG accuracy 0.0


In [None]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 50)
chunks_500 = text_splitter.split_documents(documents)
print(len(chunks_500))
db_500 = Chroma.from_documents(chunks_500, embeddings_client, persist_directory = "../vectordb/recursive_500")
db_500.persist()
retriever_500 = db_500.as_retriever()
result_500 = run_and_evaluate(retriever_500, prompt, llm)
print("CHUNK SIZE 500")
print(result_500)

In [33]:
db_500 = Chroma(persist_directory = "../ballad/vectordb/recursive_500", embedding_function=embeddings_client)

In [34]:
retriever_500 = db_500.as_retriever()
result_500, acc_500 = run_and_evaluate(retriever_500, prompt, llm)
print("CHUNK SIZE 500")
print(result_500)
print("AVG accuracy "+ str(acc_500))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the given query.', 'There is no error in the query.', 'Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'No error found in the query.', 'There is no error in the given query.']
CHUNK SIZE 500
{'context_precision': 0.6984, 'context_recall': 0.7083}
AVG accuracy 0.0


In [35]:
db_2000 = Chroma(persist_directory = "../ballad/vectordb/recursive_2000", embedding_function=embeddings_client)

In [36]:
retriever_2000 = db_2000.as_retriever()
result_2000, acc_2000 = run_and_evaluate(retriever_2000, prompt, llm)
print("CHUNK SIZE 2000")
print(result_2000)
print("AVG accuracy "+ str(acc_2000))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['There is no error in the query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
CHUNK SIZE 2000
{'context_precision': 0.5938, 'context_recall': 0.2500}
AVG accuracy 0.0


In [37]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 400, chunk_overlap = 100)
chunks_400 = text_splitter.split_documents(documents)
print(len(chunks_400))
db_400 = Chroma.from_documents(chunks_400, embeddings_client, persist_directory = "../ballad/vectordb/recursive_400")

797


In [38]:
retriever_400 = db_400.as_retriever()
result_400, acc_400 = run_and_evaluate(retriever_400, prompt, llm)
print("CHUNK SIZE 400")
print(result_400)
print("AVG accuracy "+ str(acc_400))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  40%|████      | 8/20 [00:01<00:02,  5.14it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'No answer', 'Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the query.', 'Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety.", 'There is no error in the query.']
CHUNK SIZE 400
{'context_precision': 0.3889, 'context_recall': 0.6667}
AVG accuracy 0.1


### now time to look for different top-k

Note: We continue with the size chunk of 2000 as it had the highest average score

In [39]:
retriever_2 = db_400.as_retriever(search_kwargs={"k": 2})
result_2, acc_2 = run_and_evaluate(retriever_2, prompt, llm)
print("CHUNK SIZE 1000, K=2")
print(result_2)
print("AVG accuracy "+ str(acc_2))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:06<00:00,  3.22it/s]


['There is no error in the given query.', 'There is no error in the given query.', 'Begin with peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'No error found in the query.', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the query.', "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety.", 'There is no error in the given query.']
CHUNK SIZE 1000, K=2
{'context_precision': 0.3500, 'context_recall': 0.6483}
AVG

In [40]:
retriever_3 = db_400.as_retriever(search_kwargs={"k": 3})
result_3, acc_3 = run_and_evaluate(retriever_3, prompt, llm)
print("CHUNK SIZE 500, K=3")
print(result_3)
print("AVG accuracy "+ str(acc_3))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:09<00:00,  2.00it/s]


['No error found in the query.', 'There is no error in the given query.', 'There is no error in the query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No error found in the query.', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety.", 'There is no error in the given query.']
CHUNK SIZE 500, K=3
{'context_precision': 0.3500, 'context_recall': 0.6833}
AVG accuracy 0.1


In [41]:
retriever_5 = db_400.as_retriever(search_kwargs={"k": 5})
result_5, acc_5 = run_and_evaluate(retriever_5, prompt, llm)
print("CHUNK SIZE 500, K=5")
print(result_5)
print("AVG accuracy "+ str(acc_5))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  45%|████▌     | 9/20 [00:02<00:02,  4.69it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the query.']
CHUNK SIZE 500, K=5
{'context_precision': 0.4944, 'context_recall': 0.4667}
AVG accuracy 0.0


In [42]:
retriever_6 = db_400.as_retriever(search_kwargs={"k": 6})
result_6, acc_6 = run_and_evaluate(retriever_6, prompt, llm)
print("CHUNK SIZE 500, K=6")
print(result_6)
print("AVG accuracy "+ str(acc_6))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  40%|████      | 8/20 [00:02<00:02,  4.66it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the query.']
CHUNK SIZE 500, K=6
{'context_precision': 0.4919, 'context_recall': 0.6800}
AVG accuracy 0.0


### look for different retrievers

3 chunks was the best score

#### parent document retriever

In [43]:
parent_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=2000)
child_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=200, chunk_overlap = 0)

# vectorstore = Chroma(collection_name="split_parents", persist_directory = "../ballad/vectordb/parent_summary", embedding_function=embeddings_client)
# vectorstore.persist()
vectorstore = Chroma(persist_directory = "../ballad/vectordb/parent_summary", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)
parent_document_retriever.add_documents(documents)
result_parent, acc_parent = run_and_evaluate(parent_document_retriever, prompt, llm)
print(result_parent)
print("AVG accuracy "+ str(acc_parent))




passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

['No answer', 'There is no error in the given query.', 'Begin with peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', "You can't take my sass.\nYou can't take my talking.\nYou can't kiss my ass\nAnd then keep on ignoring.\nNothing you can take from me was ever worth keeping.", 'There is no error in the given query.', 'There is no error in the given query.', 'Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer']
{'context_precision': 0.7778, 'context_recall': 0.2000}
AVG accuracy 0.0


In [47]:
parent_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap = 50)
child_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents_small",persist_directory = "../ballad/vectordb/parent_small_error", embedding_function=embeddings_client)
vectorstore.persist()
#vectorstore = Chroma(persist_directory = "../ballad/vectordb/parent_small_error", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_small = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_small,
    parent_splitter=parent_splitter_small,
)
parent_document_retriever_small.add_documents(documents)
result_parent_small, acc_parent_small = run_and_evaluate(parent_document_retriever_small, prompt, llm)
print(result_parent_small)
print("AVG accuracy "+ str(acc_parent_small))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:08<00:00,  2.25it/s]


['There is no error in the given query.', 'There is no error in the given query.', 'Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
{'context_precision': 0.3000, 'context_recall': 0.8000}
AVG accuracy 0.0


In [46]:
parent_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=3000)
child_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300, chunk_overlap = 0)

# vectorstore = Chroma(collection_name="split_parents_large", persist_directory = "../ballad/vectordb/parent_large_summary", embedding_function=embeddings_client)
# vectorstore.persist()
vectorstore = Chroma(persist_directory = "../ballad/vectordb/parent_large_summary", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_large = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_large,
    parent_splitter=parent_splitter_large,
)
parent_document_retriever_large.add_documents(documents)
result_parent_large, acc_parent_large = run_and_evaluate(parent_document_retriever_large, prompt, llm)
print(result_parent_large)
print("AVG accuracy "+ str(acc_parent_large))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   5%|▌         | 1/20 [00:01<00:30,  1.59s/it]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'low'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severi

['There is no error in the given query.', 'There is no error in the given query.', 'Begin with peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the query.', 'No error found in the query.', 'There is no error in the given query.']
{'context_precision': 0.8125, 'context_recall': 0.3333}
AVG accuracy 0.0


#### Maximum marginal relevance retrieval

In [48]:
retriever_mmr = db_400.as_retriever(search_type="mmr")
result_mmr, acc_mmr = run_and_evaluate(retriever_mmr, prompt, llm)
print("Marginal relevance")
print(result_mmr)
print("AVG accuracy "+ str(acc_mmr))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  40%|████      | 8/20 [00:02<00:03,  3.94it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'sever

['No error found in the query.', 'There is no error in the given query.', 'Begin with peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'No answer', 'There is no error in the given query.', 'No answer', "'input': 'Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final blow.',\n'output': 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.'", 'There is no error in the query.', "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety.", 'The

#### BM25

In [49]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 400, chunk_overlap = 50)
chunks_400 = text_splitter.split_documents(documents)

In [50]:
retriever_bm25 = BM25Retriever.from_documents(chunks_400)
result_bm25, acc_bm25 = run_and_evaluate(retriever_bm25, prompt, llm)
print("BM25")
print(result_bm25)
print("AVG accuracy "+ str(acc_bm25))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  65%|██████▌   | 13/20 [00:02<00:00,  7.54it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'se

["She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.", 'There is no error in the given query.', 'Begin with chaos. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they

#### Ensambler - Hybrid

In [51]:
ensemble_retriever_1 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_400], weights=[0.75, 0.25])
result_ensemble1, acc_retriever_1 = run_and_evaluate(ensemble_retriever_1, prompt, llm)
print("Ensambler 75/25")
print(result_ensemble1)
print("AVG accuracy "+ str(acc_retriever_1))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  35%|███▌      | 7/20 [00:01<00:03,  3.92it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 75/25
{'context_precision': 0.5053, 'context_recall': 0.7299}
AVG accuracy 0.0


In [52]:
ensemble_retriever_2 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_400], weights=[0.5, 0.5])
result_ensemble2, acc_ens_2 = run_and_evaluate(ensemble_retriever_2, prompt, llm)
print("Ensambler 50/50")
print(result_ensemble2)
print("AVG accuracy "+ str(acc_ens_2))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  35%|███▌      | 7/20 [00:02<00:03,  3.54it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 50/50
{'context_precision': 0.4444, 'context_recall': 0.8349}
AVG accuracy 0.0


In [53]:
ensemble_retriever_3 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_400], weights=[0.25,0.75])
result_ensemble3, acc_ens_3 = run_and_evaluate(ensemble_retriever_3, prompt, llm)
print("Ensambler 25/75")
print(result_ensemble3)
print("AVG accuracy "+ str(acc_ens_3))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  40%|████      | 8/20 [00:02<00:03,  3.61it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 25/75
{'context_precision': 0.5575, 'context_recall': 0.6389}
AVG accuracy 0.0


In [54]:
ensemble_retriever_4 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_mmr], weights=[0.75, 0.25])
result_ensemble4, acc_retriever_4 = run_and_evaluate(ensemble_retriever_4, prompt, llm)
print("Ensambler 75/25")
print(result_ensemble4)
print("AVG accuracy "+ str(acc_retriever_4))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  30%|███       | 6/20 [00:01<00:02,  4.75it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 75/25
{'context_precision': 0.3765, 'context_recall': 1.0000}
AVG accuracy 0.0


In [55]:
ensemble_retriever_5 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_mmr], weights=[0.5, 0.5])
result_ensemble5, acc_retriever_5 = run_and_evaluate(ensemble_retriever_5, prompt, llm)
print("Ensambler 50/50")
print(result_ensemble5)
print("AVG accuracy "+ str(acc_retriever_5))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  35%|███▌      | 7/20 [00:01<00:01,  6.93it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 50/50
{'context_precision': 0.4304, 'context_recall': 0.8750}
AVG accuracy 0.0


In [56]:
ensemble_retriever_6 = EnsembleRetriever(retrievers=[retriever_bm25, retriever_mmr], weights=[0.25, 0.75])
result_ensemble6, acc_retriever_6 = run_and_evaluate(ensemble_retriever_6, prompt, llm)
print("Ensambler 25/75")
print(result_ensemble6)
print("AVG accuracy "+ str(acc_retriever_6))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  30%|███       | 6/20 [00:01<00:03,  4.37it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

['There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'No answer', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.', 'There is no error in the given query.']
Ensambler 25/75
{'context_precision': 0.6637, 'context_recall': 0.7167}
AVG accuracy 0.0


#### Multi-stage - reranker

In [57]:
os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

In [66]:
retriever_context = retriever_500
compressor = CohereRerank(top_n = 3)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever_context
)

result_compression, acc_compression = run_and_evaluate(compression_retriever, prompt, llm)
print("Reranker")
print(result_compression)
print("AVG accuracy "+ str(acc_compression))



CohereAPIError: You are using a Trial key, which is limited to 10 API calls / minute. You can continue to use the Trial key for free or upgrade to a Production key with higher rate limits at 'https://dashboard.cohere.ai/api-keys'. Contact us on 'https://discord.gg/XW44jPfYJu' or email us at support@cohere.com with any questions

#### creating context by remaking the query

In [60]:
template_context = "Generate a search query to fetch the relevant documents using the user's {question}. Craft a query that specifically targets the keywords in the question. In the answer provide only the query."
prompt_context = ChatPromptTemplate.from_template(template_context)

In [61]:
answers_final = []
contexts_final = []
llm_for_context =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt_context | llm}
)
for query in questions:
    response_check = llm_for_context.invoke({"question": query})
    search_query = response_check["response"].content
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("context"), "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
)
    docs = retriever_500.get_relevant_documents(search_query)
    formatted_docs = []
    for doc in docs:
        resulting_doc = doc.page_content
        formatted_docs.append(resulting_doc)
    try:  
            response = retrieval_augmented_qa_chain.invoke({"context": formatted_docs, "question": query})
            # Access the response content
            answers_final.append(response["response"].content)
            contexts_final.append(formatted_docs)  
    except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_final.append("No answer")
            contexts_final.append(formatted_docs)


result_search_query = evaluation_rag(questions, answers_final, contexts_final, ground_truths)
acc_search_query = calculate_accuracy(answers_final, ground_truths)
print("AVG accuracy "+ str(acc_search_query))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

AVG accuracy 0.1


In [None]:
acc_search_query = calculate_accuracy(answers_final, ground_truths)
print("AVG accuracy "+ str(acc_search_query))

### change model to GPT-4

In [62]:
result_bm25_gpt4, acc_bm25_gpt4 = run_and_evaluate(retriever_bm25, prompt, llm_gpt4)
print(result_bm25_gpt4)
print("AVG accuracy "+ str(acc_bm25_gpt4))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  45%|████▌     | 9/20 [00:01<00:01,  5.81it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

["She believes there's a natural goodness built into human beings. One can recognize when they've stepped across the line into evil, and the lifelong challenge is to strive to remain on the right side of that line.", 'The corrected sentence is: She could fly around District 12 all she liked, but she and her mockingjays could never harm him again.', 'Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'No answer', "I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.", "You've no right to starve people, to punish them for no reason. No right to take away their life and freedom. Those are things everyone is born with, and they're not y

In [63]:
result_mmr_gpt4, acc_mmr_gpt4 = run_and_evaluate(retriever_mmr, prompt, llm_gpt4)
print("Marginal relevance")
print(result_mmr_gpt4)
print("AVG accuracy "+ str(acc_mmr_gpt4))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  40%|████      | 8/20 [00:02<00:03,  3.13it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'sever

["She believes there's a natural evilness ingrained in human beings. One can recognize when they've crossed into the realm of good, and the lifelong challenge is to strive to remain on the right side of that line.", 'The corrected sentence is: She could fly around District 12 all she liked, but she and her mockingjays could never harm him again.', 'Begin with that. War. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'No answer', "I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.", 'No answer', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'The corr

In [64]:
result_3_gpt4, acc_3_gpt4 = run_and_evaluate(retriever_3, prompt, llm_gpt4)
print("CHUNK SIZE 500, K=3, _gpt4")
print(result_3_gpt4)
print("AVG accuracy "+ str(acc_3_gpt4))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:12<00:00,  1.62it/s]


["She believes there's a natural goodness built into human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.", 'She could fly around District 4 all she liked, but she and her mockingjays could never harm him again.', 'Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping.', "I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.", "You've no right to starve people, to punish them for n

In [65]:
result_400_gpt4, acc_400_gpt4 = run_and_evaluate(retriever_400, prompt, llm_gpt4)
print("CHUNK SIZE 400")
print(result_400_gpt4)
print("AVG accuracy "+ str(acc_400_gpt4))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  25%|██▌       | 5/20 [00:01<00:02,  5.50it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': True, 'severity': 'medium'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

["She believes there's a natural goodness built into human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.", 'No answer', 'Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping.', "I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.", "You've no right to starve people, to punish them for no reason. No right to take away their life and freedom. Those are things everyone is born wit

In [67]:
result_ensemble4_gpt4, acc_retriever_4_gpt4 = run_and_evaluate(ensemble_retriever_4, prompt, llm_gpt4)
print("Ensambler 75/25")
print(result_ensemble4_gpt4)
print("AVG accuracy "+ str(acc_retriever_4_gpt4))

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  35%|███▌      | 7/20 [00:01<00:03,  3.97it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

["She believes there's a natural goodness built into human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line..", 'The corrected query is: She could fly around District 12 all she liked, but she and her mockingjays could never harm him again.', 'Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'No answer', "I'm Lucy Gray, and I'm not really from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves obliged to stay.", 'No answer', 'Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final bow.', 'Having some

In [68]:
result_2_gpt4, acc_2_gpt4 = run_and_evaluate(retriever_2, prompt, llm_gpt4)
print("CHUNK SIZE 500, K=2, _gpt4")
print(result_2_gpt4)
print("AVG accuracy "+ str(acc_2_gpt4))

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating: 100%|██████████| 20/20 [00:06<00:00,  2.99it/s]


["She believes there's a natural goodness built into human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.", 'She could fly around District 4 all she liked, but she and her mockingjays could never harm him again.', 'Begin with that. War. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping.', "I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.", "You've no right to starve people, to punish them for no 

In [69]:
result_500_gpt4, acc_500_gpt4 = run_and_evaluate(retriever_500, prompt, llm_gpt4)
print("CHUNK SIZE 500 gpt4")
print(result_500_gpt4)
print("AVG accuracy "+ str(acc_500_gpt4))



passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

["She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.", 'She could fly around District 12 all she liked, but she and her mockingjays could never harm him again.', 'Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?.', 'You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping..', "I'm Lucy Gray Baird, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.", "You've no right to starve people, to punish 