In [16]:
import dotenv
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain_openai import AzureOpenAIEmbeddings
dotenv.load_dotenv()
from langchain_community.vectorstores import Chroma
from ragas import evaluate
from ragas.metrics import (
    context_recall,
    context_precision,
)
from datasets import Dataset
from langchain_openai import AzureChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate
import os
import sys
sys.tracebacklimit = 0
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
import getpass
from langchain.retrievers.document_compressors import CohereRerank
from langchain.retrievers import ContextualCompressionRetriever
import pandas as pd
import numpy as np
from langchain_community.document_loaders import PyPDFLoader

In [17]:
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
OPENAI_API_VERSION = os.environ.get("OPENAI_API_VERSION")
AZURE_OPENAI_ENDPOINT = os.environ.get("AZURE_OPENAI_ENDPOINT")
OPENAI_MODEL = os.environ.get("OPENAI_MODEL")
OPENAI_DEPLOYMENT = os.environ.get("OPENAI_DEPLOYMENT")
EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL")
EMBEDDING_DEPLOYMENT = os.environ.get("EMBEDDING_DEPLOYMENT")
OPENAI_MODEL_GPT4 = os.environ.get("OPENAI_MODEL_GPT4")
OPENAI_DEPLOYMENT_GPT4 = os.environ.get("OPENAI_DEPLOYMENT_GPT4")

In [18]:
questions = [
    "She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line.",
    "She could fly around District 4 all she liked, but she and her mockingjays could never harm him again.",
    "Begin with that. Peace. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?",
    "You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on ignoring.\nNothing you can take from me was ever worth keeping.",
    "I'm Lucy Gray, and I'm not truly from Five, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay.",
    "You've no right to starve people, to punish them for no reason. No right to take away their life and freedom. Those are things everyone is born with, and they're not yours for the taking. Winning a war doesn't give you that right. Having more weapons doesn't give you that right. Being from the USA doesn't give you that right. Nothing does.",
    "Good night, everybody. Hope we see you next week, and until then, keep dancing your dance, said Lucy Gray, and the whole Covey took one final blow.",
    "Having someone to cheer for could potentially generate interest among people watching the Twilight.",
    "Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the lizard tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety.",
    "Snow sleeps on top."
]

ground_truths = [
    ["She believes there's a natural goodness ingrained in human beings. One can recognize when they've crossed into the realm of evil, and the lifelong challenge is to strive to remain on the right side of that line."],
    ["She could fly around District 12 all she liked, but she and her mockingjays could never harm him again."],
    ["Begin with that. Chaos. Absence of control, no law, no government whatsoever. Similar to being in the arena. Where do we proceed from there? What kind of agreement is needed for us to coexist in peace? What type of social contract is essential for survival?"],
    ["You can’t take my sass.\nYou can’t take my talking.\nYou can kiss my ass\nAnd then keep on walking.\nNothing you can take from me was ever worth keeping."],
    ["I'm Lucy Gray, and I'm not truly from Twelve, she explained. My people belong to the Covey, a group of musicians by profession. We simply took a wrong turn one day and found ourselves compelled to stay."],
    ["You've no right to starve people, to punish them for no reason. No right to take away their life and freedom. Those are things everyone is born with, and they're not yours for the taking. Winning a war doesn't give you that right. Having more weapons doesn't give you that right. Being from the Capitol doesn't give you that right. Nothing does."],
    ["Good night, everybody. Hope we see you next week, and until then, keep singing your song, said Lucy Gray, and the whole Covey took one final blow."],
    ["Having someone to cheer for could potentially generate interest among people watching the Hunger Games."],
    ["Snow had placed the handkerchief carrying Lucy Gray's scent, taken from the outside pocket of his book bag, into the snake tank. The intention was to prevent the snakes from attacking her as they had done to Clemensia, ensuring her safety."],
    ["Snow lands on top."]
]

In [19]:
embeddings_client = AzureOpenAIEmbeddings(
    azure_deployment=EMBEDDING_DEPLOYMENT,
    openai_api_version=OPENAI_API_VERSION)
llm = AzureChatOpenAI(model_name=OPENAI_MODEL, azure_deployment=OPENAI_DEPLOYMENT,temperature=0)
llm_gpt4 = AzureChatOpenAI(model_name=OPENAI_MODEL_GPT4, azure_deployment=OPENAI_DEPLOYMENT_GPT4,temperature=0)

In [20]:
def evaluation_rag(questions, answers, contexts, ground_truths):
    data = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truths": ground_truths
    }
    dataset = Dataset.from_dict(data)
    azure_configs = {
        "base_url": AZURE_OPENAI_ENDPOINT,
        "model_deployment": OPENAI_DEPLOYMENT,
        "model_name": OPENAI_MODEL,
        "embedding_deployment": EMBEDDING_DEPLOYMENT,
        "embedding_name": EMBEDDING_MODEL,  
    }

    azure_model = AzureChatOpenAI(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["model_deployment"],
        model=azure_configs["model_name"],
        validate_base_url=False,
    )

    azure_embeddings = AzureOpenAIEmbeddings(
        openai_api_version=OPENAI_API_VERSION,
        azure_endpoint=azure_configs["base_url"],
        azure_deployment=azure_configs["embedding_deployment"],
        model=azure_configs["embedding_name"],
    )
    result = evaluate(
        dataset = dataset, 
        metrics=[
            context_precision,
            context_recall,
        ], 
        llm=azure_model, 
        embeddings=azure_embeddings,
        raise_exceptions=False,
    )
    return result

In [21]:
def flatten_list(lst):
    return [item.replace('"', '').replace("'", '').replace('“','').replace('”', '') for sublist in lst for item in (sublist if isinstance(sublist, list) else [sublist])]

def calculate_accuracy(answers, ground_truths):
    # Convert both answers and ground truths to lowercase for case-insensitive comparison
    answers_lower = flatten_list([answer.lower() for answer in answers])
    ground_truths_lower = flatten_list([[truth.lower() for truth in sublist] for sublist in ground_truths])
    # Check if each answer corresponds with the ground truth exactly
    accuracy = [1 if ans == truth else 0 for ans, truth in zip(answers_lower, ground_truths_lower)]
    # Calculate the average accuracy
    average_accuracy = sum(accuracy) / len(accuracy)
    return average_accuracy

In [22]:
columns = ["System", "Context Precision", "Context Recall", "Accuracy"]
results_df = pd.DataFrame(columns=columns)

In [23]:
def evaluate_system(system_name, questions, answers, contexts, ground_truths):
    result = evaluation_rag(questions, answers, contexts, ground_truths)
    accuracy = calculate_accuracy(answers, ground_truths)
    # Create a dictionary to store the results
    system_results = {
        "System": system_name,
        "Context Precision": result["context_precision"],
        "Context Recall": result["context_recall"],
        "Accuracy": accuracy
    }
    df_system_results = pd.DataFrame([system_results])
    return df_system_results

In [24]:
def evaluate_LLM(system_name, questions, answers, contexts, ground_truths):
    accuracy = calculate_accuracy(answers, ground_truths)
    # Create a dictionary to store the results
    system_results = {
        "System": system_name,
        "Context Precision": np.nan,
        "Context Recall": np.nan,
        "Accuracy": accuracy
    }
    df_llm_results = pd.DataFrame([system_results])
    return df_llm_results

### General answers by LLMs

In [25]:
template = """ Fix an error in the following query. The answer should be exactly the same as query but with corrected mistake. For example:
'input': 'There are 10 continents in the world.',
'output': 'There are 7 continents in the world.',
'input': 'The elephant is the largest animal of all time.',
'output': 'The blue whale is the largest animal of all time.'.
This is the input query: {question}"""
prompt = ChatPromptTemplate.from_template(template)

In [26]:
llm_chain =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm}
)
llm_chain_gpt4 =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt | llm_gpt4}
)

In [27]:
answers_llm = []
contexts_llm = [[""],[""],[""],[""],[""],[""],[""],[""],[""],[""]]

In [28]:
for query in questions:
    response = llm_chain.invoke({"question": query})
    answers_llm.append(response["response"].content)

In [29]:
llm_results = evaluate_LLM("GPT-3.5", questions, answers_llm, contexts_llm, ground_truths)
results_df = pd.concat([results_df, llm_results], ignore_index=True)
print(llm_results)

    System  Context Precision  Context Recall  Accuracy
0  GPT-3.5                NaN             NaN       0.0


  results_df = pd.concat([results_df, llm_results], ignore_index=True)


In [30]:
answers_llm_gpt4 = []
for query in questions:
    response = llm_chain_gpt4.invoke({"question": query})
    answers_llm_gpt4.append(response["response"].content)
llm_results_gpt4 = evaluate_LLM("GPT-4",questions, answers_llm_gpt4, contexts_llm, ground_truths)
results_df = pd.concat([results_df, llm_results_gpt4], ignore_index=True)
print(llm_results_gpt4)

  System  Context Precision  Context Recall  Accuracy
0  GPT-4                NaN             NaN       0.5


### Naive RAG

In [31]:
def retrieval_chain(prompt, retriever, llm):
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
    )
    return retrieval_augmented_qa_chain

In [32]:
loader = PyPDFLoader(r"..\ballad\the_ballad_of_songbirds_and_snakes.pdf")
documents = loader.load()

In [33]:
# text_splitter = CharacterTextSplitter()
# chunks = text_splitter.split_documents(documents)
# db_naive = Chroma.from_documents(chunks, embeddings_client, persist_directory = "../ballad/vectordb/naive")
# db_naive.persist()
# retriever_naive = db_naive.as_retriever()

In [34]:
db_naive = Chroma(persist_directory = "../ballad/vectordb-edit/naive", embedding_function=embeddings_client)
retriever_naive = db_naive.as_retriever()

In [35]:
template = """Fix an error in the input query. The answer should be exactly the same as query but with corrected mistake. For example:
'input': 'There are 10 continents in the world.',
'output': 'There are 7 continents in the world.',
'input': 'The elephant is the largest animal of all time.',
'output': 'The blue whale is the largest animal of all time.'.
This is the input query: {question}. 
Here is some provided context {context}."""
prompt = ChatPromptTemplate.from_template(template)

In [36]:
answers_naive = []
contexts_naive = []
for query in questions:
    try:  
        response = retrieval_chain(prompt, retriever_naive, llm).invoke({"question": query})
        # Access the response content
        answers_naive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_naive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_naive.append("No answer")
        context_full = retriever_naive.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_naive.append(context_content)

In [37]:
result_naive_rag = evaluate_system("Naive", questions, answers_naive, contexts_naive, ground_truths)
results_df = pd.concat([results_df, result_naive_rag], ignore_index=True)
print(result_naive_rag)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   5%|▌         | 1/20 [00:00<00:16,  1.18it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

  System  Context Precision  Context Recall  Accuracy
0  Naive            0.62963           0.225       0.0


### recursive splitter

In [38]:
# text_splitter = text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder()
# chunks_r = text_splitter.split_documents(documents)
# db_basic = Chroma.from_documents(chunks_r, embeddings_client, persist_directory = "../ballad/vectordb/recursive_basic")
# db_basic.persist()
# retriever_basic = db_basic.as_retriever()

In [39]:
db_basic = Chroma(persist_directory = "../ballad/vectordb/recursive_basic", embedding_function=embeddings_client)
retriever_basic = db_basic.as_retriever()

In [40]:
answers_recursive = []
contexts_recursive = []
for query in questions:
    try:  
        response = retrieval_chain(prompt, retriever_basic, llm).invoke({"question": query})
        # Access the response content
        answers_recursive.append(response["response"].content)
        # Access the context content
        context_content = [context.page_content for context in response["context"]]
        contexts_recursive.append(context_content)  
    except Exception as e:  
        print(f"Warning: {e}" + "on the following question: " + query)  
        answers_recursive.append("No answer")
        context_full = retriever_basic.get_relevant_documents(query)
        context_content = [context.page_content for context in context_full]
        contexts_recursive.append(context_content)



In [41]:
result_recursive = evaluate_system("Recursive", questions, answers_naive, contexts_naive, ground_truths)
results_df = pd.concat([results_df, result_recursive], ignore_index=True)
result_recursive

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  15%|█▌        | 3/20 [00:01<00:04,  3.72it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'sever

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
0,Recursive,0.472222,0.472222,0.0


### chunk sizes

In [42]:
def run_and_evaluate(name, retriever, prompt, llm, results_df):
    answers = []
    contexts_extra = []

    for query in questions:
        try:  
            response = retrieval_chain(prompt, retriever, llm).invoke({"question": query})
            # Access the response content
            answers.append(response["response"].content)
            # Access the context content
            context_content = [context.page_content for context in response["context"]]
            contexts_extra.append(context_content)  
        except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers.append("No answer")
            context_full = retriever.get_relevant_documents(query)
            context_content = [context.page_content for context in context_full]
            contexts_extra.append(context_content)

    result = evaluate_system(name, questions, answers, contexts_extra, ground_truths)
    results_df = pd.concat([results_df, result], ignore_index=True)
    return result, results_df

In [43]:
chunk_sizes = [500, 1000, 2000, 3000]
overlap_percentages = [0, 5, 10, 15, 20]

for chunk_size in chunk_sizes:
    for overlap_percentage in overlap_percentages:
        # Calculate overlap based on percentage
        chunk_overlap = int(chunk_size * overlap_percentage / 100)
        
        # Create text splitter
        # text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
        
        # # Split documents
        # chunks = text_splitter.split_documents(documents)
        
        # Print number of chunks
        print(f"Chunk size {chunk_size}, overlap {overlap_percentage}%")
        
        # Create Chroma database
        # db = Chroma.from_documents(chunks, embeddings_client, persist_directory=f"../ballad/vectordb-edit/chunking_{chunk_size}_{overlap_percentage}")
        db = Chroma(persist_directory = "../ballad/vectordb/recursive_basic", embedding_function=embeddings_client)
        db.persist()
        
        # Create retriever
        retriever = db.as_retriever()
        
        # Run and evaluate
        result,results_df = run_and_evaluate(f"Chunk {chunk_size}, overlap {overlap_percentage}%", retriever, prompt, llm, results_df)
        print(f"CHUNK SIZE {chunk_size}, {overlap_percentage}% overlap:")
        print(result)

Chunk size 500, overlap 0%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 500, 0% overlap:
                  System  Context Precision  Context Recall  Accuracy
0  Chunk 500, overlap 0%           0.416667        0.481481       0.1
Chunk size 500, overlap 5%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 500, 5% overlap:
                  System  Context Precision  Context Recall  Accuracy
0  Chunk 500, overlap 5%           0.416667        0.490741       0.1
Chunk size 500, overlap 10%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 500, 10% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 500, overlap 10%           0.453704        0.481481       0.1
Chunk size 500, overlap 15%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 500, 15% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 500, overlap 15%           0.453704        0.398148       0.1
Chunk size 500, overlap 20%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 500, 20% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 500, overlap 20%           0.555556        0.283333       0.1
Chunk size 1000, overlap 0%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 1000, 0% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 1000, overlap 0%           0.416667        0.490741       0.1
Chunk size 1000, overlap 5%
You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 1000, 5% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 1000, overlap 5%           0.416667        0.490741       0.1
Chunk size 1000, overlap 10%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 1000, 10% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 1000, overlap 10%           0.416667        0.388889       0.1
Chunk size 1000, overlap 15%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 1000, 15% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 1000, overlap 15%           0.555556           0.225       0.1
Chunk size 1000, overlap 20%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 1000, 20% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 1000, overlap 20%           0.416667        0.388889       0.1
Chunk size 2000, overlap 0%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 2000, 0% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 2000, overlap 0%           0.472222        0.490741       0.1
Chunk size 2000, overlap 5%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 2000, 5% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 2000, overlap 5%           0.416667        0.490741       0.1
Chunk size 2000, overlap 10%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 2000, 10% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 2000, overlap 10%           0.416667        0.388889       0.1
Chunk size 2000, overlap 15%
You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 2000, 15% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 2000, overlap 15%           0.416667        0.490741       0.1
Chunk size 2000, overlap 20%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 2000, 20% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 2000, overlap 20%           0.416667        0.481481       0.1
Chunk size 3000, overlap 0%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 3000, 0% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 3000, overlap 0%           0.472222        0.388889       0.1
Chunk size 3000, overlap 5%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 3000, 5% overlap:
                   System  Context Precision  Context Recall  Accuracy
0  Chunk 3000, overlap 5%           0.416667        0.388889       0.1
Chunk size 3000, overlap 10%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 3000, 10% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 3000, overlap 10%           0.416667        0.388889       0.1
Chunk size 3000, overlap 15%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 3000, 15% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 3000, overlap 15%           0.435185        0.351852       0.1
Chunk size 3000, overlap 20%


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

CHUNK SIZE 3000, 20% overlap:
                    System  Context Precision  Context Recall  Accuracy
0  Chunk 3000, overlap 20%           0.416667        0.388889       0.1


In [44]:
results_df.to_csv(f"../ballad/results/results_errors.csv", index=False)

In [None]:
# text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 1000, chunk_overlap = 100)
# chunks_1000 = text_splitter.split_documents(documents)
# print(len(chunks_1000))
# db_1000 = Chroma.from_documents(chunks_1000, embeddings_client, persist_directory = "../ballad/vectordb/recursive_1000")
# db_1000.persist()
# retriever_1000 = db_1000.as_retriever()
# result_1000_context = run_and_evaluate(retriever_1000, prompt, llm)
# print("CHUNK SIZE 1000")
# print(result_1000_context)

In [None]:
# db_1000 = Chroma(persist_directory = "../ballad/vectordb/recursive_1000", embedding_function=embeddings_client)

In [None]:
# retriever_1000 = db_1000.as_retriever()
# result_1000_context, result_1000 = run_and_evaluate(retriever_1000, prompt, llm)
# print("CHUNK SIZE 1000")
# print(result_1000_context)
# print("AVG accuracy "+ str(result_1000))

In [None]:
# text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 50)
# chunks_500 = text_splitter.split_documents(documents)
# print(len(chunks_500))
# db_500 = Chroma.from_documents(chunks_500, embeddings_client, persist_directory = "../vectordb/recursive_500")
# db_500.persist()
# retriever_500 = db_500.as_retriever()
# result_500 = run_and_evaluate(retriever_500, prompt, llm)
# print("CHUNK SIZE 500")
# print(result_500)

In [None]:
# db_500 = Chroma(persist_directory = "../ballad/vectordb/recursive_500", embedding_function=embeddings_client)

In [None]:
# retriever_500 = db_500.as_retriever()
# result_500, acc_500 = run_and_evaluate(retriever_500, prompt, llm)
# print("CHUNK SIZE 500")
# print(result_500)
# print("AVG accuracy "+ str(acc_500))

In [None]:
# db_2000 = Chroma(persist_directory = "../ballad/vectordb/recursive_2000", embedding_function=embeddings_client)

In [None]:
# retriever_2000 = db_2000.as_retriever()
# result_2000, acc_2000 = run_and_evaluate(retriever_2000, prompt, llm)
# print("CHUNK SIZE 2000")
# print(result_2000)
# print("AVG accuracy "+ str(acc_2000))

In [None]:
# text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 400, chunk_overlap = 100)
# chunks_400 = text_splitter.split_documents(documents)
# print(len(chunks_400))
# db_400 = Chroma.from_documents(chunks_400, embeddings_client, persist_directory = "../ballad/vectordb/recursive_400")

In [None]:
# retriever_400 = db_400.as_retriever()
# result_400, acc_400 = run_and_evaluate(retriever_400, prompt, llm)
# print("CHUNK SIZE 2000")
# print(result_400)
# print("AVG accuracy "+ str(acc_400))

### now time to look for different top-k

Note: We continue with the size chunk of 500, 5% as it had the highest average score

In [46]:
highest = results_df.nlargest(3, "Accuracy")
highest

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
1,GPT-4,,,0.5
4,"Chunk 500, overlap 0%",0.416667,0.481481,0.1
5,"Chunk 500, overlap 5%",0.416667,0.490741,0.1


In [47]:
db_k = Chroma(persist_directory = "../ballad/vectordb-edit/chunking_500_0", embedding_function=embeddings_client)

In [48]:
k_values = [2, 3, 5]

# Iterate over different k values
for k in k_values:
    # Create retriever with k value
    retriever = db_k.as_retriever(search_kwargs={"k": k})
    
    # Run and evaluate
    result,results_df = run_and_evaluate(f"Chunk size 500, overlap 0%, K={k}", retriever, prompt, llm, results_df)
    print(f"Results for K={k}:")
    print(result)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Results for K=2:
                            System  Context Precision  Context Recall  \
0  Chunk size 500, overlap 0%, K=2           0.523148             0.4   

   Accuracy  
0       0.1  


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Results for K=3:
                            System  Context Precision  Context Recall  \
0  Chunk size 500, overlap 0%, K=3               0.75        0.509259   

   Accuracy  
0       0.3  


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Results for K=5:
                            System  Context Precision  Context Recall  \
0  Chunk size 500, overlap 0%, K=5           0.601852             0.5   

   Accuracy  
0       0.0  


In [None]:
# retriever_2 = db_500.as_retriever(search_kwargs={"k": 2})
# result_2, acc_2 = run_and_evaluate(retriever_2, prompt, llm)
# print("CHUNK SIZE 1000, K=2")
# print(result_2)
# print("AVG accuracy "+ str(acc_2))

In [None]:
# retriever_3 = db_500.as_retriever(search_kwargs={"k": 3})
# result_3, acc_3 = run_and_evaluate(retriever_3, prompt, llm)
# print("CHUNK SIZE 500, K=3")
# print(result_3)
# print("AVG accuracy "+ str(acc_3))

In [None]:
# retriever_5 = db_500.as_retriever(search_kwargs={"k": 5})
# result_5, acc_5 = run_and_evaluate(retriever_5, prompt, llm)
# print("CHUNK SIZE 500, K=5")
# print(result_5)
# print("AVG accuracy "+ str(acc_5))

In [None]:
# retriever_6 = db_500.as_retriever(search_kwargs={"k": 6})
# result_6, acc_6 = run_and_evaluate(retriever_6, prompt, llm)
# print("CHUNK SIZE 500, K=6")
# print(result_6)
# print("AVG accuracy "+ str(acc_6))

### look for different retrievers



#### parent document retriever

In [49]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore

parent_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap = 200)
child_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=200, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents",persist_directory = "../ballad/vectordb-edit/parent_error", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)
parent_document_retriever.add_documents(documents)
result_parent, results_df = run_and_evaluate(f"Parent Retriever 1000-200", parent_document_retriever, prompt, llm, results_df)
print(result_parent)


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

                      System  Context Precision  Context Recall  Accuracy
0  Parent Retriever 1000-200           0.845238        0.358333       0.1


In [50]:
parent_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=500, chunk_overlap = 50)
child_splitter_small = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=100, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents_small",persist_directory = "../ballad/vectordb-edit/parent_small_error", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_small = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_small,
    parent_splitter=parent_splitter_small,
)
parent_document_retriever_small.add_documents(documents)
result_parent_small, results_df = run_and_evaluate(f"Parent Retriever 500-100", parent_document_retriever_small, prompt, llm, results_df)
print(result_parent_small)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  25%|██▌       | 5/20 [00:01<00:03,  4.20it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'medium'}, 'violence': {'filtered': False, 'sev

                     System  Context Precision  Context Recall  Accuracy
0  Parent Retriever 500-100           0.657407             0.8       0.4


In [51]:
parent_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=1500, chunk_overlap = 150)
child_splitter_large = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=200, chunk_overlap = 0)

vectorstore = Chroma(collection_name="split_parents_large",persist_directory = "../ballad/vectordb-edit/large_error", embedding_function=embeddings_client)
store = InMemoryStore()
parent_document_retriever_large = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter_large,
    parent_splitter=parent_splitter_large,
)
parent_document_retriever_large.add_documents(documents)
result_parent_large , results_df = run_and_evaluate(f"Parent Retriever 1500-200", parent_document_retriever_large, prompt, llm, results_df)
print(result_parent_large)

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

                      System  Context Precision  Context Recall  Accuracy
0  Parent Retriever 1500-200              0.825        0.305556       0.1


#### Maximum marginal relevance retrieval

In [53]:
highest = results_df.nlargest(4, "Accuracy")
highest

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
1,GPT-4,,,0.5
28,Parent Retriever 500-100,0.657407,0.8,0.4
25,"Chunk size 500, overlap 0%, K=3",0.75,0.509259,0.3
4,"Chunk 500, overlap 0%",0.416667,0.481481,0.1


In [54]:
retriever_mmr = db_k.as_retriever(search_type="mmr", search_kwargs={"k": 3})
result_mmr, results_df = run_and_evaluate(f"MMR", retriever_mmr, prompt, llm, results_df)
print("Marginal relevance")
print(result_mmr)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Marginal relevance
  System  Context Precision  Context Recall  Accuracy
0    MMR           0.509259        0.666667       0.2


#### BM25

In [55]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size = 500, chunk_overlap = 0)
chunks_500 = text_splitter.split_documents(documents)

In [57]:
retriever_bm25 = BM25Retriever.from_documents(chunks_500, search_kwargs={"k": 3})
result_bm25, results_df = run_and_evaluate(f"BM25", retriever_bm25, prompt, llm, results_df)
print("BM25")
print(result_bm25)

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  70%|███████   | 14/20 [00:02<00:00,  7.35it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'se

BM25
  System  Context Precision  Context Recall  Accuracy
0   BM25           0.527778        0.888889       0.2


#### Ensambler - Hybrid

In [58]:
ret = db_k.as_retriever(search_kwargs={"k": 3})

In [59]:
ensemble_retriever_1 = EnsembleRetriever(retrievers=[retriever_bm25, ret], weights=[0.75, 0.25])
result_ensemble1, results_df = run_and_evaluate(f"Ensambler 1", ensemble_retriever_1, prompt, llm, results_df)
print("Ensambler")
print(result_ensemble1)

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  35%|███▌      | 7/20 [00:01<00:02,  4.83it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': True, 'severity': 'medium'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'sev

Ensambler
        System  Context Precision  Context Recall  Accuracy
0  Ensambler 1           0.938194        0.551111       0.0


In [60]:
ensemble_retriever_2 = EnsembleRetriever(retrievers=[retriever_bm25, ret], weights=[0.5, 0.5])
result_ensemble2, results_df = run_and_evaluate(f"Ensambler 2", ensemble_retriever_2, prompt, llm, results_df)
print("Ensambler")
print(result_ensemble2)

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Ensambler
        System  Context Precision  Context Recall  Accuracy
0  Ensambler 2            0.84375        0.677083       0.0


In [61]:
ensemble_retriever_3 = EnsembleRetriever(retrievers=[retriever_bm25, ret], weights=[0.25,0.75])
result_ensemble3, results_df = run_and_evaluate(f"Ensambler 3", ensemble_retriever_3, prompt, llm, results_df)
print("Ensambler")
print(result_ensemble3)

You can’t take my talking.
You can kiss my ass
And then keep on ignoring.
Nothing you can take from me was ever worth keeping.


passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Ensambler
        System  Context Precision  Context Recall  Accuracy
0  Ensambler 3             0.7125        0.677083       0.1


#### Multi-stage - reranker

In [None]:
os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

In [None]:
retriever_context = retriever_6
compressor = CohereRerank(top_n = 3)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever_context
)

result_compression, acc_compression = run_and_evaluate(compression_retriever, prompt, llm)
print("Reranker")
print(result_compression)
print("AVG accuracy "+ str(acc_compression))

#### creating context by remaking the query

In [65]:
highest = results_df.nlargest(3, "Accuracy")
highest

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
1,GPT-4,,,0.5
28,Parent Retriever 500-100,0.657407,0.8,0.4
25,"Chunk size 500, overlap 0%, K=3",0.75,0.509259,0.3


In [67]:
template_context = "Generate a search query to fetch the relevant documents using the user's {question}. Craft a query that specifically targets the keywords in the question. In the answer provide only the query."
prompt_context = ChatPromptTemplate.from_template(template_context)

In [68]:
answers_final = []
contexts_final = []
# retriever_context_q = EnsembleRetriever(retrievers=[retriever_bm25, retriever_3], weights=[0.5, 0.5])
llm_for_context =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt_context | llm}
)
for query in questions:
    response_check = llm_for_context.invoke({"question": query})
    search_query = response_check["response"].content
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("context"), "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm, "context": itemgetter("context")}
)
    docs = parent_document_retriever_small.get_relevant_documents(search_query)
    formatted_docs = []
    for doc in docs:
        resulting_doc = doc.page_content
        formatted_docs.append(resulting_doc)
    try:  
            response = retrieval_augmented_qa_chain.invoke({"context": formatted_docs, "question": query})
            # Access the response content
            answers_final.append(response["response"].content)
            contexts_final.append(formatted_docs)  
    except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_final.append("No answer")
            contexts_final.append(formatted_docs)


result_search_query = evaluation_rag(questions, answers_final, contexts_final, ground_truths)
result_search_query

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  50%|█████     | 10/20 [00:01<00:01,  6.08it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'medium'}, 'violence': {'filtered': False, 'se

{'context_precision': 0.5926, 'context_recall': 0.7000}

In [69]:
accuracy = calculate_accuracy(answers_final, ground_truths)
    # Create a dictionary to store the results
system_results = {
        "System": "Search query",
        "Context Precision": result_search_query["context_precision"],
        "Context Recall": result_search_query["context_recall"],
        "Accuracy": accuracy
    }
df_result_search_query = pd.DataFrame([system_results])

In [70]:
results_df = pd.concat([results_df, df_result_search_query], ignore_index=True)
results_df

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
0,GPT-3.5,,,0.0
1,GPT-4,,,0.5
2,Naive,0.62963,0.225,0.0
3,Recursive,0.472222,0.472222,0.0
4,"Chunk 500, overlap 0%",0.416667,0.481481,0.1
5,"Chunk 500, overlap 5%",0.416667,0.490741,0.1
6,"Chunk 500, overlap 10%",0.453704,0.481481,0.1
7,"Chunk 500, overlap 15%",0.453704,0.398148,0.1
8,"Chunk 500, overlap 20%",0.555556,0.283333,0.1
9,"Chunk 1000, overlap 0%",0.416667,0.490741,0.1


### change model to GPT-4

In [71]:
top_3 = results_df.nlargest(4, "Accuracy")
top_3

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
1,GPT-4,,,0.5
28,Parent Retriever 500-100,0.657407,0.8,0.4
35,Search query,0.592593,0.7,0.4
25,"Chunk size 500, overlap 0%, K=3",0.75,0.509259,0.3


In [72]:
result_parent_gpt4, results_df = run_and_evaluate(f"Chunk 500, overlap 10%, GPT-4", parent_document_retriever_small, prompt, llm_gpt4, results_df)
result_parent_gpt4

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Task exception was never retrieved
future: <Task finished name='Task-2060' coro=<AsyncClient.aclose() done, defined at c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_client.py:2011> exception=RuntimeError('Event loop is closed')>
RuntimeError: Event loop is closed
Task exception was never retrieved
future: <Task finished name='Task-2062' coro=<AsyncClient.aclose() done, defined at c:\Users\sigitalapina\OneDrive - KPMG\Desktop\thesis-rag\.venv\Lib\site-packages\httpx\_client.py:2011> exception=RuntimeError('Event loop is closed')>
RuntimeError: Event loop is closed
Task exception was never retrieved
future: <Task finished name='Task-2064' coro=<AsyncClient.aclose() done, defi

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
0,"Chunk 500, overlap 10%, GPT-4",0.657407,0.8,0.0


In [73]:
replacement_map = {
    'Chunk 500, overlap 10%, GPT-4': 'Parent Retriever 500-100, GPT-4'
}
results_df['System'] = results_df['System'].replace(replacement_map)

In [74]:
answers_final = []
contexts_final = []
# retriever_context_q = EnsembleRetriever(retrievers=[retriever_bm25, retriever_3], weights=[0.5, 0.5])
llm_for_context =(
    { "question": itemgetter("question")}
    | RunnablePassthrough()
    | {"response": prompt_context | llm_gpt4}
)
for query in questions:
    response_check = llm_for_context.invoke({"question": query})
    search_query = response_check["response"].content
    retrieval_augmented_qa_chain = (
        {"context": itemgetter("context"), "question": itemgetter("question")}
        | RunnablePassthrough.assign(context=itemgetter("context"))
        | {"response": prompt | llm_gpt4, "context": itemgetter("context")}
)
    docs = parent_document_retriever_small.get_relevant_documents(search_query)
    formatted_docs = []
    for doc in docs:
        resulting_doc = doc.page_content
        formatted_docs.append(resulting_doc)
    try:  
            response = retrieval_augmented_qa_chain.invoke({"context": formatted_docs, "question": query})
            # Access the response content
            answers_final.append(response["response"].content)
            contexts_final.append(formatted_docs)  
    except Exception as e:  
            print(f"Warning: {e}" + "on the following question: " + query)  
            answers_final.append("No answer")
            contexts_final.append(formatted_docs)


result_search_query = evaluation_rag(questions, answers_final, contexts_final, ground_truths)
result_search_query

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:  35%|███▌      | 7/20 [00:01<00:01,  6.72it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': True, 'severity': 'medium'}, 'violence': {'filtered': False, 'sev

{'context_precision': 0.5154, 'context_recall': 0.8000}

In [75]:
accuracy = calculate_accuracy(answers_final, ground_truths)
    # Create a dictionary to store the results
system_results = {
        "System": "Search query",
        "Context Precision": result_search_query["context_precision"],
        "Context Recall": result_search_query["context_recall"],
        "Accuracy": accuracy
    }
df_result_search_query = pd.DataFrame([system_results])

In [76]:
results_df = pd.concat([results_df, df_result_search_query], ignore_index=True)
results_df

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
0,GPT-3.5,,,0.0
1,GPT-4,,,0.5
2,Naive,0.62963,0.225,0.0
3,Recursive,0.472222,0.472222,0.0
4,"Chunk 500, overlap 0%",0.416667,0.481481,0.1
5,"Chunk 500, overlap 5%",0.416667,0.490741,0.1
6,"Chunk 500, overlap 10%",0.453704,0.481481,0.1
7,"Chunk 500, overlap 15%",0.453704,0.398148,0.1
8,"Chunk 500, overlap 20%",0.555556,0.283333,0.1
9,"Chunk 1000, overlap 0%",0.416667,0.490741,0.1


In [77]:
retriever_3 = db_k.as_retriever(search_kwargs={"k": 3})
result_retriever_3_gpt4, results_df = run_and_evaluate(f"Chunk size 500, overlap 0%, K=3, GPT-4", retriever_3, prompt, llm_gpt4, results_df)
result_retriever_3_gpt4

passing column names as 'ground_truths' is deprecated and will be removed in the next version, please use 'ground_truth' instead. Note that `ground_truth` should be of type string and not Sequence[string] like `ground_truths`
Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]Runner in Executor raised an exception
openai.BadRequestError: Error code: 400 - {'error': {'message': "The response was filtered due to the prompt triggering Azure OpenAI's content management policy. Please modify your prompt and retry. To learn more about our content filtering policies please read our documentation: https://go.microsoft.com/fwlink/?linkid=2198766", 'type': None, 'param': 'prompt', 'code': 'content_filter', 'status': 400, 'innererror': {'code': 'ResponsibleAIPolicyViolation', 'content_filter_result': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': True, 'severity': 'm

Unnamed: 0,System,Context Precision,Context Recall,Accuracy
0,"Chunk size 500, overlap 0%, K=3, GPT-4",0.75,0.509259,0.1


In [79]:
results_df.to_csv(f"../ballad/results/results_errors.csv", index=False)