# Contextual Retrieval - AI Makerspace Event

In [24]:
!pip install -qU langchain langchain-anthropic langchain-cohere langchain-openai cohere anthropic openai ragas rank_bm25 faiss-cpu

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[?25h

## Dependencies and API Keys

In [10]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")

OpenAI API Key:··········


In [39]:
os.environ["ANTHROPIC_API_KEY"] = getpass.getpass("Anthropic API Key:")

Anthropic API Key:··········


## Data

In [5]:
!wget https://www.gutenberg.org/files/11/11-0.txt -O alice_in_wonderland.txt

--2024-10-16 15:23:35--  https://www.gutenberg.org/files/11/11-0.txt
Resolving www.gutenberg.org (www.gutenberg.org)... 152.19.134.47, 2610:28:3090:3000:0:bad:cafe:47
Connecting to www.gutenberg.org (www.gutenberg.org)|152.19.134.47|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 154638 (151K) [text/plain]
Saving to: ‘alice_in_wonderland.txt’


2024-10-16 15:23:35 (3.52 MB/s) - ‘alice_in_wonderland.txt’ saved [154638/154638]



## Naive Ensemble Retriever with BM25

In [6]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader('./alice_in_wonderland.txt')
documents = loader.load()

In [107]:
len(documents)

1

In [7]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=0)
split_documents = text_splitter.split_documents(documents)

In [8]:
len(split_documents)

242

In [13]:
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
generator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o-mini"))

In [15]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm)
dataset = generator.generate_with_langchain_docs(split_documents, testset_size=30)

Applying [SummaryExtractor, HeadlinesExtractor]:   0%|          | 0/484 [00:00<?, ?it/s]

Applying EmbeddingExtractor:   0%|          | 0/242 [00:00<?, ?it/s]

Applying HeadlineSplitter:   0%|          | 0/242 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, KeyphrasesExtractor, TitleExtractor]:   0%|          | 0/801 [00:00<?, ?it/s]

Applying CosineSimilarityBuilder:   0%|          | 0/1 [00:00<?, ?it/s]

Applying SummaryCosineSimilarityBuilder:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating common themes:   0%|          | 0/1 [00:00<?, ?it/s]

Generating common_concepts:   0%|          | 0/1 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/31 [00:00<?, ?it/s]

In [18]:
dataset_df = dataset.to_pandas()

In [19]:
dataset_df.to_csv("./csv_testset.csv")

In [143]:
from langchain.retrievers import EnsembleRetriever
from langchain_community.retrievers import BM25Retriever
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

def create_ensemble_retriever_from_documents(documents, num_docs_retrieved = 5, weights = [0.5,0.5]):
  bm25_retriever = BM25Retriever.from_documents(
      split_documents
  )
  bm25_retriever.k = num_docs_retrieved
  embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
  faiss_vectorstore = FAISS.from_documents(
      split_documents, embedding_model
  )
  faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": num_docs_retrieved})
  ensemble_retriever = EnsembleRetriever(
      retrievers=[bm25_retriever, faiss_retriever], weights=weights
  )
  return ensemble_retriever

In [144]:
ensemble_retriever = create_ensemble_retriever_from_documents(split_documents)

In [145]:
from langchain_core.prompts import PromptTemplate

rag_prompt_template = """\
You must answer the question using the provided context.

Context:
{context}

Question:
{question}
"""

rag_prompt = PromptTemplate.from_template(rag_prompt_template)

In [151]:
from langchain_anthropic import ChatAnthropic
from langchain_openai import ChatOpenAI

llm = ChatAnthropic(model='claude-3-haiku-20240307')
rag_llm = ChatOpenAI(model="gpt-4o-mini")

In [167]:
from operator import itemgetter

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | ensemble_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | rag_llm, "context": itemgetter("context")}
)

In [153]:
retrieval_augmented_qa_chain.invoke({"question" : "Who is the Rabbit?"})["response"].content

'The Rabbit is a character in "Alice\'s Adventures in Wonderland" by Lewis Carroll, specifically known as the White Rabbit. He is depicted as a hurried and anxious creature, often seen wearing a waistcoat and carrying a pocket watch. His appearance and behavior spark Alice\'s curiosity, leading her to follow him down the rabbit hole and into Wonderland. In the story, he is portrayed as being concerned about being late and has interactions with Alice that lead to various adventures.'

In [154]:
retrieval_augmented_qa_chain.invoke({"question" : "What occurs during the trial?"})["response"].content

'During the trial, the King insists that it cannot proceed until all the jurymen are back in their proper places. Alice realizes that she had accidentally placed the Lizard in the jury box head down, causing it to wave its tail helplessly. She corrects this but thinks it doesn’t matter much how the Lizard is positioned. The jurors are busy writing on slates, and when Alice questions what they are doing, the Gryphon explains that they are putting down their names to avoid forgetting them by the end of the trial. Alice observes that the jurors are writing down "stupid things" on their slates, which she finds absurd. The atmosphere is serious, and Alice feels compelled to remain solemn, despite the ridiculousness of the situation. The King eventually calls for the reading of the accusation after some commotion involving Alice and the jurors.'

In [69]:
dataset_base = dataset.copy()

## Evaluating Base RAG

In [168]:
from tqdm import tqdm

base_dataset = []

for row in tqdm(dataset_base):
  chain_response = retrieval_augmented_qa_chain.invoke({"question" : row.eval_sample.user_input})
  response = chain_response["response"].content
  contexts = chain_response["context"]
  row.eval_sample.response = response
  row.eval_sample.retrieved_contexts = [context.page_content for context in contexts]
  response_dict = {
      "user_input" : row.eval_sample.user_input,
      "response" : response,
      "reference" : row.eval_sample.reference,
      "retrieved_contexts" : [context.page_content for context in contexts]
  }
  base_dataset.append(response_dict)

100%|██████████| 31/31 [05:01<00:00,  9.72s/it]


In [169]:
from ragas import EvaluationDataset

eval_dataset = EvaluationDataset.from_list(base_dataset)

In [170]:
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
evaluator_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))

In [171]:
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness, SemanticSimilarity
from ragas import evaluate

metrics = [LLMContextRecall(), FactualCorrectness(), Faithfulness()]
results = evaluate(dataset=eval_dataset, metrics=metrics, llm=evaluator_llm,)

Evaluating:   0%|          | 0/93 [00:00<?, ?it/s]

  recall = true_positives / (true_positives + false_negatives)
  recall = true_positives / (true_positives + false_negatives)


In [173]:
results

{'context_recall': 0.6160, 'factual_correctness': 0.3059, 'faithfulness': 0.5678}

## Contextual Retrieval

In [114]:
contextual_prompt = """\
<document>
{document}
</document>
Here is the chunk we want to situate within the whole document
<chunk>
{chunk}
</chunk>
Please give a short succinct context to situate this chunk within the overall document for the purposes of improving search retrieval of the chunk. Answer only with the succinct context and nothing else.
"""

contextual_prompt = PromptTemplate.from_template(contextual_prompt)

In [125]:
add_context_chain = contextual_prompt | llm | StrOutputParser()

### Adding Contextual Information to our Chunks

> NOTE: This cell will take a while to run!

In [None]:
for chunk in tqdm(split_documents):
  chunk.page_content += add_context_chain.invoke({"chunk" : chunk.page_content, "document" : documents[0].page_content})

In [127]:
split_documents[0]

Document(metadata={'source': './alice_in_wonderland.txt'}, page_content='\ufeff\ufeff*** START OF THE PROJECT GUTENBERG EBOOK ALICE\'S ADVENTURES IN\nWONDERLAND ***\n[Illustration]\n\n\n\n\nAlice’s Adventures in Wonderland\n\nby Lewis Carroll\n\nTHE MILLENNIUM FULCRUM EDITION 3.0\n\nContents\n\n CHAPTER I.     Down the Rabbit-Hole\n CHAPTER II.    The Pool of Tears\n CHAPTER III.   A Caucus-Race and a Long Tale\n CHAPTER IV.    The Rabbit Sends in a Little Bill\n CHAPTER V.     Advice from a Caterpillar\n CHAPTER VI.    Pig and Pepper\n CHAPTER VII.   A Mad Tea-Party\n CHAPTER VIII.  The Queen’s Croquet-Ground\n CHAPTER IX.    The Mock Turtle’s Story\n CHAPTER X.     The Lobster Quadrille\n CHAPTER XI.    Who Stole the Tarts?\n CHAPTER XII.   Alice’s Evidence\n\n\n\n\nCHAPTER I.\nDown the Rabbit-HoleThis chunk is the beginning of the book "Alice\'s Adventures in Wonderland" by Lewis Carroll, containing the table of contents and the first chapter "Down the Rabbit-Hole".This chunk is the

In [128]:
ensemble_retriever_contextual = create_ensemble_retriever_from_documents(split_documents)

In [157]:
contextual_retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | ensemble_retriever_contextual, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | rag_llm, "context": itemgetter("context")}
)

In [158]:
contextual_retrieval_augmented_qa_chain.invoke({"question" : "Who is the Rabbit?"})["response"].content

'The Rabbit in "Alice\'s Adventures in Wonderland" is a character known as the White Rabbit. He is depicted as a hurried, anxious creature who is often seen wearing a waistcoat and carrying a pocket watch. He is the one who initially leads Alice down the rabbit hole, sparking her adventures in Wonderland. The Rabbit is characterized by his nervous demeanor and frequent exclamations about being late.'

In [159]:
contextual_retrieval_augmented_qa_chain.invoke({"question" : "What occurs during the trial?"})["response"].content

'During the trial, various characters, including the King, Queen, and jurors, are involved in a chaotic and nonsensical legal proceeding. The King, who acts as the judge, emphasizes the need for all jurymen to be in their proper places before the trial can proceed. The White Rabbit serves as the herald and reads accusations against the Knave of Hearts, who is on trial for stealing tarts.\n\nAlice, who is also called as a witness, finds the entire situation absurd and challenges the rules and actions of the court, questioning the legitimacy of the trial and the King\'s authority. The jurors are depicted as writing down nonsensical statements, and much of the testimony is comically illogical. The Hatter and other characters provide evidence that is often irrelevant or confusing, leading to further chaos.\n\nThe trial highlights the whimsical and nonsensical nature of Wonderland, with characters frequently interrupting one another and the King and Queen issuing arbitrary commands. Ultimat

In [160]:
contextual_dataset = []

for row in tqdm(dataset_base):
  chain_response = contextual_retrieval_augmented_qa_chain.invoke({"question" : row.eval_sample.user_input})
  response = chain_response["response"].content
  contexts = chain_response["context"]
  row.eval_sample.response = response
  row.eval_sample.retrieved_contexts = [context.page_content for context in contexts]
  response_dict = {
      "user_input" : row.eval_sample.user_input,
      "response" : response,
      "reference" : row.eval_sample.reference,
      "retrieved_contexts" : [context.page_content for context in contexts]
  }
  contextual_dataset.append(response_dict)

100%|██████████| 31/31 [05:42<00:00, 11.06s/it]


In [161]:
contextual_eval_dataset = EvaluationDataset.from_list(contextual_dataset)

In [162]:
metrics = [LLMContextRecall(), FactualCorrectness(), Faithfulness()]
results = evaluate(dataset=contextual_eval_dataset, metrics=metrics, llm=evaluator_llm,)

Evaluating:   0%|          | 0/93 [00:00<?, ?it/s]

  recall = true_positives / (true_positives + false_negatives)


In [163]:
results

{'context_recall': 0.5864, 'factual_correctness': 0.3580, 'faithfulness': 0.6629}

## Add Reranking

In [165]:
os.environ["CO_API_KEY"] = getpass.getpass("Cohere API Key:")

Cohere API Key:··········


In [166]:
from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
from langchain_cohere import CohereRerank

ensemble_retriever_contextual_large = create_ensemble_retriever_from_documents(split_documents, num_docs_retrieved=25)

compressor = CohereRerank(model="rerank-english-v3.0")
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=ensemble_retriever_contextual_large
)

contextual_retrieval_augmented_qa_chain_rerank = (
    {"context": itemgetter("question") | compression_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | rag_llm, "context": itemgetter("context")}
)

In [174]:
contextual_dataset_rerank = []

for row in tqdm(dataset_base):
  chain_response = contextual_retrieval_augmented_qa_chain_rerank.invoke({"question" : row.eval_sample.user_input})
  response = chain_response["response"].content
  contexts = chain_response["context"]
  row.eval_sample.response = response
  row.eval_sample.retrieved_contexts = [context.page_content for context in contexts]
  response_dict = {
      "user_input" : row.eval_sample.user_input,
      "response" : response,
      "reference" : row.eval_sample.reference,
      "retrieved_contexts" : [context.page_content for context in contexts]
  }
  contextual_dataset_rerank.append(response_dict)

100%|██████████| 31/31 [04:53<00:00,  9.48s/it]


In [175]:
contextual_eval_dataset_rerank = EvaluationDataset.from_list(contextual_dataset_rerank)

In [176]:
metrics = [LLMContextRecall(), FactualCorrectness(), Faithfulness()]
results = evaluate(dataset=contextual_eval_dataset_rerank, metrics=metrics, llm=evaluator_llm,)

Evaluating:   0%|          | 0/93 [00:00<?, ?it/s]

  recall = true_positives / (true_positives + false_negatives)


In [177]:
results

{'context_recall': 0.5718, 'factual_correctness': 0.3860, 'faithfulness': 0.4733}