#### This notebook creates the data files for the assignment
#### I have found no way to make the original notebook generate the data files after running the first breakout section.

In [55]:
# ! pip -q install langchain==0.2.16
# ! pip -q install langchain-cohere==0.3.0
# ! pip -q install langchain-community==0.2.17
# ! pip -q install langchain-core==0.2.41
# ! pip -q install langchain-experimental==0.3.2
# ! pip -q install langchain-huggingface==0.1.0
# ! pip -q install langchain-openai==0.1.25
# ! pip -q install langchain-qdrant==0.1.3
# ! pip -q install langchain-text-splitters==0.2.4
# ! pip -q install langgraph==0.2.16
# ! pip -q install langgraph-checkpoint==1.0.6
# ! pip -q install langsmith==0.1.129
# ! pip -q install ragas==0.1.20
# ! pip -q install rank_bm25

In [56]:
import os
LANGCHAIN_API_KEY = os.environ.get("LANGCHAIN_API_KEY")
LANGCHAIN_TRACING_V2 = True
LANGCHAIN_PROJECT="Advanced RAG"
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")

In [57]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from datetime import datetime, timedelta

documents = []

for i in range(1, 5):
  loader = CSVLoader(
      file_path=f"john_wick_{i}.csv",
      metadata_columns=["Review_Date", "Review_Title", "Review_Url", "Author", "Rating"]
  )

  movie_docs = loader.load()
  for doc in movie_docs:

    # Add the "Movie Title" (John Wick 1, 2, ...)
    doc.metadata["Movie_Title"] = f"John Wick {i}"

    # convert "Rating" to an `int`, if no rating is provided - assume 0 rating
    doc.metadata["Rating"] = int(doc.metadata["Rating"]) if doc.metadata["Rating"] else 0

    # newer movies have a more recent "last_accessed_at"
    doc.metadata["last_accessed_at"] = datetime.now() - timedelta(days=4-i)

  documents.extend(movie_docs)

In [58]:
documents[0]

Document(metadata={'source': 'john_wick_1.csv', 'row': 0, 'Review_Date': '6 May 2015', 'Review_Title': ' Kinetic, concise, and stylish; John Wick kicks ass.\n', 'Review_Url': '/review/rw3233896/?ref_=tt_urv', 'Author': 'lnvicta', 'Rating': 8, 'Movie_Title': 'John Wick 1', 'last_accessed_at': datetime.datetime(2024, 9, 26, 18, 8, 53, 337040)}, page_content=": 0\nReview: The best way I can describe John Wick is to picture Taken but instead of Liam Neeson it's Keanu Reeves and instead of his daughter it's his dog. That's essentially the plot of the movie. John Wick (Reeves) is out to seek revenge on the people who took something he loved from him. It's a beautifully simple premise for an action movie - when action movies get convoluted, they get bad i.e. A Good Day to Die Hard. John Wick gives the viewers what they want: Awesome action, stylish stunts, kinetic chaos, and a relatable hero to tie it all together. John Wick succeeds in its simplicity.")

In [59]:
from langchain_community.vectorstores  import Qdrant
from langchain_openai import OpenAIEmbeddings



embeddings = OpenAIEmbeddings(model="text-embedding-3-small")


vectorstore = Qdrant.from_documents(
    documents,
    embeddings,
    location=":memory:",
    collection_name="JohnWick"
)

In [60]:
naive_retriever = vectorstore.as_retriever(search_kwargs={"k" : 10})

In [61]:
from langchain_core.prompts import ChatPromptTemplate

RAG_TEMPLATE = """\
You are a helpful and kind assistant. Use the context provided below to answer the question.

If you do not know the answer, or are unsure, say you don't know.

Query:
{question}

Context:
{context}
"""

rag_prompt = ChatPromptTemplate.from_template(RAG_TEMPLATE)

In [62]:
from langchain_openai import ChatOpenAI

chat_model = ChatOpenAI(seed=42)

In [63]:
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

naive_retrieval_chain = (
    {"context": itemgetter("question") | naive_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

### The following code generated the testset, which I saved to CSV.
### It is commented out because I do not want to run it again.

In [64]:
# from ragas.testset.generator import TestsetGenerator
# from ragas.testset.evolutions import simple, reasoning, multi_context
# from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# generator_llm = ChatOpenAI(model="gpt-4o-mini")
# critic_llm = ChatOpenAI(model="gpt-4o")
# embeddings = OpenAIEmbeddings()

# generator = TestsetGenerator.from_langchain(
#     generator_llm,
#     critic_llm,
#     embeddings
# )

# distributions = {
#     simple: 0.5,
#     multi_context: 0.4,
#     reasoning: 0.1
# }

In [65]:
# testset = generator.generate_with_langchain_docs(documents, 20, distributions, with_debugging_logs=False)

In [66]:
# for data_row in testset.test_data:
#     question = data_row.question
#     contexts = data_row.contexts
#     ground_truth = data_row.ground_truth
#     evolution_type = data_row.evolution_type
#     metadata = data_row.metadata
    
#     # Process each element as needed
#     print(f"Question: {question}")
#     print(f"Contexts: {contexts}")
#     print(f"Ground Truth: {ground_truth}")
#     print(f"Evolution Type: {evolution_type}")
#     print(f"Metadata: {metadata}")
#     print("\n")  # For better readability

In [67]:
# testset.to_pandas()

In [68]:
from langsmith import Client

client = Client()

## commented the following code because has already run once and will mess up Langsmith

# dataset_name = "John Wicks Dataset"

# dataset = client.create_dataset(
#     dataset_name=dataset_name,
#     description="Questions about John Wicks movie reviews"
# )

In [69]:
# for test in testset.to_pandas().iterrows():
#   client.create_example(
#       inputs={
#           "question": test[1]["question"]
#       },
#       outputs={
#           "answer": test[1]["ground_truth"]
#       },
#       metadata={
#           "context": test[0]
#       },
#       dataset_id=dataset.id
#   )

In [70]:
# testset.test_data[0]

In [71]:
# testset_df = testset.to_pandas()
# testset_df.to_csv("testset.csv")

In [72]:
import pandas as pd

test_df = pd.read_csv("testset.csv")

In [73]:
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()

In [74]:
answers = []
contexts = []

for question in test_questions:
  response = naive_retrieval_chain.invoke({"question" : question})
  answers.append(response["response"].content)
  contexts.append([context.page_content for context in response["context"]])

In [75]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})

In [76]:
response_dataset[0]

{'question': "What role does the Russian mafia play in John Wick's story?",
 'answer': "The Russian mafia plays a significant role in John Wick's story. In the first movie, John Wick is a retired assassin who seeks vengeance against the son of a Russian mobster who stole his car, killed his dog, and destroyed his peaceful life. This leads to a violent conflict between John Wick and the Russian mafia. Additionally, in the second movie, John Wick is forced back into the world of assassins when an Italian crime lord calls in a favor, illustrating the ongoing involvement of criminal organizations in John Wick's life.",
 'contexts': [": 20\nReview: After resolving his issues with the Russian mafia, John Wick (Keanu Reeves) returns home. But soon the mobster Santino D'Antonio (Riccardo Scamarcio) visits him to show Wick's marker and tells that he needs to help. John Wicks refuses since he is retired and Santino blows-up his house. John Wick meets the owner of the Continental hotel in New Yor

In [77]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]

In [78]:
def run_evaluation(retrieval_chain):
    answers = []
    contexts = []

    for question in test_questions:
        response = retrieval_chain.invoke({"question" : question})
        answers.append(response["response"].content)
        contexts.append([context.page_content for context in response["context"]])
    response_dataset = Dataset.from_dict({
        "question" : test_questions,
        "answer" : answers,
        "contexts" : contexts,
        "ground_truth" : test_groundtruths
    })
    return evaluate(response_dataset, metrics)

In [79]:
naive_results = run_evaluation(naive_retrieval_chain)

Evaluating: 100%|██████████| 100/100 [00:46<00:00,  2.17it/s]


In [80]:
from uuid import uuid4

os.environ["LANGCHAIN_PROJECT"] = f"Advanced Rag - {uuid4().hex[0:8]}"

In [81]:
#  !pip install rank_bm25

In [82]:
from langchain_community.retrievers import BM25Retriever

bm25_retriever = BM25Retriever.from_documents(documents)

In [83]:
bm25_retrieval_chain = (
    {"context": itemgetter("question") | bm25_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [84]:
bm25_retrieval_chain.invoke({"question" : "Did people generally like John Wick?"})["response"].content

"Opinions on John Wick seem to be divided. Some people really enjoyed the movie, praising its action sequences, world-building, and Keanu Reeves' performance, while others found it to be lacking in plot and substance. Overall, it seems that there are both fans and critics of the John Wick series."

In [85]:
bm25_results = run_evaluation(bm25_retrieval_chain)

Evaluating: 100%|██████████| 100/100 [00:34<00:00,  2.87it/s]


In [86]:
bm25_results

{'faithfulness': 0.6707, 'answer_relevancy': 0.8206, 'context_recall': 0.7958, 'context_precision': 0.6861, 'answer_correctness': 0.6426}

### Contextual Compression Retrieval
This would NOT run in this notebook even after setting API key.
This runs in the original notebook 

In [87]:
# import getpass
# os.environ["COHERE_API_KEY"] = getpass.getpass("Cohere API Key:")

In [88]:
# from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
# from langchain_cohere import CohereRerank

# compressor = CohereRerank(model="rerank-english-v3.0")
# compression_retriever = ContextualCompressionRetriever(
#     base_compressor=compressor, base_retriever=naive_retriever
# )

# contextual_compression_retrieval_chain = (
#     {"context": itemgetter("question") | compression_retriever, "question": itemgetter("question")}
#     | RunnablePassthrough.assign(context=itemgetter("context"))
#     | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
# )

In [89]:
from langchain.retrievers.multi_query import MultiQueryRetriever

multi_query_retriever = MultiQueryRetriever.from_llm(
    retriever=naive_retriever, llm=chat_model
)

In [90]:
multi_query_retrieval_chain = (
    {"context": itemgetter("question") | multi_query_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [91]:
multi_query_results = run_evaluation(multi_query_retrieval_chain)

Evaluating: 100%|██████████| 100/100 [00:56<00:00,  1.76it/s]


In [92]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from qdrant_client import QdrantClient, models

parent_docs = documents
child_splitter = RecursiveCharacterTextSplitter(chunk_size=200)

In [93]:
client = QdrantClient(location=":memory:")

client.create_collection(
    collection_name="full_documents",
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE)
)

parent_document_vectorstore = Qdrant(
    collection_name="full_documents", 
    embeddings=OpenAIEmbeddings(model="text-embedding-3-small"), 
    client=client
)

In [94]:
store = InMemoryStore()

parent_document_retriever = ParentDocumentRetriever(
    vectorstore = parent_document_vectorstore,
    docstore=store,
    child_splitter=child_splitter,
)

In [95]:
parent_document_retriever.add_documents(parent_docs, ids=None)

In [96]:
parent_document_retrieval_chain = (
    {"context": itemgetter("question") | parent_document_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [97]:
parent_results = run_evaluation(parent_document_retrieval_chain)

Evaluating: 100%|██████████| 100/100 [00:32<00:00,  3.06it/s]


In [98]:
from langchain.retrievers import EnsembleRetriever

retriever_list = [bm25_retriever, naive_retriever, parent_document_retriever, multi_query_retriever]
equal_weighting = [1/len(retriever_list)] * len(retriever_list)

ensemble_retriever = EnsembleRetriever(
    retrievers=retriever_list, weights=equal_weighting
)

In [99]:
ensemble_retrieval_chain = (
    {"context": itemgetter("question") | ensemble_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [100]:
ensemble_results = run_evaluation(ensemble_retrieval_chain)

Evaluating:   1%|          | 1/100 [00:01<01:58,  1.20s/it]No statements were generated from the answer.
Evaluating: 100%|██████████| 100/100 [00:59<00:00,  1.67it/s]


In [101]:
from langchain_experimental.text_splitter import SemanticChunker

semantic_chunker = SemanticChunker(
    embeddings,
    breakpoint_threshold_type="percentile"
)

In [102]:
semantic_documents = semantic_chunker.split_documents(documents)

In [103]:
semantic_vectorstore = Qdrant.from_documents(
    semantic_documents,
    embeddings,
    location=":memory:",
    collection_name="JohnWickSemantic"
)

In [104]:
semantic_retriever = semantic_vectorstore.as_retriever(search_kwargs={"k" : 10})

In [105]:
semantic_retrieval_chain = (
    {"context": itemgetter("question") | semantic_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": rag_prompt | chat_model, "context": itemgetter("context")}
)

In [106]:
semantic_results = run_evaluation(semantic_retrieval_chain)

Evaluating: 100%|██████████| 100/100 [00:50<00:00,  1.99it/s]


In [107]:
print(naive_results)
print(bm25_results)
print("Contextual Compression Results Not Available")
print(multi_query_results)
print(parent_results)
print(ensemble_results)
print(semantic_results)

{'faithfulness': 0.8604, 'answer_relevancy': 0.9179, 'context_recall': 0.9750, 'context_precision': 0.7605, 'answer_correctness': 0.7486}
{'faithfulness': 0.6707, 'answer_relevancy': 0.8206, 'context_recall': 0.7958, 'context_precision': 0.6861, 'answer_correctness': 0.6426}
Contextual Compression Results Not Available
{'faithfulness': 0.7868, 'answer_relevancy': 0.8284, 'context_recall': 0.9750, 'context_precision': 0.6560, 'answer_correctness': 0.6776}
{'faithfulness': 0.8105, 'answer_relevancy': 0.9141, 'context_recall': 0.8750, 'context_precision': 0.8333, 'answer_correctness': 0.6585}
{'faithfulness': 0.8741, 'answer_relevancy': 0.8743, 'context_recall': 0.9750, 'context_precision': 0.7560, 'answer_correctness': 0.7285}
{'faithfulness': 0.8595, 'answer_relevancy': 0.7905, 'context_recall': 0.8750, 'context_precision': 0.7859, 'answer_correctness': 0.5975}


In [108]:
import pandas as pd
df_naive = pd.DataFrame(list(naive_results.items()), columns=['Metric', 'Naive'])
df_bm25 = pd.DataFrame(list(bm25_results.items()), columns=['Metric', 'BM25'])
df_multiQ = pd.DataFrame(list(multi_query_results.items()), columns=['Metric', 'MultiQ'])
df_parent = pd.DataFrame(list(parent_results.items()), columns=['Metric', 'Parent'])
df_ensemble = pd.DataFrame(list(ensemble_results.items()), columns=['Metric', 'Ensemble'])
df_semantic = pd.DataFrame(list(semantic_results.items()), columns=['Metric', 'Semantic'])
df_merged = df_naive.merge(df_bm25, on='Metric').merge(df_multiQ, on='Metric').merge(df_parent, on='Metric')
df_merged = df_merged.merge(df_ensemble, on='Metric').merge(df_semantic, on='Metric')
df_merged

Unnamed: 0,Metric,Naive,BM25,MultiQ,Parent,Ensemble,Semantic
0,faithfulness,0.860417,0.670694,0.786845,0.810471,0.874123,0.859524
1,answer_relevancy,0.917881,0.8206,0.828396,0.914089,0.874294,0.790533
2,context_recall,0.975,0.795833,0.975,0.875,0.975,0.875
3,context_precision,0.760535,0.686111,0.656045,0.833333,0.755983,0.785861
4,answer_correctness,0.748569,0.64264,0.677619,0.658513,0.728547,0.597534
