# Install dependencies

In [1]:
!pip install -qU langchain pypdf llama-cpp-python huggingface_hub
!pip install -qU sentence_transformers
!pip install -q qdrant-client
!pip install rank_bm25
!pip install langchain-openai
!pip install langchain-groq
!pip install -q ragas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.3/990.3 kB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m24.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m417.2/417.2 kB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# Loading the blog Documents

In [2]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
        ["https://blog.langchain.dev/langchain-v0-1-0/",
    "https://blog.langchain.dev/langgraph-cloud/"]
)


documents = loader.load()



# Instantiating LLM models

In [3]:
from langchain_openai import OpenAI
import os
from google.colab import userdata
from langchain_groq import ChatGroq
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')

groq_llm =  ChatGroq(
    model = "llama-3.1-70b-versatile" ,
    temperature=0
)


In [4]:
groq_llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x795a856cbdf0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x795a856f49a0>, model_name='llama-3.1-70b-versatile', temperature=1e-08, groq_api_key=SecretStr('**********'))

# Instantiating Embedding Model

In [5]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-ada-002"
)



# Splitting the documents

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1250,
    chunk_overlap = 100,
    length_function = len,
    is_separator_regex = False
)
#
split_docs = text_splitter.split_documents(documents)
print(len(split_docs))

27


# Instatating local Qdrant vectostore

In [7]:
! rm -rf /content/drive/*

In [8]:
from langchain_community.vectorstores import Qdrant
vectorstore = Qdrant.from_documents(
    split_docs,
    embeddings,
    path="/content/drive/MyDrive/Vectorstore",
    collection_name="full_documents",
)

# Instantiate the Keyword / BM25 Sparse embeddings model

In [9]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.retrievers import ContextualCompressionRetriever
#
bm25_retriever = BM25Retriever.from_documents(split_docs)
bm25_retriever.k=10

# Instantiate Reranker — Cross Encoders


In [10]:
from __future__ import annotations
from typing import Dict, Optional, Sequence
from langchain.schema import Document
from langchain.pydantic_v1 import Extra, root_validator

from langchain.callbacks.manager import Callbacks
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor

from sentence_transformers import CrossEncoder
# from config import bge_reranker_large

class BgeRerank(BaseDocumentCompressor):
    model_name:str = 'BAAI/bge-reranker-large'
    """Model name to use for reranking."""
    top_n: int = 3
    """Number of documents to return."""
    model:CrossEncoder = CrossEncoder(model_name)
    """CrossEncoder instance to use for reranking."""

    def bge_rerank(self,query,docs):
        model_inputs =  [[query, doc] for doc in docs]
        scores = self.model.predict(model_inputs)
        results = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
        return results[:self.top_n]


    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid
        arbitrary_types_allowed = True

    def compress_documents(
        self,
        documents: Sequence[Document],
        query: str,
        callbacks: Optional[Callbacks] = None,
    ) -> Sequence[Document]:
        """
        Compress documents using BAAI/bge-reranker models.

        Args:
            documents: A sequence of documents to compress.
            query: The query to use for compressing the documents.
            callbacks: Callbacks to run during the compression process.

        Returns:
            A sequence of compressed documents.
        """
        if len(documents) == 0:  # to avoid empty api call
            return []
        doc_list = list(documents)
        _docs = [d.page_content for d in doc_list]
        results = self.bge_rerank(query, _docs)
        final_results = []
        for r in results:
            doc = doc_list[r[0]]
            doc.metadata["relevance_score"] = r[1]
            final_results.append(doc)
        return final_results

  from tqdm.autonotebook import tqdm, trange


config.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.24G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

# Creating a Contextual Compression Pipeline

In [11]:
from langchain_community.document_transformers.embeddings_redundant_filter import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever
from langchain_community.document_transformers.long_context_reorder import LongContextReorder
from langchain.retrievers.multi_query import MultiQueryRetriever
#
vs_retriever = vectorstore.as_retriever(search_kwargs={"k":10})
#

ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever ,vs_retriever],
                                       weight=[0.5,0.5])
#

redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
#
reordering = LongContextReorder()
#
reranker = BgeRerank()
#
pipeline_compressor = DocumentCompressorPipeline(transformers=[redundant_filter,reordering,reranker])
#
compression_pipeline = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
                                                      base_retriever=ensemble_retriever)

# Utility functions for

In [12]:
def pretty_print_docs(docs):
  print(
      f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n + {d.page_content}" for i,d in enumerate(docs)])
  )


In [13]:
docs = compression_pipeline.get_relevant_documents("What are the major changes in v 0.1.0?")
pretty_print_docs(docs)

  warn_deprecated(


Document 1:

 + things that are top of mind for us are:Rewriting legacy chains in LCEL (with better streaming and debugging support)Adding new types of chainsAdding new types of agentsImproving our production ingestion capabilitiesRemoving old and unused functionalityImportantly, even though we are excited about removing some of the old and legacy code to make langchain slimmer and more focused, we also want to maintain support for people who are still using the old version. That is why we will maintain 0.1 as a stable branch (patching in critical bug fixes) for at least 3 months after 0.2 release. We plan to do this for every stable release from here on out.And if you've been wanting to get started contributing, there's never been a better time. We recently added a good getting started issue on GitHub if you're looking for a place to start.One More ThingA large part of LangChain v0.1.0 is stability and focus on the core areas outlined above. Now that we've identified the areas people 

# Naive Rag

In [14]:
from langchain.chains import RetrievalQA
#
qa = RetrievalQA.from_chain_type(llm=groq_llm,
                                 chain_type="stuff",
                                 retriever=vectorstore.as_retriever(search_kwargs={"k":5}),
                                 return_source_documents=True)

naive_response = qa("What are the major changes in v 0.1.0?")
naive_response['result']

  warn_deprecated(


'According to the text, the major changes in LangChain v0.1.0 include:\n\n1. A new versioning standard, where breaking changes will result in a minor version bump, and bug fixes or new features will result in a patch version bump.\n2. A re-architecting of the package, including separating out langchain-core and separating out partner packages.\n3. Improved focus through both functionality and documentation.\n4. Full backwards compatibility.\n5. Parity between the Python and JavaScript versions in core areas.\n\nAdditionally, the text mentions that the team has identified key areas to focus on, including:\n\n1. Rewriting legacy chains in LCEL (with better streaming and debugging support)\n2. Adding new types of chains\n3. Adding new types of agents\n4. Improving production ingestion capabilities\n5. Removing old and unused functionality'

# Advanced Rag with Reranking

In [15]:
from langchain.chains import RetrievalQA
#
qa_advanced = RetrievalQA.from_chain_type(llm=groq_llm,
                                 chain_type="stuff",
                                 retriever=compression_pipeline,
                                 return_source_documents=True)
#
qa_adv_response = qa_advanced("What are the major changes in v 0.1.0?")
qa_adv_response["result"]

'According to the text, the major changes in v0.1.0 include:\n\n1. Improved focus through both functionality and documentation.\n2. A new versioning policy, where:\n\t* Breaking changes to the public API will result in a minor version bump (the second digit).\n\t* Bug fixes or new features will result in a patch version bump (the third digit).\n3. A more stable and organized project architecture, with:\n\t* langchain-core containing the main abstractions, interfaces, and core functionality.\n\t* Partner packages separated out into langchain-community or standalone partner packages.\n\nAdditionally, the text mentions that the team is working on several areas, including rewriting legacy chains, adding new types of chains and agents, improving production ingestion capabilities, and removing old and unused functionality. However, these changes are not specifically mentioned as part of the v0.1.0 release.'

In [16]:
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x795a860882e0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x795a856f62c0>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

# Synthetic Test Set Generation using Ragas

In [17]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
#
#load documents again to avoid any kind of bias
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)
documents = text_splitter.split_documents(documents)
len(documents)


from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# generator with openai models
generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)
##
testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

embedding nodes:   0%|          | 0/76 [00:00<?, ?it/s]



Generating:   0%|          | 0/10 [00:00<?, ?it/s]



In [18]:
testset.test_data[0]

DataRow(question='What was the reason for separating out partner packages from langchain?', contexts=['we made two large architectural changes: separating out langchain-core and separating out partner packages (either into langchain-community or standalone partner packages) from langchain.\xa0As a reminder, langchain-core contains the main abstractions, interfaces, and core functionality. This code is stable and has been following a stricter versioning policy for a little over a month now.langchain itself, however, still remained on 0.0.x versions. Having all releases on minor version 0 created a few challenges:Users couldn‚Äôt be confident that updating would not have breaking changeslangchain became bloated and unstable as we took a ‚Äúmaintain everything‚Äù approach to reduce breaking changes and deprecation notificationsHowever, starting today with the release of langchain 0.1.0, all future releases will follow a new versioning standard. Specifically:Any breaking changes to the pub

# Generating Responses with RAG Pipeline
Now that we have obtained some quality control pairs and ground truths, it's time to assess our RAG pipeline using Ragas. Thanks to Ragas and LangChain, the process is quite simple once again! We'll begin by extracting our questions and ground truths from the test set we created. Converting our test dataset into a Pandas DataFrame is the first step.




In [19]:
test_df = testset.to_pandas()
test_questions = test_df["question"].values.tolist()
test_groundtruths = test_df["ground_truth"].values.tolist()
test_df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What was the reason for separating out partner...,[we made two large architectural changes: sepa...,To reduce breaking changes and deprecation not...,simple,[{'source': 'https://blog.langchain.dev/langch...,True
1,What are the programming languages supported b...,[Today we‚Äôre excited to announce the release...,The langchain 0.1.0 release supports Python an...,simple,[{'source': 'https://blog.langchain.dev/langch...,True
2,How does LangGraph allow users to design custo...,[agent control with agency Most agentic framew...,LangGraph allows users to design custom cognit...,simple,[{'source': 'https://blog.langchain.dev/langgr...,True
3,"What are the different ""Agent"" methods used in...","[systems, we are not overly opinionated on how...","The different ""Agent"" methods used in LangChai...",simple,[{'source': 'https://blog.langchain.dev/langch...,True
4,What is the significance of the public API in ...,[releases will follow a new versioning standar...,Any breaking changes to the public API will re...,simple,[{'source': 'https://blog.langchain.dev/langch...,True


# Generate responses using our Naive RAG pipeline using the Synthetic questions
# Wrap it in HF Dataset

In [20]:
answers = []
contexts = []

for question in test_questions:
  response = qa.invoke({"query" : question})
  answers.append(response["result"])
  contexts.append([context.page_content for context in response['source_documents']])

In [21]:
from datasets import Dataset

response_dataset = Dataset.from_dict({
    "question" : test_questions,
    "answer" : answers,
    "contexts" : contexts,
    "ground_truth" : test_groundtruths
})
response_dataset[0]


{'question': 'What was the reason for separating out partner packages from langchain?',
 'answer': 'The reason for separating out partner packages from langchain was to improve the robustness, stability, scalability, and general developer experience around integrations. By separating out individual integrations into their own packages, LangChain can now better manage dependencies and versioning, making it easier to install specific versions and reflect breaking changes on an individual integration basis.',
 'contexts': ['to get started building on any stack. We have almost 700 integrations, ranging from LLMs to vector stores to tools for agents to use. \uf8ffüí°LangChain is often used as the ‚Äúglue‚Äù to join all the different pieces you need to build an LLM app together, and so prioritizing a robust integration ecosystem is a priority for us.About a month ago, we started making some changes we think will improve the robustness, stability, scalability, and general developer experience

# Evaluating the dataset

In [22]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision,
)

metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    answer_correctness,
]
#
naive_results = evaluate(response_dataset, metrics,raise_exceptions=True)
naive_results

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

{'faithfulness': 0.6479, 'answer_relevancy': 0.8394, 'context_recall': 0.8750, 'context_precision': 0.7974, 'answer_correctness': 0.4906}

# Generate responses using our Advanced RAG pipeline using the questions we’ve generated.

In [23]:
adv_answers = []
adv_contexts = []

for question in test_questions:
  response = qa_advanced.invoke({"query" : question})
  adv_answers.append(response["result"])
  adv_contexts.append([context.page_content for context in response['source_documents']])

#wrap into huggingface dataset
response_dataset_advanced_retrieval = Dataset.from_dict({
    "question" : test_questions,
    "answer" : adv_answers,
    "contexts" : adv_contexts,
    "ground_truth" : test_groundtruths
})
response_dataset_advanced_retrieval[0]

{'question': 'What was the reason for separating out partner packages from langchain?',
 'answer': 'According to the text, the reason for separating out partner packages from langchain was to improve the robustness, stability, scalability, and general developer experience around integrations. This change also allowed for better dependency management and versioning, as individual integrations could now be managed in their own packages.',
 'contexts': ['Today we‚Äôre excited to announce the release of langchain 0.1.0, our first stable version. It is fully backwards compatible, comes in both Python and JavaScript, and comes with improved focus through both functionality and documentation. A stable version of LangChain helps us earn developer trust and gives us the ability to evolve the library systematically and safely.Python GitHub DiscussionPython v0.1.0 GuidesJS v0.1.0 GuidesYouTube WalkthroughIntroductionLangChain has been around for a little over a year and has changed a lot as it‚Äô

In [24]:
advanced_retrieval_results = evaluate(response_dataset_advanced_retrieval, metrics,raise_exceptions=False)
advanced_retrieval_results

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

{'faithfulness': 0.8854, 'answer_relevancy': 0.8357, 'context_recall': 0.7500, 'context_precision': 0.8229, 'answer_correctness': 0.4975}

# Compare the Evaluation

In [25]:
import pandas as pd

df_original = pd.DataFrame(list(naive_results.items()), columns=['Metric', 'Naive Rag'])
df_comparison = pd.DataFrame(list(advanced_retrieval_results.items()), columns=['Metric', 'Advance Rag'])

df_merged = pd.merge(df_original, df_comparison, on='Metric')

df_merged['Delta'] = df_merged['Advance Rag'] - df_merged['Naive Rag']

df_merged

Unnamed: 0,Metric,Naive Rag,Advance Rag,Delta
0,faithfulness,0.647917,0.885417,0.2375
1,answer_relevancy,0.839412,0.83569,-0.003722
2,context_recall,0.875,0.75,-0.125
3,context_precision,0.797396,0.822917,0.025521
4,answer_correctness,0.490615,0.497505,0.006889
