### Maximal Marginal Relevance
MMR (Maximal Marginal Relevance) is a powerful diversity-aware retrieval technique used in information retrieval and RAG pipelines to balance relevance and novelty when selecting documents.

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
"""
STEP 3: Environment Setup
Load API credentials securely from environment variables.
"""

import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get Google API key with error handling
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
if not GOOGLE_API_KEY:
    raise ValueError("GOOGLE_API_KEY not found in environment variables")

print("✓ API keys loaded successfully")

✓ API keys loaded successfully


In [4]:
# Step 1: Load and chunk the document
loader = TextLoader("langchain-rag-dataset.txt")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)
chunks

[Document(metadata={'source': 'langchain-rag-dataset.txt'}, page_content='LangChain is an open-source framework designed to simplify the development of applications using large language models (LLMs).\nLangChain provides abstractions for working with prompts, chains, memory, and agents, making it easier to build complex LLM-based systems.'),
 Document(metadata={'source': 'langchain-rag-dataset.txt'}, page_content='The framework supports integration with various vector databases like FAISS and Chroma for semantic retrieval.\nLangChain enables Retrieval-Augmented Generation (RAG) by allowing developers to fetch relevant context before generating responses.'),
 Document(metadata={'source': 'langchain-rag-dataset.txt'}, page_content='Memory in LangChain helps models retain previous interactions, making multi-turn conversations more coherent.\nAgents in LangChain can use tools like calculators, search APIs, or custom functions based on the instructions they receive.'),
 Document(metadata={'

In [5]:
# Initialize Google embeddings model for vector representation
embedding_model = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001", 
    google_api_key=GOOGLE_API_KEY
)


# Initialize Google's Gemini model for response generation
llm = ChatGoogleGenerativeAI(
    google_api_key=GOOGLE_API_KEY,
    model="gemini-2.0-flash",    # Latest fast Gemini model for quick reranking
    temperature=0,               # Deterministic output for consistency in ranking
    max_tokens=None,            # Use model default token limit
    timeout=None,               # No timeout limit for ranking requests
    max_retries=2,              # Retry failed requests twice
)

print("✓ Gemini model initialized for reranking")

✓ Gemini model initialized for reranking


In [7]:
vectorstore = FAISS.from_documents(chunks, embedding_model)


### Step 3: Create MMR Retirever
retriever=vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k":3}
)

In [8]:
### Step 3: Create MMR Retirever
retriever=vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"k":3}
)

In [9]:
# Step 4: Prompt and LLM
prompt = PromptTemplate.from_template("""
Answer the question based on the context provided.

Context:
{context}

Question: {input}
""")


In [None]:
# Step 6: RAG Pipeline using LCEL (LangChain Expression Language)
"""
LCEL (LangChain Expression Language) Implementation:
This creates a modern, streamlined RAG pipeline using the pipe operator (|) for chaining components.
LCEL provides better readability, error handling, and supports async operations out of the box.
"""

# Import required LCEL components
from langchain_core.output_parsers import StrOutputParser  # Parses LLM output to clean string
from langchain_core.runnables import RunnablePassthrough    # Passes input through unchanged

def format_docs(docs):
    """
    Format retrieved documents into a single context string.
    
    Args:
        docs (list): List of retrieved Document objects from the vector store
        
    Returns:
        str: Concatenated document content separated by double newlines
        
    Purpose:
        - Combines multiple document chunks into a single context string
        - Uses double newlines to clearly separate different document chunks
        - Makes it easier for the LLM to understand document boundaries
    """
    return "\n\n".join([doc.page_content for doc in docs])

# Create LCEL pipeline using pipe operator (|)
"""
LCEL Pipeline Structure:
1. Input Processing: {"context": retriever | format_docs, "input": RunnablePassthrough()}
   - retriever: Uses MMR to find diverse, relevant documents
   - format_docs: Converts retrieved docs to formatted string
   - RunnablePassthrough(): Preserves original query unchanged
   
2. Prompt Formatting: | prompt
   - Injects context and input into the predefined prompt template
   
3. LLM Processing: | llm
   - Sends formatted prompt to Google's Gemini model for response generation
   
4. Output Parsing: | StrOutputParser()
   - Converts LLM response object to clean string output
"""
rag_chain_lcel = (
    # Step 1: Prepare inputs - retrieve docs and pass query through
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    # Step 2: Format the prompt with context and input
    | prompt
    # Step 3: Generate response using the language model
    | llm
    # Step 4: Parse output to clean string format
    | StrOutputParser()
)

print("✓ RAG pipeline created using LCEL with pipe operators")

✓ RAG pipeline created using LCEL with pipe operators


In [None]:
# Step 7: Query Execution and Testing
"""
Test the LCEL RAG pipeline with a sample query about LangChain's capabilities.
This demonstrates how the pipeline retrieves relevant documents using MMR,
formats them as context, and generates a comprehensive answer.
"""

# Define test query - asking about LangChain's advanced features
query = "How does LangChain support agents and memory?"

# Execute the LCEL pipeline
# The pipeline will:
# 1. Use MMR retriever to find diverse, relevant documents
# 2. Format retrieved docs as context string
# 3. Combine with query in the prompt template
# 4. Generate response using Gemini model
# 5. Parse output to clean string
response = rag_chain_lcel.invoke(query)

print("✅ Answer:\n", response)

✅ Answer:
 LangChain supports agents by allowing LLMs to act as agents that can decide which tool to call and in what order during a task. It supports memory through components like `ConversationBufferMemory` and `ConversationSummaryMemory`, which help models retain previous interactions and maintain coherence in multi-turn conversations.


In [13]:
response

'LangChain supports agents by allowing LLMs to act as agents that can decide which tool to call and in what order during a task. It supports memory through components like `ConversationBufferMemory` and `ConversationSummaryMemory`, which help models retain previous interactions and maintain coherence in multi-turn conversations.'