In [1]:
import sys
from pathlib import Path

# Notebook ka folder
NOTEBOOK_DIR = Path().resolve()

# Project root = parent folder
PROJECT_ROOT = NOTEBOOK_DIR.parent

# Add project root to import path
sys.path.append(str(PROJECT_ROOT))

print("Project root:", PROJECT_ROOT)

import warnings as w
w.filterwarnings("ignore")

Project root: D:\Langchain_LangGraph_03-12-2025\MyProject


In [2]:
"""
Production-Grade RAG System with:
1. Hybrid Search (Semantic + Keyword)
2. Reranking (Cohere)
3. LangGraph Integration
4. Evaluation Metrics
5. Complete Pipeline

Run in Jupyter Notebook
"""


'\nProduction-Grade RAG System with:\n1. Hybrid Search (Semantic + Keyword)\n2. Reranking (Cohere)\n3. LangGraph Integration\n4. Evaluation Metrics\n5. Complete Pipeline\n\nRun in Jupyter Notebook\n'

#### LangChain related Library

In [3]:
#now importing all the Module which is used to build the AI Model.
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate
from exception import CustomException
from logger_config import logger
import os,sys

#using openai chat model and embedding models
from langchain_openai import ChatOpenAI,OpenAIEmbeddings

#using groq chat model 
from langchain_groq import ChatGroq

#using open source chat model from hugging Face
from langchain_huggingface import ChatHuggingFace,HuggingFaceEmbeddings,HuggingFaceEndpoint

from config import *

from langchain_core.runnables import RunnableBranch,RunnableLambda,RunnableParallel,RunnableSequence,RunnablePassthrough

[2025-12-13 22:52:38,282]-config_variable.py-INFO -Loading the environment Variable
[2025-12-13 22:52:38,284]-config_variable.py-INFO -Environment Variable successfully Loaded


In [4]:
%pwd

'd:\\Langchain_LangGraph_03-12-2025\\MyProject\\notebooks'

#### LanGraph related Library

In [5]:
#import Langgraph related Modules
import langgraph
from langgraph.graph import StateGraph,START,END
from dataclasses import dataclass
from typing import TypedDict
from typing import Literal,List,Annotated
from langchain_core.messages import AnyMessage,AIMessage,HumanMessage,ToolMessage

from pydantic import BaseModel #using this class we can perform validation to schema

from langgraph.prebuilt import tool_node,tools_condition #in this class we put all tools together
#tools_condition wrt to tool msg it will route the flow data to ttol node to perform execution

from langchain_core.tools import tool,Tool,StructuredTool

from langgraph.graph.message import BaseMessage #this is special class which hold every mesaage init.



## step:1) defining the models components

In [6]:
model1 = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.2 #we call as creative parameter
)
model1

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001BD7DFFE580>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001BD7E19E5E0>, root_client=<openai.OpenAI object at 0x000001BD7DFFEE50>, root_async_client=<openai.AsyncOpenAI object at 0x000001BD7E19E640>, temperature=0.2, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [7]:
model2 = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0.2 #we call as creative parameter
)
model2

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001BD7E930D60>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001BD7E9355B0>, model_name='llama-3.1-8b-instant', temperature=0.2, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [8]:
llm = HuggingFaceEndpoint(  
repo_id="meta-llama/Llama-3.1-8B-Instruct",  
task="text-generation",  
max_new_tokens=512,  
do_sample=False,  
repetition_penalty=1.03,  
)  

model3 = ChatHuggingFace(llm=llm, verbose=True)
model3

ChatHuggingFace(llm=HuggingFaceEndpoint(repo_id='meta-llama/Llama-3.1-8B-Instruct', repetition_penalty=1.03, stop_sequences=[], server_kwargs={}, model_kwargs={}, model='meta-llama/Llama-3.1-8B-Instruct', client=<InferenceClient(model='meta-llama/Llama-3.1-8B-Instruct', timeout=120)>, async_client=<InferenceClient(model='meta-llama/Llama-3.1-8B-Instruct', timeout=120)>, task='text-generation'), model_id='meta-llama/Llama-3.1-8B-Instruct', model_kwargs={})

In [9]:
### Hugging face Embedding Models.
from langchain_huggingface import HuggingFaceEmbeddings,HuggingFaceEndpointEmbeddings
hug_emb_model = HuggingFaceEndpointEmbeddings(
    model="BAAI/bge-large-en-v1.5",
    task = "feature-extraction",
)
hug_emb_model

HuggingFaceEndpointEmbeddings(client=<InferenceClient(model='BAAI/bge-large-en-v1.5', timeout=None)>, async_client=<InferenceClient(model='BAAI/bge-large-en-v1.5', timeout=None)>, model='BAAI/bge-large-en-v1.5', provider=None, repo_id='BAAI/bge-large-en-v1.5', task='feature-extraction', model_kwargs=None, huggingfacehub_api_token=None)

In [10]:
from langchain_openai import OpenAIEmbeddings
emb_model = OpenAIEmbeddings(
    model="text-embedding-3-small"  
)
emb_model

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001BD5C8E5160>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001BD5C8DF790>, model='text-embedding-3-small', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

### 1) building Simple RAG workflow using LangChain
#### as we know the components of Rag such as :-
#### 1) retireval 2)vector database 3)doc loaders 4)doc splitting

In [11]:
import boto3
from langchain_community.document_loaders import S3DirectoryLoader

### STEP 1: LOAD DOCUMENTS FROM S3

In [12]:
#creating an object for this class.
doc_loader = S3DirectoryLoader(bucket=S3_BUCKET_NAME,
                               region_name=AWS_REGION,
                               aws_access_key_id=AWS_ACCESS_KEY_ID,
                               aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                               prefix=S3_PREFIX
)
doc_loader

<langchain_community.document_loaders.s3_directory.S3DirectoryLoader at 0x1bd7ffe6c70>

In [13]:
lst_document = doc_loader.load()
print(f"✅ Loaded {len(lst_document)} documents from S3")

# Display first document info
if lst_document:
    print(f"\nSample Document:")
    print(f"  - Content length: {len(lst_document[0].page_content)} characters")
    print(f"  - Metadata: {lst_document[0].metadata}")
    print(f"  - Preview: {lst_document[0].page_content[:200]}...")

[2025-12-13 22:52:46,322]-<frozen importlib._bootstrap>-INFO -pikepdf C++ to Python logger bridge initialized
✅ Loaded 1 documents from S3

Sample Document:
  - Content length: 501485 characters
  - Metadata: {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'}
  - Preview: Interpretable Machine Learning

A Guide for Making Black Box Models Explainable

Christoph Molnar

This book is for sale at http://leanpub.com/interpretable-machine-learning

This version was publishe...


### STEP 2: CHUNK DOCUMENTS

In [14]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

#creating an object of this RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,      # Overlap between chunks (20% of chunk_size)
    length_function=len,    # Function to measure chunk length
    separators=[           # Try splitting by these in order:
        "\n\n",            # 1. Double newlines (paragraphs) - best
        "\n",              # 2. Single newlines (lines)
        ". ",              # 3. Sentences
        " ",               # 4. Words
        ""                 # 5. Characters (last resort)
    ]
)
splitter

<langchain_text_splitters.character.RecursiveCharacterTextSplitter at 0x1bd7ffed4c0>

In [15]:
#now using spliter method to split the documents into lst_chunks
lst_chunks = splitter.split_documents(documents=lst_document)
print(f"✅ Created {len(lst_chunks)} chunks from {len(lst_document)} documents")

✅ Created 701 chunks from 1 documents


In [16]:
lst_chunks[:2]

[Document(metadata={'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'}, page_content='Interpretable Machine Learning\n\nA Guide for Making Black Box Models Explainable\n\nChristoph Molnar\n\nThis book is for sale at http://leanpub.com/interpretable-machine-learning\n\nThis version was published on 2019-02-21\n\nThis is a Leanpub book. Leanpub empowers authors and publishers with the Lean Publishing process. Lean Publishing is the act of publishing an in-progress ebook using lightweight tools and many iterations to get reader feedback, pivot until you have the right book and build traction once you do.\n\nThis work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License\n\nContents\n\nPreface . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .'),
 Document(metadata={'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpre

#### this will see overall chunks of document ka aggregate statistics

In [17]:
# Analyze chunk statistics
import numpy as np
chunk_lengths = [len(chunk.page_content) for chunk in lst_chunks]
print(f"\nChunk Statistics:")
print(f"  - Average length: {np.mean(chunk_lengths):.0f} characters")
print(f"  - Min length: {min(chunk_lengths)} characters")
print(f"  - Max length: {max(chunk_lengths)} characters")
print(f"  - Median length: {np.median(chunk_lengths):.0f} characters")


Chunk Statistics:
  - Average length: 770 characters
  - Min length: 85 characters
  - Max length: 998 characters
  - Median length: 849 characters


### STEP 3: SETUP PINECONE INDEX

### now using Pinecone vector database to store the embedding vector generating through lst_chunks doc

In [18]:
#setting up the pincone.
from pinecone import Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)
pc

<pinecone.pinecone.Pinecone at 0x1bd7fb18fd0>

### below block of code we used to create index or folder or database in pincone

In [19]:
#Before initializing our vector store, let’s connect to a Pinecone index.
from pinecone import ServerlessSpec
import time
index_name = INDEX_NAME


if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="dotproduct",  # ✅ CRITICAL: Must be dotproduct for hybrid search
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    
    print("✅ Index created successfully")
    print("⏳ Waiting for index to be ready...")
    time.sleep(10)  # Wait for index initialization
else:
    print(f"✅ Using existing index: {INDEX_NAME}")


✅ Index created successfully
⏳ Waiting for index to be ready...


#### connection to pinecone database index or folder or database_name describe the stats

In [20]:
# Connect to the index
index = pc.Index(INDEX_NAME)

# Verify index configuration
stats = index.describe_index_stats()
print(f"\nIndex Statistics:")
print(f"  - Dimension: {stats.dimension}")
print(f"  - Total vectors: {stats.total_vector_count}")
print(f"  - Index fullness: {stats.index_fullness}")


Index Statistics:
  - Dimension: 1536
  - Total vectors: 0
  - Index fullness: 0.0


### STEP 4: SETUP BM25 ENCODER FOR HYBRID SEARCH

#### this Hybrid search combines dense vector embeddings with sparse keyword-based scoring (BM25/TF-IDF)

In [21]:
# Extract text content from chunks_of_document for BM25 training
#corpus or document
texts = [chunk.page_content for chunk in lst_chunks]
texts[:5]

['Interpretable Machine Learning\n\nA Guide for Making Black Box Models Explainable\n\nChristoph Molnar\n\nThis book is for sale at http://leanpub.com/interpretable-machine-learning\n\nThis version was published on 2019-02-21\n\nThis is a Leanpub book. Leanpub empowers authors and publishers with the Lean Publishing process. Lean Publishing is the act of publishing an in-progress ebook using lightweight tools and many iterations to get reader feedback, pivot until you have the right book and build traction once you do.\n\nThis work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License\n\nContents\n\nPreface . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .',
 'Contents\n\nPreface . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .\n\nIntroduction . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .

In [22]:
# Initialize BM25 encoder
# BM25 (Best Match 25) is a keyword-based ranking function
# It creates sparse vectors for exact term matching
from pinecone_text.sparse import BM25Encoder
bm25_encoder = BM25Encoder()
bm25_encoder

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x1bd28b57dc0>

In [23]:
# Fit BM25 on your corpus
# This learns term frequencies and inverse document frequencies
print(f"Training BM25 on {len(texts)} documents...")
bm25_encoder.fit(texts)

Training BM25 on 701 documents...


  0%|          | 0/701 [00:00<?, ?it/s]

<pinecone_text.sparse.bm25_encoder.BM25Encoder at 0x1bd28b57dc0>

In [24]:
# Save the trained encoder for future use
bm25_encoder.dump("bm25_encoder.json")
print("✅ BM25 encoder trained and saved")

✅ BM25 encoder trained and saved


In [25]:
#Testing BM25Encoder object(is it doing Keyward searching or not).
test_sparse = bm25_encoder.encode_queries("machine learning")
print(f"  - Sample sparse vector indices: {test_sparse['indices'][:10]}")
print(f"  - Sample sparse vector values: {test_sparse['values'][:10]}")

  - Sample sparse vector indices: [3066577729, 2650797237]
  - Sample sparse vector values: [0.5352454116928278, 0.46475458830717237]


### STEP 5: CREATE HYBRID SEARCH RETRIEVER

In [26]:
from langchain_community.retrievers import PineconeHybridSearchRetriever
hybrid_retriever = PineconeHybridSearchRetriever(
    embeddings=emb_model,           # Dense embeddings (semantic searching)
    sparse_encoder=bm25_encoder,    # Sparse encoder (keyword searching)
    index=index,                    # Pinecone index name
    top_k=20,                       # Retrieve top 20 candidates (before reranking)
                                    # Higher value = more comprehensive but slower
    alpha=0.5                       # Balance between dense and sparse
                                    # 0.0 = pure keyword (BM25 only)
                                    # 0.5 = balanced (50% semantic, 50% keyword)
                                    # 1.0 = pure semantic (embeddings only)
                                    # Recommendation: 0.5 for general, 0.3 for technical
)

print(f"✅ Hybrid retriever created")
print(f"  - Top-k: 20 (candidates before reranking)")
print(f"  - Alpha: 0.5 (balanced semantic + keyword)")

✅ Hybrid retriever created
  - Top-k: 20 (candidates before reranking)
  - Alpha: 0.5 (balanced semantic + keyword)


## STEP 6: ADD DOCUMENTS(pushing chunks of doc to Pinecone DB) TO HYBRID RETRIEVER

In [27]:
from uuid import uuid4
# Generate unique IDs for each chunk
uuids = [str(uuid4()) for _ in range(len(lst_chunks))]
print(f"Generated {len(uuids)} unique IDs")

Generated 701 unique IDs


In [28]:
# Extract metadata for each chunk
metadatas = [chunk.metadata for chunk in lst_chunks]
metadatas[:10]

[{'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf'},
 {'source': 's3://amazondirectorystoredocument/newfolde

In [29]:
# Add documents with both dense and sparse vectors
# This creates:
# - Dense vectors via OpenAI embeddings (semantic)
# - Sparse vectors via BM25 (keyword)
print(f"Adding {len(texts)} documents to hybrid index...")
print("(This will take a few minutes depending on document count)")

Adding 701 documents to hybrid index...
(This will take a few minutes depending on document count)


In [30]:
# Add in batches for better progress tracking
batch_size = 50
for i in range(0, len(texts), batch_size):
    batch_texts = texts[i:i+batch_size]
    batch_metadatas = metadatas[i:i+batch_size]
    batch_ids = uuids[i:i+batch_size]
    
    #YAHAN ACTUAL DATA ka vectors-->(sparse + dense vector) VECTOR DB ME JA raha hai!!!
    hybrid_retriever.add_texts(
        texts=batch_texts,
        metadatas=batch_metadatas,
        ids=batch_ids
    )
    
    progress = min(i + batch_size, len(texts))
    print(f"  ✓ Progress: {progress}/{len(texts)} documents")

print(f"✅ All {len(texts)} documents added to hybrid retriever")

  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:25,017]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:28,407]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 50/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:30,178]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:32,162]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 100/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:33,664]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:35,523]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 150/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:37,571]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:39,749]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 200/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:41,787]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:43,907]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 250/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:45,373]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:47,476]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 300/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:49,240]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:51,628]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 350/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:53,862]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:53:56,763]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 400/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:53:59,237]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:54:02,842]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 450/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:54:05,577]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:54:09,667]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 500/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:54:12,628]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:54:18,047]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 550/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:54:21,470]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:54:26,808]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 600/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:54:30,073]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:54:34,056]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 650/701 documents


  0%|          | 0/2 [00:00<?, ?it/s]

[2025-12-13 22:54:36,596]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:54:39,440]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 700/701 documents


  0%|          | 0/1 [00:00<?, ?it/s]

[2025-12-13 22:54:41,107]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
  ✓ Progress: 701/701 documents
✅ All 701 documents added to hybrid retriever


In [31]:
# Verify vectors were added
time.sleep(2)  # Wait for index to update
stats = index.describe_index_stats()
print(f"\nUpdated Index Stats:")
print(f"  - Total vectors: {stats.total_vector_count}")


Updated Index Stats:
  - Total vectors: 701


### STEP 7: ADD RERANKING LAYER

In [32]:

# Reranking imports
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import CohereRerank

# Initialize Cohere reranker
# Reranking improves results by re-scoring candidates with a cross-encoder
# Cross-encoders are more accurate but slower than bi-encoders
compressor = CohereRerank(
    model="rerank-english-v3.0",    # Cohere's reranking model
                                     # v3.0 is latest and most accurate
    top_n=5,                        # Final number of results after reranking
                                    # Flow: Retrieve 20 → Rerank → Return top 5
    cohere_api_key=COHERE_API_KEY
)
compressor

  compressor = CohereRerank(


CohereRerank(client=<cohere.client.Client object at 0x000001BD2AF94C10>, top_n=5, model='rerank-english-v3.0', cohere_api_key='GfCv0o5O6R6ezFnZcyZvR5qoN4ulHYAq3Exn6HCp', user_agent='langchain')

### based on user query want to show response

In [33]:
# Create contextual compression retriever
# This wraps the base retriever with reranking
retriever_with_rerank = ContextualCompressionRetriever(
    base_compressor=compressor,      # Reranking model
    base_retriever=hybrid_retriever  # Base hybrid retriever
)

print("✅ Reranking layer added")
print(f"  - Model: rerank-english-v3.0")
print(f"  - Pipeline: Hybrid Search (20) → Rerank → Top 5")

✅ Reranking layer added
  - Model: rerank-english-v3.0
  - Pipeline: Hybrid Search (20) → Rerank → Top 5


### Alternative: Create simple retriever without reranking for comparison

In [34]:
# Alternative: Create simple retriever without reranking for comparison
simple_retriever = hybrid_retriever
simple_retriever

PineconeHybridSearchRetriever(embeddings=OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001BD5C8E5160>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001BD5C8DF790>, model='text-embedding-3-small', dimensions=None, deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, openai_proxy=None, embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True), sparse_encoder=<pinecone_text.sparse.bm25_encoder.BM25Encoder object at 0x000001BD28B57DC0>, index=<pinecone.db_data.index.

## Testing

In [38]:
test_query = "Assumption of Linear Regression?"
print(f"Test Query: '{test_query}'")

Test Query: 'Assumption of Linear Regression?'


In [None]:
# Test without reranking
print("\n--- WITHOUT Reranking (Hybrid only) ---")
docs_no_rerank = simple_retriever.invoke(test_query)

#docs_no_rerank = simple_retriever.get_relevant_documents(test_query) yeh bhi chalegha get_relevant_documents() method
for i, doc in enumerate(docs_no_rerank[:3], 1):  # Show top 3
    print(f"\n{i}. {doc.page_content[:150]}...")
    print(f"   Source: {doc.metadata.get('source', 'unknown')}")


--- WITHOUT Reranking (Hybrid only) ---
[2025-12-13 22:57:41,872]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

1. 60

Interpretable Models

GLM, GAM and more

The biggest strength but also the biggest weakness of the linear regression model is that the prediction ...
   Source: s3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf

2. Linearity The linear regression model forces the prediction to be a linear combination of features, which is both its greatest strength and its greate...
   Source: s3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf

3. But a simple weighted sum is too restrictive for many real world prediction problems. In this chapter we will learn about three problems of the classi...
   Source: s3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf


In [40]:
# Test with reranking
print("\n--- WITH Reranking (Hybrid + Cohere) ---")

docs_with_rerank = retriever_with_rerank.invoke(test_query)
for i, doc in enumerate(docs_with_rerank, 1):  # Show all (top 5)
    print(f"\n{i}. {doc.page_content[:150]}...")
    print(f"   Source: {doc.metadata.get('source', 'unknown')}")


--- WITH Reranking (Hybrid + Cohere) ---
[2025-12-13 22:58:48,895]-_client.py-INFO -HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
[2025-12-13 22:58:50,628]-_client.py-INFO -HTTP Request: POST https://api.cohere.com/v1/rerank "HTTP/1.1 200 OK"

1. The linear regression model assumes that the outcome given the input features follows a Gaussian distribution. This assumption excludes many cases: Th...
   Source: s3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf

2. Let us remember the formula of a linear regression model:

y = (cid:12)0 + (cid:12)1x1 + ::: + (cid:12)pxp + ϵ

The linear regression model assumes th...
   Source: s3://amazondirectorystoredocument/newfolderDoc/Molnar-interpretable-machine-learning.pdf

3. 60

Interpretable Models

GLM, GAM and more

The biggest strength but also the biggest weakness of the linear regression model is that the prediction ...
   Source: s3://amazondirectorystoredocument/newfolder