## Hybrid Retriever- Combining Dense And Sparse Retriever

In [45]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever, TFIDFRetriever 
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document

In [46]:
from tqdm import tqdm
from langchain_community.vectorstores import FAISS
from langchain.docstore import InMemoryDocstore
from langchain.schema import Document
import faiss
import numpy as np
import uuid

def build_faiss_with_progress(docs, embedding_model, batch_size=32, normalize=True):
    """
    Construye un índice FAISS mostrando barra de progreso
    y conservando metadatos de los Document.
    """
    texts = [d.page_content for d in docs]
    embeddings = []

    # 1) Calcular embeddings en lotes con barra
    for i in tqdm(range(0, len(texts), batch_size), desc="Generando embeddings"):
        batch = texts[i:i+batch_size]
        batch_emb = embedding_model.embed_documents(batch)
        embeddings.extend(batch_emb)

    embeddings = np.array(embeddings, dtype="float32")

    # 2) Normalizar si se quiere coseno
    if normalize:
        faiss.normalize_L2(embeddings)

    # 3) Crear índice FAISS
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)

    # 4) Guardar Document completos con sus metadatos
    id_map = {str(uuid.uuid4()): d for d in docs}
    docstore = InMemoryDocstore(id_map)

    # 5) Devolver FAISS listo para usar
    vectorstore = FAISS(
        embedding_function=embedding_model,
        index=index,
        docstore=docstore,
        index_to_docstore_id=list(id_map.keys())
    )
    return vectorstore

In [None]:
"""dense_vectorstore = FAISS.from_documents(
    docs, 
    embedding_model,
    normalize_L2=True  # Normalize vectors for cosine similarity
)
dense_retriever = dense_vectorstore.as_retriever()
dense_retriever.search_kwargs['k'] = 2  # Retrieve top 2 documents"""

In [47]:
# Step 1: Sample documents
docs = [
    Document(page_content="LangChain helps build LLM applications.", metadata={"id": "doc1"}),
    Document(page_content="Pinecone is a vector database for semantic search.", metadata={"id": "doc2"}),
    Document(page_content="The Eiffel Tower is located in Paris.", metadata={"id": "doc3"}),
    Document(page_content="Langchain can be used to develop agentic ai application.", metadata={"id": "doc4"}),
    Document(page_content="Langchain has many types of retrievers.", metadata={"id": "doc5"}),
    Document(page_content="FAISS is a library for efficient similarity search and clustering of dense vectors.", metadata={"id": "doc6"}),
    Document(page_content="HuggingFace provides a wide range of pre-trained models for NLP tasks.", metadata={"id": "doc7"}),
    Document(page_content="BM25 is a ranking function used by search engines to estimate the relevance of documents to a given search query.", metadata={"id": "doc8"}),
    Document(page_content="TF-IDF stands for Term Frequency-Inverse Document Frequency.", metadata={"id": "doc9"}),
    Document(page_content="Ensemble methods combine multiple models to improve performance.", metadata={"id": "doc10"}),
]

# Step 2: Dense Retriever (FAISS + HuggingFace)
embedding_model = HuggingFaceEmbeddings(
    model_name="google/embeddinggemma-300m",
    model_kwargs={"device": "cuda"}  # <--- GPU
)

dense_vectorstore = build_faiss_with_progress(docs, embedding_model, normalize=True)
dense_retriever = dense_vectorstore.as_retriever()
dense_retriever.search_kwargs['k'] = 2  # Retrieve top 2 documents


Generando embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.13it/s]


In [48]:
dense_retriever.get_relevant_documents("What is LangChain?")

  dense_retriever.get_relevant_documents("What is LangChain?")


[Document(metadata={'id': 'doc1'}, page_content='LangChain helps build LLM applications.'),
 Document(metadata={'id': 'doc9'}, page_content='TF-IDF stands for Term Frequency-Inverse Document Frequency.')]

In [49]:
tf_idf_retriever = TFIDFRetriever.from_documents(docs)
tf_idf_retriever.k = 2  # Retrieve top 2 documents
print(tf_idf_retriever)

vectorizer=TfidfVectorizer() docs=[Document(metadata={'id': 'doc1'}, page_content='LangChain helps build LLM applications.'), Document(metadata={'id': 'doc2'}, page_content='Pinecone is a vector database for semantic search.'), Document(metadata={'id': 'doc3'}, page_content='The Eiffel Tower is located in Paris.'), Document(metadata={'id': 'doc4'}, page_content='Langchain can be used to develop agentic ai application.'), Document(metadata={'id': 'doc5'}, page_content='Langchain has many types of retrievers.'), Document(metadata={'id': 'doc6'}, page_content='FAISS is a library for efficient similarity search and clustering of dense vectors.'), Document(metadata={'id': 'doc7'}, page_content='HuggingFace provides a wide range of pre-trained models for NLP tasks.'), Document(metadata={'id': 'doc8'}, page_content='BM25 is a ranking function used by search engines to estimate the relevance of documents to a given search query.'), Document(metadata={'id': 'doc9'}, page_content='TF-IDF stands 

In [50]:
### Sparse Retriever(BM25)
sparse_retriever=BM25Retriever.from_documents(docs)
sparse_retriever.k=4 ##top- k documents to retriever

## step 4 : Combine with Ensemble Retriever
hybrid_retriever=EnsembleRetriever(
    retrievers=[dense_retriever,sparse_retriever],
    weights=[0.7,0.3]
)


In [51]:
hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x76cdcd1eaa10>, search_kwargs={'k': 2}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x76cd32747f50>)], weights=[0.7, 0.3])

In [52]:
# Step 5: Query and get results
query = "How can I build an application using LLMs?"
results = hybrid_retriever.invoke(query)

# Step 6: Print results
for i, doc in enumerate(results):
    print(f"\n🔹 Document {i+1}:\n{doc.page_content}")


🔹 Document 1:
LangChain helps build LLM applications.

🔹 Document 2:
Langchain can be used to develop agentic ai application.

🔹 Document 3:
TF-IDF stands for Term Frequency-Inverse Document Frequency.

🔹 Document 4:
Ensemble methods combine multiple models to improve performance.


### RAG Pipeline with hybrid retriever

In [23]:
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [24]:
# Step 5: Prompt Template
prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
""")

## step 6-llm
llm=init_chat_model("openai:gpt-3.5-turbo",temperature=0.2)
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x76cd3e76ec90>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x76cd33e2bf90>, root_client=<openai.OpenAI object at 0x76cd3dc1e9d0>, root_async_client=<openai.AsyncOpenAI object at 0x76cd33e2b9d0>, temperature=0.2, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [25]:
### Create stuff Docuemnt Chain
document_chain=create_stuff_documents_chain(llm=llm,prompt=prompt)

## create Full rAg chain
rag_chain=create_retrieval_chain(retriever=hybrid_retriever,combine_docs_chain=document_chain)
rag_chain


RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x76cdcd0a33d0>, search_kwargs={'k': 3}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x76cd3daa3590>, k=3)], weights=[0.7, 0.3]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\n\nContext:\n{context}\n\nQuestion: {input}\n')
            | ChatOpenAI(client=<o

In [26]:
# Step 9: Ask a question
query = {"input": "How can I build an app using LLMs?"}
response = rag_chain.invoke(query)

# Step 10: Output
print("✅ Answer:\n", response["answer"])

print("\n📄 Source Documents:")
for i, doc in enumerate(response["context"]):
    print(f"\nDoc {i+1}: {doc.page_content}")

✅ Answer:
 You can build an app using LLMs by utilizing LangChain, which helps in developing LLM applications. LangChain offers various types of retrievers that can be used in building agentic AI applications. Additionally, Pinecone, a vector database for semantic search, can also be integrated into the app to enhance its functionality.

📄 Source Documents:

Doc 1: LangChain helps build LLM applications.

Doc 2: Langchain can be used to develop agentic ai application.

Doc 3: Pinecone is a vector database for semantic search.

Doc 4: Langchain has many types of retrievers.
