#### Hybrid Retriever - Combining Dense & Sparse Retriever

In [26]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document

In [27]:
# Step 1: Sample documents
docs = [
    Document(page_content="LangChain helps build LLM applications."),
    Document(page_content="Pinecone is a vector database for semantic search."),
    Document(page_content="The Eiffel Tower is located in Paris."),
    Document(page_content="Langchain can be used to develop agentic ai application."),
    Document(page_content="Langchain has many types of retrievers.")
]

# STep2: Dense Retriever (FAISSS + HUggingFace)
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
dense_vectorstore = FAISS.from_documents(docs, embedding_model)
dense_retriever=dense_vectorstore.as_retriever()

In [28]:
## Sparse Retriever (BM25)
sparse_retriever=BM25Retriever.from_documents(docs)
sparse_retriever.k=3 ##top-k documents to retriever

## Step 4: Combine with ensemble retriever
hybrid_retriever=EnsembleRetriever(
    retrievers=[dense_retriever, sparse_retriever],
    weight=[0.7, 0.3]
)

In [29]:
hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001E859917890>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001E8599179D0>, k=3)], weights=[0.5, 0.5])

In [30]:
# Step5: Query & get results
query="How can I build an application using LLM"
results=hybrid_retriever.invoke(query)

# Step6: Print results
for i,doc in enumerate(results):
    print(f"\n Docuement: {i+1}:\n{doc.page_content}")


 Docuement: 1:
LangChain helps build LLM applications.

 Docuement: 2:
Langchain can be used to develop agentic ai application.

 Docuement: 3:
Langchain has many types of retrievers.

 Docuement: 4:
Pinecone is a vector database for semantic search.


#### RAG PipleLine with Hybrid Retriever

In [31]:
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain 

In [32]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [38]:
# STep[5]: PromptTemplate
prompt=PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
"""
)


## Step[6] - LLM
openAILLM = ("openai/gpt-oss-120b")
llm = init_chat_model(
    model=openAILLM,
    model_provider="openai",
    api_key=os.getenv('GROQ_API_KEY'),
    base_url="https://api.groq.com/openai/v1",
    temperature=0.6
)

llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001E859AE9E00>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001E859AEA190>, root_client=<openai.OpenAI object at 0x000001E859AE9BA0>, root_async_client=<openai.AsyncOpenAI object at 0x000001E859AE9F30>, model_name='openai/gpt-oss-120b', temperature=0.6, model_kwargs={}, openai_api_key=SecretStr('**********'), openai_api_base='https://api.groq.com/openai/v1')

In [39]:
### Create stuff Document Chain
document_chain=create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

## Create FULL RAG chain
rag_chain=create_retrieval_chain(
    retriever=hybrid_retriever,
    combine_docs_chain=document_chain
)

rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001E859917890>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001E8599179D0>, k=3)], weights=[0.5, 0.5]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context below.\n\nContext:\n{context}\n\nQuestion: {input}\n')
            | ChatOpenAI(client=

In [43]:
## STep[9] - Ask a question
query={"input": "How can I build an app using an LLM?"}
response=rag_chain.invoke(query)

## Step 10: Output
print("Answer:\n", response["answer"])


print("\n Source Documents")
for i,doc in enumerate(response["context"]):
    print(f"\nDoc {i+1}: {doc.page_content}")

Answer:
 Below is a practical, end‚Äëto‚Äëend roadmap you can follow to turn a large language model (LLM) into a working application.  
I‚Äôll frame each step around the tools you mentioned‚Äî**LangChain**, its **retrievers**, and **Pinecone**‚Äîbut the same pattern works with other LLM providers, vector stores, or deployment platforms.

---

## 1Ô∏è‚É£ Define the problem & the user experience

| Question | Why it matters |
|----------|----------------|
| **What does the app do?** (e.g., answer FAQs, generate code, act as a personal assistant) | Determines the prompt design, required data, and whether you need an ‚Äúagentic‚Äù (tool‚Äëusing) workflow. |
| **Who are the users?** (internal staff, public customers, developers) | Drives UI choices (web UI, Slack bot, API, etc.) and security requirements. |
| **What latency / cost constraints do you have?** | Influences model selection (e.g., GPT‚Äë4 vs. a smaller open‚Äësource model) and how much you cache or pre‚Äëcompute. |

*Write a one