In [1]:
from langchain_community.vectorstores import FAISS
#from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document

In [2]:
# Step 1: Sample documents
docs = [
    Document(page_content='Langchain helps to build LLM applications.'),
    Document(page_content="Lion is the king of wild animals."),
    Document(page_content="Pinecone is a vector database for semantic search."),
    Document(page_content="The Eiffel Tower is located in Paris."),
    Document(page_content="Langchain can be used to develop agentic ai application."),
    Document(page_content="LLM is a field in law in art which has nothing to do with TECH and IT."),
    Document(page_content="Langchain has different types of retrievers.")
]

In [3]:
# Step 2:Dense Retriever using HuggigFace and FAISS
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)
dense_vectorstore  =FAISS.from_documents(docs, embedding_model)
dense_retriever = dense_vectorstore.as_retriever(search_kwargs={'k': 3})

  embedding_model = HuggingFaceEmbeddings(


In [4]:
#Step 3: Sparse Retriever using BM25
sparse_retiever = BM25Retriever.from_documents(docs)
sparse_retiever.k=3 # Top - documents to retrieve

# Step 4: Combine Dense and Sparse Retriever with Ensemble Retriever
hybrid_retriever = EnsembleRetriever(
    retrievers=[dense_retriever, sparse_retiever],
    weights=[0.7, 0.3]
)

hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001E832BA5C10>, search_kwargs={'k': 3}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001E83493C090>, k=3)], weights=[0.7, 0.3])

In [5]:
# Step 5: Query and output
query = 'How to design and build application using LLM?'
results = hybrid_retriever.invoke(query)

for i, doc in enumerate(results):
    print(f'\n Document {i+1}:\n{doc.page_content}')


 Document 1:
Langchain helps to build LLM applications.

 Document 2:
LLM is a field in law in art which has nothing to do with TECH and IT.

 Document 3:
Langchain can be used to develop agentic ai application.


RAG Pipeline with Hybrid Retriever

In [6]:
import os
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [7]:
# Step 6: Prompt Template

load_dotenv()

prompt = PromptTemplate.from_template(
    '''Answer the question based on the context below.
    
    Context: {context}
    
    Question: {input}
    '''
)

# Step 7
llm = init_chat_model(
    'openai:gpt-3.5-turbo-0125',
    temperature=0.2,
    request_timeout=300,
    max_retries=5
)
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000001E836D65150>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000001E838202990>, root_client=<openai.OpenAI object at 0x000001E8367CEED0>, root_async_client=<openai.AsyncOpenAI object at 0x000001E838202710>, model_name='gpt-3.5-turbo-0125', temperature=0.2, model_kwargs={}, openai_api_key=SecretStr('**********'), request_timeout=300.0, max_retries=5)

In [8]:
# Step8:
# Create stuff document chain
document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt
)

# Crete full rag chain
rag_chain = create_retrieval_chain(
    retriever=hybrid_retriever,
    combine_docs_chain=document_chain
)

rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001E832BA5C10>, search_kwargs={'k': 3}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000001E83493C090>, k=3)], weights=[0.7, 0.3]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='Answer the question based on the context below.\n\n    Context: {context}\n\n    Question: {input}\n    ')
            | Cha

In [9]:
# Step 9: Ask question

query = {
    'input': 'How to design and build application using LLM?'
}

response = rag_chain.invoke(query)

print(f"Answer:\n{response['answer']}")

print('\nSourse Documents:')

for i, doc in enumerate(response['context']):
    print(f'\nDoc {i+1}: {doc.page_content}')


Answer:
Langchain can be used to design and build applications using LLM by incorporating agentic AI technology.

Sourse Documents:

Doc 1: Langchain helps to build LLM applications.

Doc 2: LLM is a field in law in art which has nothing to do with TECH and IT.

Doc 3: Langchain can be used to develop agentic ai application.
