In [2]:
from langchain.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.retrievers import BM25Retriever
from langchain.retrievers import EnsembleRetriever
from langchain.schema import Document

In [3]:
# Step 1: Sample documents
docs = [
    Document(page_content="LangChain helps build LLM applications."),
    Document(page_content="Pinecone is a vector database for semantic search."),
    Document(page_content="The Eiffel Tower is located in Paris."),
    Document(page_content="Langchain can be used to develop agentic ai application."),
    Document(page_content="Langchain has many types of retrievers.")
]

In [6]:
# embedding creation
embedding=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
dense_vectorstore=FAISS.from_documents(docs,embedding)
dense_retriever=dense_vectorstore.as_retriever()

In [9]:
sparse_retriever=BM25Retriever.from_documents(docs)
sparse_retriever.k=3
hybrid_retriever=EnsembleRetriever(
    retrievers=[dense_retriever,sparse_retriever],
    weights=[0.7,0.3])
hybrid_retriever

EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002251FF59310>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000002251FF5B9D0>, k=3)], weights=[0.7, 0.3])

In [11]:
# Step 5: Query and get results
query = "How can I build an application using LLMs?"
results=hybrid_retriever.invoke(query)
for i,doc in enumerate(results):
    print(f"doc{i+1}:\n{doc.page_content}")

doc1:
LangChain helps build LLM applications.
doc2:
Langchain can be used to develop agentic ai application.
doc3:
Langchain has many types of retrievers.
doc4:
Pinecone is a vector database for semantic search.


# RAG Pipeline with Hybrid Retriever

In [13]:
from langchain.chat_models import init_chat_model
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain.prompts import PromptTemplate

In [14]:
## Create Prompt temlate
template="""
        answer the user query  only on the given context{context}
        question:{input}
        """
prompt=PromptTemplate.from_template(
    template=template
)

In [15]:
# Initialize the llm
llm=init_chat_model("openai:gpt-3.5-turbo")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000022523CB1550>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000022523CB1FD0>, root_client=<openai.OpenAI object at 0x0000022522A22BA0>, root_async_client=<openai.AsyncOpenAI object at 0x0000022523CB1D30>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [16]:
# Create document chain and rag chain
document_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(hybrid_retriever,document_chain)
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | EnsembleRetriever(retrievers=[VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002251FF59310>, search_kwargs={}), BM25Retriever(vectorizer=<rank_bm25.BM25Okapi object at 0x000002251FF5B9D0>, k=3)], weights=[0.7, 0.3]), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n        answer the user query  only on the given context{context}\n        question:{input}\n        ')
            | ChatOpenAI

In [18]:
results=rag_chain.invoke({"input":"How can I build an application using LLMs?"})
results

{'input': 'How can I build an application using LLMs?',
 'context': [Document(id='0009ec1a-56fd-47f6-b3b7-7152c56b0557', metadata={}, page_content='LangChain helps build LLM applications.'),
  Document(id='df32a673-c5f5-4f7d-876a-955450d2d350', metadata={}, page_content='Langchain can be used to develop agentic ai application.'),
  Document(id='21475291-e59a-410f-9cc0-a39cfef438f6', metadata={}, page_content='Langchain has many types of retrievers.'),
  Document(id='fd694038-ccd7-48b6-9259-2d64f852a89e', metadata={}, page_content='Pinecone is a vector database for semantic search.')],
 'answer': "One way to build an application using LLMs is by utilizing a platform like LangChain, which is designed to assist in creating Language Model applications. By leveraging LangChain's tools and resources, you can develop agentic AI applications that leverage the power of LLMs for various use cases. Additionally, LangChain offers various types of retrievers that can enhance the functionality and p

In [19]:
# output
print(f"\n answe: \n{results["answer"]}")
print("\n Source documentrs\n")
for i,doc in enumerate(results['context']):
    print(f"\ndoc{i+1}:\n{doc.page_content}")


 answe: 
One way to build an application using LLMs is by utilizing a platform like LangChain, which is designed to assist in creating Language Model applications. By leveraging LangChain's tools and resources, you can develop agentic AI applications that leverage the power of LLMs for various use cases. Additionally, LangChain offers various types of retrievers that can enhance the functionality and performance of your LLM-based application.

 Source documentrs


doc1:
LangChain helps build LLM applications.

doc2:
Langchain can be used to develop agentic ai application.

doc3:
Langchain has many types of retrievers.

doc4:
Pinecone is a vector database for semantic search.


In [1]:
print(3**2**0)

3
