# Reranking Hybrid Search Strategies

In [1]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain.chat_models import init_chat_model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
loader=TextLoader("langchain_intr.txt",encoding="utf-8")
raw_docs=loader.load()
# split documents into chunks
splitter=RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=30)
chunks=splitter.split_documents(raw_docs)

# embedding model
embedding_model=OpenAIEmbeddings()
vectorstore=FAISS.from_documents(chunks,embedding_model)
# Retriever
retriever_openai=vectorstore.as_retriever(search_kwargs={"k":5})


### Reranking
- step-1 prompt
- step-2 llm

In [17]:
# prompt for reranking
from langchain.prompts import PromptTemplate
from langchain.chat_models import init_chat_model
from langchain_core.output_parsers import StrOutputParser

llm=init_chat_model("openai:gpt-3.5-turbo")
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000244330E2FD0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000244330E2C40>, root_client=<openai.OpenAI object at 0x00000244330E0FC0>, root_async_client=<openai.AsyncOpenAI object at 0x00000244330E2EA0>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)

In [18]:
rerank_prompt=PromptTemplate.from_template(
    """
    You are a helpful assistant. Your task is to rank the following documents from most to least relevant to the user's question.

User Question: "{question}"

Documents:
{documents}

Instructions:
- Think about the relevance of each document to the user's question.
- Return a list of document indices in ranked order, starting from the most relevant.

Output format: comma-separated document indices (e.g., 2,1,3,0,...)
    """
)

In [19]:
query="How can i use langchain to build an application with memory and tools?"
retrieved_docs=retriever_openai.invoke(query)
retrieved_docs

[Document(id='9994eadf-8e4b-4aeb-93ab-785c1145d32a', metadata={'source': 'langchain_intr.txt'}, page_content='Memory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.'),
 Document(id='9c2ee13d-7cda-4c3a-ad36-74098bd7be31', metadata={'source': 'langchain_intr.txt'}, page_content='LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.'),
 Document(id='5d61d68c-b408-4ca7-aa17-9eb6dc89d55b', metadata={'source': 'langchain_intr.txt'}, page_content='Agents in LangChain are chains that use LLMs to decide which tools to use and in what order. This makes them suitable for multi-step tasks like question answering with search and code execution.'),
 Document(id='93eabe8e-8381-44b9-ba4b-b2130c0abce9', metadata={

In [23]:
chain=rerank_prompt|llm|StrOutputParser()
chain

PromptTemplate(input_variables=['documents', 'question'], input_types={}, partial_variables={}, template='\n    You are a helpful assistant. Your task is to rank the following documents from most to least relevant to the user\'s question.\n\nUser Question: "{question}"\n\nDocuments:\n{documents}\n\nInstructions:\n- Think about the relevance of each document to the user\'s question.\n- Return a list of document indices in ranked order, starting from the most relevant.\n\nOutput format: comma-separated document indices (e.g., 2,1,3,0,...)\n    ')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x00000244330E2FD0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x00000244330E2C40>, root_client=<openai.OpenAI object at 0x00000244330E0FC0>, root_async_client=<openai.AsyncOpenAI object at 0x00000244330E2EA0>, model_kwargs={}, openai_api_key=SecretStr('**********'), stream_usage=True)
| StrOutputParser()

In [24]:
doc_lines=[f"{i+1}.{doc.page_content}" for i,doc in enumerate(retrieved_docs)]
formatted_docs="\n ".join(doc_lines)
formatted_docs

'1.Memory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.\n 2.LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.\n 3.Agents in LangChain are chains that use LLMs to decide which tools to use and in what order. This makes them suitable for multi-step tasks like question answering with search and code execution.\n 4.LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.\n 5.LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables developers to experiment with different models and optimize performance for specific use cases like

In [26]:
query="How can i use langchain to build an application with memory and tools?"
rerank=chain.invoke({"question":query,"documents":formatted_docs})
rerank


'2,1,3,4,5'

In [33]:
indexes=rerank.split(",")
rerank_docs=[retrieved_docs[int(i)-1] for i in indexes]
rerank_docs

[Document(id='9c2ee13d-7cda-4c3a-ad36-74098bd7be31', metadata={'source': 'langchain_intr.txt'}, page_content='LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.'),
 Document(id='9994eadf-8e4b-4aeb-93ab-785c1145d32a', metadata={'source': 'langchain_intr.txt'}, page_content='Memory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.'),
 Document(id='5d61d68c-b408-4ca7-aa17-9eb6dc89d55b', metadata={'source': 'langchain_intr.txt'}, page_content='Agents in LangChain are chains that use LLMs to decide which tools to use and in what order. This makes them suitable for multi-step tasks like question answering with search and code execution.'),
 Document(id='93eabe8e-8381-44b9-ba4b-b2130c0abce9', metadata={

In [34]:
# Print result
for i,doc in enumerate(rerank_docs):
    print(f"\nRank{i+1}:\n{doc.page_content}")


Rank1:
LangChain is a flexible framework designed for developing applications powered by large language models (LLMs). It provides tools and abstractions to work with LLMs more effectively and includes components for prompt management, chains, memory, and agents.

Rank2:
Memory in LangChain enables context retention across multiple steps in a conversation or task, making the application more coherent and stateful.

Rank3:
Agents in LangChain are chains that use LLMs to decide which tools to use and in what order. This makes them suitable for multi-step tasks like question answering with search and code execution.

Rank4:
LangChain supports tool integration including web search, calculators, and APIs, allowing LLMs to interact with external systems and respond more accurately to dynamic queries.

Rank5:
LangChain integrates with many third-party services such as OpenAI, Hugging Face, and Cohere. This enables developers to experiment with different models and optimize performance for sp