<a href="https://colab.research.google.com/github/sugarforever/LangChain-Advanced/blob/main/Retrievers/01_MultiQuery_Retriever.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U langchain openai chromadb tiktoken

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.8 MB[0m [31m9.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m437.8/437.8 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m66.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m65.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "your valid openai api key"

In [None]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [None]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load blog post
loader = WebBaseLoader("https://blog.langchain.dev/langchain-prompt-hub/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

# VectorDB
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever

question = "What can we do with LangChain hub?"
llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(retriever=vectordb.as_retriever(), llm=llm)

In [None]:
unique_docs = retriever_from_llm.get_relevant_documents(query=question)
unique_docs

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can the LangChain hub be utilized?', '2. What are the possible applications of the LangChain hub?', '3. In what ways can the LangChain hub be used effectively?']


[Document(page_content="Today, we're excited to launch LangChain Hub‚Äìa home for uploading, browsing, pulling, and managing your prompts. (Soon, we'll be adding other artifacts like chains and agents).\uf8ffüí°Explore the Hub hereLangChain Hub is built into LangSmith (more on that below) so there are 2 ways to start exploring LangChain Hub.With LangSmith access: Full read and write permissions. You can explore all existing prompts and upload your own by logging in and navigate to the Hub from your admin panel.Without", metadata={'language': 'en', 'source': 'https://blog.langchain.dev/langchain-prompt-hub/', 'title': 'Announcing LangChain Hub'}),
 Document(page_content="way to facilitate this kind of collaboration.We're aiming to make LangChain Hub the best place for teams to write and manage prompts, together. The product isn't quite there today‚Äìthis first iteration only supports personal accounts‚Äìbut we're actively looking for organizations that are excited to explore an Alpha wi

In [None]:
from typing import List
from langchain.chains import LLMChain
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.retrievers.multi_query import LineListOutputParser


# Output parser will split the LLM result into a list of queries
'''
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        print(f"Input text: {text}")
        lines = text.strip().split("\n")
        return LineList(lines=lines)
'''

output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from a vector
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search.
    Provide these alternative questions seperated by newlines.
    Original question: {question}""",
)
llm = ChatOpenAI(temperature=0)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

In [None]:
# Run
retriever = MultiQueryRetriever(
    retriever=vectordb.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output

unique_docs = retriever.get_relevant_documents(query=question)
unique_docs

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can the LangChain hub be utilized?', '2. What are the possible applications of the LangChain hub?', '3. In what ways can the LangChain hub be used?', '4. What functionalities does the LangChain hub offer?', '5. What are the potential uses of the LangChain hub?']


[Document(page_content="Today, we're excited to launch LangChain Hub‚Äìa home for uploading, browsing, pulling, and managing your prompts. (Soon, we'll be adding other artifacts like chains and agents).\uf8ffüí°Explore the Hub hereLangChain Hub is built into LangSmith (more on that below) so there are 2 ways to start exploring LangChain Hub.With LangSmith access: Full read and write permissions. You can explore all existing prompts and upload your own by logging in and navigate to the Hub from your admin panel.Without", metadata={'language': 'en', 'source': 'https://blog.langchain.dev/langchain-prompt-hub/', 'title': 'Announcing LangChain Hub'}),
 Document(page_content="way to facilitate this kind of collaboration.We're aiming to make LangChain Hub the best place for teams to write and manage prompts, together. The product isn't quite there today‚Äìthis first iteration only supports personal accounts‚Äìbut we're actively looking for organizations that are excited to explore an Alpha wi