In [17]:
%pip install -U langchain langchain-community

Collecting langchain
  Downloading langchain-0.1.8-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community
  Downloading langchain_community-0.0.21-py3-none-any.whl.metadata (8.1 kB)
Collecting langchain-core<0.2,>=0.1.24 (from langchain)
  Downloading langchain_core-0.1.25-py3-none-any.whl.metadata (6.0 kB)
Collecting langsmith<0.2.0,>=0.1.0 (from langchain)
  Downloading langsmith-0.1.5-py3-none-any.whl.metadata (13 kB)
Downloading langchain-0.1.8-py3-none-any.whl (816 kB)
   ---------------------------------------- 0.0/816.1 kB ? eta -:--:--
   - ------------------------------------- 41.0/816.1 kB 991.0 kB/s eta 0:00:01
   ------------ --------------------------- 256.0/816.1 kB 3.2 MB/s eta 0:00:01
   -------------------------------- ------- 655.4/816.1 kB 5.2 MB/s eta 0:00:01
   ---------------------------------------- 816.1/816.1 kB 5.2 MB/s eta 0:00:00
Downloading langchain_community-0.0.21-py3-none-any.whl (1.7 MB)
   ---------------------------------------- 0.0/1.7 MB 

In [1]:
# Build a sample vectorDB
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

# Load blog post
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
splits = text_splitter.split_documents(data)

# VectorDB
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=splits, embedding=embedding)

In [2]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import ChatOpenAI

question = "What are the approaches to Task Decomposition?"

llm = ChatOpenAI(temperature=0)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

In [3]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

In [4]:
unique_docs = retriever_from_llm.get_relevant_documents(query=question)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. How can Task Decomposition be achieved through different methods?', '2. What strategies are commonly used for breaking down tasks into smaller components?', '3. What are the various techniques employed for Task Decomposition in practice?']


6

### Supplying your own prompt

In [8]:
from typing import List

from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate
from pydantic import BaseModel, Field


# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)


output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)
llm = ChatOpenAI(temperature=0)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

# Other inputs
question = "What are the approaches to Task Decomposition?"

In [9]:
# Run
retriever = MultiQueryRetriever(
    retriever=vectordb.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.get_relevant_documents(
    query="What does the course say about regression?"
)
len(unique_docs)

OutputParserException: Failed to parse LineList from completion 1. Got: 1 validation error for LineList
__root__
  LineList expected dict not int (type=type_error)