In [None]:

!pip install google-generativeai langchain-google-genai




In [None]:

! pip install langchain_community tiktoken langchainhub chromadb langchain datasets


Collecting langchain_community
  Downloading langchain_community-0.2.7-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tiktoken
  Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchainhub
  Downloading langchainhub-0.1.20-py3-none-any.whl (5.0 kB)
Collecting chromadb
  Downloading chromadb-0.5.4-py3-none-any.whl (581 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.4/581.4 kB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain
  Downloading langchain-0.2.7-py3-none-any.whl (983 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.6/983.6 kB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets

In [None]:

import os
api=''
os.environ['GOOGLE_API_KEY'] = api


In [None]:

import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.embeddings import GooglePalmEmbeddings




## Multi-Query with LangChain

Now we switch across to using our populated index as a vectorstore in Langchain.

In [None]:
# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Embed
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=GooglePalmEmbeddings())



In [None]:

from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
# LLM
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)


We initialize the `MultiQueryRetriever`:

In [None]:
from langchain.retrievers.multi_query import MultiQueryRetriever

retriever = MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(), llm=llm
)

We set logging so that we can see the queries as they're generated by our LLM.

In [None]:
# Set logging for the queries
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

**To query with our multi-query retriever we call the `get_relevant_documents` method.**

In [None]:
question = "What is Task Decomposition?"

docs = retriever.get_relevant_documents(query=question)
len(docs)

  warn_deprecated(
INFO:langchain.retrievers.multi_query:Generated queries: ['- What are the principles and techniques for breaking down complex tasks into smaller, manageable components?', '- How can task decomposition be applied to improve efficiency and effectiveness in various domains?', '- What are the benefits and challenges of using task decomposition in project management and software development?']


7

In [None]:
docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'),
 Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates mul

## Adding the Generation in RAG

So far we've built a multi-query powered **R**etrieval **A**ugmentation chain. Now, we need to add **G**eneration.

In [None]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

QA_PROMPT = PromptTemplate(
    input_variables=["query", "contexts"],
    template="""You are a helpful assistant who answers user queries using the
    contexts provided. If the question cannot be answered using the information
    provided say "I don't know".

    Contexts:
    {contexts}

    Question: {query}""",
)

# Chain
qa_chain = LLMChain(llm=llm, prompt=QA_PROMPT)

  warn_deprecated(


In [None]:
out = qa_chain(
    inputs={
        "query": question,
        "contexts": "\n---\n".join([d.page_content for d in docs])
    }
)
out["text"]

  warn_deprecated(


'Task decomposition is the process of breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.'

## Chaining Everything with a SequentialChain

We can pull together the logic above into a function or set of methods, whatever is prefered — however if we'd like to use LangChain's approach to this we must "chain" together multiple chains. The first retrieval component is (1) not a chain per se, and (2) requires processing of the output. To do that, and fit with LangChain's "chaining chains" approach, we setup the _retrieval_ component within a `TransformChain`:

In [None]:
from langchain.chains import TransformChain

def retrieval_transform(inputs: dict) -> dict:
    docs = retriever.get_relevant_documents(query=inputs["question"])
    docs = [d.page_content for d in docs]
    docs_dict = {
        "query": inputs["question"],
        "contexts": "\n---\n".join(docs)
    }
    return docs_dict

retrieval_chain = TransformChain(
    input_variables=["question"],
    output_variables=["query", "contexts"],
    transform=retrieval_transform
)

Now we chain this with our generation step using the `SequentialChain`:

In [None]:
from langchain.chains import SequentialChain

rag_chain = SequentialChain(
    chains=[retrieval_chain, qa_chain],
    input_variables=["question"],  # we need to name differently to output "query"
    output_variables=["query", "contexts", "text"]
)

Then we perform the full RAG pipeline:

In [None]:
out = rag_chain({"question": question})
out["text"]

INFO:langchain.retrievers.multi_query:Generated queries: ['- What are the principles and techniques for breaking down complex tasks into smaller, manageable components?', '- How can task decomposition be applied to improve efficiency and effectiveness in various domains?', '- What are the benefits and challenges of using task decomposition in project management and software development?']


'Task decomposition is the process of breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.'

# Generation

In [None]:

from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'))])

In [None]:


# LLM
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)

# Chain
chain = prompt | llm


In [None]:

# Run
chain.invoke({"context":docs,"question":"What is Task Decomposition?"})


AIMessage(content='- Task Decomposition: A Comprehensive Overview for Machine Learning\n- Understanding Task Decomposition: A Key Concept in Artificial Intelligence\n- Task Decomposition in Large Language Models: Techniques and Applications', response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}]}, id='run-f8652535-058b-4564-a711-f3eaf1d2ac4c-0', usage_metadata={'input_tokens': 89, 'output_tokens': 34, 'total_tokens': 123})

In [None]:
# Index
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=GooglePalmEmbeddings())


retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

## Custom Multiquery

We'll try this with two prompts, both encourage more variety in search queries.

**Prompt A**
```
Your task is to generate 3 different search queries that aim to
answer the user question from multiple perspectives.
Each query MUST tackle the question from a different viewpoint,
we want to get a variety of RELEVANT search results.
Provide these alternative questions separated by newlines.
Original question: {question}
```



In [None]:

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


template = """
Your task is to generate 3 different search queries that aim to
answer the user question from multiple perspectives.
Each query MUST tackle the question from a different viewpoint,
we want to get a variety of RELEVANT search results.
Provide these alternative questions separated by newlines.
Original question: {question}
"""


prompt = ChatPromptTemplate.from_template(template)
prompt

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")


'- Task Decomposition: A Comprehensive Guide to Breaking Down Complex Tasks\n- Understanding Task Decomposition: A Step-by-Step Approach for Project Management\n- Task Decomposition in Software Engineering: Techniques and Best Practices'

---

**Prompt B**
```
Your task is to generate 3 different search queries that aim to
answer the user question from multiple perspectives. The user questions
are focused on Large Language Models, Machine Learning, and related
disciplines.
Each query MUST tackle the question from a different viewpoint, we
want to get a variety of RELEVANT search results.
Provide these alternative questions separated by newlines.
Original question: {question}
```

In [None]:


from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


template = """
Your task is to generate 3 different search queries that aim to
answer the user question from multiple perspectives. The user questions
are focused on Large Language Models, Machine Learning, and related
disciplines.
Each query MUST tackle the question from a different viewpoint, we
want to get a variety of RELEVANT search results.
Provide these alternative questions separated by newlines.
Original question: {question}
"""


prompt = ChatPromptTemplate.from_template(template)
prompt

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")


'- Task Decomposition: A Comprehensive Overview\n- Task Decomposition in Machine Learning: Techniques and Applications\n- Task Decomposition in Large Language Models: Benefits and Challenges'

# Output after applying multi query

**SequentialChain** is used to execute dependent tasks in a specific order.

It ensures the proper flow of data between tasks.

It can be used to handle multi-step queries where each step depends on the previous one.

It is especially useful for complex workflows in natural language processing and machine learning tasks.

In [None]:
retrieval_chain = TransformChain(
    input_variables=["question"],
    output_variables=["query", "contexts"],
    transform=retrieval_transform
)

rag_chain = SequentialChain(
    chains=[retrieval_chain, qa_chain],
    input_variables=["question"],  # we need to name differently to output "query"
    output_variables=["query", "contexts", "text"]
)

In [None]:

out = rag_chain({"question": question})
out["text"]


'Task Decomposition is a technique used to break down complex tasks into smaller, more manageable steps. This can be done using a variety of methods, such as CoT (Chain of Thought), which instructs the model to "think step by step" and decompose hard tasks into smaller and simpler steps.'

In [None]:
out = rag_chain({"question": question})
out["text"]

'Task Decomposition is a technique used to break down complex tasks into smaller, more manageable steps. This can be done using a variety of methods, such as CoT (Chain of Thought), which instructs the model to "think step by step" and decompose hard tasks into smaller and simpler steps.'

In [None]:

question = "Model selection"
out = rag_chain({"question": question})
out["text"]

'{\n "id": "I don\'t know",\n "reason": "The context does not mention any models or their capabilities, so I cannot select a suitable model."\n}'

In [None]:

question = "Why we need Task Decomposition"
out = rag_chain({"question": question})
out["text"]


'Task Decomposition is used to transform big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.'