In [1]:
!pip install dotenv



In [2]:
import dotenv
dotenv.load_dotenv()

True

# Part 5: Multi Query

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    )
)
blog_docs = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)

splits = text_splitter.split_documents(blog_docs)

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
generate_queries = (
    prompt_perspectives 
    | ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [5]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs"""
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question": question})
len(docs)

  return [loads(doc) for doc in unique_docs]


6

In [6]:
from operator import itemgetter

template = """Answer the folowing question based on this context: 

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)

final_rag_chain = (
    {"context": retrieval_chain,
     "question":itemgetter("question")}
     | prompt
     | llm
     | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition for LLM agents is the process of breaking down large, complex tasks into smaller, more manageable subgoals. This enables the agent to handle complex tasks more efficiently. It can be achieved through techniques like Chain of Thought (CoT), which instructs the model to "think step by step," or Tree of Thoughts, which explores multiple reasoning possibilities at each step. Task decomposition can be done by LLM with simple prompting, by using task-specific instructions, or with human inputs.'

# Part 6: Rag-Fusion

In [7]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [8]:
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
generate_queries = (
    prompt_rag_fusion
    | ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
    | StrOutputParser()
    | (lambda x: x.split('\n'))
)

In [9]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previus_score = fused_scores[doc_str]
            fused_scores[doc_str]+= 1 / (rank+k)
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion

docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)
    

10

In [10]:
from langchain_core.runnables import RunnablePassthrough

final_rag_chain = (
    {'context': retrieval_chain_rag_fusion,
     'question': itemgetter('question')}
     | prompt
     | llm
     | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task decomposition for LLM agents involves breaking down large tasks into smaller, more manageable subgoals to handle complex tasks efficiently. This can be achieved through:\n\n*   Using LLMs with simple prompting techniques like "Steps for XYZ. 1." or "What are the subgoals for achieving XYZ?".\n*   Employing task-specific instructions (e.g., "Write a story outline" for writing a novel).\n*   Incorporating human inputs.\n\nTechniques like Chain of Thought (CoT) and Tree of Thoughts (ToT) are used to enhance model performance on complex tasks by decomposing them into smaller steps and exploring multiple reasoning possibilities. CoT instructs the model to "think step by step," while ToT extends CoT by generating multiple thoughts per step, creating a tree structure for search.'

# Part 7

In [11]:
from langchain.prompts import ChatPromptTemplate
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [12]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser

generate_queries_decomposition = (prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

question = "What are the main components of LLM powered autonomus agent system"
questions = generate_queries_decomposition.invoke({"question": question})

In [13]:
questions

['Here are 3 search queries related to the question "What are the main components of an LLM-powered autonomous agent system?":',
 '',
 '1.  **"LLM autonomous agent architecture components"** (Focuses on the architectural breakdown)',
 '2.  **"Key modules LLM autonomous agents"** (Uses the term "modules" as a synonym for components)',
 '3.  **"LLM agent planning memory execution"** (Highlights the core functional aspects often associated with autonomous agents)']

In [14]:
# Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [15]:
from operator import itemgetter
def format_qa_pair(question, answer):
    """Format Q and A pair"""
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string

# llm

q_a_pairs = ""

for q in questions[2:]:
    rag_chain = (
        {"context":itemgetter("question") | retriever,
         "question":itemgetter("question"),
         "q_a_pairs":itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )
    answer = rag_chain.invoke({'question':q, "q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n---\n"+ q_a_pair

In [16]:
print(q_a_pairs)


---
Question: 1.  **"LLM autonomous agent architecture components"** (Focuses on the architectural breakdown)
Answer: Based on the provided context, an LLM-powered autonomous agent system architecture includes the following key components:

1.  **LLM as the Brain:** The large language model (LLM) acts as the core controller, driving the agent's decision-making and problem-solving processes.

2.  **Planning:**
    *   **Subgoal and Decomposition:** The agent breaks down large, complex tasks into smaller, more manageable subgoals.
    *   **Reflection and Refinement:** The agent self-reflects on past actions, learns from mistakes, and refines its approach for future steps.

3.  **Memory:**
    *   **Short-term memory:** In-context learning leverages the model's short-term memory.
    *   **Long-term memory:** An external vector store provides the agent with the ability to retain and recall information over extended periods.

4.  **Tool Use:** The agent learns to call external APIs to ac

# Part 8 Step Back

In [17]:
# Few Shot Examples
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]
# We now transform these to example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

In [21]:
from langchain_groq import ChatGroq
dotenv.load_dotenv()
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [22]:
generate_queries_step_back = prompt | llm | StrOutputParser()
question = "What is task decomposition for LLM agents?"
generate_queries_step_back.invoke({"question": question})

'What are the general strategies for problem-solving used by LLM agents?'

In [23]:

from langchain_core.runnables import RunnablePassthrough, RunnableLambda
# Response prompt 
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. Your response should be comprehensive and not contradicted with the following context if they are relevant. Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

chain = (
    {
        # Retrieve context using the normal question
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": generate_queries_step_back | retriever,
        # Pass on the question
        "question": lambda x: x["question"],
    }
    | response_prompt
    | llm
    | StrOutputParser()
)

chain.invoke({"question": question})

'Task decomposition is a crucial component of LLM-powered autonomous agents, especially when dealing with complicated tasks that involve many steps. It refers to the process of breaking down a large task into smaller, more manageable subgoals. This enables the agent to handle complex tasks more efficiently.\n\nHere\'s a breakdown of task decomposition techniques:\n\n*   **Chain of Thought (CoT):** This is a standard prompting technique where the LLM is instructed to "think step by step." This decomposes hard tasks into smaller, simpler steps, utilizing more computation at test time. CoT transforms big tasks into multiple manageable tasks and sheds light on the model’s thinking process.\n*   **Tree of Thoughts (ToT):** This extends CoT by exploring multiple reasoning possibilities at each step. It decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be performed using Breadth-First Search (BFS) or 

# HyDE

In [25]:
template = """Please write a scientific paper passsage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)
generate_docs_for_retrieval = (
    prompt_hyde | llm | StrOutputParser()
)
question = "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question":question})

'Task decomposition for Large Language Model (LLM) agents refers to the process of breaking down a complex, high-level goal or objective into a series of smaller, more manageable sub-tasks. This decomposition is crucial for enabling LLM agents to effectively tackle tasks that require multi-step reasoning, planning, and execution. Instead of attempting to directly address the entire complex task in a single step, the LLM agent leverages its reasoning capabilities to identify the constituent sub-tasks, define their dependencies, and then sequentially address each sub-task. This modular approach not only simplifies the overall problem but also allows for error recovery, progress tracking, and the integration of external tools or knowledge sources at each sub-task level. Effective task decomposition often involves defining clear criteria for sub-task completion, specifying the information required for each sub-task, and determining the optimal order in which the sub-tasks should be execute

In [26]:
retrieval_chain = generate_docs_for_retrieval | retriever
retrieved_docs = retrieval_chain.invoke({"question": question})
retrieved_docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Component One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a

In [28]:
template = """Answer the folowing question based on this context:

{context}

Question: {question}"""

prompt=  ChatPromptTemplate.from_template(template)
final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)
final_rag_chain.invoke({"context": retrieved_docs, "question":question})

'Task decomposition is the process of breaking down large, complicated tasks into smaller, more manageable subgoals. This enables LLM agents to handle complex tasks more efficiently. Task decomposition can be done by:\n\n1.  LLM with simple prompting (e.g., "Steps for XYZ. 1.", "What are the subgoals for achieving XYZ?")\n2.  Using task-specific instructions (e.g., "Write a story outline." for writing a novel)\n3.  With human inputs.\n\nTechniques like Chain of Thought (CoT) and Tree of Thoughts are examples of how task decomposition can be implemented. CoT instructs the model to "think step by step," while Tree of Thoughts explores multiple reasoning possibilities at each step, creating a tree structure of thoughts.'