In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_BASE = os.environ.get("OPENAI_API_BASE")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

LANGCHAIN_TRACING_V2 = os.environ.get("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = 'https://api.smith.langchain.com'
LANGCHAIN_API_KEY = os.environ.get("LANGCHAIN_API_KEY")

In [2]:
from langchain_openai import ChatOpenAI
from langchain_community.embeddings import HuggingFaceBgeEmbeddings

llm = ChatOpenAI(model="qwen-max", temperature=0)

model_name = "C:\\Home\\Documents\\Projects\\models\\BAAI\\bge-large-en-v1.5"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embedding_model = HuggingFaceBgeEmbeddings(
    model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)

  from tqdm.autonotebook import tqdm, trange


In [5]:
# Vector Store

from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient

client = QdrantClient(host="localhost", port=6333)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="rag_from_scratch",
    embedding=embedding_model,
)

retriever = vector_store.as_retriever(search_kwargs={"k": 1})

# Multi-Query

In [6]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives 
    | llm 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [8]:
question = "What is task decomposition for LLM agents?"

generate_queries.invoke(question)

['1. How can large language model agents break down complex tasks into simpler sub-tasks?',
 '2. What are the methods for dividing tasks into smaller, more manageable parts for LLMs?',
 '3. Can you explain the process of task decomposition in the context of LLM-based systems?',
 '4. In what ways do LLM agents use task decomposition to improve their performance and efficiency?',
 '5. What techniques are used by LLMs to decompose a large problem into smaller, more solvable components?']

In [11]:
def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [doc.page_content for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [doc for doc in unique_docs]

# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question":question})
len(docs)

1

In [13]:
import pprint

pprint.pprint(docs[0])

('Fig. 1. Overview of a LLM-powered autonomous agent system.\n'
 'Component One: Planning#\n'
 'A complicated task usually involves many steps. An agent needs to know what '
 'they are and plan ahead.\n'
 'Task Decomposition#\n'
 'Chain of thought (CoT; Wei et al. 2022) has become a standard prompting '
 'technique for enhancing model performance on complex tasks. The model is '
 'instructed to “think step by step” to utilize more test-time computation to '
 'decompose hard tasks into smaller and simpler steps. CoT transforms big '
 'tasks into multiple manageable tasks and shed lights into an interpretation '
 'of the model’s thinking process.\n'
 'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple '
 'reasoning possibilities at each step. It first decomposes the problem into '
 'multiple thought steps and generates multiple thoughts per step, creating a '
 'tree structure. The search process can be BFS (breadth-first search) or DFS '
 '(depth-first search) with each

In [14]:
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")} 
    | prompt 
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"question":question})

pprint.pprint(result)

('Task decomposition for LLM (Large Language Model) agents is the process of '
 'breaking down a complex task into smaller, more manageable subtasks. This '
 'approach helps the model to handle and solve the problem more effectively by '
 'tackling it step by step. \n'
 '\n'
 'In the context provided, there are a few methods mentioned for task '
 'decomposition:\n'
 '\n'
 '1. **Chain of Thought (CoT):** This technique involves instructing the model '
 'to "think step by step," which allows it to utilize more computational '
 'resources at test time to decompose difficult tasks into simpler steps. CoT '
 "provides insight into the model's reasoning process.\n"
 '\n'
 '2. **Tree of Thoughts (ToT):** This method extends CoT by exploring multiple '
 'reasoning possibilities at each step. It decomposes the problem into '
 'multiple thought steps and generates multiple thoughts per step, creating a '
 'tree structure. The search through this tree can be done using breadth-first '
 'search (B

# RAG Fusion

In [16]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [17]:
question = "What is task decomposition for LLM agents?"

generate_queries.invoke(question)

['1. How does task decomposition benefit Large Language Model (LLM) agents?',
 '2. Best practices for implementing task decomposition in LLMs.',
 '3. Examples of task decomposition in LLM agent workflows.',
 '4. The role of task decomposition in improving the efficiency and effectiveness of LLM agents.']

In [18]:
def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = doc.page_content
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (doc, score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)

1

In [19]:
pprint.pprint(docs[0])

('Fig. 1. Overview of a LLM-powered autonomous agent system.\n'
 'Component One: Planning#\n'
 'A complicated task usually involves many steps. An agent needs to know what '
 'they are and plan ahead.\n'
 'Task Decomposition#\n'
 'Chain of thought (CoT; Wei et al. 2022) has become a standard prompting '
 'technique for enhancing model performance on complex tasks. The model is '
 'instructed to “think step by step” to utilize more test-time computation to '
 'decompose hard tasks into smaller and simpler steps. CoT transforms big '
 'tasks into multiple manageable tasks and shed lights into an interpretation '
 'of the model’s thinking process.\n'
 'Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple '
 'reasoning possibilities at each step. It first decomposes the problem into '
 'multiple thought steps and generates multiple thoughts per step, creating a '
 'tree structure. The search process can be BFS (breadth-first search) or DFS '
 '(depth-first search) with each

In [20]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"question":question})

pprint.pprint(result)

('Task decomposition for LLM (Large Language Model) agents is the process of '
 'breaking down a complex task into smaller, more manageable subtasks. This '
 'technique helps the agent to plan and execute the task more effectively by '
 'addressing each subtask step-by-step. \n'
 '\n'
 'In the context provided, two specific methods for task decomposition are '
 'mentioned:\n'
 '\n'
 '1. **Chain of Thought (CoT)**: This method, introduced by Wei et al. in '
 '2022, involves instructing the model to "think step by step." By doing so, '
 'the model can use more computational resources at test time to decompose '
 'hard tasks into simpler, smaller steps. This not only makes the task more '
 "manageable but also provides insight into the model's thinking process.\n"
 '\n'
 '2. **Tree of Thoughts**: This is an extension of CoT, introduced by Yao et '
 'al. in 2023. It explores multiple reasoning possibilities at each step of '
 'the task. The problem is decomposed into multiple thought steps

# Decomposition

In [21]:
# Decomposition
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""

prompt_decomposition = ChatPromptTemplate.from_template(template)

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question":question})

questions

['1. What are the key architectural components of an LLM-powered autonomous agent?',
 '2. How does natural language processing (NLP) contribute to the functionality of an LLM-based autonomous system?',
 '3. What role do reinforcement learning and other machine learning techniques play in enhancing the decision-making capabilities of an LLM-driven autonomous agent?']

In [22]:
generate_queries_decomposition.invoke({"question", "诺贝尔奖得主中，谁既是科学家又是文学家？"})

['1. 诺贝尔奖得主中有哪些人同时获得了科学类奖项和文学奖？',
 '2. 历史上是否有科学家也因为文学成就获得诺贝尔奖？',
 '3. 获得诺贝尔科学奖的得主里，哪些人在文学领域也有显著贡献？']

### Answer recursively

In [23]:
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [24]:
def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

q_a_pairs = ""
for q in questions:
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

pprint.pprint(answer)

('Reinforcement learning (RL) and other machine learning (ML) techniques play '
 'a crucial role in enhancing the decision-making capabilities of an '
 'LLM-driven autonomous agent. Here’s how they contribute to the system:\n'
 '\n'
 '### 1. **Reinforcement Learning (RL):**\n'
 '   - **Learning from Interactions:**\n'
 '     - RL is a type of machine learning where an agent learns to make '
 'decisions by interacting with its environment. The agent receives rewards or '
 'penalties based on its actions, which it uses to learn optimal strategies '
 'over time. In the context of an LLM-driven autonomous agent, RL can help the '
 'agent learn to make better decisions by receiving feedback from the '
 'environment.\n'
 '   - **Policy Optimization:**\n'
 '     - RL algorithms, such as Q-learning, Deep Q-Networks (DQN), and '
 'Proximal Policy Optimization (PPO), can be used to optimize the policy that '
 'the LLM follows. This means the LLM can learn to take actions that maximize '
 'long-t

### Answer individually

In [25]:
# Answer each sub-question individually 

from langchain import hub

# RAG prompt
prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question,prompt_rag, sub_question_generator_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition / 
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.invoke(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, 
                                                                "question": sub_question})
        rag_results.append(answer)
    
    return rag_results,sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)

In [26]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"context":context, "question":question})

pprint.pprint(result)

('The main components of an LLM-powered autonomous agent system include:\n'
 '\n'
 '1. **Planning and Problem-Solving**: This involves the ability to break down '
 'complex tasks into smaller, manageable subgoals. The planning module also '
 'includes reflection and refinement, which allows the agent to learn from its '
 'actions and improve over time. Language models play a critical role here by '
 'acting as the "brain" of the agent, enabling it to plan and solve problems '
 'effectively.\n'
 '\n'
 '2. **Memory**: Memory is essential for the agent to store and recall '
 'information about past experiences and learned strategies. This helps in '
 'refining future actions and making more informed decisions. The memory '
 'component supports the continuous learning and adaptation of the agent.\n'
 '\n'
 '3. **Data Processing and Analysis**: This component is crucial for handling '
 'and analyzing the data that the agent interacts with. It enables the agent '
 'to decompose tasks into su

# Step-back Question

In [27]:
# Few Shot Examples
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?",
    },
    {
        "input": "Jan Sindel’s was born in what country?",
        "output": "what is Jan Sindel’s personal history?",
    },
]
# We now transform these to example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. 
            Your task is to step back and paraphrase a question to a more generic step-back question, 
            which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

In [28]:
generate_queries_step_back = prompt | llm | StrOutputParser()

question = "What is task decomposition for LLM agents?"

generate_queries_step_back.invoke({"question": question})

'What is task decomposition in the context of artificial intelligence?'

In [29]:
from langchain_core.runnables import RunnableLambda

# Response prompt 
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question. 
Your response should be comprehensive and not contradicted with the following context if they are relevant. 
Otherwise, ignore them if they are not relevant.

# {normal_context}
# {step_back_context}

# Original Question: {question}
# Answer:"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

chain = (
    {
        # Retrieve context using the normal question
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        # Retrieve context using the step-back question
        "step_back_context": generate_queries_step_back | retriever,
        # Pass on the question
        "question": lambda x: x["question"],
    }
    | response_prompt
    | ChatOpenAI(model="qwen-max", temperature=0)
    | StrOutputParser()
)

result = chain.invoke({"question": question})

pprint.pprint(result)

('Task decomposition for LLM (Large Language Model) agents is a process where '
 'complex tasks are broken down into smaller, more manageable sub-tasks. This '
 'approach helps the model to better understand and execute the overall task '
 'by focusing on one step at a time. The concept of task decomposition is '
 'crucial in enhancing the performance of LLMs on complex tasks, as it allows '
 'the model to think step by step, making use of more test-time computation to '
 'simplify the problem.\n'
 '\n'
 '### Key Techniques in Task Decomposition:\n'
 '\n'
 '1. **Chain of Thought (CoT)**:\n'
 '   - **Definition**: CoT, introduced by Wei et al. in 2022, is a prompting '
 'technique that encourages the model to "think step by step." This method '
 'helps the model to break down a complex task into simpler, sequential '
 'steps.\n'
 '   - **Benefits**: By decomposing the task, CoT not only makes the task more '
 "manageable but also provides insight into the model's reasoning process, "
 '

# HyDE

In [31]:
# HyDE document genration

template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""

prompt_hyde = ChatPromptTemplate.from_template(template)

generate_docs_for_retrieval = (prompt_hyde | llm | StrOutputParser())

# Run
question = "What is task decomposition for LLM agents?"

hypothetical_doc = generate_docs_for_retrieval.invoke({"question": question})

pprint.pprint(hypothetical_doc)

('**Task Decomposition for Large Language Model (LLM) Agents: An Overview**\n'
 '\n'
 'In the realm of artificial intelligence, large language models (LLMs) have '
 'emerged as powerful tools capable of generating human-like text and '
 'performing a wide array of complex tasks. However, the effectiveness of LLMs '
 'in handling intricate and multifaceted tasks can be significantly enhanced '
 'through a process known as task decomposition. Task decomposition involves '
 'breaking down a complex task into smaller, more manageable sub-tasks, each '
 'of which can be addressed by the LLM with greater precision and efficiency.\n'
 '\n'
 'The primary goal of task decomposition is to improve the overall performance '
 'and reliability of LLM agents by leveraging their strengths in handling '
 'specific, well-defined tasks. This approach is particularly useful when '
 'dealing with tasks that require a sequence of steps or involve multiple '
 'domains of knowledge. By decomposing a task, the

In [32]:
# Retrieve

retrieval_chain = generate_docs_for_retrieval | retriever 

retireved_docs = retrieval_chain.invoke({"question": question})

retireved_docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', '_id': 'ca53f36c-9331-4f43-bc8f-5b5e8f788d57', '_collection_name': 'rag_from_scratch'}, page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts p

In [33]:
# RAG

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"context": retireved_docs, "question": question})

pprint.pprint(result)

('Task decomposition for LLM (Large Language Model) agents is the process of '
 'breaking down a complex task into smaller, more manageable sub-tasks. This '
 'technique helps the agent to handle and plan for complicated tasks by '
 'simplifying them into a series of simpler steps. The document mentions '
 'several methods for achieving this:\n'
 '\n'
 '1. **Chain of Thought (CoT)**: This method involves instructing the model to '
 'think step-by-step, which allows it to use more computational resources at '
 'test time to break down hard tasks into smaller, simpler steps. CoT not only '
 "makes the task more manageable but also provides insight into the model's "
 'reasoning process.\n'
 '\n'
 '2. **Tree of Thoughts**: This is an extension of CoT that explores multiple '
 'reasoning possibilities at each step. It decomposes the problem into '
 'multiple thought steps and generates multiple thoughts per step, creating a '
 'tree-like structure. The search process can be conducted using

# Prompt

In [35]:
lang_detection_prompt_template = """
Please detect if the user's input is in Chinese. 
If it is, translate it into English and return only the translated text, without any additional explanation.
User question: {question}
"""

lang_detection_prompt = ChatPromptTemplate.from_template(lang_detection_prompt_template)

lang_detection_chain = (
    lang_detection_prompt 
    | llm 
    | StrOutputParser()
)

question = "LLM Agent 中的任务分解是什么？"

lang_detection_chain.invoke({"question": question})

'Task decomposition in the LLM Agent refers to what?'

In [36]:
# RAG

template = """Answer the following question in Chinese based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

result = final_rag_chain.invoke({"context": retireved_docs, "question": question})

pprint.pprint(result)

('在LLM（大型语言模型）驱动的自主代理系统中，任务分解是指将一个复杂任务拆解为多个更小、更简单的步骤的过程。这一过程有助于提高模型处理复杂任务的能力。例如，通过链式思维（CoT; '
 'Wei等人，2022年提出）这种技术指导模型“一步一步地思考”，利用更多的测试时间计算能力来分解难题。此外，还有树形思维（Tree of '
 'Thoughts; '
 'Yao等人，2023年提出），它进一步扩展了CoT的方法，在每一步探索多种推理可能性，形成一种树状结构，并可以通过广度优先搜索(BFS)或深度优先搜索(DFS)来进行搜索，每个状态由分类器（通过提示）或多数投票来评估。任务分解可以通过简单的提示让LLM完成，比如给出类似“实现XYZ的步骤”这样的指令；也可以使用特定于任务的指示，如要求编写小说时提供“写故事大纲”的指令；还可以结合人类输入来进行。')
