### Initial Setup

In [None]:
import os
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv() 

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

os.environ["LANGSMITH_TRACING_V2"] = "false"  # Disable LangSmith tracing
# Load Documents
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",))
blog_docs = loader.load()

# Split into Chunks
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
chunk_size=300, chunk_overlap=50
)
splits = text_splitter.split_documents(blog_docs)

# Embed & Store
vectorstore = Chroma.from_documents(documents=splits,
embedding=OpenAIEmbeddings())

# Retriever
retriever = vectorstore.as_retriever()

USER_AGENT environment variable not set, consider setting it to identify your requests.


### Multi-Query Retriever

In [2]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.load import dumps, loads

# 1. Prompt for generating multiple queries

template = """You are an AI language model assistant. Your task is to generate five different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

# 2. Chain to generate queries

llm = ChatOpenAI(temperature=0)
generate_queries = (
    prompt_perspectives
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

# 3. Function to get unique union of documents

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

# 4. Build the retrieval chain

retrieval_chain = generate_queries | retriever.map() | get_unique_union

# 5. Define the final RAG chain

from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter

template = """
    Answer the following question based on this context:
    {context}
    Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

# Run the chain
question = "What is task decomposition for LLM agents?"
final_rag_chain.invoke({"question": question})

  return [loads(doc) for doc in unique_docs]


'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This process enables efficient handling of complex tasks by transforming them into multiple manageable tasks, allowing the agent to tackle each step individually.'

### RAG-Fusion

In [3]:
# 1. Prompt for RAG-Fusion
template = """You are a helpful assistant that generates multiple search queries related to: {question} \n Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

# 2. Chain to generate queries
generate_queries = (
    prompt_rag_fusion
    | ChatOpenAI(temperature=0)
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

# 3. Reciprocal Rank Fusion function

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Iterate through each document in the list, with its rank
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Update the score using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

# 4. Build the retrieval chain for fusion
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion

question = "What is task decomposition for LLM agents?"
docs = retrieval_chain_rag_fusion.invoke({"question": question})
print("No. of Docs: ", len(docs))

# 5. Define the final RAG chain
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

No. of Docs:  15


'Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks. This process allows the agent to tackle the overall task by dividing it into more achievable steps, leading to improved performance and results.'

### Decomposition

In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# 1. Prompt to generate sub-questions
template = """You are a helpful assistant that generates multiple sub-questions from a complex question. The goal is to break down the complex question into a series of simpler questions that can be answered individually. Generate 3 sub-questions. Question: {question}"""
prompt_decomposition = ChatPromptTemplate.from_template(template)
generate_queries_decomposition = prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n"))

# 2. Prompt for recursive answering
template = """
Here is the question you need to answer: {question}
Here is any available background question + answer pairs: {q_a_pairs}
Here is additional context relevant to the question: {context}

Use the above context and any background question + answer pairs to answer the question.
"""
decomposition_prompt = ChatPromptTemplate.from_template(template)

# 3. Recursive answering loop
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question": question})

q_a_pairs = ""
for q in questions:
    rag_chain = (
        {"context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
        "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )
    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pair = f"Question: {q}\nAnswer: {answer}\n\n"
    q_a_pairs = q_a_pairs + q_a_pair

print("Answer (Recursive): \n", answer)

# 4. Answer each sub-question individually 

# RAG prompt
prompt_template = """
Answer the following question based only on the provided context:
    <context>
    {context}
    </context>
    Question: {question}
"""
prompt_rag = ChatPromptTemplate.from_template(prompt_template)

def retrieve_and_rag(question,prompt_rag,sub_question_generator_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition / 
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, 
                                                                "question": sub_question})
        rag_results.append(answer)
    
    return rag_results,sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(question, prompt_rag, generate_queries_decomposition)
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""
    
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(questions, answers)

# Prompt
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

ind_answer = final_rag_chain.invoke({"context":context,"question":question})
print("Answer (Individual): \n", answer)

Answer (Recursive): 
 The main components of an LLM-powered autonomous agent system provide specific functionalities as follows:

1. Planning Component:
- Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals for efficient handling of complex tasks.
- Reflection and refinement: The agent engages in self-criticism, self-reflection over past actions, learns from mistakes, and refines actions for future steps to improve the quality of final results.

2. Memory Component:
- Short-term memory: Utilized for in-context learning to adapt to current situations.
- Long-term memory: Enables the agent to retain and recall infinite information over extended periods, often leveraging an external vector store for fast retrieval.

3. Tool Use Component:
- The agent learns to call external APIs for extra information missing from the model weights.
- Accesses current information, code execution capabilities, proprietary information sources, and more to enhance de

### Step-back

In [8]:
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain_core.runnables import RunnableLambda

# 1. Few-shot examples for generating the step-back question
examples = [
    {
        "input": "Could the members of The Police perform lawful arrests?",
        "output": "what can the members of The Police do?"
    },
    {
        "input": "Jan Sindel's was born in what country?",
        "output": "what is Jan Sindel's personal history?"
    },
]
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
example_prompt=example_prompt, examples=examples
)
prompt_step_back = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at world knowledge. Your task is to step back and paraphrase a question to a more generic step-back question, which is easier to answer. Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

# 2. Chain to generate step-back question

generate_queries_step_back = prompt_step_back | llm | StrOutputParser()
question = "What is task decomposition for LLM agents?"
generate_queries_step_back.invoke({"question": question})

# 3. Response prompt using both normal and step-back context
response_prompt_template = """You are an expert of world knowledge. I am going to ask you a question.
Your response should be comprehensive and not contradict the following contexts.
Normal Context:
{normal_context}
Step-back Context:
{step_back_context}

Original Question: {question}
Answer:"""
response_prompt = ChatPromptTemplate.from_template(response_prompt_template)

# 4. Full chain with parallel retrieval
chain = (
    {
        "normal_context": RunnableLambda(lambda x: x["question"]) | retriever,
        "step_back_context": generate_queries_step_back | retriever,
        "question": lambda x: x["question"],
    }
    | response_prompt
    | llm
    | StrOutputParser()
)

answer = chain.invoke({"question": question})

print("Answer: ", answer)

Answer:  Task decomposition for LLM agents involves breaking down large tasks into smaller, more manageable subgoals. This process enables the agent to efficiently handle complex tasks by dividing them into smaller steps that are easier to tackle. By decomposing tasks, LLM agents can effectively plan and execute their actions, leading to improved problem-solving capabilities and higher-quality results.

One common technique used for task decomposition in LLM agents is the Chain of Thought (CoT) method, as described by Wei et al. (2022). CoT prompts the model to "think step by step," encouraging it to break down hard tasks into simpler steps. This approach helps the model utilize more computation during testing to decompose tasks effectively. Additionally, the Tree of Thoughts method, introduced by Yao et al. (2023), extends CoT by exploring multiple reasoning possibilities at each step, creating a tree structure of thought processes.

Task decomposition can be achieved in various ways,

### HyDE

In [None]:
# 1. Prompt to generate a hypothetical document
template = """
Please write a scientific paper passage to answer the question.
Question: {question}
Passage:
"""
prompt_hyde = ChatPromptTemplate.from_template(template)

# 2. Chain to generate the document
generate_docs_for_retrieval = (
    prompt_hyde
    | llm
    | StrOutputParser()
)

question = "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question":question})

# 3. Retrieval chain using the hypothetical document
retrieval_chain = generate_docs_for_retrieval | retriever

# 4. Final RAG chain
template = """
    Answer the following question based on this context: {context}
    Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

answer = final_rag_chain.invoke({"question": question})
print("Answer: \n", answer)

Answer: 
 Task decomposition for LLM agents involves breaking down large tasks into smaller, manageable subgoals. This enables efficient handling of complex tasks by dividing them into more manageable steps, allowing the agent to navigate through the problem more effectively.
