### 导包

In [36]:
import os
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.load import dumps, loads
from typing import List
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from langchain import hub

In [None]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = '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'
os.environ['DeepSeek_API_KEY'] = 'sk-2b3073febb6b486e94e62f9e9704f759'
os.environ['EMBEDDING_API_KEY'] = 'sk-zk23347d533c0c49e3d781430f5bb140c0a67d78ee90597e'

In [5]:
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
    parse_only=bs4.SoupStrainer(
        class_=("post-content", "post-title", "post-header")
    )
    ),
)

blog_docs = loader.load()

In [7]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50
)

splits = text_splitter.split_documents(blog_docs)

In [11]:
vectorstore = Chroma.from_documents(splits, OpenAIEmbeddings(
                                                            model="text-embedding-ada-002",
                                                            api_key=os.environ.get('EMBEDDING_API_KEY'),
                                                            base_url="https://api.zhizengzeng.com/v1/",
                                                            encoding_format="float"
))

retriever = vectorstore.as_retriever()

In [13]:
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""

prompt_perspectives = ChatPromptTemplate.from_template(template)

In [15]:
llm = ChatOpenAI(
    model='deepseek-chat',
    openai_api_key=os.environ.get('DeepSeek_API_KEY'),
    openai_api_base='https://api.deepseek.com',
    max_tokens=1024
)

In [17]:
generate_queries = (
    prompt_perspectives
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [19]:
def get_unique_union(documents: list[List]):
    """
    Unique union of retrieved docs
    """
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

In [20]:
question = "What is task decomposition for LLM agents"
retrievla_chain = generate_queries | retriever.map() | get_unique_union
docs = retrievla_chain.invoke({"question": question})
len(docs)


  return [loads(doc) for doc in unique_docs]


8

In [22]:
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOpenAI(
    model='deepseek-chat',
    openai_api_key=os.environ.get('DeepSeek_API_KEY'),
    openai_api_base='https://api.deepseek.com',
    max_tokens=1024,
    temperature=0
)

final_rag_chain = (
    {"context": retrievla_chain,
     "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task decomposition for LLM (Large Language Model) agents refers to the process of breaking down complex tasks into smaller, more manageable subgoals or steps. This is a crucial component of planning in LLM-powered autonomous agent systems, as it enables the agent to handle intricate tasks more efficiently.\n\nIn the context of LLM agents, task decomposition can be achieved through various methods:\n\n1. **Chain of Thought (CoT)**: This is a standard prompting technique where the model is instructed to "think step by step." By decomposing a complex task into smaller, simpler steps, the model can utilize more test-time computation to tackle the problem effectively. This approach also provides insight into the model\'s reasoning process.\n\n2. **Tree of Thoughts (ToT)**: This method extends CoT by exploring multiple reasoning possibilities at each step. It decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search pr

In [24]:
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [25]:
generate_queries = (
    prompt_rag_fusion
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

In [26]:
def reciprocal_rank_fusion(results: list[List], k=60):
    """
    Reciprocal_rank_fusion that takes multiple lists of ranked documents 
    and an optional parameter k used in the RRF formula
    """
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain_rag_fusion.invoke({"question": question})

len(docs)


7

In [27]:
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion,
     "question": itemgetter("question")}
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task decomposition for LLM (Large Language Model) agents refers to the process of breaking down complex tasks into smaller, more manageable subgoals or steps. This is a crucial component of planning in an LLM-powered autonomous agent system, as it enables the agent to handle intricate tasks efficiently.\n\nThere are several techniques and approaches to task decomposition:\n\n1. **Chain of Thought (CoT)**: This is a standard prompting technique where the model is instructed to "think step by step." By decomposing a hard task into smaller, simpler steps, the model can utilize more test-time computation and provide insights into its reasoning process.\n\n2. **Tree of Thoughts (ToT)**: This approach extends CoT by exploring multiple reasoning possibilities at each step. It decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be either breadth-first search (BFS) or depth-first search (DFS), with each 

In [28]:
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (3 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

In [29]:
generate_queries_decomposition = (
    prompt_decomposition
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

question = "What is task decomposition for LLM agents"
questions = generate_queries_decomposition.invoke({"question": question})

In [30]:
questions 

['1. What is the definition of task decomposition in the context of LLM (Large Language Model) agents?  ',
 '2. How do LLM agents use task decomposition to solve complex problems?  ',
 '3. What are the common techniques or algorithms used for task decomposition in LLM agents?']

In [31]:
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [34]:
def foramat_qa_pair(question, answer):
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

q_a_pairs = ""

for q in questions:
    rag_chain = (
        {"context": itemgetter("question") | retriever,
         "question": itemgetter("question"),
         "q_a_pairs": itemgetter("q_a_pairs")}
        | decomposition_prompt
        | llm
        | StrOutputParser()
    )

    answer = rag_chain.invoke({"question": q, "q_a_pairs": q_a_pairs})
    q_a_pairs = foramat_qa_pair(q, answer)
    q_a_pairs = q_a_pairs + "\n---\n" + q_a_pairs

In [35]:
answer 

'The common techniques or algorithms used for task decomposition in LLM agents include:\n\n1. **Chain of Thought (CoT)**: This technique encourages the LLM to "think step by step," breaking down complex tasks into smaller, more manageable steps. By decomposing the task into smaller parts, the agent can systematically address each step, enhancing its ability to plan and execute the task effectively.\n\n2. **Tree of Thoughts (ToT)**: This method extends CoT by exploring multiple reasoning possibilities at each step. The problem is first decomposed into multiple thought steps, and then multiple thoughts are generated per step, creating a tree-like structure. The agent can then use search algorithms like BFS (breadth-first search) or DFS (depth-first search) to navigate through these possibilities, with each state evaluated by a classifier or majority vote.\n\n3. **Prompting and Instructions**: Task decomposition can be facilitated through simple prompting (e.g., "Steps for XYZ.\\\\n1." or

In [37]:
prompt_rag = hub.pull("rlm/rag-prompt")


def retrieval_ang_rag(question, prompt_rag, sub_question_generator_chain):
    sub_questions = sub_question_generator_chain.invoke({"question": question})

    rag_results = []

    for sub_question in sub_questions:
        retrieved_docs = retriever.get_relevant_documents(sub_question)

        answer = (prompt_rag 
                  | llm 
                  | StrOutputParser()).invoke(
                      {"context": retrieved_docs, 
                       "question": question}
        )

        rag_results.append(answer)
    
    return rag_results, sub_questions

answer, question =  retrieval_ang_rag(question, prompt_rag, generate_queries_decomposition)

  retrieved_docs = retriever.get_relevant_documents(sub_question)
