In [None]:
##indexing##
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
loader=WebBaseLoader(
    web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/"),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        ),
    )
)

docs=loader.load()

#split
text_splitter=RecursiveCharacterTextSplitter.from_tiktoken_encoder(chunk_size=300,chunk_overlap=50)
splits=text_splitter.split_documents(docs) #->list

#embed
embedding_model=HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
vectorstore=Chroma.from_documents(documents=splits,embedding=embedding_model)
retriever=vectorstore.as_retriever()

#ollama llm
llm=ChatOllama(model="llama3.2:3b-instruct-q8_0",temperature=0)

In [6]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
#multi query prompts
template="""
You are an AI language model assistant. Your task is to generate three
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}
"""
prompt_rag_fusion=ChatPromptTemplate.from_template(template)

generate_querier=(
    prompt_rag_fusion
    | llm
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)



In [None]:
from langchain_core.load import loads,dumps


def reciprocal_rank_fusion(results: list    [list],k=60):
    #紀錄每個document的分數
    fuse_score={}
    for docs in results:
        for rank,doc in enumerate(docs):

            doc_str=dumps(doc) #:->str

            #初始分數都為0
            if doc_str not in fuse_score:
                fuse_score[doc_str]=0

            pre_score=fuse_score[doc_str]
            #RRF formula: 1 / (rank + k)
            fuse_score[doc_str]=pre_score+1/(rank+k)

    #Sort the docs based on fused scores in descending order to get the final reranked results
    reranked_results=[
        (loads(doc),score)
        for doc,score in sorted(fuse_score.items() ,key=lambda x : x[1] , reverse=True )
        ]

    return reranked_results

question="What is task decomposition for LLM agents?"

retrieval_chain_rag_fusion= generate_querier | retriever.map() | reciprocal_rank_fusion

#docs=retrieval_chain_rag_fusion.invoke({"question":question})


In [9]:
from operator import itemgetter
template="""
Answer the following question based in the context:
{context}

Question:{question}
"""
prompt=ChatPromptTemplate.from_template(template)

final_rag_chain=(
    {
        "context":retrieval_chain_rag_fusion,
        "question":itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition is a technique used to break down complex tasks into smaller, simpler steps that an agent can plan and execute. This allows the agent to utilize more test-time computation and decompose hard tasks into manageable subtasks.\n\nThere are three ways to perform task decomposition:\n\n1. Simple prompting: The LLM is given simple prompts like "Steps for XYZ." or "What are the subgoals for achieving XYZ."\n2. Task-specific instructions: The agent is given task-specific instructions, such as "Write a story outline" for writing a novel.\n3. Human inputs: The agent receives human inputs to perform task decomposition.\n\nTask decomposition can be done using techniques like Chain of Thought (CoT) or Tree of Thoughts (Yao et al. 2023), which transform big tasks into multiple manageable tasks and shed light on the model\'s thinking process.'