In [1]:
import dotenv
dotenv.load_dotenv()

True

# Part 5: Multi Query

In [3]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths=('https://lilianweng.github.io/posts/2023-06-23-agent/',),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    )
)
blog_docs = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300,
    chunk_overlap=50
)

splits = text_splitter.split_documents(blog_docs)

from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=GoogleGenerativeAIEmbeddings(model="models/embedding-001"))

retriever = vectorstore.as_retriever()

In [27]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
generate_queries = (
    prompt_perspectives 
    | ChatGoogleGenerativeAI(model="gemini-2.0-flash",temperature=0) 
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [28]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs"""
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    unique_docs = list(set(flattened_docs))
    return [loads(doc) for doc in unique_docs]

question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union
docs = retrieval_chain.invoke({"question": question})
len(docs)

6

In [30]:
from operator import itemgetter

template = """Answer the folowing question based on this context: 

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)

final_rag_chain = (
    {"context": retrieval_chain,
     "question":itemgetter("question")}
     | prompt
     | llm
     | StrOutputParser()
)

final_rag_chain.invoke({"question":question})

'Task decomposition is the process of breaking down a complex task into smaller, more manageable subgoals. This allows the LLM agent to handle complex tasks more efficiently. Task decomposition can be done by:\n\n1.  LLM with simple prompting (e.g., "Steps for XYZ. 1.", "What are the subgoals for achieving XYZ?")\n2.  Using task-specific instructions (e.g., "Write a story outline." for writing a novel)\n3.  With human inputs.\n\nChain of Thought (CoT) and Tree of Thoughts are techniques used for task decomposition. CoT instructs the model to "think step by step," while Tree of Thoughts explores multiple reasoning possibilities at each step, creating a tree structure.'

# Part 6: Rag-Fusion

In [31]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [34]:
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
generate_queries = (
    prompt_rag_fusion
    | ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
    | StrOutputParser()
    | (lambda x: x.split('\n'))
)

In [38]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previus_score = fused_scores[doc_str]
            fused_scores[doc_str]+= 1 / (rank+k)
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion

docs = retrieval_chain_rag_fusion.invoke({"question": question})
len(docs)
    

11

In [36]:
from langchain_core.runnables import RunnablePassthrough

final_rag_chain = (
    {'context': retrieval_chain_rag_fusion,
     'question': itemgetter('question')}
     | prompt
     | llm
     | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

'Task decomposition for LLM agents is the process of breaking down large, complicated tasks into smaller, more manageable subgoals. This enables the agent to handle complex tasks more efficiently. Task decomposition can be achieved by:\n\n1.  Using LLMs with simple prompting techniques, such as asking "Steps for XYZ. 1." or "What are the subgoals for achieving XYZ?".\n2.  Using task-specific instructions, such as "Write a story outline." for writing a novel.\n3.  Using human inputs.\n\nTechniques like Chain of Thought (CoT) and Tree of Thoughts are used to facilitate task decomposition. CoT instructs the model to "think step by step," while Tree of Thoughts explores multiple reasoning possibilities at each step, creating a tree structure of thoughts.'