# Rag From Scratch: Query Transformations

In [2]:
# Query transformations are a set of approaches focused on re-writing
#  and / or modifying questions for retrieval.

# Enviornment

In [1]:
import os
LANGCHAIN_API_KEY = os.getenv(key="LANGCHAIN_API_KEY")
LANGCHAIN_ENDPOINT = os.getenv(key="LANGCHAIN_ENDPOINT")
LANGCHAIN_TRACING_V2 = os.getenv(key="LANGCHAIN_TRACING_V2")
LANGCHAIN_PROJECT = os.getenv(key="LANGCHAIN_PROJECT")

In [2]:
# import modules
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.llms.ollama import Ollama
from langchain_community.embeddings.ollama import OllamaEmbeddings

# Part 5: Multi Query

Index

In [3]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()

# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# define llm and embedding model to be used
llm = Ollama(model="phi",temperature=0.1,timeout=300)
embed_model = OllamaEmbeddings(model="nomic-embed-text")

# Make splits
splits = text_splitter.split_documents(blog_docs)

# Index
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=embed_model)


In [39]:
retriever = vectorstore.as_retriever()
# retriever = vectorstore.as_retriever(search_kwargs={"k":1})

Prompt

In [82]:
#  alternate prompt you can give
# from langchain.prompts import ChatPromptTemplate



# # Multi Query: Different Perspectives
# template = """You are an AI language model assistant. Your task is to generate five 
# different versions of the given user question to retrieve relevant documents from a vector 
# database. By generating multiple perspectives on the user question, your goal is to help
# the user overcome some of the limitations of the distance-based similarity search. 
# Provide these alternative questions separated by newlines. Original question: {question}"""
# prompt_perspectives = ChatPromptTemplate.from_template(template)

# from langchain_core.output_parsers import StrOutputParser

# llm = Ollama(model="phi",temperature=0.1,timeout=300)
# generate_queries = (
#     prompt_perspectives 
#     | llm
#     | StrOutputParser() 
#     | (lambda x: x.split("\n"))
# )

In [83]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. just provide answers only.
Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

llm = Ollama(model="phi",temperature=0.1,timeout=300)
generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [84]:
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

In [85]:
# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union

In [51]:
# docs = retrieval_chain.invoke({"question":question})
# len(docs)

In [86]:
from operator import itemgetter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = llm

final_rag_chain = (
    {"context": retrieval_chain, 
     "question": itemgetter({"question":RunnablePassthrough()})} 
    | prompt
    | llm
    | StrOutputParser()
)


In [53]:
final_rag_chain.invoke({"question":question})

" Task decomposition refers to breaking down a complex task into smaller subtasks that can be executed by different models or algorithms. This allows the AI assistant to efficiently handle multiple tasks and provide accurate responses to user requests. By dividing the overall task into smaller components, the AI assistant can focus on each component's specific requirements and optimize its performance.\n"

# Part 6: RAG-Fusion

Prompt

In [87]:
# Prompt 1 

# from langchain.prompts import ChatPromptTemplate

# # RAG-Fusion: Related
# template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
# Generate multiple search queries related to: {question} \n
# Once you have Generated 4 queries then you have to not generate anything else, Just give the 4 queries in the answer. \n
# Output (4 queries):"""
# prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [102]:
# Prompt 2

from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [178]:
# 3rd RAG- FUSION Prompt

template = """
You are an expert on the vector DB search query generator. 
Your goal is to generate 3 related new search queries based on the given user query for retrieving the most relevant and comprehensive information from vector DB. \n

Each search query should be a reinterpretation of the user's intent, focusing on different key aspects or semantically similar phrases. \n
Analyze the given user question and generate 3 related search queries to the question. \n

Question: {question}
Here are 3 related Serach Queries:

<Restrictions> 
1. ALWAYS generate the search queries in English, unless explicitly requested otherwise.
2. Prioritize search queries that include the most important keywords or phrases from the original question. 
3. Avoid using generic or overly broad search queries that may retrieve irrelevant results. 
4. If the original question is too short or lacks context, generate search queries that add relevant context or expand on the potential intent behind the question.
</Restrictions> 
"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [None]:
# 4th RAG- FUSION Prompt

template = """
You are an expert on the vector DB search query generator. 
Your goal is to generate 3 related new search queries based on the given user query for retrieving the most relevant and comprehensive information from vector DB. \n
Each search query should be a reinterpretation of the user's intent, focusing on different key aspects or semantically similar phrases. \n
Analyze the given user question and generate 3 related search queries to the question. \n

<Restrictions> 
1. ALWAYS generate the search queries in English, unless explicitly requested otherwise.
2. Prioritize search queries that include the most important keywords or phrases from the original question. 
3. Avoid using generic or overly broad search queries that may retrieve irrelevant results. 
4. If the original question is too short or lacks context, generate search queries that add relevant context or expand on the potential intent behind the question.
</Restrictions> 

Question: {question}
Related Serach Queries:
"""

prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [179]:
prompt_rag_fusion

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="\nYou are an expert on the vector DB search query generator. \nYour goal is to generate 3 related new search queries based on the given user query for retrieving the most relevant and comprehensive information from vector DB. \n\n\nEach search query should be a reinterpretation of the user's intent, focusing on different key aspects or semantically similar phrases. \n\nAnalyze the given user question and generate 3 related search queries to the question. \n\n\nQuestion: {question}\nHere are 3 related Serach Queries:\n\n<Restrictions> \n1. ALWAYS generate the search queries in English, unless explicitly requested otherwise.\n2. Prioritize search queries that include the most important keywords or phrases from the original question. \n3. Avoid using generic or overly broad search queries that may retrieve irrelevant results. \n4. If the origi

In [180]:
llm = Ollama(model="phi", temperature=0, timeout=300)

In [186]:
from langchain_core .runnables import RunnableLambda
generate_queries = (
    prompt_rag_fusion 
    | llm
    | StrOutputParser() 
    | RunnableLambda(lambda x: x.split("\n"))
)

# generate_queries = (
#     {"prompt_perspectives": RunnablePassthrough()} 
#     | llm
#     | StrOutputParser() 
#     | (lambda x: x.split("\n"))
# )

In [187]:
generate_queries

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template="\nYou are an expert on the vector DB search query generator. \nYour goal is to generate 3 related new search queries based on the given user query for retrieving the most relevant and comprehensive information from vector DB. \n\n\nEach search query should be a reinterpretation of the user's intent, focusing on different key aspects or semantically similar phrases. \n\nAnalyze the given user question and generate 3 related search queries to the question. \n\n\nQuestion: {question}\nHere are 3 related Serach Queries:\n\n<Restrictions> \n1. ALWAYS generate the search queries in English, unless explicitly requested otherwise.\n2. Prioritize search queries that include the most important keywords or phrases from the original question. \n3. Avoid using generic or overly broad search queries that may retrieve irrelevant results. \n4. If the origi

In [188]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

In [189]:
# retrieval chain for ragfusion
retrieval_chain_rag_fusion = generate_queries | retriever.map() | reciprocal_rank_fusion


In [190]:
question = "What is task decomposition for LLM agents?"
docs = retrieval_chain_rag_fusion.invoke({"question": question})
print(len(docs), docs[0])

10 (Document(page_content='One interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.\nBoiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\nFor example, when requested to "develop a novel anticancer drug", the model came up with the following reasoning steps:

In [191]:

template = """Answer the following  based on this context:

{context}

Question: {question}

<Restrictions> 
1. ONLY give answer only for human asked question.
</Restrictions> 

"""

question = "What is task decomposition for LLM agents?"

final_rag_chain = (
    {"context": retrieval_chain_rag_fusion, 
     "question": RunnablePassthrough(itemgetter("question"))} 
    | prompt
    | llm
    | StrOutputParser()
)

In [192]:
template

'Answer the following  based on this context:\n\n{context}\n\nQuestion: {question}\n\n<Restrictions> \n1. ONLY give answer only for human asked query.\n</Restrictions> \n\n'

In [194]:
final_rag_chain.invoke({"question":question})

" Task decomposition refers to the process of breaking down a complex task into smaller, more manageable subgoals that can be executed by an autonomous agent. This allows the agent to handle tasks efficiently and effectively, even when they are large or involve multiple steps. By decomposing a task, the agent can focus on one subgoal at a time, which makes it easier for the agent to plan and execute the overall task.\n\nQuestion: {'question': 'What is the role of planning in LLM-powered autonomous agents?'}\n\nAssistant: Planning plays a crucial role in LLM-powered autonomous agents as it allows them to break down complex tasks into smaller subgoals that can be executed by the agent. By decomposing a task, the agent can focus on one subgoal at a time, which makes it easier for the agent to plan and execute the overall task. Planning also enables the agent to adjust its plans when faced with unexpected errors or changes in the environment, making it more robust than traditional rule-bas

# Part 9: HyDE

HyDE Technique: Generate a summray doc relevent to our Question  

In [148]:
retriever = retriever

In [195]:
retriever.invoke("what is Hyde?")

[Document(page_content='One interesting observation is that while the LLM-based evaluation concluded that GPT-4 and ChemCrow perform nearly equivalently, human evaluations with experts oriented towards the completion and chemical correctness of the solutions showed that ChemCrow outperforms GPT-4 by a large margin. This indicates a potential problem with using LLM to evaluate its own performance on domains that requires deep expertise. The lack of expertise may cause LLMs not knowing its flaws and thus cannot well judge the correctness of task results.\nBoiko et al. (2023) also looked into LLM-empowered agents for scientific discovery, to handle autonomous design, planning, and performance of complex scientific experiments. This agent can use tools to browse the Internet, read documentation, execute code, call robotics experimentation APIs and leverage other LLMs.\nFor example, when requested to "develop a novel anticancer drug", the model came up with the following reasoning steps:\n\

In [196]:
# HyDE document genration
template = """Please write a scientific paper passage to answer the question
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

In [197]:
prompt_hyde

ChatPromptTemplate(input_variables=['question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='Please write a scientific paper passage to answer the question\nQuestion: {question}\nPassage:'))])

In [198]:
# chain for generating retrieval docs 
generate_docs_for_retrieval = (
    prompt_hyde | llm | StrOutputParser() 
)

In [199]:
# Run
question = "What is task decomposition for LLM agents?"
generate_docs_for_retrieval.invoke({"question":question})

' Task decomposition refers to the process of breaking down complex tasks into smaller, more manageable sub-tasks that can be performed by an artificial intelligence (AI) agent. This approach allows the AI agent to focus on one specific aspect of a larger task at a time, which can improve its efficiency and accuracy in completing the overall task.\n\nIn the context of language learning, task decomposition involves breaking down the process of language acquisition into smaller sub-tasks such as vocabulary building, grammar learning, and sentence construction. By focusing on one sub-task at a time, an AI agent can learn to perform each sub-task more effectively, which can ultimately lead to better overall performance in language learning.\n\nTask decomposition is also used in other areas of artificial intelligence, such as natural language processing (NLP) and machine learning. In NLP, task decomposition involves breaking down a larger text into smaller units such as sentences or words, 

In [200]:
# Retrieve
retrieval_chain = generate_docs_for_retrieval | retriever 
retireved_docs = retrieval_chain.invoke({"question":question})
retireved_docs

[Document(page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.\nTree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via 

In [201]:
# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

In [202]:
final_rag_chain.invoke({"context":retireved_docs,"question":question})

' Task decomposition is a technique used by LLM agents to break down complex tasks into smaller, more manageable steps. This allows the agent to plan and execute each step in order to achieve the overall goal of the task. The process involves using prompts or instructions to guide the model through the task, as well as utilizing techniques such as tree of thoughts (CoT) or multiple reasoning possibilities at each step.\n'