### __Query Decomposition__

In [None]:
from ast import parse
import bs4
from langchain_community.document_loaders import WebBaseLoader
import torch
import tqdm
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate

from langchain.load import dumps, loads
from langchain.schema.output_parser import StrOutputParser

from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain_community.llms import Ollama
import os
from operator import itemgetter


os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = ''
os.environ['USER_AGENT'] = ''


loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_ = (
                "post-content", "post-title", "post-header"
            )
        )
    ),
)

blog_docs = loader.load()



text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, chunk_overlap=50
)

splits = text_splitter.split_documents(blog_docs)


modelPath = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embeddings)

retriever = vectorstore.as_retriever()

### ___Query LLM___

In [None]:
#model_id = "mistralai/Mistral-7B-Instruct-v0.2"
model_id = "microsoft/Phi-3-mini-4k-instruct"
Query_LLM = HuggingFacePipeline.from_model_id(
    model_id=model_id,
    task="text-generation",
    model_kwargs={ "temperature": 0.0,
                   "do_sample": True },
)

### ___Answer LLM___

In [None]:
Answer_LLM = Ollama(
    model="qwen2.5:7b-instruct",
    temperature=0.2
)

#print(Answer_LLM("Explain transformers in simple terms"))

# model_id = "mistralai/Mistral-7B-Instruct-v0.2"
# model_id = "microsoft/Phi-3-mini-4k-instruct"

# Answer_LLM = HuggingFacePipeline.from_model_id(
#     model_id=model_id,
#     task="text-generation",
#     model_kwargs={ "temperature": 0.0,
#                    "do_sample": True },
# )

# tokenizer = AutoTokenizer.from_pretrained(model_id)

# model = AutoModelForCausalLM.from_pretrained(model_id)

# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     max_new_tokens=256,
#     temperature=0.0,
# )

# Answer_LLM = HuggingFacePipeline(pipeline=pipe)

In [None]:
template = """
You are a task is to generate multiple search queries related to:
{question} \n
Output (3 queries):
"""

prompt_decomposition = ChatPromptTemplate.from_template(template)

generate_queries_decomposition = (
    prompt_decomposition
    | Query_LLM
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"

questions = generate_queries_decomposition.invoke( { "question": question } )
questions

In [None]:
from operator import itemgetter


template = """
Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

def format_qa_pair(question, answer):
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.split()

q_a_pairs = ""
answer = ""

for q in questions:
    rag_chain = (
        {
            "context": itemgetter("question") | retriever,
            "question": itemgetter("question"),
            "q_a_pairs": itemgetter("q_a_pairs")
        }
        | decomposition_prompt
        | Answer_LLM
        | StrOutputParser()
    )

    answer = rag_chain.invoke(
        {
            "question": q, "q_a_pairs": q_a_pairs
        }
    )

    
    q_a_pair = format_qa_pair(q, answer)

    print(type(q_a_pair))

    q_a_pairs = str(q_a_pair) + "\n---\n" + str(q_a_pair)


answer

In [None]:
from langchain_core.runnables import RunnableLambda

prompt_rag = hub.pull("rlm/rag-prompt")

def retrieve_and_rag(question, prompt_rag, sub_question_generator_chain):
    sub_questions = sub_question_generator_chain.invoke( { "question": question } )
    rag_results = []

    for sub_question in sub_questions:
        retrieved_docs = retriever.get_relevant_documents(sub_question)
        answer = (prompt_rag | Answer_LLM | StrOutputParser() ).invoke( { "context": retrieved_docs,
                                                                         "question": sub_question } )
        rag_results.append(answer)
    
    return rag_results, sub_questions

answers, questions = retrieve_and_rag( question, prompt_rag, generate_queries_decomposition )

In [None]:
def format_qa_pairs( questions, answers ):
    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start = 1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    
    return format_qa_pairs(questions, answers)

context = format_qa_pairs(questions, answers)

template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | Answer_LLM
    | StrOutputParser()
)

final_rag_chain.invoke( { "context": context, "question": question } )