In [128]:
import bs4
from langchain_community.document_loaders import WebBaseLoader


In [129]:
from dotenv import load_dotenv

load_dotenv()

True

In [130]:
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from backend.utils import SplitterDocument
from langchain_ollama.llms import OllamaLLM

loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
blog_docs = loader.load()
embedder = OllamaEmbeddings(model="llama3.2:1b")
model = OllamaLLM(model="llama3.2:1b", temperature=0)

In [131]:
splitter = SplitterDocument(chunk_size=300, chunk_overlap=30)

In [132]:
splits = splitter.splits(blog_docs)

In [133]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embedder, collection_name="blog",
                                    persist_directory="./chroma_langchain_db")

In [134]:
retriever = vectorstore.as_retriever()

In [135]:

from typing import List
from backend.common.config import BaseObject

from backend.common.config import Config
from backend.prompt import MULTI_TURN_PROMPT, FINAL_RAG_PROMPT


In [136]:
def get_unique_union(documents: list[List]):
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]


def _init_general_prompt_template(prompt_template: str = None):
    prompt: ChatPromptTemplate = ChatPromptTemplate.from_template(prompt_template)
    return prompt

In [137]:
from langchain_core.tracers.langchain import wait_for_all_tracers


class TranslationManager(BaseObject):
    def __init__(
            self,
            config: Config = None,
            model=None,
            embedder=None,
            retriever=None,
            general_prompt_template: str = MULTI_TURN_PROMPT,
            final_rag_prompt_template: str = FINAL_RAG_PROMPT,
    ):
        super().__init__()
        self.config = config if config is not None else Config()
        self._base_model = model
        self._embedder = embedder
        self._prompt_perspectives = _init_general_prompt_template(prompt_template=general_prompt_template)
        self._prompt = _init_general_prompt_template(prompt_template=final_rag_prompt_template)
        self._retriever = retriever
        self._init_generate_queries()
        self._init_retrieval_chain()
        self._init_final_rag_chain()

    def _init_generate_queries(self):
        self._generate_queries = (
                self._prompt_perspectives
                | self._base_model
                | StrOutputParser()
                | (lambda x: x.split("\n"))  # Split by newlines
                | (lambda x: [q for q in x if q])
        ).with_config(run_name="TranslateResponse")

    def _init_retrieval_chain(self):
        self._retrieval_chain = (
                self._generate_queries
                | self._retriever.map()
                | get_unique_union
        ).with_config(run_name="RetrieveResponse")

    def _init_final_rag_chain(self):
        self._final_rag_chain = (
                {"context": self._retrieval_chain,
                 "question": itemgetter("question")}
                | self._prompt
                | self._base_model
                | StrOutputParser()
        ).with_config(run_name="FinalRagChain")

    def predict(self, question):
        try:
            output = self._final_rag_chain.invoke({"question": question})
            return output
        finally:
            wait_for_all_tracers()


In [146]:

translation_manager = TranslationManager(
    model=model,
    embedder=embedder,
    retriever=retriever,
    general_prompt_template=MULTI_TURN_PROMPT,
    final_rag_prompt_template=FINAL_RAG_PROMPT)

question = "What is the speed of light?"
print(translation_manager.predict(question))

The question "What is the speed of light?" does not require any specific information or context from the provided text. It appears to be a general knowledge question that can be answered based on basic physics principles.

The speed of light (approximately 299,792,458 meters per second) is a fundamental constant in physics and has been consistently measured and confirmed through various experiments over the years. There is no specific information or context in the provided text that would suggest otherwise.

If you're looking for an answer to this question based on general knowledge, I can provide one: The speed of light is approximately 299,792,458 meters per second.


In [139]:
from langchain.prompts import ChatPromptTemplate

# Multi Query: Different Perspectives
template = """You are an AI language model assistant. Your task is to generate five
different versions of the given user question to retrieve relevant documents from a vector
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search.
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser

generate_queries = (
        prompt_perspectives
        | model
        | StrOutputParser()
        | (lambda x: x.split("\n"))  # Split by newlines
        | (lambda x: [q for q in x if q])
)

In [140]:
qu = "What is the speed of light?"
generate_queries.invoke({"question": qu})

['I can generate five different versions of the original question to retrieve relevant documents from a vector database. Here are the alternative questions:',
 'What is the fundamental limit on the rate at which information can be transmitted through space?',
 "What is the maximum speed at which any object or information can travel in a vacuum, as described by Einstein's theory of special relativity?",
 'How fast does light travel when it passes through a vacuum, according to the laws of physics and our current understanding of the universe?',
 'What is the theoretical upper bound on the speed of electromagnetic radiation, including light, as predicted by quantum mechanics and general relativity?',
 "Is there any known limit to the speed at which information can be transmitted through space, or is it an inherent property of the universe that we don't yet fully understand?"]

In [141]:
from langchain.load import dumps, loads


def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]


In [142]:
# Retrieve
question = "What is task decomposition for LLM agents?"
retrieval_chain = generate_queries | retriever.map() | get_unique_union

In [143]:
docs = retrieval_chain.invoke({"question": question})

In [144]:
docs

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='Fig. 10. A picture of a sea otter using rock to crack open a seashell, while floating in the water. While some other animals can use tools, the complexity is not comparable with humans. (Image source: Animals using tools)\nMRKL (Karpas et al. 2022), short for “Modular Reasoning, Knowledge and Language”, is a neuro-symbolic architecture for autonomous agents. A MRKL system is proposed to contain a collection of “expert” modules and the general-purpose LLM works as a router to route inquiries to the best suitable expert module. These modules can be neural (e.g. deep learning models) or symbolic (e.g. math calculator, currency converter, weather API).\nThey did an experiment on fine-tuning LLM to call a calculator, using arithmetic as a test case. Their experiments showed that it was harder to solve verbal math problems than explicitly stated math problems because LLMs (7B Jurassic1-large 

In [145]:
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
        {"context": retrieval_chain,
         "question": itemgetter("question")}
        | prompt
        | model
        | StrOutputParser()
)

final_rag_chain.invoke({"question": question})

"Task decomposition for LLM (Large Language Model) agents refers to the process of breaking down complex tasks into smaller, more manageable sub-tasks that can be executed by individual components or modules within the agent. This allows the agent to focus on one sub-task at a time and improve its overall performance.\n\nIn the context of Large Language Models, task decomposition is often used in conjunction with other techniques such as fine-tuning, instruction-based learning, and reinforcement learning. The goal of task decomposition is to enable LLMs to:\n\n1. Break down complex tasks into smaller sub-tasks\n2. Focus on one sub-task at a time\n3. Improve performance by executing each sub-task efficiently\n\nTask decomposition can be achieved through various techniques, including:\n\n1. **Fine-tuning**: Refining the model's parameters to focus on specific sub-tasks.\n2. **Instruction-based learning**: Providing instructions or prompts that guide the agent to execute specific sub-task