# Step 1: Load


In [1]:
import bs4
from langchain.document_loaders import WebBaseLoader

loader = WebBaseLoader(
    web_paths = ("https://lilianweng.github.io/posts/2023-06-23-agent/", ),
    bs_kwargs = {
        "parse_only": bs4.SoupStrainer(
            class_ = ("post-content", "post-title", "post-header")
        )
    }
)

docs = loader.load()

In [2]:
len(docs[0].page_content)

42824

# Step 2: Split


In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

textsplitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000, chunk_overlap = 200, add_start_index = True
)
all_splits = textsplitter.split_documents(docs)

In [4]:
len(all_splits[0].page_content)

969

In [5]:
all_splits[1].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 971}

In [6]:
all_splits[2].metadata

{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 1585}

# Step 3: Store


In [7]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import dotenv

dotenv.load_dotenv()
vectorstore = Chroma.from_documents(
    documents = all_splits, 
    embedding = OpenAIEmbeddings(), 
    persist_directory = './chroma_db'
)

# Step 4: Retrieve


In [8]:
retriever = vectorstore.as_retriever(search_type = "similarity", search_kwargs = {"k": 6})

retrieved_docs = retriever.get_relevant_documents(
    "What are the approaches to Task Decomposition?"
)

In [9]:
len(retrieved_docs)

6

In [10]:
print(retrieved_docs[1].page_content)

Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.


# Step 5: Generate


In [16]:
from langchain.chat_models import ChatOpenAI

llm = ChatOpenAI(model_name = "gpt-3.5-turbo", temperature = 0)


In [17]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [18]:
print(
    prompt.invoke(
        {"context": "filler context", "question": "filler question"}
    ).to_string()
)

Human: You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


In [20]:
from langchain.schema import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()} 
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end = "", flush = True)

Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, allowing for easier interpretation and execution by autonomous agents or models. Task decomposition can be done through various methods, such as using prompting techniques, task-specific instructions, or human inputs.

## Go Deeper


In [21]:
from langchain.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
rag_prompt_custom = PromptTemplate.from_template(template)

rag_chain  = (
    {'context': retriever | format_docs, 'question': RunnablePassthrough()}
    | rag_prompt_custom 
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end = "", flush = True)

Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, allowing for a more systematic and organized approach to problem-solving. Thanks for asking!

## Adding Sources


In [24]:
from operator import itemgetter

from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    {
        "context": lambda input: format_docs(input["documents"]),
        "question": itemgetter("question"),
    }
    | rag_prompt_custom 
    | llm 
    | StrOutputParser()
)

rag_chain_with_sources = RunnableParallel(
    {"documents": retriever, "question": RunnablePassthrough()}
) |  {
    "documents": lambda input: [doc.metadata for doc in input["documents"]],
    "answer": rag_chain_from_docs
    
}

rag_chain_with_sources.invoke("What is Task Decomposition")



{'documents': [{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
   'start_index': 1585},
  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
   'start_index': 2192},
  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
   'start_index': 17804},
  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
   'start_index': 17414},
  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
   'start_index': 29630},
  {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
   'start_index': 19373}],
 'answer': 'Task decomposition is a technique used to break down complex tasks into smaller and simpler steps. It involves transforming big tasks into multiple manageable tasks, allowing for a more systematic and organized approach to problem-solving. Thanks for asking!'}