In [11]:
import os
import bs4
from dotenv import load_dotenv
import logging
from bs4 import BeautifulSoup, SoupStrainer
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import PromptTemplate


# Enables tracing for LangSmith or LangChain's internal operations, which could log detailed traces for debugging purposes.
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Enables tracing for LangSmith or LangChain's internal operations, which could log detailed traces for debugging purposes.
logging.basicConfig(level=logging.INFO)
log = logging.getLogger(__name__)

# Load environment variables from .env file.
load_dotenv()

# Load API key from environment variable.
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    log.error("OPENAI_API_KEY not found in environment variables.")
    raise ValueError("OPENAI_API_KEY not set in the environment.")
os.environ["OPENAI_API_KEY"] = api_key

llm = ChatOpenAI(model="gpt-4o-mini")


In [12]:
prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

print(example_messages[0].content)



You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


In [13]:
def get_user_agent() -> str:
    """Get user agent from environment variable."""
    env_user_agent = os.environ.get("USER_AGENT")
    if not env_user_agent:
        log.warning(
            "USER_AGENT environment variable not set, "
            "consider setting it to identify your requests."
        )
        return "DefaultLangchainUserAgent"
    return env_user_agent

In [14]:
# Define a strainer to filter the HTML content.
# The `bs4.SoupStrainer` is configured to retain only the elements with the classes "post-title", "post-header", and "post-content".
# This focused extraction ensures that only the necessary information for LangChain processing is captured, eliminating extraneous data.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))

# Initialize the WebBaseLoader.
# This object is responsible for fetching the content of the specified webpage and applying the filtering defined by the `bs4_strainer`.
# It allows us to retrieve only the desired sections of the webpage while setting a custom User-Agent header to mimic a standard web browser request.
loader = WebBaseLoader( # Document loader class
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), # Source 
    bs_kwargs={"parse_only": bs4_strainer}, # Converts to standadize format for processing by LangChain
    requests_kwargs={"headers": {"User-Agent": get_user_agent()}}
)

# Load the Document from the WebBaseLoader.
# The `loader.load()` method retrieves the webpage content and applies the previously defined filtering.
# The resulting documents are stored in the `docs` variable, which contains the extracted data ready for processing with LangChain.
docs = loader.load()

# Print information about the extracted content.
# This snippet outputs the length of the extracted content (in characters) and displays the first 500 characters.
# This verification step helps ensure that the extraction process was successful and that the expected data is captured.
# print(len(docs[0].page_content))
# print(docs[0].page_content[:500])


In [15]:
# Initialize RecursiveCharacterTextSplitter to break down documents into manageable chunks.
# We set chunk_size to 1000 characters, with an overlap of 200 characters to maintain context between chunks.
# This ensures that each chunk is small enough to fit into the context window for processing (e.g., by a model like GPT).
# The add_start_index parameter is set to True, so we can track where each chunk starts in the original document.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)

# Split the input documents into smaller chunks for processing using the text splitter.
all_splits = text_splitter.split_documents(docs)

# Store the embedded representations of the document chunks in a Chroma vectorstore.
# This allows for efficient similarity searches or embedded queries in downstream tasks.
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

# Check the length of the content in the first chunk to ensure it's within the expected chunk size.
len(all_splits)

# Get the number of chunks created from the document splits.
len(all_splits[0].page_content)

# Retrieve metadata (such as source or chunk index) from the 11th chunk to check for useful information.
all_splits[10].metadata

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/',
 'start_index': 7056}

In [16]:
# Created a retriever to perform a similarity search over the vector database.
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# Chain together relevant documents and the question to construct a prompt, pass it to the model, and parse the output.
retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")

# Get the length of the retrieved documents.
len(retrieved_docs)

# Print the content of the first retrieved document.
print(retrieved_docs[0].page_content)

# Used LangChain Express Language (LCEL) Runnable to combine components and functions efficiently.
# Automatically traced the chain in LangChain to monitor execution.
# Enabled streaming, async, and batched function calls.
def format_docs(doc):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Stream the response for the question "What is Task Decomposition?"
for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be BFS (breadth-first search) or DFS (depth-first search) with each state evaluated by a classifier (via a prompt) or majority vote.
Task decomposition can be done (1) by LLM with simple prompting like "Steps for XYZ.\n1.", "What are the subgoals for achieving XYZ?", (2) by using task-specific instructions; e.g. "Write a story outline." for writing a novel, or (3) with human inputs.


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Task decomposition is the process by which an agent breaks down large, complex tasks into smaller, manageable subgoals. This approach enables more efficient handling of tasks by allowing the agent to tackle simpler steps sequentially. Techniques like Chain of Thought (CoT) and Tree of Thoughts facilitate this decomposition by guiding the agent to think through each step methodically.

In [17]:
# Built-in chains
# create_stuff_documents_chain pecifies how retrieved context is fed into a prompt and LLM.
# Stuff the contents into the prompt e.g. we will include all retrieved context without any summarization or other processing.
# It implements the rag_chain, with input keys context and input --it generates an answer using retrieved context and query.
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

# create_retrieval_chain adds the retrieval step and propagates the retrieved context through the chain,
# providing it alongside the final answer.
# It has input key input, and includes input, context, and answer in its output.
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "What is Task Decomposition?"})
print(response["answer"])

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Task decomposition is the process of breaking down a complicated task into smaller, more manageable steps. Techniques such as Chain of Thought (CoT) and Tree of Thoughts are used to facilitate this by encouraging the model to think step by step, allowing for clearer reasoning and organization of tasks. This approach enhances performance on complex tasks by simplifying them into subgoals or individual actions.


In [18]:
# Customized version of RAG prompt.
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


"Task decomposition is the process by which an agent breaks down large tasks into smaller, manageable subgoals to facilitate efficient handling of complex tasks. This can be achieved through prompting techniques like Chain of Thought (CoT) and Tree of Thoughts, which enhance the model's performance by allowing it to think step by step. By creating manageable tasks, the agent can systematically address each component of the larger task more effectively. Thanks for asking!"

In [19]:
# Shows resources that was used to generate the answer.
# LangChain's built-in create_retrieval_chain will propagate retrieved source documents through to the output in the "context" key:
for document in response["context"]:
    print(document)
    print()

page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.' metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 1585}

page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard