In [3]:
import os
from dotenv import load_dotenv

load_dotenv()

LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [31]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# Only keep post title, headers, and content from the full HTML.
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer},
)
docs = loader.load()

len(docs[0].page_content)

43131

In [32]:
print(docs[0].page_content[:500])



      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [85]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

# bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content", "o-topper__content", "n-content-body js-article__content-body", "article-info__time-byline",))
bs4_strainer = bs4.SoupStrainer(class_=("o-topper__headline o-topper__headline--large", "n-content-body js-article__content-body"))
# bs4_strainer = bs4.SoupStrainer(class_=("ch bg eu ev ew ex"))
loader = WebBaseLoader(
    # web_paths=("https://www.ft.com/content/b8716f43-0f64-4c26-8155-6a39691c647b",),
    web_paths=("https://www.ft.com/content/557d71f5-c3c1-4e9f-b4e5-c5d4e36d73e0",),
    # web_paths=("https://medium.com/@avinashmachinelearninginfo/rag-implementation-using-custom-dataset-openai-llm-0ead82106736",),
    bs_kwargs={"parse_only": bs4_strainer},
    # bs_kwargs={},
)
docs2 = loader.load()

len(docs2[0].page_content)

10866

In [86]:
print(docs2[0].page_content[:5000])

The club of City executives plotting a revival for the UK’s capital marketsFor the 10 top executives who meet each month at the London Stock Exchange’s Square Mile headquarters, fear is a powerful motivator.The three-century-old bourse is confronting a prolonged drought in listings, a domestic pension industry shunning UK equities and a morale-sapping anxiety that any homegrown company aiming for global success will choose to float in New York.The monthly gatherings were born of a decision in the summer of 2022 by Dame Julia Hoggett, chief executive of the exchange, to take matters into her own hands.She convened a group of City grandees, advisers and investors to drive a radical overhaul of the UK’s capital markets, win political backing for the changes and try to counter what the group saw as a corrosive negativity engulfing the London market.Two years on, the efforts of the group, which calls itself the Capital Markets Industry Taskforce, and the fortunes of the market, are at a cri

In [67]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs2)

len(all_splits)

14

In [68]:
len(all_splits[0].page_content)

996

In [69]:
all_splits[10].metadata

{'source': 'https://www.ft.com/content/557d71f5-c3c1-4e9f-b4e5-c5d4e36d73e0',
 'start_index': 7998}

In [70]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [78]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

# retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")
retrieved_docs = retriever.invoke("When did Katharine Braddick join Barclays")

len(retrieved_docs)

6

In [72]:
print(retrieved_docs[0].page_content)

to push investors to back UK companies. Regardless of political affiliations, there is a consensus in the City that a period of political stability would help after the turmoil since Brexit. Part of CMIT’s sales pitch to politicians is that the benefits of a flourishing equity market — and a healthy pipeline of private companies that could list on it — extend far beyond the City’s lawyers, bankers and accountants.“We need the capital markets to work better because it makes us all better off,” said CMIT member Katharine Braddick, group head of strategic policy at Barclays and a former Treasury official.There is a “mutually reinforcing relationship” between the capital markets and the country’s prosperity, she added. “We’ve got to do a better job of explaining to people why this market exists. It’s about funding the UK economy, helping people save more and driving the transition to net zero.”British companies that list in New York or take money from a San Francisco venture capital fund


In [73]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-3.5-turbo-0125")

In [74]:
import getpass
import os

In [75]:
import langchainhub
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:")]

In [76]:
print(example_messages[0].content)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: filler question 
Context: filler context 
Answer:


In [79]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("When did Katharine Braddick join Barclays?"):
    print(chunk, end="", flush=True)

Katharine Braddick joined Barclays as the group head of strategic policy.

In [20]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)


question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

response = rag_chain.invoke({"input": "What is Task Decomposition?"})
print(response["answer"])

Task decomposition is the process of breaking down a complex task into smaller and simpler steps. This approach helps agents or models tackle difficult tasks by dividing them into more manageable subtasks. Techniques like Chain of Thought and Tree of Thoughts are used to decompose tasks into multiple steps for easier execution.


In [21]:
for document in response["context"]:
    print(document)
    print()

page_content='Fig. 1. Overview of a LLM-powered autonomous agent system.\nComponent One: Planning#\nA complicated task usually involves many steps. An agent needs to know what they are and plan ahead.\nTask Decomposition#\nChain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an interpretation of the model’s thinking process.' metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 1585}

page_content='Tree of Thoughts (Yao et al. 2023) extends CoT by exploring multiple reasoning possibilities at each step. It first decomposes the problem into multiple thought steps and generates multiple thoughts per step, creating a tree structure. The search process can be 

In [22]:
from langchain_core.prompts import PromptTemplate

template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

Question: {question}

Helpful Answer:"""
custom_rag_prompt = PromptTemplate.from_template(template)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")

'Task decomposition is the process of breaking down complex tasks into smaller and simpler steps to make them more manageable for an autonomous agent or AI assistant. This can be achieved through techniques like Chain of Thought and Tree of Thoughts, which guide the agent in thinking step by step or exploring multiple reasoning possibilities at each step. Thanks for asking!'