In [17]:
import getpass
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI

os.environ["LANGCHAIN_TRACING_V2"] = "true"

load_dotenv()
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")
if "LANGCHAIN_API_KEY" not in os.environ:
    os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("Enter your LangChain API key: ")

llm = ChatOpenAI(model="gpt-4o-mini")

In [18]:
# 1: Load: load data with document loaders (using WebBaseLoader which uses BeutifulSoup)
import bs4
from langchain_community.document_loaders import WebBaseLoader

# only get relevant HTML elements
bs4_strainer = bs4.SoupStrainer(class_=("post-title", "post-header", "post-content"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={"parse_only": bs4_strainer}, # only load specific HTML elements
)

# load the data
docs = loader.load()

print("loaded", len(docs), "document with length", len(docs[0].page_content))  # check the number of documents and number of characters in the first document
print(docs[0].page_content[:500])


loaded 1 document with length 43047


      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng


Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.
Agent System Overview#
In


In [19]:
# 2: Split: split documents into smaller chunks 
#           smaller chunks can fit into context window and easier to search over and pass to LLM
#            also for embedding and vector store

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,        # size of each chunk
    chunk_overlap=200,      # overlap between chunks (to preserve context in case some information is split across chunks)
    add_start_index=True,   # store start index to each chunk
)
all_splits = text_splitter.split_documents(docs)
print("split into", len(all_splits), "chunks with length", len(all_splits[0].page_content))  # check the number of chunks and number of characters in the first chunk
print(all_splits[50].metadata)  

split into 63 chunks with length 969
{'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'start_index': 34990}


In [26]:
# 3: Embed: embed our chunks and store them in a vector store

from langchain_chroma import Chroma             # vector store
from langchain_openai import OpenAIEmbeddings   # embedding model

vector_store = Chroma.from_documents(documents=all_splits, embedding = OpenAIEmbeddings())

In [None]:
# 4: Retrieve: 

# turn vector store into a retriever
# use similarity search to find the most relevant chunks
# return the top 5 most relevant chunks
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 5}) 

# retrieved_docs = retriever.invoke("What are the approaches to Task Decomposition?")

# print("retrieved", len(retrieved_docs), "documents")
# for i, doc in enumerate(retrieved_docs):
#     print(f"Document {i+1}:")
#     print(doc.page_content[:500])  # print first 500 characters of each document
#     print("Metadata:", doc.metadata)
#     print()

retrieved 5 documents
Document 1:
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time computation to decompose hard tasks into smaller and simpler steps. CoT transforms big tasks into multiple manageable tasks and shed lights into an 
Metadata: {'start_index': 1638, 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}

Document 2:
Component One: Planning#
A complicated task usually involves many steps. An agent needs to know what they are and plan ahead.
Task Decomposition#
Chain of thought (CoT; Wei et al. 2022) has become a standard prompting technique for enhancing model performance on complex tasks. The model is instructed to “think step by step” to utilize more test-time co

In [None]:
# 5: Generate: generate a response using the LLM and the retrieved documents
from langchain import hub

prompt = hub.pull("rlm/rag-prompt") # load prompt from LangChain Hub

# example_query = prompt.invoke(
#     {"context": "filler context",
#      "question": "filler question"}
# ).to_messages()

# print("Example query:", example_query[0].content)  # print the example query

# chain that takes a question, retrieve documents, construct prompt, passes to LLM, and returns response

from langchain_core.prompts import PromptTemplate
template = """
    You are a helpful assistant for question-answering tasks.
    Use the following pieces of context to answer the question at the end.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Always validate it's a good question at the beginning of your answer. 
    Always say thank you for the question at the end of your answer.

    {context}
    Question: {question}
    Helpful Answer:
"""
custom_prompt = PromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser   # output parser to convert LLM response to string
from langchain_core.runnables import RunnablePassthrough    # passthrough to pass the question through\

def formated_docs(docs):                    #take all the retrieved documents and format them into a string
    return "\n\n".join(doc.page_content for doc in docs)

#chaining
rag_chain = ( #feed direction: L->R
    {"context" : retriever | formated_docs, # take retrieved documents and format them into string that can feed into LLM
     "question" : RunnablePassthrough(),    # forwards the query to the LLM
    }
    | custom_prompt                         # content for {context} and {question} are injected into the prompt 
    | llm                                   # the prompt is passed to the LLM      
    | StrOutputParser()                     # parse the LLM output to string
)

    
rag_chain.invoke("What is the name of my son")

"That's a good question! However, I don't know the answer to that. Thank you for the question!"