#### Environment


In [None]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"

###### API Keys


In [2]:
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

#### Overview


In [45]:
import tiktoken
from langchain_core.documents import Document


def count_tokens(text: str, encoding_name: str = "cl100k_base") -> str:
    enc = tiktoken.get_encoding(encoding_name=encoding_name)
    return len(enc.encode(text=text))


def format_documents(docs: list[Document]) -> str:
    combined = "\n\n".join(doc.page_content for doc in docs)
    return combined

In [47]:
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# INDEXING

# Load documents
docs = PyPDFLoader("../documents/NIPS-2017-attention-is-all-you-need-Paper.pdf").load()

# Split
split_docs = RecursiveCharacterTextSplitter(
    chunk_size=1500, chunk_overlap=200
).split_documents(documents=docs)

combined = format_documents(docs=split_docs)
tokens = count_tokens(text=combined)

print(f"Total Tokens: {tokens}")


# Embed
vector_store = Chroma.from_documents(documents=split_docs, embedding=OpenAIEmbeddings())
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

# prompt
prompt = hub.pull("rlm/rag-prompt")

# LLM
llm = ChatOpenAI(model="gpt-5", temperature=0)

chain = (
    {"context": retriever | format_documents, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

response = chain.invoke("Summarize this document")

print(response)

Total Tokens: 8952
The article surveys how to build LLM-centered autonomous agents, positioning the LLM as the “brain” augmented by planning (task decomposition and reflection), memory (short-term context plus long-term vector-store retrieval/MIPS), and tool use (API calling and external models). It reviews techniques such as Chain/Tree of Thought, LLM+P, ReAct, Reflexion, Chain of Hindsight, Algorithm Distillation; retrieval methods (LSH, HNSW, FAISS, ScaNN); and tool frameworks/benchmarks like MRKL, Toolformer, HuggingGPT, and API-Bank, with case studies including ChemCrow, scientific discovery agents, and Generative Agents. The piece highlights PoCs (AutoGPT, GPT-Engineer) and key challenges: limited context windows, hard long-horizon planning/adaptation, and unreliable natural-language interfaces.
