In [1]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

In [3]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
docs = loader.load()

loader = WebBaseLoader("https://lilianweng.github.io/posts/2024-02-05-human-data-quality/")
docs.extend(loader.load())

In [4]:
import uuid

from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

In [5]:
llm =  ChatGoogleGenerativeAI(model="models/gemini-2.5-flash-preview-05-20", temperature=0)

chain = (
    {"doc": lambda x: x.page_content}
    | ChatPromptTemplate.from_template("Summarize the following document: \n\n {doc}")
    | llm 
    | StrOutputParser()
)

summaries = chain.batch(docs, {"max_concurrency": 5})

In [9]:
from langchain.storage import InMemoryByteStore
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.retrievers.multi_vector import MultiVectorRetriever

In [13]:
vectorstore = Chroma(collection_name="summaries", 
                    embedding_function=GoogleGenerativeAIEmbeddings(model="models/text-embedding-004"))

# the storage layer for the parent documents
store = InMemoryByteStore()
id_key = "doc_id"

retriever = MultiVectorRetriever(
    vectorstore=vectorstore,
    byte_store=store,
    id_key=id_key
)
doc_ids = [str(uuid.uuid4()) for _ in docs]

# Docs linked to summaries
summary_docs = [
    Document(page_content=s, metadata={id_key: doc_ids[i]})
    for i, s in enumerate(summaries)
]

# Add 
retriever.vectorstore.add_documents(summary_docs)
retriever.docstore.mset(list(zip(doc_ids, docs)))

In [15]:
query = "Memory in agents"
sub_docs = vectorstore.similarity_search(query, k=1)
sub_docs[0].page_content

'This document by Lilian Weng explores the concept of LLM-powered autonomous agents, positioning Large Language Models (LLMs) as their core "brain." These agents are enhanced by three key components:\n\n1.  **Planning:** Agents break down complex tasks into manageable subgoals using techniques like Chain of Thought (CoT) and Tree of Thoughts (ToT). They also employ **self-reflection** to learn from past actions and refine future steps, utilizing frameworks such as ReAct (integrating reasoning and acting), Reflexion (dynamic memory and self-reflection to avoid inefficient trajectories), Chain of Hindsight (learning from feedback sequences), and Algorithm Distillation (applying self-improvement to reinforcement learning).\n\n2.  **Memory:** Analogous to human memory, agents use:\n    *   **Short-term memory:** In-context learning within the LLM\'s finite context window.\n    *   **Long-term memory:** An external vector store for retaining and recalling vast amounts of information, access

In [17]:
retrieved_docs = retriever.invoke(query,n_results=1)
retrieved_docs[0].page_content[0:500]

"\n\n\n\n\n\nLLM Powered Autonomous Agents | Lil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nLil'Log\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n|\n\n\n\n\n\n\nPosts\n\n\n\n\nArchive\n\n\n\n\nSearch\n\n\n\n\nTags\n\n\n\n\nFAQ\n\n\n\n\n\n\n\n\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\n \n\n\nTable of Contents\n\n\n\nAgent System Overview\n\nComponent One: Planning\n\nTask Decomposition\n\nSelf-Reflection\n\n\nComponent Two: Memory\n\nTypes of Memory\n\nMaximum Inner Product Search (MIPS)\n\n\nComponent Three:"