In [1]:
!pip install -r requirements.txt



In [2]:
import os

from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [5]:
OPENAI_API_KEY = ""

In [6]:
llama3_format_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

In [7]:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=50,
)
doc_splits = text_splitter.split_documents(docs_list)


embeddings_model = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embeddings_model,
    collection_name="sol_day2",
    persist_directory="./chroma_db",
)

query = "llm agent memory"

retriever = vectorstore.as_retriever()
docs = retriever.get_relevant_documents(query)

# Common setting for chains
local_llm = "llama3"
llm = ChatOllama(model=local_llm, format="json", temperature=0)
parser = JsonOutputParser()

# Relevance check
prompt = PromptTemplate(
    template=llama3_format_template.format(
        prompt="""Check the <query> is relevant to retrieved <context>. Answer with "yes" or "no" in json format(key: relevance).
        Here is the question: {query}
        Here is the context: {context}"""
    ),
    input_variables=["query", "context"],
)
relevant_chain = prompt | llm | parser
valid_doc_list = []
for doc in docs:
    doc_text = doc.page_content
    is_relevant = relevant_chain.invoke({"query": query, "context": doc_text})["relevance"]
    if is_relevant == "yes":
        valid_doc_list.append(doc)

# Drop duplicate in context
context_list = []
for doc in valid_doc_list:
    if doc.page_content not in [context.page_content for context in context_list]:
        context_list.append(doc)

# If context_list is not empty
if context_list:
    while 1:
        # Answer
        context = "\n".join([doc.page_content for doc in context_list])
        prompt = PromptTemplate(
            template=llama3_format_template.format(
                prompt="""Answer to the <query> with <context>. Answer in json format(key: answer).
                Here is the question: {query}
                Here is the context: {context}"""
            ),
            input_variables=["query", "context"],
        )
        question_chain = prompt | llm | parser
        answer = question_chain.invoke(
            {"query": "What is agent memory?", "context": context}
            )["answer"]
        
        # Halucination check
        hallucination_prompt = PromptTemplate(
            template=llama3_format_template.format(
                prompt="""Check the <answer> has halllucination or not. Answer with "yes" or "no" in json format(key: hallucination).
                Here is the answer: {answer}"""
            ),
            input_variables=["answer"],
        )
        hallucination_chain = hallucination_prompt | llm | parser
        hallucination = hallucination_chain.invoke({"answer": answer})["hallucination"]

        if hallucination == "no":
            break    

    print("="*50)
    print(f"Answer: {answer}")
    for idx, context in enumerate(context_list):
        print("="*50)
        print(f"Context {idx+1}")
        print(context)
        print(f"Content: {context.page_content}")
        print(f"Source: {context.metadata['source']}")
    print("="*50)

else:
    print("No")

  warn_deprecated(


Answer: Agent memory refers to the component of an agent system that enables it to behave conditioned on past experience, allowing it to learn from its interactions and make decisions based on its history.
Context 1
page_content='The design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.' metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.