In [1]:
import os
import bs4
import getpass
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough

os.environ["OPENAI_API_KEY"] = getpass.getpass()
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

loader = WebBaseLoader(
    web_paths=(urls),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings(model="text-embedding-3-small")
)

retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


In [3]:
query = "agent memory"

retrieved_docs = retriever.invoke(query)
context_text = format_docs(retrieved_docs)

evaluation_template = (
    "retrieval 퀄리티를 LLM이 스스로 평가하도록 하세요. 문맥이 사용자 쿼리와 관련이 있으면 yes, 관련이 없으면 no로 출력합니다. relevance : yes, no 로 출력해주세요.\n"
    "{format_instructions}\n"
    "Context: {context}\n"
    "Question: {question}\n"
)
evaluation_prompt = PromptTemplate(
    template=evaluation_template,
    input_variables=["context", "question"],
    partial_variables={"format_instructions": JsonOutputParser().get_format_instructions()},
)

evaluation_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | evaluation_prompt
        | llm
        | JsonOutputParser()
)

evaluation_result = evaluation_chain.invoke(query)
print("[STEP1] relevance check result:", evaluation_result)

answer = ""
regenerated = False
if evaluation_result.get("relevance") == "yes":
    for _ in range(2):
        answer_template = (
            "다음 문맥을 기반으로 사용자 질문에 답변해 주세요.\n"
            "Context: {context}\n"
            "Question: {question}\n"
            "Answer:"
        )
        answer_prompt = PromptTemplate(
            template=answer_template,
            input_variables=["context", "question"]
        )

        rag_chain = (
                {"context": retriever | format_docs, "question": RunnablePassthrough()}
                | answer_prompt
                | llm
                | StrOutputParser()
        )

        for chunk in rag_chain.stream(query):
            answer += chunk

        hallucination_template = (
            "생성된 답안에 Hallucination(잘못된 정보나 근거 없는 내용)이 포함되어 있는지 평가하세요. 반드시 유효한 JSON 형식으로만 답변하세요. 만약 답안에 사실과 다르거나 허위 내용이 있다면 yes, 허위 내용이 없다면 no로 출력합니다. hallucination : yes, no 로 출력해주세요.\n"
            "{format_instructions}\n"
            "Context: {context}\n"
            "Answer: {answer}\n"
        )
        hallucination_prompt = PromptTemplate(
            template=hallucination_template,
            input_variables=["context", "answer"],
            partial_variables={"format_instructions": JsonOutputParser().get_format_instructions()},
        )
        hallucination_chain = hallucination_prompt | llm | JsonOutputParser()
        hallucination_result = hallucination_chain.invoke({"context": context_text, "answer": answer})

        #hallucination_result['hallucination'] = "yes" (할루시네이션 테스트)

        if hallucination_result.get("hallucination") == "no":
            break
        elif hallucination_result.get("hallucination") == "yes":
            if regenerated:
                print("\n최대 재생성 횟수(1회)를 초과\n")
                answer = ""
                break
            else:
                print("\nhallucination 감지됨: 답변을 다시 생성합니다.\n")
                regenerated = True
else:
    print("\nRetrieved chunks are not relevant to the query.")

####### RESULT ######
if answer != "":
    print("\n\n[STEP2] 최종 답변:")
    print(answer)
    print("\n\n[STEP3] 출처:")
    for doc in retrieved_docs:
        source = doc.metadata.get("source", "출처 정보 없음") if hasattr(doc, "metadata") else "출처 정보 없음"
        print("-", source)


[STEP1] relevance check result: {'relevance': 'yes'}


[STEP2] 최종 답변:
Agent memory consists of both short-term and long-term components, allowing the agent to retain and recall information over varying time frames:

1. **Short-term Memory**: This involves in-context learning, where the agent utilizes the immediate information available within a specific interaction to make decisions. It allows the agent to react to situations based on recent inputs and context but does not retain this information for long periods.

2. **Long-term Memory**: This module enables the agent to retain and recall an infinite amount of information over extended periods. It relies on external databases or vector stores to organize and retrieve memories efficiently. Long-term memory captures a comprehensive list of experiences and interactions, providing a foundation for the agent's behavior informed by past events.

Together, these memory systems help agents synthesize past experiences into higher-level summari