In [1]:
OPENAI_API_KEY = ""

In [5]:
!pip install -q --upgrade langchain_community bs4

# 3개의 블로그 포스팅 본문을 Load

In [6]:
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader([
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ])
docs = loader.load()

# 불러온 본문을 Split (Chunking) : recursive text splitter 활용

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

splits = []
for doc in docs:
  splitted_docs = text_splitter.split_documents([doc])
  splits.extend(splitted_docs)

# Chunks 를 임베딩하여 Vector store 저장: openai 사용

In [8]:
!pip install langchain-openai

Collecting langchain-openai
  Downloading langchain_openai-0.1.8-py3-none-any.whl.metadata (2.5 kB)
Downloading langchain_openai-0.1.8-py3-none-any.whl (38 kB)
Installing collected packages: langchain-openai
Successfully installed langchain-openai-0.1.8


In [9]:
import os
from langchain_openai import OpenAIEmbeddings

# Set the API key
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
embeddings_model = OpenAIEmbeddings()

In [10]:
!pip install langchain-chroma sentence_transformers

Collecting langchain-chroma
  Downloading langchain_chroma-0.1.1-py3-none-any.whl.metadata (1.3 kB)
Collecting sentence_transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Collecting chromadb<0.6.0,>=0.4.0 (from langchain-chroma)
  Downloading chromadb-0.5.3-py3-none-any.whl.metadata (6.8 kB)
Collecting fastapi<1,>=0.95.2 (from langchain-chroma)
  Downloading fastapi-0.111.0-py3-none-any.whl.metadata (25 kB)
Collecting scikit-learn (from sentence_transformers)
  Downloading scikit_learn-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl.metadata (11 kB)
Collecting scipy (from sentence_transformers)
  Downloading scipy-1.13.1-cp310-cp310-macosx_10_9_x86_64.whl.metadata (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting Pillow (from sentence_transformers)
  Downloading pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl.metadata (9.2 kB)
Collecting build>=1.0.3 (from chromadb<

In [11]:
# import
from langchain_chroma import Chroma
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

# create the open-source embedding function
# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()

# User query = ‘agent memory’ 를 받아 관련된 chunks를 retrieve

In [12]:
query =  "agent momory"
searched_docs = vectorstore.similarity_search(query)

# print results
print(searched_docs)

[Document(page_content='Memory', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}), Document(page_content='memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g.', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and Ba

In [13]:
searched_docs[0]

Document(page_content='Memory', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"})

In [14]:
# save DB
db = Chroma.from_documents(docs, embeddings_model, persist_directory="./chroma_db")

# load DB
db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)


# User query와 retrieved chunk 에 대해 relevance 가 있는지를 평가하는 시스템 프롬프트 작성
- retrieval 퀄리티를 LLM 이 스스로 평가하도록 하고, 관련이 있으면 {‘relevance’: ‘yes’} 관련이 없으면 {‘relevance’: ‘no’} 라고 출력하도록 함. (JsonOutputParser() 를 활용 ) - llama3 prompt format 준수

In [15]:
!pip install langchainhub

Collecting langchainhub
  Downloading langchainhub-0.1.20-py3-none-any.whl.metadata (659 bytes)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.32.0.20240602-py3-none-any.whl.metadata (1.8 kB)
Downloading langchainhub-0.1.20-py3-none-any.whl (5.0 kB)
Downloading types_requests-2.32.0.20240602-py3-none-any.whl (15 kB)
Installing collected packages: types-requests, langchainhub
Successfully installed langchainhub-0.1.20 types-requests-2.32.0.20240602


In [16]:
query =  "agent momory"

searched_docs = vectorstore.similarity_search(query)
contexts = [doc.page_content for doc in searched_docs]
print(contexts)

['Memory', 'memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g.', 'Memory can be defined as the processes used to acquire, store, retain, and later retrieve', 'Long-term memory: This provides the agent with the capability to retain and recall (infinite)']


In [17]:
from langchain.prompts import PromptTemplate

llama3_format_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

prompt_relevance = """
Check the <query> is relevant to retrieved <context>. Answer with "yes" or "no" in json format(key: relevance).

<query>
{query}
</query>

<context>
{context}
</context>
"""
prompt_relevance_llama3 = llama3_format_template.format(prompt=prompt_relevance)

In [18]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

query = "Memory"

model = ChatOpenAI(model="gpt-3.5-turbo-0125")

vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_template(prompt_relevance_llama3)
model = ChatOpenAI(temperature=0)
output_parser = JsonOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "query": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser
answer = chain.invoke(query)
print(answer)

{'relevance': 'yes'}


# 5 에서 모든 docs에 대해 ‘no’ 라면 디버깅
(Splitter, Chunk size, overlap, embedding model, vector store, retrieval 평가 시스템 프롬프트 등)

# 5에서 ‘yes’ 라면 질문과 명확히 관련 없는 docs 나 질문
(예: ‘I like an apple’에 대해서는 ‘no’ 라고 나오는지 테스트 프롬프트 및 평가 코드 작성. 이 때는 관련 없다는 답변 작성)

In [19]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

query = "agent memory"

vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_template(prompt_relevance_llama3)
model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
output_parser = JsonOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "query": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser
answer = chain.invoke(query)
print(answer)

{'relevance': 'yes'}


# ‘yes’ 이고 7의 평가에서도 문제가 없다면, 4의 retrieved chunk 를 가지고 답변 작성

- 막힌 부분: 랭체인이 relevance 여부와 retrieved chunk를 함께 내뱉게 만드는 방법.

# 생성된 답안에 Hallucination 이 있는지 평가하는 시스템 프롬프트 작성.
LLM이 스스로 평가하도록 하고, hallucination 이 있으면 {‘hallucination’: ‘yes’} 없으면 {‘hallucination’: ‘no’} 라고 출력하도록 함

# 9 에서 ‘yes’ 면 8 로 돌아가서 다시 생성,
‘no’ 면 답변 생성하고 유저에게 답변 생성에 사용된 출처와 함께 출력


# 최종

In [7]:
from langchain.prompts import PromptTemplate

llama3_format_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

prompt_relevance = """
Check the <query> is relevant to retrieved <context>. Answer with "yes" or "no" in json format(key: relevance).

<query>
{query}
</query>

<context>
{context}
</context>
"""
prompt_relevance_llama3 = llama3_format_template.format(prompt=prompt_relevance)

In [38]:
import os

from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=50,
)
doc_splits = text_splitter.split_documents(docs_list)


embeddings_model = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(
    documents=doc_splits,
    embedding=embeddings_model,
    collection_name="sol_day2",
    persist_directory="./chroma_db",
)

query = "llm agent memory"

retriever = vectorstore.as_retriever()
docs = retriever.get_relevant_documents(query)

# Common setting for chains
local_llm = "llama3"
llm = ChatOllama(model=local_llm, format="json", temperature=0)
parser = JsonOutputParser()

# Relevance check
prompt = PromptTemplate(
    template=llama3_format_template.format(
        prompt="""Check the <query> is relevant to retrieved <context>. Answer with "yes" or "no" in json format(key: relevance).
        Here is the question: {query}
        Here is the context: {context}"""
    ),
    input_variables=["query", "context"],
)
relevant_chain = prompt | llm | parser
valid_doc_list = []
for doc in docs:
    doc_text = doc.page_content
    is_relevant = relevant_chain.invoke({"query": query, "context": doc_text})["relevance"]
    if is_relevant == "yes":
        valid_doc_list.append(doc)

# Drop duplicate in context
context_list = []
for doc in valid_doc_list:
    if doc.page_content not in [context.page_content for context in context_list]:
        context_list.append(doc)

# If context_list is not empty
if context_list:
    while 1:
        # Answer
        context = "\n".join([doc.page_content for doc in context_list])
        prompt = PromptTemplate(
            template=llama3_format_template.format(
                prompt="""Answer to the <query> with <context>. Answer in json format(key: answer).
                Here is the question: {query}
                Here is the context: {context}"""
            ),
            input_variables=["query", "context"],
        )
        question_chain = prompt | llm | parser
        answer = question_chain.invoke(
            {"query": "What is agent memory?", "context": context}
            )["answer"]
        
        # Halucination check
        hallucination_prompt = PromptTemplate(
            template=llama3_format_template.format(
                prompt="""Check the <answer> has halllucination or not. Answer with "yes" or "no" in json format(key: hallucination).
                Here is the answer: {answer}"""
            ),
            input_variables=["answer"],
        )
        hallucination_chain = hallucination_prompt | llm | parser
        hallucination = hallucination_chain.invoke({"answer": answer})["hallucination"]

        if hallucination == "no":
            break    

    print("="*50)
    print(f"Answer: {answer}")
    for idx, context in enumerate(context_list):
        print("="*50)
        print(f"Context {idx+1}")
        print(context)
        print(f"Content: {context.page_content}")
        print(f"Source: {context.metadata['source']}")
    print("="*50)

else:
    print("No")

Answer: Agent memory refers to the ability of an artificial intelligence (AI) or a software agent to store, retrieve, and utilize information from its previous interactions, experiences, and observations to inform its future decisions and behaviors.
Context 1
page_content='The design of generative agents combines LLM with memory, planning and reflection mechanisms to enable agents to behave conditioned on past experience, as well as to interact with other agents.' metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language':