In [1]:
OPENAI_API_KEY = ""

In [2]:
!pip install -q --upgrade langchain_community

In [3]:
from langchain_community.document_loaders import WebBaseLoader



# 3개의 블로그 포스팅 본문을 Load

In [4]:
loader = WebBaseLoader([
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ])
docs = loader.load()

# 불러온 본문을 Split (Chunking) : recursive text splitter 활용

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=100,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

splits = []
for doc in docs:
  splitted_docs = text_splitter.split_documents([doc])
  splits.extend(splitted_docs)

# Chunks 를 임베딩하여 Vector store 저장: openai 사용

In [6]:
!pip install langchain-openai



In [7]:
import os
from langchain_openai import OpenAIEmbeddings

# Set the API key
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
embeddings_model = OpenAIEmbeddings()

In [8]:
!pip install langchain-chroma sentence_transformers



In [9]:
# import
from langchain_chroma import Chroma
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

# create the open-source embedding function
# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()

# User query = ‘agent memory’ 를 받아 관련된 chunks를 retrieve

In [10]:
query =  "agent momory"
searched_docs = vectorstore.similarity_search(query)

# print results
print(searched_docs)

[Document(page_content='Memory', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"}), Document(page_content='memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g.', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and Ba

In [11]:
searched_docs[0]

Document(page_content='Memory', metadata={'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview In a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:', 'language': 'en', 'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'title': "LLM Powered Autonomous Agents | Lil'Log"})

In [12]:
# save DB
db = Chroma.from_documents(docs, embeddings_model, persist_directory="./chroma_db")

# load DB
db = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)


# User query와 retrieved chunk 에 대해 relevance 가 있는지를 평가하는 시스템 프롬프트 작성
- retrieval 퀄리티를 LLM 이 스스로 평가하도록 하고, 관련이 있으면 {‘relevance’: ‘yes’} 관련이 없으면 {‘relevance’: ‘no’} 라고 출력하도록 함. (JsonOutputParser() 를 활용 ) - llama3 prompt format 준수

In [13]:
!pip install langchainhub



In [14]:
query =  "agent momory"

searched_docs = vectorstore.similarity_search(query)
contexts = [doc.page_content for doc in searched_docs]
print(contexts)

['Memory', 'memory (e.g. two agents continuing the conversation topic) and coordination of social events (e.g.', 'Memory can be defined as the processes used to acquire, store, retain, and later retrieve', 'Long-term memory: This provides the agent with the capability to retain and recall (infinite)']


In [24]:
from langchain.prompts import PromptTemplate

llama3_format_template = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"

prompt_relevance = """
Check the <query> is relevant to retrieved <context>. Answer with "yes" or "no" in json format(key: relevance).

<query>
{query}
</query>

<context>
{context}
</context>
"""
prompt_relevance_llama3 = llama3_format_template.format(prompt=prompt_relevance)

In [25]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

query = "Memory"

model = ChatOpenAI(model="gpt-3.5-turbo-0125")

vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_template(prompt_relevance_llama3)
model = ChatOpenAI(temperature=0)
output_parser = JsonOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "query": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser
answer = chain.invoke(query)
print(answer)

{'relevance': 'yes'}


# 5 에서 모든 docs에 대해 ‘no’ 라면 디버깅
(Splitter, Chunk size, overlap, embedding model, vector store, retrieval 평가 시스템 프롬프트 등)

# 5에서 ‘yes’ 라면 질문과 명확히 관련 없는 docs 나 질문
(예: ‘I like an apple’에 대해서는 ‘no’ 라고 나오는지 테스트 프롬프트 및 평가 코드 작성. 이 때는 관련 없다는 답변 작성)

In [26]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

query = "agent memory"

vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()

prompt = ChatPromptTemplate.from_template(prompt_relevance_llama3)
model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
output_parser = JsonOutputParser()

setup_and_retrieval = RunnableParallel(
    {"context": retriever, "query": RunnablePassthrough()}
)
chain = setup_and_retrieval | prompt | model | output_parser
answer = chain.invoke(query)
print(answer)

{'relevance': 'no'}


# ‘yes’ 이고 7의 평가에서도 문제가 없다면, 4의 retrieved chunk 를 가지고 답변 작성

- 막힌 부분: 랭체인이 relevance 여부와 retrieved chunk를 함께 내뱉게 만드는 방법.

In [39]:
answer

{'relevance': 'no'}

# 생성된 답안에 Hallucination 이 있는지 평가하는 시스템 프롬프트 작성.
LLM이 스스로 평가하도록 하고, hallucination 이 있으면 {‘hallucination’: ‘yes’} 없으면 {‘hallucination’: ‘no’} 라고 출력하도록 함

In [21]:
from langchain.prompts import PromptTemplate

prompt_hallucination = """
Check the <answer> has halllucination or not. Answer with "yes" or "no" in json format(key: hallucination).

<answer>
{answer}
</answer>
"""
prompt_hallucination_llama3 = llama3_format_template.format(prompt=prompt_hallucination)

In [27]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

query = "Memory"
vectorstore = Chroma.from_documents(
    collection_name="sol_day2",
    documents=splits,
    embedding=embeddings_model
    )
retriever = vectorstore.as_retriever()
class CustomRetriever:
    def __init__(self, retriever):
        self.retriever = retriever

    def __call__(self, query):
        retrieved_docs = self.retriever.retrieve(query)
        chunks = [doc.page_content for doc in retrieved_docs]
        return {"retrieved_chunks": chunks, "retrieved_docs": retrieved_docs}

model = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
output_parser = JsonOutputParser()

# relevance
setup_and_retrieval = RunnableParallel(
    {"context": CustomRetriever(retriever), "query": RunnablePassthrough()}
)
prompt_relevance = ChatPromptTemplate.from_template(prompt_relevance_llama3)

chain_relevance = setup_and_retrieval | prompt_relevance | model | output_parser
answer = chain_relevance.invoke(query)

if answer["relevance"] == "yes":
  while 1:
    # hallucination
    prompt_hallucination = ChatPromptTemplate.from_template(prompt_hallucination_llama3)
    chain_hallucination = prompt_hallucination | model | output_parser
    answer = chain_hallucination.invoke(query)

    if answer["hallucination"] == "no":
      break

# 9 에서 ‘yes’ 면 8 로 돌아가서 다시 생성,
‘no’ 면 답변 생성하고 유저에게 답변 생성에 사용된 출처와 함께 출력
