In [1]:
%%capture --no-stderr
%pip install langchain langchain-openai langchain-openai langchain_chroma langchain-text-splitters langchain_community

In [2]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

··········


In [3]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

··········


In [4]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:", additional_kwargs={}, response_metadata={})]

In [5]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Task Decomposition?")



"Task decomposition is the process of breaking down a complex task into smaller, manageable steps. This can be achieved using techniques like Chain of Thought (CoT) and Tree of Thoughts, which guide models to think step by step and explore multiple reasoning paths. These methods enhance performance by allowing for structured problem-solving and clearer interpretations of the model's reasoning process."

아래 1-10 task 에 대한 코드를, 각각의 task별 참고 링크를 보면서 작성해보세요.

**1. 3개의 블로그 포스팅 본문을 Load하기 : WebBaseLoader 활용**

https://python.langchain.com/v0.2/docs/integrations/document_loaders/web_base/

In [6]:
import bs4
from langchain_community.document_loaders import WebBaseLoader

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

print(f"로드된 문서 수: {len(docs)}")
for i, doc in enumerate(docs):
    print(f"\n문서 {i+1}:")
    print(f"출처: {doc.metadata['source']}")
    print(f"내용 미리보기: {doc.page_content[:100]}...")

로드된 문서 수: 3

문서 1:
출처: https://lilianweng.github.io/posts/2023-06-23-agent/
내용 미리보기: 

      LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |...

문서 2:
출처: https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/
내용 미리보기: 

      Prompt Engineering
    
Date: March 15, 2023  |  Estimated Reading Time: 21 min  |  Author: ...

문서 3:
출처: https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/
내용 미리보기: 

      Adversarial Attacks on LLMs
    
Date: October 25, 2023  |  Estimated Reading Time: 33 min  ...


**2. 불러온 본문을 Split (Chunking) : recursive text splitter 활용 (아래 링크 참고)**

https://python.langchain.com/v0.2/docs/how_to/recursive_text_splitter/

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # 각 청크의 최대 크기 (문자 수)
    chunk_overlap=200,  # 청크 간 겹침 크기 (문자 수)
    length_function=len,  # 크기 측정 함수
    is_separator_regex=False,  # 구분자를 정규식으로 해석하지 않음
)

# 2. 문서 분할
splits = text_splitter.split_documents(docs)

# 3. 결과 확인
print(f"분할된 청크 수: {len(splits)}")
for i, split in enumerate(splits[:3]):  # 처음 3개 청크만 확인
    print(f"\n청크 {i+1}:")
    print(f"출처: {split.metadata['source']}")
    print(f"내용 미리보기: {split.page_content[:100]}...")

분할된 청크 수: 183

청크 1:
출처: https://lilianweng.github.io/posts/2023-06-23-agent/
내용 미리보기: LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author...

청크 2:
출처: https://lilianweng.github.io/posts/2023-06-23-agent/
내용 미리보기: Memory

Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as ...

청크 3:
출처: https://lilianweng.github.io/posts/2023-06-23-agent/
내용 미리보기: Fig. 1. Overview of a LLM-powered autonomous agent system.
Component One: Planning#
A complicated ta...


**3. Chunks 를 임베딩하여 Vector store 저장: openai 임베딩, chroma vectorstore 사용**

이 때,

embedding model 은 "text-embedding-3-small" 사용

embedding: https://python.langchain.com/v0.2/docs/integrations/text_embedding/openai/

vetor store: https://python.langchain.com/v0.2/docs/integrations/vectorstores/chroma/

retriever search_type 은 'similarity', search_kwargs={'k': 6} 을 사용해주세요.

In [8]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [9]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="my_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db"  # 로컬 저장 경로
)

In [10]:
from uuid import uuid4

uuids = [str(uuid4()) for _ in range(len(splits))]

# 벡터 스토어에 청크 추가
vector_store.add_documents(documents=splits, ids=uuids)

['a78cf58d-3f28-47c3-85c9-8ef78a4930ad',
 '61aebec9-2f0f-4d35-836c-69548af066b2',
 'e0ae7744-d32b-4e25-a402-e5f4719dfc06',
 '40c5222f-fb22-4d66-b4aa-d29a6e0e811b',
 '9a5c7694-f8d3-46d5-8e71-beb3bb53ed2a',
 '9c474788-17d4-40d5-a6de-84a7d5253860',
 'fb9dabe3-eeaa-4e16-9c3e-87065129b71a',
 'b85b629a-f113-4f2a-82e1-9d075e186839',
 '3fbd879a-8037-4fdb-916f-c09e3616358a',
 'e35a87ec-7cfd-4e36-ad57-c93dbee72e13',
 'f1acfab5-4e7f-48be-9373-6b41e1d1c434',
 '54ec9c31-bfe7-40bc-803e-df636a3e6f94',
 'bf853be9-023b-4712-9273-ee9e5b44c112',
 '54056b02-62bd-4fac-8ed7-cf7e89c2cebe',
 'ace81352-d31f-46ef-9a08-5580d05f8ea1',
 'a4b849ae-033b-499f-93b2-1d81f1154055',
 'e4c8cd0a-1359-49f1-ace8-a4afd7535d9c',
 'd486524d-f690-4822-afdf-14254bbb8bc4',
 'f6e59a06-e4a3-41f7-9f82-7931324efff2',
 'ed4502ee-711e-45a4-9299-07bff4c85776',
 '5e7fec5f-3338-4c85-bc17-26d372afb57a',
 '98581423-9088-472f-bfd6-4c5b126b2599',
 '1333a955-b04c-4872-9c08-35e83e786148',
 '6f3dc5a2-d23f-4eaf-bb85-21e051fab4e3',
 'a4fb1e55-1bc4-

In [11]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 6}
)

**4. User query = ‘agent memory’ 를 받아 관련된 chunks를 retrieve**


https://python.langchain.com/v0.2/docs/how_to/vectorstore_retriever/

https://api.python.langchain.com/en/latest/vectorstores/langchain_core.vectorstores.VectorStore.html#langchain_core.vectorstores.VectorStore.as_retriever

In [12]:
query = "agent memory"
retrieved_docs = retriever.invoke(query)

In [13]:
print(f"Retrieved {len(retrieved_docs)} documents:")
for i, doc in enumerate(retrieved_docs):
    print(f"Document {i+1}: {doc.page_content[:100]}...")

Retrieved 6 documents:
Document 1: Memory stream: is a long-term memory module (external database) that records a comprehensive list of...
Document 2: LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author...
Document 3: Memory

Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as ...
Document 4: Fig. 13. The generative agent architecture. (Image source: Park et al. 2023)
This fun simulation res...
Document 5: Reliability of natural language interface: Current agent system relies on natural language as an int...
Document 6: They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test s...


**5. 5-1) 과 5-2) 를 참고하여, User query와 retrieved chunk 에 대해 relevance 가 있는지를 평가하는 시스템 프롬프트를 작성해보세요: retrieval 퀄리티를 LLM 이 스스로 평가하도록 하고, 관련이 있으면 {‘relevance’: ‘yes’} 관련이 없으면 {‘relevance’: ‘no’} 라고 출력하도록 함. ( JsonOutputParser() 를 활용 )**


5-1 참고) RAG 용 프롬프트 작성을 위한 Prompt Hub 활용

https://smith.langchain.com/hub/rlm/rag-prompt

In [None]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")
prompt

In [None]:
example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

5-2 참고) output JSON formatting 하는 코드

https://python.langchain.com/v0.2/docs/how_to/output_parser_json/#without-pydantic

In [None]:
# 5-2 참고 코드
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

joke_query = "Tell me a joke."

parser = JsonOutputParser()

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})

In [18]:
!pip install -U langchain langchain-openai



In [19]:
from langsmith import Client
client = Client(api_key=getpass.getpass())
prompt = client.pull_prompt("rlm/rag-prompt", include_model=True)

··········


In [21]:
# 필요한 모듈 임포트
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# 1. LLM 초기화 (예: OpenAI GPT 모델 사용)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# 2. JsonOutputParser 설정
parser = JsonOutputParser()

# 3. 프롬프트 템플릿 정의
prompt_template = """
You are an AI assistant tasked with evaluating the relevance of a retrieved document chunk to a user's query.

User query: {query}

Retrieved chunk: {chunk}

Determine if the retrieved chunk is relevant to the user's query. Output your decision in JSON format as follows:
- If relevant: {{"relevance": "yes"}}
- If not relevant: {{"relevance": "no"}}
"""

# 4. PromptTemplate 생성
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["query", "chunk"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# 5. 체인 구성
chain = prompt | llm | parser

# 6. 체인 실행 예시
user_query = "agent memory"
retrieved_chunk = "Memory is a crucial component of an agent, allowing it to retain information over time."

result = chain.invoke({"query": user_query, "chunk": retrieved_chunk})
print(result)

{'relevance': 'yes'}


6. 5 에서 모든 docs에 대해 'yes' 가 나와야 하는 케이스와 ‘no’ 가 나와야 하는 케이스를 작성해보세요.

In [23]:
user_query = "agent memory"

chunks = [
    "Memory is a crucial component of an agent, allowing it to retain information over time.",
    "In reinforcement learning, agents use memory to store past experiences, which helps in decision-making.",
    "Agent memory can be short-term or long-term, depending on the task requirements."
]

for i, chunk in enumerate(chunks, start=1):
    result = chain.invoke({"query": user_query, "chunk": chunk})
    print(f"Result {i}: {result}")


Result 1: {'relevance': 'yes'}




Result 2: {'relevance': 'yes'}




Result 3: {'relevance': 'yes'}


In [24]:
user_query = "agent memory"

chunks = [
    "The weather forecast for tomorrow is sunny and warm, with a high of 82 degrees.",
    "Robbers broke into the city bank and stole $1 million in cash.",
    "The top 10 soccer players in the world right now."
]

for i, chunk in enumerate(chunks, start=1):
    result = chain.invoke({"query": user_query, "chunk": chunk})
    print(f"Result {i}: {result}")




Result 1: {'relevance': 'no'}
Result 2: {'relevance': 'no'}
Result 3: {'relevance': 'no'}


8. 6-7 의 평가에서 문제가 없다면, 5에서 작성한 코드의 실행 결과가 'yes' 인 경우, 4의 retrieved chunk 를 가지고 답변하는 chain 코드를 작성해주세요. (prompt | llm | parser 형태의 코드)

In [26]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI  # LLM 예시로 OpenAI 사용

# 1. 필요한 함수 정의
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

relevance_prompt = ChatPromptTemplate.from_template(
    "주어진 쿼리 '{query}'와 청크 '{chunk}'를 보고 청크가 쿼리와 관련이 있는지 'yes' 또는 'no'로 답하세요."
)
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)  # LLM 초기화
relevance_chain = relevance_prompt | llm | StrOutputParser()

retrieved_docs = retriever.invoke(query)
print(f"Retrieved {len(retrieved_docs)} documents:")
for i, doc in enumerate(retrieved_docs):
    print(f"Document {i+1}: {doc.page_content[:100]}...")

relevant_chunks = []
for doc in retrieved_docs:
    relevance_result = relevance_chain.invoke({"query": query, "chunk": doc.page_content})
    if relevance_result.strip().lower() == "yes":  # 결과가 'yes'인지 확인
        relevant_chunks.append(doc)

if relevant_chunks:
    print("\nRelevance confirmed. Generating answer...")
    # RAG 체인 구성 (context로 relevant_chunks 사용)
    rag_chain = (
        {"context": lambda x: format_docs(relevant_chunks), "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    # 스트리밍으로 답변 출력
    for chunk in rag_chain.stream(query):
        print(chunk, end="", flush=True)
else:
    print("\nNo relevant chunks found.")

  llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)  # LLM 초기화


Retrieved 6 documents:
Document 1: Memory stream: is a long-term memory module (external database) that records a comprehensive list of...
Document 2: LLM Powered Autonomous Agents
    
Date: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author...
Document 3: Memory

Short-term memory: I would consider all the in-context learning (See Prompt Engineering) as ...
Document 4: Fig. 13. The generative agent architecture. (Image source: Park et al. 2023)
This fun simulation res...
Document 5: Reliability of natural language interface: Current agent system relies on natural language as an int...
Document 6: They also discussed the risks, especially with illicit drugs and bioweapons. They developed a test s...





Relevance confirmed. Generating answer...




Agent memory consists of both short-term and long-term memory modules. Short-term memory utilizes in-context learning, while long-term memory retains and recalls information over extended periods, often using an external database. This memory system allows agents to synthesize past experiences and



 inform future behavior.

9. 생성된 답안에 Hallucination 이 있는지 평가하는 시스템 프롬프트를 작성해보세요. LLM이 스스로 평가하도록 하고, hallucination 이 있으면 {‘hallucination’: ‘yes’} 없으면 {‘hallucination’: ‘no’} 라고 출력하도록 하세요.

In [28]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# 1. LLM 초기화
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

# 2. JSON 파서 설정
parser = JsonOutputParser()

# 3. 수정된 프롬프트 템플릿 정의
prompt_template = """
You are an AI assistant tasked with evaluating whether a generated answer contains hallucination. Hallucination occurs when the answer includes information that is not factually correct or not supported by evidence.

Generated answer: {answer}

Determine if the generated answer contains hallucination. Output your decision in JSON format as follows:
- If there is hallucination: {{"hallucination": "yes"}}
- If there is no hallucination: {{"hallucination": "no"}}
"""

# 4. PromptTemplate 생성
prompt = PromptTemplate(
    template=prompt_template,
    input_variables=["answer"]
)

# 5. 체인 구성
chain = prompt | llm | parser

# 6. 체인 실행 예시
generated_answer = "The capital of France is Paris, and it is known for its beautiful beaches."
result = chain.invoke({"answer": generated_answer})
print(result)



{'hallucination': 'yes'}


10. 9 에서 ‘yes’ 면 8 로 돌아가서 다시 생성, ‘no’ 면 답변 생성하고 유저에게 답변 생성에 사용된 출처와 함께 출력하도록 하세요. (최대 1번까지 다시 생성)

In [29]:
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain.prompts import PromptTemplate
from langchain_openai import ChatOpenAI

# 가정: Task 8에서 답변을 생성하는 함수
def generate_answer(query, retrieved_docs):
    # RAG 프롬프트와 LLM 설정
    prompt = hub.pull("rlm/rag-prompt")  # LangChain Hub에서 RAG 프롬프트 가져오기
    llm = ChatOpenAI(model="gpt-4o-mini")  # LLM 모델 설정
    rag_chain = (
        {"context": lambda x: "\n".join([doc.page_content for doc in retrieved_docs]),
         "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    return rag_chain.invoke(query)

# 가정: Task 9에서 Hallucination을 평가하는 함수
def evaluate_hallucination(answer):
    hallucination_prompt = PromptTemplate(
        template="""
        You are an AI assistant tasked with evaluating whether a generated answer contains hallucination.
        Generated answer: {answer}
        Output your decision in JSON format: {{"hallucination": "yes"}} or {{"hallucination": "no"}}
        """,
        input_variables=["answer"]
    )
    llm = ChatOpenAI(model="gpt-4o-mini")
    parser = JsonOutputParser()
    hallucination_chain = hallucination_prompt | llm | parser
    return hallucination_chain.invoke({"answer": answer})

# Task 10: 답변 생성 및 평가 로직
def task_10(query, retrieved_docs, max_attempts=2):
    attempt = 0
    while attempt < max_attempts:
        # 1. 답변 생성
        answer = generate_answer(query, retrieved_docs)
        # 2. Hallucination 평가
        evaluation = evaluate_hallucination(answer)

        if evaluation.get("hallucination") == "no":
            # 출처 추출 (retrieved_docs의 metadata에서 'source' 가져오기)
            sources = [doc.metadata.get("source", "Unknown") for doc in retrieved_docs]
            return answer, sources

        # Hallucination이 'yes'이면 재시도
        attempt += 1

    # 최대 시도 횟수 초과 시 실패 메시지 반환
    return "최대 재시도 횟수를 초과하여 hallucination 없는 답변을 생성하지 못했습니다.", []

# 실행 예시
query = "What is Task Decomposition?"
retrieved_docs = retriever.invoke(query)  # Task 4에서 정의된 retriever 사용 가정
answer, sources = task_10(query, retrieved_docs)

# 결과 출력
print("Answer:", answer)
print("Sources:", sources)



Answer: Task decomposition is the process of breaking down a complex task into smaller, manageable sub-tasks. This can be done using techniques like prompting language models to think step by step or through human input for specific tasks. It allows the model to tackle complicated problems systematically, enhancing performance and clarity in thought processes.
Sources: ['https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://lilianweng.github.io/posts/2023-06-23-agent/', 'https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/', 'https://lilianweng.github.io/posts/2023-06-23-agent/']
