In [None]:
%%capture --no-stderr
%pip install langchain langchain-openai langchain-openai langchain_chroma langchain-text-splitters langchain_community


In [None]:
!pip install openai



In [None]:
import getpass
import os

os.environ['OPENAI_API_KEY'] = getpass.getpass(prompt="OpenAI API 키를 입력해주세요 (입력 시 문자 숨김): ")

OpenAI API 키를 입력해주세요 (입력 시 문자 숨김): ··········


In [None]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass()

from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o")
resp = llm.invoke("간단히 자기소개 해줘")
print(resp.content)


··········
안녕하세요! 저는 OpenAI에서 개발한 AI 언어 모델입니다. 여러분의 질문에 답변하고 정보를 제공하기 위해 만들어졌습니다. 다양한 주제에 대해 이야기 할 수 있으며, 필요에 따라 도움을 드릴 수 있도록 최선을 다하겠습니다. 어떤 도움이 필요하신가요?


In [None]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

··········


In [None]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()

example_messages

[HumanMessage(content="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: filler question \nContext: filler context \nAnswer:", additional_kwargs={}, response_metadata={})]

In [None]:
resp = llm.invoke("간단히 자기소개 해줘")
print(resp.content)

안녕하세요! 저는 여러분에게 정보를 제공하고 질문에 답변하기 위해 생성된 AI 어시스턴트입니다. 다양한 주제에 대한 정보를 제공하고 여러분의 질문에 최대한 도움을 드리기 위해 노력하고 있습니다. 무엇을 도와드릴까요?


In [None]:
from typing import List  # ← List 타입 추가
from langchain_core.documents import Document
from langchain_community.document_loaders import WebBaseLoader

def task1_load() -> List[Document]:
    urls = [
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
        "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
        "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ]
    loader = WebBaseLoader(urls)
    docs = loader.load()
    return docs

docs = task1_load()


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
def task2_split(docs: List[Document],
                chunk_size: int = 1200,
                chunk_overlap: int = 200) -> List[Document]:
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap
    )
    return splitter.split_documents(docs)


In [None]:
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

def task3_build_vectorstore(chunks: List[Document]) -> Chroma:
    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
    vs = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory="./chroma_rag_demo",
    )

    return vs


In [None]:
def task4_retrieve(vs: Chroma, query: str = "agent memory") -> List[Document]:
    retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": 6})
    return retriever.invoke(query)

In [None]:
from langchain_core.documents import Document
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI

def build_relevance_chain(llm: ChatOpenAI):
    """retrieved chunk가 query와 관련 있는지 yes/no(JSON)로만 판단"""
    parser = JsonOutputParser()
    prompt = PromptTemplate(
        template=(
            "You are a strict relevance judge for a RAG system.\n"
            "Decide if the given context is relevant to the question.\n"
            "Respond ONLY with EXACTLY ONE of the following two JSON lines:\n"
            "{{\"relevance\": \"yes\"}}\n"
            "{{\"relevance\": \"no\"}}\n"
            "# Question:\n{question}\n\n"
            "# Context:\n{context}\n"
        ),
        input_variables=["question", "context"],
    )
    return prompt | llm | parser


def build_hallucination_chain(llm: ChatOpenAI):
    """답변이 context로 뒷받침되는지 yes/no(JSON)로만 판단"""
    parser = JsonOutputParser()
    prompt = PromptTemplate(
        template=(
            "You are a strict hallucination detector.\n"
            "Judge whether the Answer contains claims NOT supported by the Context.\n"
            "Respond ONLY with EXACTLY ONE of the following two JSON lines:\n"
            "{{\"hallucination\": \"yes\"}}\n"
            "{{\"hallucination\": \"no\"}}\n"
            "# Answer:\n{answer}\n\n"
            "# Context:\n{context}\n"
        ),
        input_variables=["answer", "context"],
    )
    return prompt | llm | parser



In [None]:
def task6_cases():
    yes_docs = [Document(page_content="Agent memory stores user preferences...")]
    no_docs = [Document(page_content="Paris is capital of France.")]
    return {"YES_ALL": yes_docs, "NO_ALL": no_docs}


In [None]:
from dataclasses import dataclass
from typing import List, Dict, Any

@dataclass
class EvalResult:
    relevance_yes_for_all: bool
    relevance_no_for_all: bool
    details: List[Dict[str, Any]]

def task7_verify_relevance(llm: ChatOpenAI) -> EvalResult:
    chain = build_relevance_chain(llm)
    cases = task6_cases()
    q = "agent memory"

    details = []
    yes_flags, no_flags = [], []

    # YES 케이스 검증
    for d in cases["YES_ALL"]:
        res = chain.invoke({"question": q, "context": d.page_content})
        details.append({"case": "YES_ALL", "doc": d.page_content[:80], "res": res})
        yes_flags.append(res.get("relevance") == "yes")

    # NO 케이스 검증
    for d in cases["NO_ALL"]:
        res = chain.invoke({"question": q, "context": d.page_content})
        details.append({"case": "NO_ALL", "doc": d.page_content[:80], "res": res})
        no_flags.append(res.get("relevance") == "no")

    return EvalResult(
        relevance_yes_for_all=all(yes_flags),
        relevance_no_for_all=all(no_flags),
        details=details
    )



In [None]:
def build_rag_answer_chain(llm: ChatOpenAI):
    answer_prompt = PromptTemplate(
        template="Context:\n{context}\n\nQuestion: {question}\n"
                 "→ Context를 기반으로 답변하라.",
        input_variables=["context", "question"]
    )
    chain = (
        {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
        | answer_prompt
        | llm
        | StrOutputParser()
    )
    return chain


In [None]:
from typing import List
from langchain_core.documents import Document

# 8 을 수행하기 위한 참고 코드
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough



text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

for chunk in rag_chain.stream("What is Task Decomposition?"):
    print(chunk, end="", flush=True)

Task Decomposition involves breaking down complex tasks into smaller, more manageable steps. Techniques like Chain of Thought (CoT) enhance model performance by guiding the model to think step by step, while the Tree of Thoughts expands on this by allowing multiple reasoning paths. The process can use simple prompts, task-specific instructions, or external tools like classical planners to decompose the tasks effectively.

In [None]:
def task10_run(
    user_query: str = "agent memory",
    verbose: bool = True,
    chunk_size: int = 1800,
    chunk_overlap: int = 250,
    search_type: str = "mmr",            # "similarity" | "mmr"
    search_kwargs: dict = None,          # 예: {"k": 12, "lambda_mult": 0.3}
    proceed_if_any_yes: bool = True,     # True: 하나라도 yes면 진행 / False: 전부 yes여야 진행
):
    """
    End-to-end 실행:
    1) 로드 → 2) 스플릿 → 3) 벡터스토어 → 4) 리트리브 → 5) 관련성 평가
    6) (합격 시) 답변 생성 → 7) 환각 점검 → (필요 시 1회) 재생성 → 최종 출력

    전역으로 필요한 것:
    - ChatOpenAI, CHAT_MODEL
    - task1_load, task2_split, task3_build_vectorstore, task4_retrieve
    - build_relevance_chain, build_rag_answer_chain, build_hallucination_chain
    - format_docs, unique_sources
    """
    if search_kwargs is None:
        search_kwargs = {"k": 12, "lambda_mult": 0.3} if search_type == "mmr" else {"k": 6}

    llm = ChatOpenAI(model=CHAT_MODEL, temperature=0)
    out = {
        "user_query": user_query,
        "params": {
            "chunk_size": chunk_size,
            "chunk_overlap": chunk_overlap,
            "search_type": search_type,
            "search_kwargs": search_kwargs,
            "proceed_if_any_yes": proceed_if_any_yes,
        },
    }

    # Task 1
    if verbose: print("[Task1] Loading docs...")
    raw_docs = task1_load()
    out["docs_count"] = len(raw_docs)
    if verbose: print("  loaded:", out["docs_count"])
    if not raw_docs:
        if verbose: print("No docs loaded. Stop.")
        out.update({"chunks_count": 0, "retrieved_count": 0, "relevance_votes": [], "proceed": False,
                    "final_answer": None, "sources": [], "hallucination_checks": []})
        return out

    # Task 2
    if verbose: print("[Task2] Splitting...")
    chunks = task2_split(raw_docs, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    out["chunks_count"] = len(chunks)
    if verbose: print("  chunks:", out["chunks_count"])
    if not chunks:
        if verbose: print("No chunks. Stop.")
        out.update({"retrieved_count": 0, "relevance_votes": [], "proceed": False,
                    "final_answer": None, "sources": [], "hallucination_checks": []})
        return out

    # Task 3
    if verbose: print("[Task3] Building vectorstore...")
    vs = task3_build_vectorstore(chunks)   # Chroma 0.4.x 이상은 persist() 불필요
    if verbose: print("  vectorstore ready")

    # Task 4 (검색기 생성 + 검색)
    if verbose: print("[Task4] Retrieving...")
    # 사용자 지정 search_type/kwargs 반영
    retriever = vs.as_retriever(search_type=search_type, search_kwargs=search_kwargs)
    retrieved = retriever.invoke(user_query)
    out["retrieved_count"] = len(retrieved)
    if verbose: print("  retrieved:", out["retrieved_count"])
    if out["retrieved_count"] == 0:
        if verbose: print("No retrieved docs. Stop.")
        out.update({"relevance_votes": [], "proceed": False,
                    "final_answer": None, "sources": [], "hallucination_checks": []})
        return out

    # Task 5 (관련성 평가)
    if verbose: print("[Task5] Relevance judging (per chunk)...")
    rel_chain = build_relevance_chain(llm)
    rel_votes = []
    for d in retrieved:
        res = rel_chain.invoke({"question": user_query, "context": d.page_content})
        rel_votes.append((res or {}).get("relevance", "no"))
    out["relevance_votes"] = rel_votes
    if verbose: print("  votes:", rel_votes)

    # 진행 조건: 하나라도 yes면 진행 / 전부 yes여야 진행
    proceed = any(v == "yes" for v in rel_votes) if proceed_if_any_yes else all(v == "yes" for v in rel_votes)
    out["proceed"] = proceed
    if not proceed:
        if verbose: print("No relevant chunks by policy. Stop.")
        out.update({"final_answer": None, "sources": [], "hallucination_checks": []})
        return out

    # Task 8 (답변 생성)
    if verbose: print("[Task8] Generating answer from retrieved context...")
    answer_chain = build_rag_answer_chain(llm)
    context_text = format_docs(retrieved)
    answer = answer_chain.invoke({"context": context_text, "question": user_query})
    out["answer_v1"] = answer or ""
    if verbose: print("  answer_v1 length:", len(out["answer_v1"]))

    # Task 9 (환각 판정)
    if verbose: print("[Task9] Hallucination check...")
    h_chain = build_hallucination_chain(llm)
    h1 = h_chain.invoke({"answer": out["answer_v1"], "context": context_text}) or {}
    out["hallucination_checks"] = [h1]
    if verbose: print("  check1:", h1)

    final_answer = out["answer_v1"]

    # Task 10 (필요시 1회 재생성)
    if h1.get("hallucination") == "yes":
        if verbose: print("[Task10] Re-generate once (safer prompt)...")
        from langchain_core.prompts import PromptTemplate
        from langchain_core.output_parsers import StrOutputParser
        from langchain_core.runnables import RunnablePassthrough

        cautious_prompt = PromptTemplate(
            template=(
                "Answer strictly using only the facts present in the provided context. "
                "If a detail is not explicitly mentioned, say you don't know.\n\n"
                "Context:\n{context}\n\nQuestion: {question}"
            ),
            input_variables=["context", "question"],
        )
        cautious_chain = (
            {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
            | cautious_prompt
            | ChatOpenAI(model=CHAT_MODEL, temperature=0)
            | StrOutputParser()
        )
        answer2 = cautious_chain.invoke({"context": context_text, "question": user_query}) or ""
        h2 = h_chain.invoke({"answer": answer2, "context": context_text}) or {}
        out["answer_v2"] = answer2
        out["hallucination_checks"].append(h2)
        if verbose: print("  check2:", h2)
        final_answer = answer2

    sources = unique_sources(retrieved)
    out["final_answer"] = final_answer
    out["sources"] = sources

    if verbose:
        print("\n=== FINAL ANSWER ===\n")
        print(final_answer)
        print("\n--- Sources ---")
        for i, s in enumerate(sources, 1):
            print(f"[{i}] {s}")

    return out


In [None]:
CHAT_MODEL = "gpt-4o"
EMBED_MODEL = "text-embedding-3-small"
task10_run(user_query="agent memory")

[Task1] Loading docs...
  loaded: 3
[Task2] Splitting...
  chunks: 99
[Task3] Building vectorstore...
  vectorstore ready
[Task4] Retrieving...
  retrieved: 12
[Task5] Relevance judging (per chunk)...
  votes: ['yes', 'yes', 'yes', 'yes', 'no', 'no', 'no', 'no', 'no', 'no', 'no', 'yes']
[Task8] Generating answer from retrieved context...
  answer_v1 length: 1381
[Task9] Hallucination check...
  check1: {'hallucination': 'no'}

=== FINAL ANSWER ===

The context provided discusses the concept of agent memory within the framework of LLM-powered autonomous agents. In this system, memory is a crucial component that allows agents to retain and recall information over time, enabling them to behave in a manner conditioned by past experiences. There are two main types of memory mentioned:

1. **Short-term memory**: This is akin to in-context learning, where the model uses the immediate context to learn and make decisions. It is limited in capacity, often constrained by a word limit, and is used

{'user_query': 'agent memory',
 'params': {'chunk_size': 1800,
  'chunk_overlap': 250,
  'search_type': 'mmr',
  'search_kwargs': {'k': 12, 'lambda_mult': 0.3},
  'proceed_if_any_yes': True},
 'docs_count': 3,
 'chunks_count': 99,
 'retrieved_count': 12,
 'relevance_votes': ['yes',
  'yes',
  'yes',
  'yes',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'no',
  'yes'],
 'proceed': True,
 'answer_v1': "The context provided discusses the concept of agent memory within the framework of LLM-powered autonomous agents. In this system, memory is a crucial component that allows agents to retain and recall information over time, enabling them to behave in a manner conditioned by past experiences. There are two main types of memory mentioned:\n\n1. **Short-term memory**: This is akin to in-context learning, where the model uses the immediate context to learn and make decisions. It is limited in capacity, often constrained by a word limit, and is used for immediate tasks.\n\n2. **Long-term me