In [1]:
%%capture --no-stderr
%pip install langchain_community langchainhub chromadb langchain langgraph tavily-python langchain-text-splitters langchain_openai

from langchain_openai import ChatOpenAI
import os
import getpass
os.environ['OPENAI_API_KEY'] = getpass.getpass()

llm = ChatOpenAI(model="gpt-4o-mini", temperature = 0)

import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

from tavily import TavilyClient
tavily_key = getpass.getpass()
tavily = TavilyClient(api_key=tavily_key)

### Index

from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding = OpenAIEmbeddings(model="text-embedding-3-small")
)
retriever = vectorstore.as_retriever()


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
### Router
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

system = """You are an expert at routing a user question to a vectorstore or web search.
Use the vectorstore for questions on LLM agents, prompt engineering, and adversarial attacks.
You do not need to be stringent with the keywords in the question related to these topics.
Otherwise, use web-search. Give a binary choice 'web_search' or 'vectorstore' based on the question.
Return the a JSON with a single key 'datasource' and no premable or explanation. Question to route"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "question: {question}"),
    ]
)

question_router = prompt | llm | JsonOutputParser()

# question = "llm agent memory"
# question = "What is prompt?"
# docs = retriever.get_relevant_documents(question)
# print(question_router.invoke({"question": question}))

In [3]:
### Retrieval Grader
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

system = """You are a grader assessing relevance
    of a retrieved document to a user question. If the document contains keywords related to the user question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
    """

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "question: {question}\n\n document: {document} "),
    ]
)

retrieval_grader = prompt | llm | JsonOutputParser()
# question = "What is prompt?"
# docs = retriever.invoke(question)
# doc_txt = docs[0].page_content
# print(retrieval_grader.invoke({"question": question, "document": doc_txt}))

In [4]:
### Generate

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

system = """You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "question: {question}\n\n context: {context} "),
    ]
)

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
# question = "What is prompt?"
# docs = retriever.invoke(question)
# generation = rag_chain.invoke({"context": docs, "question": question})
# print(generation)

In [5]:
### Hallucination Grader
system = """You are a grader assessing whether
    an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate
    whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a
    single key 'score' and no preamble or explanation."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "documents: {documents}\n\n answer: {generation} "),
    ]
)

hallucination_grader = prompt | llm | JsonOutputParser()
#hallucination_grader.invoke({"documents": docs, "generation": generation})

In [6]:
### Answer Grader
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "{question}"),
    ]
)

# Prompt
system = """You are a grader assessing whether an
    answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is
    useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation."""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "question: {question}\n\n answer: {generation} "),
    ]
)

answer_grader = prompt | llm | JsonOutputParser()
#answer_grader.invoke({"question": question, "generation": generation})

In [14]:
from pprint import pprint
from typing import List

from langchain_core.documents import Document
from typing_extensions import TypedDict

from langgraph.graph import END, StateGraph

class GraphState(TypedDict):
    question: str
    generation: str
    need_websearch: bool
    documents: List[str]
    hallucination_try: bool
    websearch_try_cnt: int
    hallucination_result: bool

def start(state):
    print("--- start ----\n")
    return {"question": state["question"],
            "generation": "",
            "need_websearch": False,
            "documents": [],
            "hallucination_try": False,
            "websearch_try_cnt": 0,
            "hallucination_result": False}

def docs_retrieval(state):
    print("--- docs_retrieval ----\n")
    docs = retriever.invoke(state["question"])
    # 메타데이터 추가
    for doc in docs:
        doc.metadata["source"] = "vectorstore"
    state["documents"] = docs
    return {"question": state["question"],
            "generation": state["generation"],
            "need_websearch": state["need_websearch"],
            "documents": state["documents"],
            "hallucination_try": state["hallucination_try"],
            "websearch_try_cnt": state["websearch_try_cnt"],
            "hallucination_result": state["hallucination_result"]}


def relevance_checker(state):
    print("--- relevance_checker ----\n")
    question = state["question"]
    documents = state["documents"]

    # Score each doc
    state["documents"] = []
    state["need_websearch"] = False
    for d in documents:
        score = retrieval_grader.invoke(
            {"question": question, "document": d.page_content}
        )
        grade = score["score"]
        if grade.lower() == "yes":
            state["documents"].append(d)
        else:
            state["need_websearch"] = True
            continue

    if state["websearch_try_cnt"] >= 1 and state["need_websearch"]:
        print("\n\nwebsearch를 2회 했지만 relevance에 실패 했습니다. 종료합니다.")
        return END

    return {"question": state["question"],
            "generation": state["generation"],
            "need_websearch": state["need_websearch"],
            "documents": state["documents"],
            "hallucination_try": state["hallucination_try"],
            "websearch_try_cnt": state["websearch_try_cnt"],
            "hallucination_result": state["hallucination_result"]}

def websearch_tavaily(state):
    print("--- websearch_tavaily ----\n")
    question = state["question"]
    documents = None
    if "documents" in state:
        documents = state["documents"]

    docs = tavily.search(query=question)['results']
    web_results = []
    for d in docs:
        doc = Document(
            page_content=d["content"],
            metadata={"url": d["url"]}
        )
        web_results.append(doc)

    if documents is not None:
        documents.extend(web_results)
    else:
        documents = web_results

    return {"question": state["question"],
            "generation": state["generation"],
            "need_websearch": state["need_websearch"],
            "documents": documents,
            "hallucination_try": state["hallucination_try"],
            "websearch_try_cnt": state["websearch_try_cnt"] + 1,
            "hallucination_result": state["hallucination_result"]}

def generate_answer(state):
    print("--- generate_answer ----\n")
    question = state["question"]
    documents = state["documents"]

    generation = rag_chain.invoke({"context": documents, "question": question})
    state["generation"] = generation

    return {"question": state["question"],
            "generation": state["generation"],
            "need_websearch": state["need_websearch"],
            "documents": state["documents"],
            "hallucination_try": state["hallucination_try"],
            "websearch_try_cnt": state["websearch_try_cnt"],
            "hallucination_result": state["hallucination_result"]}

def hallucination_checker(state):
    print("--- hallucination_checker ----\n")
    documents = state["documents"]
    generation = state["generation"]

    score = hallucination_grader.invoke(
        {"documents": documents, "generation": generation}
    )
    grade = score["score"]

    if grade == "yes":
        state["hallucination_result"] = False
    else:
        state["hallucination_result"] = True

    if state["hallucination_try"] == True and state["hallucination_result"] == True:
        print("failed: hallucinated")
        return END

    return {"question": state["question"],
            "generation": state["generation"],
            "need_websearch": state["need_websearch"],
            "documents": state["documents"],
            "hallucination_try": state["hallucination_try"],
            "websearch_try_cnt": state["websearch_try_cnt"],
            "hallucination_result": state["hallucination_result"]}

def answer_to_user(state):
    print("--- answer_to_user ----\n")
    return {"question": state["question"],
            "generation": state["generation"],
            "need_websearch": state["need_websearch"],
            "documents": state["documents"],
            "hallucination_try": state["hallucination_try"],
            "websearch_try_cnt": state["websearch_try_cnt"],
            "hallucination_result": state["hallucination_result"]}

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("start", start)
workflow.add_node("docs_retrieval", docs_retrieval)
workflow.add_node("relevance_checker", relevance_checker)
workflow.add_node("websearch_tavaily", websearch_tavaily)
workflow.add_node("generate_answer", generate_answer)
workflow.add_node("hallucination_checker", hallucination_checker)
workflow.add_node("answer_to_user", answer_to_user)

# Build graph
workflow.set_entry_point("start")
workflow.add_edge("start", "docs_retrieval")
workflow.add_edge("docs_retrieval", "relevance_checker")
workflow.add_conditional_edges(
    "relevance_checker",
    lambda result: result["need_websearch"],
    {
        False: "generate_answer",
        True: "websearch_tavaily"
    }
)
workflow.add_edge("websearch_tavaily", "relevance_checker")
workflow.add_edge("generate_answer", "hallucination_checker")
workflow.add_conditional_edges(
    "hallucination_checker",
    lambda result: result["hallucination_result"],
    {
        False: "answer_to_user",
        True: "generate_answer"
     },
)

app = workflow.compile()

#inputs = {"question": "메시는 현재 어느팀에서 뛰나요?"}
inputs = {"question": "What is promopt-engineering?"}
for output in app.stream(inputs):
    for key, value in output.items():
        if key == "answer_to_user":
            print("\n최종 답변:")
            print(value["generation"])
            print("\n참조한 문서:")
            for doc in value["documents"]:
                if "source" in doc.metadata:
                    print("- Vectorstore 검색 결과:")
                    print("  내용:", doc.page_content)
                elif "url" in doc.metadata:
                    print(f"- 웹 검색 결과: {doc.metadata['url']}")


--- start ----

--- docs_retrieval ----

--- relevance_checker ----

--- generate_answer ----

--- hallucination_checker ----

--- answer_to_user ----


최종 답변:
Prompt engineering, also known as in-context prompting, involves methods to communicate with large language models (LLMs) to guide their behavior towards desired outcomes without altering the model's weights. It is an empirical science that requires experimentation, as the effectiveness of these methods can vary significantly across different models. The primary goal is to achieve alignment and steerability of the model.

참조한 문서:
- Vectorstore 검색 결과:
  내용: Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights. It is an empirical science and the effect of prompt engineering methods can vary a lot among models, thus requiring heavy experimentation and heuristics.
This post only focuses on prompt enginee