In [2]:
%%capture --no-stderr
%pip install langchain_community langchainhub chromadb langchain langgraph tavily-python langchain-text-splitters langchain_openai

In [52]:
from langchain_openai import ChatOpenAI
import os
os.environ['OPENAI_API_KEY'] = ''

llm = ChatOpenAI(model="gpt-4o-mini", temperature = 0)

from tavily import TavilyClient
tavily = TavilyClient(api_key='')

In [4]:
### Index

from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding = OpenAIEmbeddings(model="text-embedding-3-small")
)
retriever = vectorstore.as_retriever()



In [6]:
user_query = "what is memory agent ?"

In [7]:
# Docs Retrieval

docs = retriever.get_relevant_documents(user_query)

# print(docs[0].page_content)

  docs = retriever.get_relevant_documents(user_query)


In [34]:
# Relevance Checker

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

relevance_checker_system = """
당신은 AI 기반 문서 연관성 평가자입니다.

사용자의 질문과 검색된 문서 조각(context)을 참고하여, 이 문서가 질문에 **직접적인 관련이 있는지** 판단하세요.
- 관련성이 높다면 relevant_score를 높은 점수로 설정하세요.
- 관련성이 낮다면 relevant_score를 낮은 점수로 설정하세요.
- relevant_score의 범위는 0 ~ 100 사이로 설정하세요.

추측하지 말고, 문서 안에 **질문에 답할 수 있는 정보가 실제로 존재**할 때만 true를 반환하세요.

질문:
{question}

문서 조각:
{document}

결과는 다음 형식의 JSON으로 반환하세요. 대문자는 사용하지 마세요.
{{
  "relevant_score": 0 ~ 100
}}
"""

relevance_checker_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", relevance_checker_system),
        ("human", "question: {question}\n\n document: {document}"),
    ]
)

relevance_checker = relevance_checker_prompt | llm | JsonOutputParser()
docs = retriever.invoke(user_query)
doc_txt = docs[0].page_content
relevance_checker_result = relevance_checker.invoke({"question": user_query, "document": doc_txt})


In [37]:
print("relevance_check_result :", relevance_checker_result['relevant_score'])

if int(relevance_checker_result['relevant_score']) >= 70:
    print("Relevant !!")
else:
    print("Not Relevant !!")

relevance_check_result : 60
Not Relevant


In [40]:
#def generate_answer(user_query):

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

generate_answer_system = """
당신은 친절하고 정확한 AI 어시스턴트입니다.

아래의 문서(document)는 사용자의 질문(question)에 답변하기 위한 참고용 자료입니다.
이 문서를 기반으로 질문에 대한 **정확하고 간결한 답변**을 생성하세요.

- 문서 내용에 포함된 정보만을 기반으로 답변하세요.
- 가능한 경우, 문서에서 근거가 되는 문장을 인용하거나 간단히 요약해서 포함하세요.
"""

generate_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", generate_answer_system),
        ("human", "question: {question}\n\n document: {document} "),
    ]
)

# Chain
rag_chain = generate_prompt | llm | StrOutputParser()

# Run
generation_answer = rag_chain.invoke({"question": user_query, "document": doc_txt})
print(generation_answer)

A memory agent refers to a system that utilizes both short-term and long-term memory capabilities. Short-term memory allows the agent to learn from in-context information, while long-term memory enables it to retain and recall vast amounts of information over extended periods, often using an external vector store for fast retrieval. This combination enhances the agent's ability to function autonomously and effectively.


In [65]:
#def search_tavily():

response = tavily.search(query=user_query, max_results=3)
response.get('results')[2]

{'query': 'what is memory agent ?', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'What Is AI Agent Memory? - ML Journey', 'url': 'https://mljourney.com/what-is-ai-agent-memory/', 'content': 'AI agent memory refers to the internal storage system that enables an intelligent agent to retain and access information across different points in time. This functionality is a critical leap beyond the capabilities of traditional prompt-based AI models, which handle each user input as an isolated event. Memory allows an agent to establish', 'score': 0.86981434, 'raw_content': None}, {'title': 'What Is AI Agent Memory? - IBM', 'url': 'https://www.ibm.com/think/topics/ai-agent-memory', 'content': 'What Is AI Agent Memory? What is AI agent memory? AI agent memory refers to an artificial intelligence\xa0(AI) system’s ability to store and recall past experiences to improve decision-making, perception and overall performance. Long-term memory (LTM) allows AI agents to

In [45]:
# Hallucination Checker

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

hallucination_checker_system = """
당신은 AI 모델이 생성한 답변이 주어진 문서(document)를 기반으로 한 것인지 검증하는 평가자입니다.


- 문서(document)의 정보에 **명확히 근거한 경우에만** "hallucinated": false로 판단하세요.
- 문서에 명확한 근거가 없거나, 과장/왜곡/추측이 섞인 경우는 "hallucinated": true로 판단하세요.
- 문서와 전혀 관련 없는 정보가 포함된 경우도 "hallucinated": true입니다.

생성한 답변:
{answer}

인용한 문서:
{document}

답변과 문서를 비교하여, 다음 형식의 JSON으로 판단 결과만 출력하세요:

{{
  "hallucinated": true or false
}}
"""

hallucination_checker_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", hallucination_checker_system),
        ("human", "answer: {answer}\n\n document: {document}"),
    ]
)

hallucination_checker = hallucination_checker_prompt | llm | JsonOutputParser()
hallucination_checker_result = hallucination_checker.invoke({"answer": generation_answer, "document": doc_txt})

In [51]:
print("hallucination_checker_result :", hallucination_checker_result.get('hallucinated'))

if hallucination_checker_result.get('hallucinated') == False:
    print("Not Hallucinated !!")
    # Anser to User
else:
    print("Hallucinated !!")
    # Back to Generate Anser

hallucination_checker_result : False
Not Hallucinated !!


## START

In [1]:
%%capture --no-stderr
%pip install langchain_community langchainhub chromadb langchain langgraph tavily-python langchain-text-splitters langchain_openai

In [2]:
from langchain_openai import ChatOpenAI
import os
os.environ['OPENAI_API_KEY'] = ''

llm = ChatOpenAI(model="gpt-4o-mini", temperature = 0)

from tavily import TavilyClient
tavily = TavilyClient(api_key='')

### Index

from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding = OpenAIEmbeddings(model="text-embedding-3-small")
)
retriever = vectorstore.as_retriever()

# Relevance Checker

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

relevance_checker_system = """
당신은 AI 기반 문서 연관성 평가자입니다.

사용자의 질문과 검색된 문서 조각(context)을 참고하여, 이 문서가 질문에 **직접적인 관련이 있는지** 판단하세요.
- 관련성이 높다면 relevant_score를 높은 점수로 설정하세요.
- 관련성이 낮다면 relevant_score를 낮은 점수로 설정하세요.
- relevant_score의 범위는 0 ~ 100 사이로 설정하세요.

추측하지 말고, 문서 안에 **질문에 답할 수 있는 정보가 실제로 존재**할 때만 true를 반환하세요.

질문:
{question}

문서 조각:
{document}

결과는 다음 형식의 JSON으로 반환하세요.
{{
  "relevant_score": 0 ~ 100
}}
"""

relevance_checker_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", relevance_checker_system),
        ("human", "question: {question}\n\n document: {document}"),
    ]
)

relevance_checker = relevance_checker_prompt | llm | JsonOutputParser()

#def generate_answer(user_query):

from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate

generate_answer_system = """
당신은 친절하고 정확한 AI 어시스턴트입니다.

아래의 문서(document)는 사용자의 질문(question)에 답변하기 위한 참고용 자료입니다.
이 문서를 기반으로 질문에 대한 **정확하고 간결한 답변**을 생성하세요.

- 문서 내용에 포함된 정보만을 기반으로 답변하세요.
- 가능한 경우, 문서에서 근거가 되는 문장을 인용하거나 간단히 요약해서 포함하세요.
"""

generate_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", generate_answer_system),
        ("human", "question: {question}\n\n document: {document} "),
    ]
)

# Chain
rag_chain = generate_prompt | llm | StrOutputParser()

# Hallucination Checker

from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate

hallucination_checker_system = """
당신은 AI 모델이 생성한 답변이 주어진 문서(document)를 기반으로 한 것인지 검증하는 평가자입니다.


- 문서(document)의 정보에 **명확히 근거한 경우에만** "hallucinated": false로 판단하세요.
- 문서에 명확한 근거가 없거나, 과장/왜곡/추측이 섞인 경우는 "hallucinated": true로 판단하세요.
- 문서와 전혀 관련 없는 정보가 포함된 경우도 "hallucinated": true입니다.

생성한 답변:
{answer}

인용한 문서:
{document}

답변과 문서를 비교하여, 다음 형식의 JSON으로 판단 결과만 출력하세요:

{{
  "hallucinated": true or false
}}
"""

hallucination_checker_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", hallucination_checker_system),
        ("human", "answer: {answer}\n\n document: {document}"),
    ]
)

hallucination_checker = hallucination_checker_prompt | llm | JsonOutputParser()



In [3]:
from pprint import pprint
from typing import List

from langchain_core.documents import Document
from typing_extensions import TypedDict

from langgraph.graph import END, StateGraph

MAX_GENERATION_CNT = 1
MAX_SEARCH_CNT = 1
RELEVANT_SCORE_THRESHOLD = 50

class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        question: question
        generation: LLM generation
        web_search: whether to add search
        documents: list of documents
    """

    question: str
    documents: List[str]
    ref_doc: str
    generation: str
    search_cnt: int
    generation_cnt: int
    stop_flag: bool

def retrieve(state):
  print("------------------------------ RETRIEVE ------------------------------")

  question = state["question"]
  documents = retriever.invoke(question)

  print("question :", question)
  print("documents :", documents)

  return {"documents": documents, "question": question}

def web_search(state):
  print("------------------------------ WEB SEARCH ------------------------------")

  print("now search_cnt: ", state["search_cnt"])
  if state["search_cnt"] > MAX_SEARCH_CNT:
    print("TOO_MANY_SEARCH !!! END ...")
    return {"generation": "TOO_MANY_SEARCH !!! END ...", "stop_flag": True}

  question = state["question"]

  response = tavily.search(query=question, max_results=3)
  # print("tavily search result :", response.get('results'))

  return {"documents": response.get('results'), "search_cnt" : state["search_cnt"] + 1}

def grade_documents(state):
  print("------------------------------ CHECK DOCUMENT RELEVANCE TO QUESTION ------------------------------")
  question = state["question"]
  documents = state["documents"]

  for doc in documents:
    relevance_checker_result = relevance_checker.invoke({"question": question, "document": doc})
    print("relevance_check_score :", relevance_checker_result['relevant_score'])
    if relevance_checker_result['relevant_score'] >= RELEVANT_SCORE_THRESHOLD:
      ref_doc = doc
      return {"ref_doc": ref_doc}

  return {"ref_doc": "**NOT_RELEVANCE**"}

def generate(state):
  print("------------------------------ GENERATE ANSWER ------------------------------")

  print("now generation_cnt: ", state["generation_cnt"])
  if state["generation_cnt"] > MAX_GENERATION_CNT:
    print("TOO_MANY_GENERATION !!! END ...")
    return {"generation": "TOO_MANY_GENERATION !!! END ...", "stop_flag": True}


  question = state["question"]
  ref_doc = state["ref_doc"]

  generation_answer = rag_chain.invoke({"question": question, "document": ref_doc})

  return {"generation": generation_answer, "generation_cnt" : state["generation_cnt"] + 1}

def hallucination_check(state):
  print("------------------------------ CHECK HALLUCINATION ------------------------------")

  if state["stop_flag"] :
    return "***END***"

  generation_answer = state["generation"]

  question = state["generation"]

  doc_txt = state["ref_doc"]

  hallucination_checker_result = hallucination_checker.invoke({"answer": generation_answer, "document": doc_txt})
  print("hallucination_checker_result :", hallucination_checker_result.get('hallucinated'))

  if str(hallucination_checker_result.get('hallucinated')).lower() == "false":
    print("NOT_HALLUCINATED, ANSWER RETURN")
    return "NOT_HALLUCINATED"
  elif str(hallucination_checker_result.get('hallucinated')).lower() == "true":
    print("'HALLUCINATED, NEED RE GENERATION")
    return "HALLUCINATED"

def decide_to_generate(state):
  print("------------------------------ DECIDE TO GENERATE ------------------------------")

  if state["stop_flag"] :
    return "***END***"

  if state.get("ref_doc") == "**NOT_RELEVANCE**":
    print("GO TO WEB_SEARCH")
    return "WEB_SEARCH"
  else:
    print("GO TO GENERATE")
    return "GENERATE"

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("retrieve", retrieve)  # retrieve
workflow.add_node("websearch", web_search)  # web search
workflow.add_node("grade_documents", grade_documents)  # grade documents
workflow.add_node("generate", generate)  # generatae
workflow.add_node("hallucination_check", hallucination_check)  # generate

<langgraph.graph.state.StateGraph at 0x7f6032cfb250>

In [4]:
# 그래프 시작 지점
workflow.set_entry_point("retrieve")

# 엣지 추가
workflow.add_edge("retrieve", "grade_documents")      # 검색 → 관련성 평가

# 조건부 엣지
workflow.add_conditional_edges(
    "grade_documents",
    decide_to_generate,
    {
        "GENERATE": "generate",     # 충분히 관련 → 바로 답변 생성
        "WEB_SEARCH": "websearch",  # 부족하면 → 웹 검색으로 보강"
        "***END***": END           # 종료
    },
)

workflow.add_edge("websearch", "grade_documents")     # 웹 검색 후 → 관련성 평가

## 환각 검증 결과에 따라 분기
workflow.add_conditional_edges(
    "generate",
    hallucination_check,
    {
        "NOT_HALLUCINATED": END,     # 문제없으면 종료
        "HALLUCINATED": "generate",  # 환각이면 추가 검색 후 재생성
        "***END***": END           # 종료
    },
)

# 그래프 컴파일
graph = workflow.compile()

In [5]:
# 실행 예시
result = graph.invoke({"question": "에이전트가 뭔지 한글로 대답해.", "search_cnt": 0, "generation_cnt": 0, "stop_flag": False})
print("\n\n================ result ================")
pprint(result)

------------------------------ RETRIEVE ------------------------------
question : 에이전트가 뭔지 한글로 대답해.
documents : [Document(metadata={'language': 'en', 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality o

In [6]:
# 실행 예시
result = graph.invoke({"question": "대한민국 서울의 오늘 날씨 알려줘.", "search_cnt": 0, "generation_cnt": 0, "stop_flag": False})
print("\n\n================ result ================")
pprint(result)

------------------------------ RETRIEVE ------------------------------
question : 대한민국 서울의 오늘 날씨 알려줘.
documents : [Document(metadata={'language': 'en', 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them for future steps, thereby improving the quality

In [7]:
# 실행 예시
result = graph.invoke({"question": "집에 가려면 어떻게 해 ? 다양한 비유를 섞여서 설명해.", "search_cnt": 0, "stop_flag": False})
print("\n\n================ result ================")
pprint(result)

------------------------------ RETRIEVE ------------------------------
question : 집에 가려면 어떻게 해 ? 다양한 비유를 섞여서 설명해.
documents : [Document(metadata={'title': "LLM Powered Autonomous Agents | Lil'Log", 'description': 'Building agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refine them f

Optional #2

In [None]:
from langchain_core.runnables import ConfigurableFieldSpec
from langchain_core.runnables import RunnableParallel
from langchain_community.chat_message_histories import SQLChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're an assistant who speaks in {language}. Respond in 20 words or fewer",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
)

runnable = prompt | llm

def get_session_history(user_id: str, conversation_id: str):
    return SQLChatMessageHistory(f"{user_id}--{conversation_id}", "sqlite:///memory.db")

with_message_history = RunnableWithMessageHistory(
    runnable,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="user_id",
            annotation=str,
            name="User ID",
            description="Unique identifier for the user.",
            default="",
            is_shared=True,
        ),
        ConfigurableFieldSpec(
            id="conversation_id",
            annotation=str,
            name="Conversation ID",
            description="Unique identifier for the conversation.",
            default="",
            is_shared=True,
        ),
    ],
)

with_message_history.invoke(
    {"language": "korean", "input": "hi im bob!"},
    config={"configurable": {"user_id": "123", "conversation_id": "1"}},
)

In [10]:
from tavily import TavilyClient
tavily_test = TavilyClient(api_key='')

response1 = tavily_test.search(query="Where does Messi play right now?", max_results=3)
context = [{"url": obj["url"], "content": obj["content"]} for obj in response1['results']]

# You can easily get search result context based on any max tokens straight into your RAG.
# The response is a string of the context within the max_token limit.

response2 = tavily_test.get_search_context(query="Where does Messi play right now?", search_depth="advanced", max_tokens=500)

# You can also get a simple answer to a question including relevant sources all with a simple function call:
# You can use it for baseline
response3 = tavily_test.qna_search(query="Where does Messi play right now?")


In [11]:
print("======================== 1 ========================")
print(response1)

print("======================== 2 ========================")
print(response2)

print("======================== 3 ========================")
print(response3)

{'query': 'Where does Messi play right now?', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': "Where's Messi? Why Isn't He Playing For Inter Miami? Did He Leave?", 'url': 'https://stylecaster.com/entertainment/celebrity-news/1638745/where-messi-inter-miami/', 'content': 'Messi has always been expected to miss some games with Inter Miami. Back in August 2023, Martino confirmed that the footballer would miss "at least three games" with the Miami team this season.', 'score': 0.6100127, 'raw_content': None}, {'title': 'Is Lionel Messi playing today? Status, lineup for next Inter Miami game ...', 'url': 'https://www.sportingnews.com/us/soccer/news/lionel-messi-playing-today-status-lineup-inter-miami-2025/b87bb697bffbfbd6b7de8a7a', 'content': "Every week, Lionel Messi garners more attention than any other player in Major League Soccer, and the 37-year-old's fitness is a consistent talking point.", 'score': 0.539833, 'raw_content': None}, {'title': 'Lionel Mes

In [19]:
pip install streamlit



In [20]:
pip install gpt4all

Collecting gpt4all
  Downloading gpt4all-2.8.2-py3-none-manylinux1_x86_64.whl.metadata (4.8 kB)
Downloading gpt4all-2.8.2-py3-none-manylinux1_x86_64.whl (121.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.6/121.6 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gpt4all
Successfully installed gpt4all-2.8.2


In [14]:
import streamlit as st

In [17]:
import streamlit as st
from tavily import TavilyClient
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate
from langgraph.graph import END, StateGraph
from typing import List
from typing_extensions import TypedDict
from langchain_core.documents import Document

tavily = TavilyClient(api_key="")

st.set_page_config(
    page_title="Research Assistant",
    page_icon=":orange_heart:",
)


class GraphState(TypedDict):
    question: str
    generation: str
    web_search: str
    documents: List[Document]


def main():
    # 모델 선택
    llm_model = st.sidebar.selectbox(
        "Select Model",
        options=[
            "llama3",
        ],
    )

    urls = [
        "https://lilianweng.github.io/posts/2023-06-23-agent/",
        "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
        "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
    ]

    # 웹 페이지 로드 및 문서 분할
    docs = [WebBaseLoader(url).load() for url in urls]
    docs_list = [item for sublist in docs for item in sublist]

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=250, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(docs_list)

    # 벡터 저장소 생성
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        collection_name="rag-chroma",
        embedding=GPT4AllEmbeddings(model_name="all-MiniLM-L6-v2.gguf2.f16.gguf"),
    )
    retriever = vectorstore.as_retriever()

    # RAG 에이전트 노드 및 엣지 정의
    def retrieve(state):
        print("---RETRIEVE---")
        question = state["question"]
        documents = retriever.invoke(question)
        return {"documents": documents, "question": question}

    def generate(state):
        print("---GENERATE---")
        question = state["question"]
        documents = state["documents"]
        generation = rag_chain.invoke({"context": documents, "question": question})
        return {"documents": documents, "question": question, "generation": generation}

    def grade_documents(state):
        print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")
        question = state["question"]
        documents = state["documents"]
        filtered_docs = []
        web_search = "No"
        for d in documents:
            score = retrieval_grader.invoke(
                {"question": question, "document": d.page_content}
            )
            grade = score["score"]
            if grade.lower() == "yes":
                print("---GRADE: DOCUMENT RELEVANT---")
                filtered_docs.append(d)
            else:
                print("---GRADE: DOCUMENT NOT RELEVANT---")
                web_search = "Yes"
                continue
        return {
            "documents": filtered_docs,
            "question": question,
            "web_search": web_search,
        }

    def web_search(state):
        print("---WEB SEARCH---")
        question = state["question"]
        documents = state["documents"]
        docs = tavily.search(query=question)["results"]
        web_results = "\n".join([d["content"] for d in docs])
        web_results = Document(page_content=web_results)
        if documents is not None:
            documents.append(web_results)
        else:
            documents = [web_results]
        return {"documents": documents, "question": question}

    def route_question(state):
        print("---ROUTE QUESTION---")
        question = state["question"]
        print(question)
        source = question_router.invoke({"question": question})
        print(source)
        print(source["datasource"])
        if source["datasource"] == "web_search":
            print("---ROUTE QUESTION TO WEB SEARCH---")
            return "websearch"
        elif source["datasource"] == "vectorstore":
            print("---ROUTE QUESTION TO RAG---")
            return "vectorstore"

    def decide_to_generate(state):
        print("---ASSESS GRADED DOCUMENTS---")
        state["question"]
        web_search = state["web_search"]
        state["documents"]

        if web_search == "Yes":
            print(
                "---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---"
            )
            return "websearch"
        else:
            print("---DECISION: GENERATE---")
            return "generate"

    def grade_generation_v_documents_and_question(state):
        print("---CHECK HALLUCINATIONS---")
        question = state["question"]
        documents = state["documents"]
        generation = state["generation"]

        score = hallucination_grader.invoke(
            {"documents": documents, "generation": generation}
        )
        grade = score["score"]

        if grade == "yes":
            print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
            print("---GRADE GENERATION vs QUESTION---")
            score = answer_grader.invoke(
                {"question": question, "generation": generation}
            )
            grade = score["score"]
            if grade == "yes":
                print("---DECISION: GENERATION ADDRESSES QUESTION---")
                return "useful"
            else:
                print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
                return "not useful"
        else:
            print("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
            return "not supported"

    # RAG 에이전트 그래프 구성
    workflow = StateGraph(GraphState)
    workflow.add_node("websearch", web_search)
    workflow.add_node("retrieve", retrieve)
    workflow.add_node("grade_documents", grade_documents)
    workflow.add_node("generate", generate)

    workflow.set_conditional_entry_point(
        route_question,
        {
            "websearch": "websearch",
            "vectorstore": "retrieve",
        },
    )

    workflow.add_edge("retrieve", "grade_documents")
    workflow.add_conditional_edges(
        "grade_documents",
        decide_to_generate,
        {
            "websearch": "websearch",
            "generate": "generate",
        },
    )
    workflow.add_edge("websearch", "generate")
    workflow.add_conditional_edges(
        "generate",
        grade_generation_v_documents_and_question,
        {
            "not supported": "generate",
            "useful": END,
            "not useful": "websearch",
        },
    )

    app = workflow.compile()

    # rag_chain 정의
    llm = ChatOllama(model=llm_model, temperature=0)
    prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks.
        Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
        Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
        Question: {question}
        Context: {context}
        Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
        input_variables=["question", "context"],
    )
    rag_chain = prompt | llm | StrOutputParser()

    # retrieval_grader, hallucination_grader, answer_grader 정의
    llm = ChatOllama(model=llm_model, format="json", temperature=0)

    retrieval_grader_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance
        of a retrieved document to a user question. If the document contains keywords related to the user question,
        grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
        Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
        Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
         <|eot_id|><|start_header_id|>user<|end_header_id|>
        Here is the retrieved document: \n\n {document} \n\n
        Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
        """,
        input_variables=["question", "document"],
    )
    retrieval_grader = retrieval_grader_prompt | llm | JsonOutputParser()

    hallucination_grader_prompt = PromptTemplate(
        template=""" <|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether
        an answer is grounded in / supported by a set of facts. Give a binary 'yes' or 'no' score to indicate
        whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a
        single key 'score' and no preamble or explanation. <|eot_id|><|start_header_id|>user<|end_header_id|>
        Here are the facts:
        \n ------- \n
        {documents}
        \n ------- \n
        Here is the answer: {generation}  <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
        input_variables=["generation", "documents"],
    )
    hallucination_grader = hallucination_grader_prompt | llm | JsonOutputParser()

    answer_grader_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing whether an
        answer is useful to resolve a question. Give a binary score 'yes' or 'no' to indicate whether the answer is
        useful to resolve a question. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.
         <|eot_id|><|start_header_id|>user<|end_header_id|> Here is the answer:
        \n ------- \n
        {generation}
        \n ------- \n
        Here is the question: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
        input_variables=["generation", "question"],
    )
    answer_grader = answer_grader_prompt | llm | JsonOutputParser()

    question_router_prompt = PromptTemplate(
        template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an expert at routing a
    user question to a vectorstore or web search. Use the vectorstore for questions on LLM  agents,
    prompt engineering, and adversarial attacks. You do not need to be stringent with the keywords
    in the question related to these topics. Otherwise, use web-search. Give a binary choice 'web_search'
    or 'vectorstore' based on the question. Return the a JSON with a single key 'datasource' and
    no premable or explanation. Question to route: {question} <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
        input_variables=["question"],
    )
    question_router = question_router_prompt | llm | JsonOutputParser()

    # ----------------------------------------------------------------------
    # Streamlit 앱 UI
    st.title("Research Assistant powered by OpenAI")

    input_topic = st.text_input(
        ":female-scientist: Enter a topic",
        value="Superfast Llama 3 inference on Groq Cloud",
    )

    generate_report = st.button("Generate Report")

    if generate_report:
        with st.spinner("Generating Report"):
            inputs = {"question": input_topic}
            for output in app.stream(inputs):
                for key, value in output.items():
                    print(f"Finished running: {key}:")
            final_report = value["generation"]
            st.markdown(final_report)

    st.sidebar.markdown("---")
    if st.sidebar.button("Restart"):
        st.session_state.clear()
        st.experimental_rerun()


main()



ImportError: Could not import gpt4all library. Please install the gpt4all library to use this embedding model: pip install gpt4all