In [1]:
# API KEY를 환경변수로 관리하기 위한 설정 파일
from dotenv import load_dotenv

# API KEY 정보로드
load_dotenv()

True

In [2]:
from langchain_teddynote import logging

# 프로젝트 이름을 입력합니다.
logging.langsmith("주식분석")

LangSmith 추적을 시작합니다.
[프로젝트명]
주식분석


In [3]:
import sys
import os
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from edges import *



In [4]:
urls = [
    "https://the-edit.co.kr/65111",
    "https://blog.naver.com/sud_inc/223539001961?trackingCode=rss",
    "https://mochaclass.com/blog/직장인을-위한-취미생활-가이드-요즘-취미-트렌드부터-취미-추천까지-7797",
    "https://www.hankyung.com/article/2024072845441",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=100, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs_list)

In [5]:
# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding=OpenAIEmbeddings(),
    persist_directory="./chroma_db",
)

In [6]:
## : 문서 분할(Split Documents)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=50)

In [7]:
# RAG 기능을 사용하는 Node 설계
# 먼저 저장한 DB의 데이터를 불러온다.
# 임베딩 함수로는 openai의 임베딩을 사용하였다.

from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain.tools.retriever import create_retriever_tool
from langgraph.prebuilt import ToolNode

vectorstore = Chroma(
    collection_name="rag-chroma",
    embedding_function=OpenAIEmbeddings(),
    persist_directory="./chroma_db",
)
retriever = vectorstore.as_retriever()

# 해당 툴을 정의하는 것.
# 이를 이용하여 LLM에 해당 툴을 결합시킬 수 있음.
retriever_tool = create_retriever_tool(
    retriever,
    # 해당 retriever이 tool call에 의해 호출되는 경우, 해당 tool의 이름
    "retrieve_trends",
    # 해당 tool을 호출해야하는 상황을 Agent가 판단할 수 있도록 지시
    "Search for the latest trends in fashion and hobbies and return relevant information.",
)

# 해당 툴 노드를 정의하는 것.
# Graph에서 사용하기 위해 Node로 만들 필요가 있음.
retrieve = ToolNode([retriever_tool])

In [8]:
##### STATE #####
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph.message import add_messages

class PersonaState(TypedDict):
    user_input: str
    messages: Annotated[list, add_messages]
    character_persona_dict: dict
    retrieve_check: bool
    retrieval_msg: str
    rewrite_query: str
    tools_call_switch: Annotated[bool, True]

In [9]:
# 필요한 라이브러리를 한 번에 모두 로드
import sys
import os
import json
from states import *
from pydantic import BaseModel, Field
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from langchain_core.messages import HumanMessage, ToolMessage
from langgraph.prebuilt import ToolNode
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain.tools.retriever import create_retriever_tool

In [10]:
# ChromaDB 로드
vectorstore = Chroma(
    collection_name="rag-chroma",
    embedding_function=OpenAIEmbeddings(),
    persist_directory="./chroma_db",
)
retriever = vectorstore.as_retriever()


In [11]:
# tool의 경우 llm에 bind 용도로 사용할 것이고
tool = TavilySearchResults(max_results=3)

In [12]:
# web_search_tool의 경우 직접 invoke를 통해 검색 결과를 받아올 것이다.
# 해당 함수의 경우에는 재검색이 요청된 경우에 사용하도록 한다.
web_search_tool = TavilySearchResults(max_results=5)

In [13]:
# 노드 1-1. 검색용 노드
tool_node = ToolNode(tools=[tool])

In [14]:
# 검색용 RAG 툴 로드하고 노드만듦
retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_trends",
    "Search for the latest trends in fashion and hobbies and return relevant information.",
)

In [15]:
# 노드 1-2. RAG용 노드.
retrieve = ToolNode([retriever_tool])

In [16]:
# 두 개 툴 엮어서 리스트 만듦.
tools = [tool, retriever_tool]

In [17]:
# 툴 2개가 bind된 character_make_node
character_model = ChatOpenAI(model="gpt-4o", temperature=0.2)
character_model_with_tools = character_model.bind_tools(tools)

def character_make_node(state: PersonaState):
    prompt = ChatPromptTemplate.from_messages([
        ("system","""
        You are an expert in creating characters for fiction.\n
        Whatever input the user is presented with, you must return a description of the completed character.\n
        If no information is available, randomly generate and return the character's attributes.\n
        Based on the values entered by the user, envision and present the character, including the character's age, gender, job, location, interests, hobbies and etc.\n
        The returned value must be in Korean.\n
        """),
        ("human", "Input: {human_input}\n Retrieve: {context}"),
    ])
    prompt_with_tools = ChatPromptTemplate.from_messages([
        ("system","""
        You are an expert in creating characters for fiction.\n
        Whatever input the user is presented with, you must return a description of the completed character.\n
        If no information is available, randomly generate and return the character's attributes.\n
        Based on the values entered by the user, envision and present the character, including the character's age, gender, job, location, interests, hobbies and etc.\n
        If you have difficulty creating an appropriate character, use an online search to solve the problem.\n
        The returned value must be in Korean.\n
        """),
        ("human", "Input: {human_input}\n Retrieve: {context}"),
    ])
    messages_list = state['messages']
    last_human_message = next((msg for msg in reversed(messages_list) if isinstance(msg, HumanMessage)), None).content
    last_msg = state['messages'][-1].content
    
    if last_human_message == last_msg:
        last_msg = ""
        print(f"==================================== INPUT ====================================\nHuman Input: {last_human_message}")
    else:
        try:
            last_msg_data = json.loads(state['messages'][-1].content)
            last_msg = "\n\n".join([d["content"] for d in last_msg_data])
        except:
            ...
        print(f"==================================== INPUT ====================================\nHuman Input: {last_human_message}\nContext: {last_msg}")
    
    if state['tools_call_switch']:
        chain_with_tools = prompt_with_tools | character_model_with_tools
        response = chain_with_tools.invoke({"human_input": last_human_message, "context": last_msg})
        
        if hasattr(response, "tool_calls") and len(response.tool_calls) > 0 and (response.tool_calls[0]["name"]) == "tavily_search_results_json":
            print("================================ Search Online ================================")
            tool_switch = False
        elif hasattr(response, "tool_calls") and len(response.tool_calls) > 0 and (response.tool_calls[0]["name"]) == "retrieve_trends":
            print("=============================== Search Retrieval ===============================")
            tool_switch = False
        else:
            print("============================= Chracter Information =============================")
            tool_switch = False
            print(response.content)
            
    else:
        chain = prompt | character_model
        response = chain.invoke({"human_input": last_human_message, "context": last_msg})
        print("============================= Chracter Information =============================")
        tool_switch = False
        print(response.content)

    return {"messages": [response], "user_input": last_human_message, "tools_call_switch": tool_switch}

In [18]:
# 노드 1-3. RAG 검증노드
# 노드 1-2의 Tools Output을 받아서, User Input에 잘 맞는지 검증해서 Yes Or No로 대답함.
# 만약 Yes라면 그대로 다시 Character Make Node로 보내서 최종 답변을 생성하도록 하고
# 아니라면 검색을 진행하고 새로운 값을 받아서 보낼거임.

class GradeDocuments(BaseModel):
    """Binary score for relevance check on retrieved documents."""
    binary_score:str = Field(..., description="Documents are relevant to the question, 'yes' or 'no'", enum=['yes', 'no'])
 
rag_check_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
rag_check_model = rag_check_model.with_structured_output(GradeDocuments)

def retrieve_check_node(state: PersonaState):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
            You are a consultation expert who provides appropriate information in response to user input.
            Return 'yes' or 'no' if you can provide an accurate answer to the user's question from the given documentation.
            If you can't provide a clear answer, be sure to return NO.
            """),
            ("human", "Retrieved document: \n\n {document} \n\n User's input: {question}"),
        ]
    )
    
    retrieval_msg = state['messages'][-1].content
    human_msg = state['user_input']
    retrieval_grader = prompt | rag_check_model
    response = retrieval_grader.invoke({"document": retrieval_msg, "question": human_msg})
    retrieve_handle = response.binary_score
    retrieve_check = False
    
    if retrieve_handle == "no":
        print("=============================== Need to Check ===============================")
        retrieve_check = True
    if retrieve_handle == "yes":
        print("============================== No Need to Check =============================")
        
    return {"retrieve_check": retrieve_check, "retrieval_msg": retrieval_msg}




In [19]:
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# 노드 1-4. 쿼리 재-작성 노드
# 노드 1-2에서 산출된 retrieve가 입력값과 적절하게 매치되지 않는 경우, 입력값을 수정하게 됨.
# state User_input 이용
# 이는 노드 1-3에서 yes를 반환하는 경우에 실행됨.

class Rewrite_Output(TypedDict):
    """
    Sturctured_output을 생성하기위한 클래스
    """
    query: Annotated[str, ..., "Rewritten query to find appropriate material on the web"]

rewrite_model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
rewrite_model = rewrite_model.with_structured_output(Rewrite_Output)

def rewrite_node(state: PersonaState):
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", """
            You're an expert in improving search relevance.\n
            Look at previously entered search queries and rewrite them to better find that information on the internet.
            """),
            ("human", "Previously entered search queries: \n{user_input}"),
        ]
    )
    
    user_input = state['user_input']
    rewrite_chain = prompt | rewrite_model
    response = rewrite_chain.invoke({"user_input": user_input})
    rewrited_query = response['query']
    print(f"================================ Rewrited Query ================================\nRewritted Query: {rewrited_query}")

    return {"rewrite_query": rewrited_query}

In [20]:
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# ----------------------------------------------------------------------------
# 노드 1-5. 재작성된 쿼리를 이용해서 인터넷 검색하는 노드

def rewrite_search_node(state: PersonaState):
    print("================================ Search Web ================================")
    docs = web_search_tool.invoke({"query": state['rewrite_query']})
    web_results = "\n\n".join([d["content"] for d in docs])
    web_results = web_results + "\n\n" + state['retrieval_msg']
    # print(web_results)

    new_messages = [ToolMessage(content=web_results, tool_call_id="tavily_search_results_json")]
            
    return {"messages": new_messages}

In [21]:
# 라우팅 함수를 수정해주자.
# 검색이 필요한 것인지, 아니면 RAG가 필요한 것인지 탐색!
def simple_route(state: PersonaState):
    """
    Simplery Route Tools or Next or retrieve
    """
    if isinstance(state, list):
        ai_message = state[-1]
    elif messages := state.get("messages", []):
        ai_message = messages[-1]
    else:
        raise ValueError(f"No messages found in input state to tool_edge: {state}")
    if hasattr(ai_message, "tool_calls") and len(ai_message.tool_calls) > 0 and ai_message.tool_calls[0]["name"] == "tavily_search_results_json":
        # print("Tavily Search Tool Call")
        return "tools"
    elif hasattr(ai_message, "tool_calls") and len(ai_message.tool_calls) > 0 and ai_message.tool_calls[0]["name"] == "retrieve_trends":
        # print("Retrieve Call")
        return "retrieve"

    return "next"

# 여기서는 RAG가 괜찮은지 검증하여 반환.
def retrieve_route(state: PersonaState):
    """
    RAG Need Check?
    """
    if state['retrieve_check']:
        return "rewrite"

    return "return"

In [22]:
# 마지막으로 지금까지 만든 노드를 모두 넣어준다.
graph_builder.add_node("User Input", user_input_node)
graph_builder.add_node("Character Make", character_make_node)
graph_builder.add_node("Character Retrieve Check", retrieve_check_node)
graph_builder.add_node("Rewrite Tool", rewrite_node)
graph_builder.add_node("Rewrite-Search", rewrite_search_node)
graph_builder.add_node("Tavily Search Tool", tool_node)
graph_builder.add_node("RAG Tool", retrieve)

graph_builder.add_edge(START, "User Input")
graph_builder.add_edge("User Input", "Character Make")
graph_builder.add_edge("Tavily Search Tool", "Character Make")
graph_builder.add_edge("RAG Tool", "Character Retrieve Check")
graph_builder.add_edge("Rewrite Tool", "Rewrite-Search")
graph_builder.add_edge("Rewrite-Search", "Character Make")

graph_builder.add_conditional_edges(
    "Character Make",
    simple_route,
    {"tools": "Tavily Search Tool", "next": "Persona Setup", "retrieve": "RAG Tool"}
)
graph_builder.add_conditional_edges(
    "Character Retrieve Check", 
    retrieve_route, 
    {"rewrite": "Rewrite Tool", "return": "Character Make"}
)

ValueError: Node `User Input` already present.