In [12]:
import os
import json
from typing import List, Annotated, AsyncGenerator
from typing_extensions import TypedDict
from urllib.parse import urlencode

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_openai import ChatOpenAI
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_teddynote.messages import messages_to_history
from langchain_teddynote.tools.tavily import TavilySearch
from langchain_teddynote.evaluator import GroundednessChecker
from langgraph.checkpoint.memory import MemorySaver

from rag.utils import format_docs
from rag.pdf import PDFRetrievalChain

In [13]:
from dotenv import load_dotenv
load_dotenv()

from langchain_teddynote import logging
logging.langsmith("CH17-LangGraph-Structures")


LangSmith Ï∂îÏ†ÅÏùÑ ÏãúÏûëÌï©ÎãàÎã§.
[ÌîÑÎ°úÏ†ùÌä∏Î™Ö]
CH17-LangGraph-Structures


In [14]:
# PDF Î¨∏ÏÑúÎ•º Î°úÎìúÌï©ÎãàÎã§.
# pdf = PDFRetrievalChain(["data/SPRI_AI_Brief_2023ÎÖÑ12ÏõîÌò∏_F.pdf"]).create_chain()
pdf = PDFRetrievalChain().create_chain()

# retrieverÏôÄ chainÏùÑ ÏÉùÏÑ±Ìï©ÎãàÎã§.
pdf_retriever = pdf.retriever
pdf_chain = pdf.chain

# # Obt√©m a chave da API do ambiente
# api_key = os.getenv("OPENAI_API_KEY")

In [39]:
def print_state(state, func_name) -> None:
    print(f"- [{func_name}]-------------------------------------")
    print(f"question: {state['question']}")
    # print(f"context: {type(state['context'])}, {len(state['context'])}, {state['context']}")

    if state['context']:
        for context in state['context']:
            # print(f"context: {context['page_context']}")
            print(f"context: {context.metadata['source']}")
    else:
        print(f"context: {state['context']}")


    # print(f"answer: {state['answer'][:20]}")
    print(f"messages: {state['messages'][-1]}")
    print(f"relevance: {state['relevance']}")
    print("--------------------------------------")


In [43]:
class State(TypedDict):
    question: Annotated[str, "Question"]  # ÏßàÎ¨∏(ÎàÑÏ†ÅÎêòÎäî list)
    context: Annotated[list, "Context"]  # Î¨∏ÏÑúÏùò Í≤ÄÏÉâ Í≤∞Í≥º
    answer: Annotated[str, "Answer"]  # ÎãµÎ≥Ä
    messages: Annotated[list, add_messages]  # Î©îÏãúÏßÄ(ÎàÑÏ†ÅÎêòÎäî list)
    relevance: Annotated[str, "Relevance"]  # Í¥ÄÎ†®ÏÑ±



# Î¨∏ÏÑú Í≤ÄÏÉâ ÎÖ∏Îìú
def retrieve_document(state: State) -> State:
    
    print_state(state, 'retrieve_document')

    # print(f"[langgraph_agent_jb.py] state : {state['messages']}")        

    # ÏßàÎ¨∏ÏùÑ ÏÉÅÌÉúÏóêÏÑú Í∞ÄÏ†∏ÏòµÎãàÎã§.
    latest_question = state["question"]

    # Î¨∏ÏÑúÏóêÏÑú Í≤ÄÏÉâÌïòÏó¨ Í¥ÄÎ†®ÏÑ± ÏûàÎäî Î¨∏ÏÑúÎ•º Ï∞æÏäµÎãàÎã§.
    retrieved_docs = pdf_retriever.invoke(latest_question)
    # print(f'[langgraph_agent_jp][retrieve_document] {latest_question}')
    # print('-' * 60)
    # print(f'[langgraph_agent_jp][retrieve_document] {retrieved_docs}')   # TODO: Delete

    # Í≤ÄÏÉâÎêú Î¨∏ÏÑúÎ•º ÌòïÏãùÌôîÌï©ÎãàÎã§.(ÌîÑÎ°¨ÌîÑÌä∏ ÏûÖÎ†•ÏúºÎ°ú ÎÑ£Ïñ¥Ï£ºÍ∏∞ ÏúÑÌï®)
    # retrieved_docs = format_docs(retrieved_docs)

    # Í≤ÄÏÉâÎêú Î¨∏ÏÑúÎ•º context ÌÇ§Ïóê Ï†ÄÏû•Ìï©ÎãàÎã§.
    return State(context=retrieved_docs)

        
def llm_answer(state: State) -> State:
    print_state(state, 'llm_answer')

    latest_question = state["question"]
    context = format_docs(state["context"])
    # context = state["context"]

    # print(f'[langgraph_agent_jp][llm_answer()] <{latest_question}>{type(context)}')
    # print(f'[langgraph_agent_jp][llm_answer()] context: {context}')

    # last_question = "Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò."
    
    response = pdf_chain.invoke(
        {
            "question": latest_question,
            "context": context,
            # "chat_history": messages_to_history(state["messages"]),
            "chat_history": []
        }
    )

    search_results = state['context']
    # print(f"[langgraph_agent_jb][llm_answer()] search_results: {search_results}")

    # if not search_results:
    #     logger.warning("No relevant documents found in search results.")
            
    linked_docs = []
    base_url = "https://jabis.jbbank.co.kr/jabis_pdf_view"
    
    for search_result in search_results:
        # if search_result[1] < 0.8:  # relevance threshold
        params = {
            "source": search_result.metadata["source"],
            "title": search_result.metadata["title"],
            "page": search_result.metadata["page"] + 1,
        }
        url_with_params = base_url + "?" + urlencode(params)
        
        linked_docs.append(
            f"üëâ [{params['title']}]({url_with_params}) [pages]: {params['page']}"
        )

    # print(f"[langgraph_agent_jb][llm_answer()] linked_docs: {linked_docs}")
       
    response = response + "\n\n üìñ Í¥ÄÎ†® Î¨∏ÏÑú Î≥¥Í∏∞\n\n" + "\n\n".join(linked_docs)

    print(f"response: {response}")
    # ÏÉùÏÑ±Îêú ÎãµÎ≥Ä, (Ïú†Ï†ÄÏùò ÏßàÎ¨∏, ÎãµÎ≥Ä) Î©îÏãúÏßÄÎ•º ÏÉÅÌÉúÏóê Ï†ÄÏû•Ìï©ÎãàÎã§.
    return {
        "answer": response,
        "messages": [("assistant", response)]
    }

    # return {
    #     "answer": response,
    #     "messages": [("user", latest_question), ("assistant", response)]
    # }

    # return {"messages": [response]}        
    # return {"messages": [llm.invoke(state["messages"])]}

# Í∑∏ÎûòÌîÑ Ï†ïÏùò
graph_builder = StateGraph(State)

# ÎÖ∏Îìú Ï†ïÏùò
graph_builder.add_node("retrieve", retrieve_document)
graph_builder.add_node("llm_answer", llm_answer)

# Ïó£ÏßÄ Ï†ïÏùò
graph_builder.add_edge(START, "retrieve")
graph_builder.add_edge("retrieve", "llm_answer")
graph_builder.add_edge("llm_answer", END)

# Ï≤¥ÌÅ¨Ìè¨Ïù∏ÌÑ∞ ÏÑ§Ï†ï
memory = MemorySaver()

# Ïª¥ÌååÏùº
graph = graph_builder.compile()

In [24]:
retrieve_document(inputs)


context: []
- [retrieve_document]-------------------------------------
question: Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.
context: []
context: []
messages: ('human', 'Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.')
relevance: 
--------------------------------------


{'context': [Document(id='0606d6c6-0749-488f-860a-f5139660aa43', metadata={'title': '(D2010) Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® [Í∞úÏ†ï(3) 2018. 4. 2]', 'source': '../rag_data/jbb/Í∑úÏ†ï/(D2010) Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® [Í∞úÏ†ï(3) 2018. 4. 2].pdf', 'page': 0}, page_content='Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® D2010 - 1 -Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® ÏÜåÍ¥ÄÎ∂ÄÏÑú : Ï†ïÎ≥¥Î≥¥Ìò∏Î∂Ä Ï†ú 1 Ï°∞ (Î™©Ï†Å) Ïù¥ ÏßÄÏπ®ÏùÄ ÔΩ¢Ï†ÑÏûêÍ∏àÏúµÍ∞êÎèÖÍ∑úÏ†ï ÔΩ£Ïóê Îî∞Îùº Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå (Ïù¥Ìïò ‚ÄúÏúÑÏõêÌöå ‚ÄùÎùº ÌïúÎã§) Ïùò Ïö¥ÏòÅÏóê ÌïÑÏöîÌïú ÏÇ¨Ìï≠ÏùÑ Ï†ïÌï®ÏúºÎ°úÏç® Ï†ïÎ≥¥Î≥¥Ìò∏Ïùò Ìö®Ïú®Ï†Å Ï∂îÏßÑÏùÑ Î™©Ï†ÅÏúºÎ°ú ÌïúÎã§.\nÏ†ú 2 Ï°∞ (Íµ¨ÏÑ±) ‚ë† ÏúÑÏõêÌöåÏùò ÏúÑÏõêÏùÄ Îã§ÏùåÍ≥º Í∞ôÏù¥ Íµ¨ÏÑ±ÌïúÎã§ .\n1. ÏúÑÏõêÏû• : Ï†ïÎ≥¥Î≥¥Ìò∏ÏµúÍ≥†Ï±ÖÏûÑÏûê (CISO) 2. Î∂ÄÏúÑÏõêÏû• : Ï†ïÎ≥¥Î≥¥Ìò∏ Î∂ÄÏÑúÏû• 3. ÏúÑÏõê : ITÎ∂ÄÏÑúÏû• , ÎîîÏßÄÌÑ∏ Î∂ÄÏÑúÏû• , Ï§ÄÎ≤ï Î∂ÄÏÑúÏû• 4. ÏúÑÏ¥âÏúÑÏõê : Ïã¨ÏùòÏÇ¨Ìï≠ Í¥ÄÎ†®Î∂ÄÏÑú Îã¥ÎãπÎ≥∏Î∂ÄÏû• Î∞è Î∂Ä, Ïã§, ÌåÄÏû• Ï§ëÏóêÏÑú ÏúÑÏõêÏû•Ïù¥ ÏÑ†ÏûÑÌïòÎäî  ÏúÑÏõê 5. Í∞ÑÏÇ¨ : Ï†ïÎ≥¥Î≥¥Ìò∏ Î

In [23]:
question = "Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò."
inputs = {
        "question": question,
        "context": [],
        "answer": "",
        "messages": [("human", question)],
        "relevance": "",
}


In [25]:
from langchain_core.messages import HumanMessage

# ÏÇ¨Ïö©ÏûêÏùò Î©îÏãúÏßÄÎ•º ÎîïÏÖîÎÑàÎ¶¨ ÌòïÌÉúÎ°ú ÏûÖÎ†• Îç∞Ïù¥ÌÑ∞ Íµ¨ÏÑ±
          

# stream_mode="messages" Î•º ÌÜµÌïú Ïä§Ìä∏Î¶¨Î∞ç Ï≤òÎ¶¨
for chunk_msg, metadata in graph.stream(inputs, stream_mode="messages"):
    # HumanMessage Í∞Ä ÏïÑÎãå ÏµúÏ¢Ö ÎÖ∏ÎìúÏùò Ïú†Ìö®Ìïú Ïª®ÌÖêÏ∏†Îßå Ï∂úÎ†• Ï≤òÎ¶¨
    if (
        chunk_msg.content
        and not isinstance(chunk_msg, HumanMessage)
        and metadata["langgraph_node"] == "llm_answer"
    ):
        print(chunk_msg.content, end="", flush=True)

context: []
- [retrieve_document]-------------------------------------
question: Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.
context: []
context: []
messages: content='Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.' additional_kwargs={} response_metadata={} id='f6110153-8372-47ca-8ae0-ff47d49bedd3'
relevance: 
--------------------------------------
- [llm_answer]-------------------------------------
question: Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.
context: [Document(id='0606d6c6-0749-488f-860a-f5139660aa43', metadata={'page': 0, 'source': '../rag_data/jbb/Í∑úÏ†ï/(D2010) Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® [Í∞úÏ†ï(3) 2018. 4. 2].pdf', 'title': '(D2010) Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® [Í∞úÏ†ï(3) 2018. 4. 2]'}, page_content='Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® D2010 - 1 -Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® ÏÜåÍ¥ÄÎ∂ÄÏÑú : Ï†ïÎ≥¥Î≥¥Ìò∏Î∂Ä Ï†ú 1 Ï°∞ (Î™©Ï†Å) Ïù¥ ÏßÄÏπ®ÏùÄ ÔΩ¢Ï†ÑÏûêÍ∏àÏúµÍ∞êÎèÖÍ∑úÏ†ï ÔΩ£Ïóê Îî∞Îùº Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå (Ïù¥Ìïò ‚ÄúÏúÑÏõêÌöå ‚ÄùÎùº ÌïúÎã§) Ïùò Ïö¥ÏòÅÏóê ÌïÑÏöîÌïú Ï

In [35]:
from langchain_core.messages import (
    convert_to_openai_messages,
    message_chunk_to_message)
from fastapi.responses import StreamingResponse

def stream(inputs: State):
    def event_stream():
        try:
            # print(f"\nReceived inputs: {inputs}\n")
            # async for event in graph.astream(input=inputs, stream_mode="messages"):
            for event in graph.astream(input=inputs, stream_mode="messages"):
                print(f"\nReceived event: {event}\n")
                # get first element of tuple
                print(f"\nReceived event[0]['content']: {event[0]['content']}\n")
            
                message = message_chunk_to_message(event[0])
                print(f"\nConverted event: {message}\n")
                yield convert_to_openai_messages(message)['content']
        except Exception as e:
            print(f"An error occurred: {e}")

    return StreamingResponse(event_stream(), media_type="application/json")


In [36]:
stream(inputs)

<starlette.responses.StreamingResponse at 0x316c8d7d0>

In [45]:
for event in graph.stream(input=inputs, stream_mode="messages"):
    print(f"\nReceived event: {event}\n")
    # get first element of tuple
    # print(f"\nReceived event[0]['content']: {event[0]['content']}\n")

    message = message_chunk_to_message(event[0])
    print(f"\nConverted event: {message}\n")


- [retrieve_document]-------------------------------------
question: Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.
context: []
messages: content='Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.' additional_kwargs={} response_metadata={} id='03a43476-920f-4ceb-8e9f-79b7ef0d7b41'
relevance: 
--------------------------------------
- [llm_answer]-------------------------------------
question: Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.
context: ../rag_data/jbb/Í∑úÏ†ï/(D2010) Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® [Í∞úÏ†ï(3) 2018. 4. 2].pdf
context: ../rag_data/jbb/Í∑úÏ†ï/(D2010) Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏßÄÏπ® [Í∞úÏ†ï(3) 2018. 4. 2].pdf
context: ../rag_data/jbb/Í∑úÏ†ï/(M2005) Ï†ïÎ≥¥Î≥¥Ìò∏Ï°∞ÏßÅ Í¥ÄÎ¶¨ ÏßÄÏπ® [Í∞úÏ†ï(1) 2020. 8.24].pdf
messages: content='Ï†ïÎ≥¥Î≥¥Ìò∏ÏúÑÏõêÌöå ÏúÑÏõêÏóê ÎåÄÌï¥ ÏïåÎ†§Ï§ò.' additional_kwargs={} response_metadata={} id='03a43476-920f-4ceb-8e9f-79b7ef0d7b41'
relevance: 
--------------------------------------

Received event: (AIMessageChunk(content='Ï†Ñ', additio