In [None]:
import warnings
import os
import sys
from dotenv import load_dotenv, find_dotenv
sys.path.append('../..')
_ = load_dotenv(find_dotenv())  # read local .env file

warnings.filterwarnings("ignore")

In [21]:
import langchain
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.agents import load_tools, initialize_agent, tool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI

In [22]:
llm = ChatOpenAI(temperature=0, model='gpt-4o-mini', streaming=True)
langchain.debug = True

## Custom Tools


### 열차 시간표 조회


In [23]:
@tool
def get_train_schedule(station_name: str = '하단') -> str:
    """Returns today's train schedule of the given station name.
    Output is formatted as CSV"""
    return """
    시간,분,행선지,
    5, 4, 노포,
    5, 20, 노포,
    10, 20, 노포,
    20,40,노포,
    23,40,노포,
    """

### 역간 이동시간 조회


In [24]:
@tool
def get_estimated_time(from_station: str = '하단') -> str:
    """
    Returns all estimated time from from_station.
    """
    return '32분'

### 역간 구간 조회


In [25]:
@tool
def get_distance_type(from_station: str = '하단') -> str:
    """
    Returns all distance type from_station
    """
    return '2구간'

In [47]:
from duckduckgo_search import ddg


@tool
def ddg_search(query: str) -> str:
    """
    When there is no relavant tools, use this tool to search online and retrieve relavant information
    """
    query = "site:http://www.humetro.busan.kr/homepage/default/ " + query
    searches = ddg(query, max_results=10)
    links = set()
    for search in searches:
        link = search['href']
        if "www.humetro.busan.kr/default/main.do" in link:
            continue
        if 'login.do' in link:
            continue
        links.add(link)
    return '요금은 모두 삼천원입니다. 어린이 교통카드를 살 수 있습니다. 역사내에 편의점이 있습니다.'

## 벡터 스토어


In [65]:
from langchain.vectorstores import Chroma
from langchain.document_loaders import NotionDirectoryLoader
loader = NotionDirectoryLoader("./notion_db")
docs = loader.load()

In [67]:
from langchain.text_splitter import MarkdownHeaderTextSplitter
splitter = MarkdownHeaderTextSplitter(
    headers_to_split_on=[
        ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
    ]
)
splitted_docs = splitter.split_text(docs[0].page_content)

In [68]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

chunk_size = 2000
chunk_overlap = 200
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=chunk_size,
    chunk_overlap=chunk_overlap)
splits = text_splitter.split_documents(splitted_docs)

In [73]:
!rm -rf ./chroma

In [75]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.memory import ConversationBufferMemory

vector_db = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings(),
    persist_directory='chroma'
)
memory = ConversationBufferMemory(memory_key="chat_history")

In [77]:
vector_db.max_marginal_relevance_search('화장실이 어디인가요')

[Document(page_content='기본가격 및 사이즈  \n| 구분 | 소 | 중 | 대 | 특대 |\n| --- | --- | --- | --- | --- |\n| 기본가격(원) | 2,000 | 3,000 | 4,000 | 5,000 |\n| 구형 사이즈(cm) | 30×42×55 | 40×42×55 | 50×42×55 | 60×42×55 |\n| 신형 사이즈(cm) | 37×27×55 | 37×37×55 | 37×57×55 | 37×87×55 |  \n**추가요금**  \n- 구형: 현금만 가능\n- 신형: 현금·카드 가능\n- 1호선 신형 물품보관함 설치 역사 : 자갈치, 남포, 부산, 범내골, 서면, 시청, 연산, 동래, 부산대\n- 당일 자정(24:00이후) 요금(소 2,000원/ 중 3,000원/ 대 5,000원)이 추가되며 추후에 물품을 찾을 때 납부\n- 예) 23:50분에 소형 물품보관함 사용 시 기본 2,000원에 24:00시를 기하여 2,000원이 추가되어 4,000원이 됩니다  \n| 구분 | 기본 요금 | 추가 반복 요금1 (4시간 이후 12시간까지 1시간마다 반복 추가 과금) | 추가 반복 요금2 (12시간 이후 12시간마다 반복 추가 과금) |\n| --- | --- | --- | --- |\n| 소형 | 2,000 | +300 | +2,000 |\n| 중형 | 3,000 | +400 | +3,000 |\n| 대형 | 4,000 | +500 | +4,000 |\n| 특대형 | 5,000 | +600 |  |  \n**역별 물품보관함 현황**  \n| 역명 | 소 | 중 | 대 | 특대 |\n| --- | --- | --- | --- | --- |\n| 다대포해수욕장 | 10 | 8 | 4 |  |\n| 다대포항 | 10 | 8 | 2 |  |\n| 동매 | 10 | 8 | 2 |  |\n| 하단 | 10 | 16 | 4 |  |\n| 괴정 | 10 | 8 | 4 |  |\n| 동대신 | 10 | 4 | 2 |  |\n| 토

## Agent


In [49]:
agent = initialize_agent(
    tools=[get_train_schedule, get_estimated_time,
           get_distance_type, ddg_search],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    handle_parsing_error=True,
    verbose=True,
    memory=memory,
)

In [50]:
result = agent.run('역사 내 편의점이 있나요?')

[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor] Entering Chain run with input:
[0m{
  "input": "역사 내 편의점이 있나요?",
  "chat_history": ""
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "역사 내 편의점이 있나요?",
  "chat_history": "",
  "agent_scratchpad": "",
  "stop": [
    "\nObservation:",
    "\n\tObservation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 2:chain:LLMChain > 3:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Answer the following questions as best you can. You have access to the following tools:\n\nget_train_schedule: get_train_schedule(station_name: str = '하단') -> str - Returns today's train schedule of the given station name.\n    Output is formatted as CSV\nget_estimated_time: get_estimated_time(from_station: str = '하단') -> str - Returns all estimated time from from_station.\nget_distance_type: get_distance_type(from_station: str = '하단') 



[36;1m[1;3m[tool/end][0m [1m[1:chain:AgentExecutor > 4:tool:ddg_search] [2.09s] Exiting Tool run with output:
[0m"요금은 모두 삼천원입니다. 어린이 교통카드를 살 수 있습니다. 역사내에 편의점이 있습니다."
[32;1m[1;3m[chain/start][0m [1m[1:chain:AgentExecutor > 5:chain:LLMChain] Entering Chain run with input:
[0m{
  "input": "역사 내 편의점이 있나요?",
  "chat_history": "",
  "agent_scratchpad": "I need to find out if there is a convenience store in the station.\nAction: ddg_search\nAction Input: \"역사 내 편의점\"\nObservation: 요금은 모두 삼천원입니다. 어린이 교통카드를 살 수 있습니다. 역사내에 편의점이 있습니다.\nThought:",
  "stop": [
    "\nObservation:",
    "\n\tObservation:"
  ]
}
[32;1m[1;3m[llm/start][0m [1m[1:chain:AgentExecutor > 5:chain:LLMChain > 6:llm:ChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: Answer the following questions as best you can. You have access to the following tools:\n\nget_train_schedule: get_train_schedule(station_name: str = '하단') -> str - Returns today's train schedule of the given station name.\n    

In [52]:
print(result)

Yes, there is a convenience store in the station.
