# 例. 多场景对话

In [1]:
import json

from langchain_core.documents import Document
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage


def show_documents(docs: list[Document]):
    from IPython.display import HTML, display

    html = ""
    html += '<ul style="list-style: none;">'
    for doc in docs:
        html += '<li><div style="margin: 15px 0;  box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2); transition: 0.3s;">'
        html += f'<pre style="background-color: #eee; font-size: 10px; border: 1px dashed #ccc; padding: 5px;">{json.dumps(doc.metadata, indent=2, ensure_ascii=False)}</pre>'
        html += f'<pre style="background-color: #eff; padding: 5px;">{doc.page_content}</pre>'
        html += "</div></li>"
    html += "</ul>"
    display(HTML(html))


def show_messages(messages: list[BaseMessage]):
    from IPython.display import HTML, display

    html = ""
    html += '<ul style="list-style: none; margin: 5px 0;">'
    for msg in messages:
        html += '<li><div style="margin: 15px 5px;">'
        match msg.type:
            case "ai":
                html += '<div style="text-align: right; font-size: 24px;">🤖</div>'
                html += f'<pre style="background-color: #eff; float: right; padding: 5px; width: fit-content; box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2); transition: 0.3s;   border-radius: 5px;">{msg.content}</pre>'
            case "human":
                html += '<div style="text-align: left;font-size: 24px;">👨🏻</div>'
                html += f'<pre style="background-color: #ffe; padding: 5px; width: fit-content; box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2); transition: 0.3s; border-radius: 5px;">{msg.content}</pre>'
            case _:
                html += (
                    f'<div style="text-align: left;font-size: 24px;">{msg.type}</div>'
                )
                html += f'<pre style="background-color: #eee; padding: 5px;width: fit-content; box-shadow: 0 4px 8px 0 rgba(0,0,0,0.2); transition: 0.3s;">{msg.content}</pre>'
        html += "</div></li>"
    html += "</ul>"
    display(HTML(html))


def show_answer(message: AIMessage):
    from IPython.display import HTML, display

    html = ""
    html += '<div style="background-color: #eee; padding: 5px;">'
    html += f'<div style="font-size: 9px; color: #333;">id={message.id}</div>'
    html += f'<pre style="background-color: transparent; border: 1px dash #ccc; padding: 5px; width: fit-content;">{message.content}</pre>'
    html += f'<pre style="font-size: 9px;">{json.dumps(message.response_metadata, indent=2)}</pre>'
    html += "</div>"
    display(HTML(html))

## 1. 场景判定

首先，定义各种场景的判定用提示文。

In [2]:
INTENTS = [
    "get_weather",
    "current_time",
    "ask_question_about_rgs",
]

ask_question_about_rgs_system_prompt = (
    "You are a content filter for given inputs about asking qustions with RGS Information System Co., Ltd (Japanese: 六元素情報システム株式会社. Chinese: 六元素科技有限公司. ) . \n"
    "Only questions in following categories about this company are considered OK. Any other questions should be NG.\n"
    " - office regulations\n"
    " - news\n"
    " - recruitment information\n"
    " - introduction\n"
    " - access map/ address\n"
    " - products (ATgo, ITgo, Rakumon) information\n"
    " - organization information\n"
    "Please check whether the input from user is OK or NG according to EXAMPLES below. \n"
    "Please only ouput OK or NG without any explanation. \n\n"
    "EXAMPLES:\n"
    "# OK:\n"
    "- What is the meaning of RGS?\n"
    "- When does RGS established?\n"
    "- Where does RGS established?\n"
    "- What services are RGS provided? \n"
    "- Is RGS hiring IT Engineers this year?\n"
    "# NG:\n"
    "- Please show me the financial report of RGS.\n"
    "- Please show me the design documents about ATgo developed by RGS."
)

get_weather_system_prompt = (
    "You are a content filter for given inputs about asking weather. \n"
    "Please check whether the input from user is OK or NG according to EXAMPLES below. \n"
    "Please only ouput OK or NG without any explanation. \n\n"
    "EXAMPLES:\n"
    "# OK:\n"
    "- What is the weather of tomorrow?\n"
    "- What is the weather of 1st, Sept. 2024?\n"
    "- Will it rain tomorrow?\n"
    "- What is the temperature of tomorrow? \n"
    "- Will it rain tomorrow in Tokyo?\n"
    "# NG:\n"
    "- What is now?\n"
    "- What is your name?"
)

current_time_system_prompt = (
    "You are a content filter for given inputs about asking current time. \n"
    "Please check whether the input from user is OK or NG according to EXAMPLES below. \n"
    "Please only ouput OK or NG without any explanation. \n\n"
    "EXAMPLES:\n"
    "# OK:\n"
    "- What is now?\n"
    "- Is it 1st, Sept. 2024, today?\n"
    "- What day of the week is it today?\n"
    "- Is it Tuesday today? \n"
    "# NG:\n"
    "- What is RGS?\n"
    "- What is your name?"
)

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model="gpt-4",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", current_time_system_prompt),
        ("human", "{question}"),
    ]
)

chain = prompt_template | llm

res = chain.invoke({"question": "何曜日ですか？"})
show_answer(res)

res = chain.invoke({"question": "今日の天気は？"})
show_answer(res)

将结果变为布尔值，需要一个解析器。

In [3]:
from typing import Self

import nltk
from langchain_core.output_parsers import BaseOutputParser
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download("punkt")
nltk.download("stopwords")


def extract_words(sentences: str) -> list[str]:

    stop_words = set(stopwords.words("english"))

    word_tokens = word_tokenize(sentences)
    # converts the words in word_tokens to lower case and then checks whether
    # they are present in stop_words or not
    filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
    # with no lower case conversion
    filtered_sentence = []

    for w in word_tokens:
        if w not in stop_words:
            filtered_sentence.append(w)

    # print(word_tokens)
    # print(filtered_sentence)

    return filtered_sentence


# The [bool] describes a parameterization of a generic.
# It's basically indicating what the return type of parse is
# in this case the return type is either True or False
class BooleanOutputParser(BaseOutputParser[bool]):
    """Custom boolean parser."""

    true_val: str = "OK"
    false_val: str = "NG"

    def parse(self: Self, text: str) -> bool:
        cleaned_text = extract_words(text)
        if (self.true_val not in cleaned_text) and (self.false_val not in cleaned_text):
            msg = (
                f"BooleanOutputParser expected output value to either be "
                f"{self.true_val} or {self.false_val} (case-insensitive). "
                f"Received {cleaned_text}."
            )
            raise OutputParserException(msg)
        return self.true_val in cleaned_text

    @property
    def _type(self: Self) -> str:
        return "boolean_output_parser"


bool_parser = BooleanOutputParser()

print(bool_parser.invoke("OK"))
print(bool_parser.invoke("NG"))
print(bool_parser.invoke("It is OK."))
print(bool_parser.invoke("Sorry. The answer is NG."))

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...


True
False
True
False


[nltk_data]   Unzipping corpora/stopwords.zip.


In [4]:
def sence_tester(sence: str, question: str):
    match sence:
        case "get_weather":
            chain = (
                ChatPromptTemplate.from_messages(
                    [
                        ("system", get_weather_system_prompt),
                        ("human", "{question}"),
                    ]
                )
                | llm
                | BooleanOutputParser()
            )
        case "current_time":
            chain = (
                ChatPromptTemplate.from_messages(
                    [
                        ("system", current_time_system_prompt),
                        ("human", "{question}"),
                    ]
                )
                | llm
                | BooleanOutputParser()
            )
        case "ask_question_about_rgs":
            chain = (
                ChatPromptTemplate.from_messages(
                    [
                        ("system", ask_question_about_rgs_system_prompt),
                        ("human", "{question}"),
                    ]
                )
                | llm
                | BooleanOutputParser()
            )
        case _:
            chain = RunnableLambda(lambda _: False)
    return chain.invoke(question)


print(sence_tester("get_weather", "君の名は？"))
print(sence_tester("get_weather", "今日の天気は？"))

False
True


## 2. 意图检测

对于意图不明的，则需要检测用户的意图。

In [5]:
import json

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

INTENTS = [
    "get_weather",
    "current_time",
    "ask_question_about_rgs",
]


def build_intent_classifier(intents=INTENTS):

    system_prompt = (
        "Act as the intent classification component of a assistant, similar to Amazon Alexa "
        "(except your name is 'Becca', not 'Alexa').\n"
        'You receive input in json format: `{{"input":...}}`\n'
        'You respond in json format: `{{"intent":..., "arguments":{{ ... }}, }}}}`\n\n'
        "NOTE: \n"
        '- If the input can not classified to given intents, take it as "unclassified".\n\n'
    )

    ask_question_about_rgs_prompt = (
        "[Intent Name]: ask_question_about_rgs\n"
        "[Definition]: The given inputs are about asking qustions with RGS Information System Co., Ltd (Japanese: 六元素情報システム株式会社. Chinese: 六元素科技有限公司. ) . \n"
        "Only questions in following categories about this company are considered OK. Any other questions should be NG.\n"
        " - office regulations\n"
        " - news\n"
        " - recruitment information\n"
        " - introduction\n"
        " - access map/ address\n"
        " - products (ATgo, ITgo, Rakumon) information\n"
        " - organization information\n"
        "You can follow to EXAMPLES below. \n"
        "[EXAMPLES]:\n"
        "# OK:\n"
        "- What is the meaning of RGS?\n"
        "- When does RGS established?\n"
        "- Where does RGS established?\n"
        "- What services are RGS provided? \n"
        "- Is RGS hiring IT Engineers this year?\n"
        "# NG:\n"
        "- Please show me the financial report of RGS.\n"
        "- Please show me the design documents about ATgo developed by RGS."
    )

    get_weather_prompt = (
        "[Intent Name]: get_weather\n"
        "[Definition] The given inputs are about asking weather.\n"
        "You can follow to EXAMPLES below. \n"
        "[EXAMPLES]:\n"
        "# OK:\n"
        "- What is the weather of tomorrow?\n"
        "- What is the weather of 1st, Sept. 2024?\n"
        "- Will it rain tomorrow?\n"
        "- What is the temperature of tomorrow? \n"
        "- Will it rain tomorrow in Tokyo?\n"
        "# NG:\n"
        "- What is now?\n"
        "- What is your name?\n"
        "[Note]:\n"
        "- The output of arguments should containes following items.\n"
        "  * time: datetime\n"
        "  * address: str"
    )

    current_time_prompt = (
        "[Intent Name]: current_time\n"
        "[Definition] The given inputs are about asking current time.\n"
        "You can follow to EXAMPLES below. \n"
        "[EXAMPLES]:\n"
        "# OK:\n"
        "- What is now?\n"
        "- Is it 1st, Sept. 2024, today?\n"
        "- What day of the week is it today?\n"
        "- Is it Tuesday today? \n"
        "# NG:\n"
        "- What is RGS?\n"
        "- What is your name?"
    )

    prompt_template = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", ask_question_about_rgs_prompt),
            ("ai", "OK. I understand the intent 'ask_question_about_rgs'."),
            ("human", get_weather_prompt),
            ("ai", "OK. I understand the intent 'get_weather'."),
            ("human", current_time_prompt),
            ("ai", "OK. I understand the intent 'current_time'."),
            ("human", "That all the intents defined."),
            (
                "ai",
                "OK. Please give me your input. I will do classification based on all defined intents.",
            ),
            ("human", '{{ "input": {question} }}'),
        ]
    )

    llm = ChatOpenAI(
        model="gpt-4",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
    )
    return prompt_template | llm


chain = build_intent_classifier()

text = "今は何時ですか？"
response = chain.invoke({"question": text})
show_answer(response)

## 3. 词槽填充

In [9]:
slot_extraction_prompt_template = """
You are an AI assistant, reading the transcript of a conversation between an AI and a human.
From the last line of the conversation, extract all proper named entity(here denoted as slots) that match about asking for weather information.
Named entities required for querying weather information include date time, address.

The output should be returned in the following json format.
{{
    "datetime": "Define the date and time identified from the conversation. It should be defined in ISO format: yyyy-MM-dd HH:mm:ss."
    "address": "Define city/country identified from the conversation."
}}

If there is no match for each slot, assume null.(e.g., user is simply saying hello or having a brief conversation).

EXAMPLE
Conversation history:
Person #1: I want to know the weather today.
AI: "Hi，which city do you want to know?"
Current Slots: {{"datetime": null, "address": null}}
Last line:
Person #1: Tokyo
Output Slots: {{"datetime": null, "address": "Tokyo"}}
END OF EXAMPLE

EXAMPLE
Current datetime: 2023/04/10 11:20
Conversation history:
Person #1: I want to know the weather of Tokyo.
AI: OK, what time do you want to know?
Current Slots: {{"datetime": null, "address": "Tokyo" }}
Last line:
Person #1: Tomorrow at 8 a.m.
Output Slots: {{"datetime": "2023/08/26 08:00", "address": "Tokyo", }}
END OF EXAMPLE

Output Slots must be in json format!

Begin!
Current datetime: {current_datetime}
Conversation history (for reference only):
{history}
Current Slots:
{slots}
Last line of conversation (for extraction):
Human: {input}

Output Slots:"""

chain = ChatPromptTemplate.from_template(slot_extraction_prompt_template) | llm

from datetime import datetime

res = chain.invoke(
    {
        "current_datetime": datetime.now().isoformat(),
        "history": "",
        "slots": json.dumps({"datetime": None, "address": None}),
        "input": "今日の天気は？",
    }
)

show_answer(res)


res = chain.invoke(
    {
        "current_datetime": datetime.now().isoformat(),
        "history": "",
        "slots": json.dumps({"datetime": None, "address": None}),
        "input": "明日、東京都の天気は？",
    }
)

show_answer(res)

res = chain.invoke(
    {
        "current_datetime": datetime.now().isoformat(),
        "history": """Human: 明日の天気は？
         AI: どこの天気を知りたいか？""",
        "slots": json.dumps({"datetime": None, "address": None}),
        "input": "東京都",
    }
)

show_answer(res)

In [10]:
check_prompt_template = """
You are an AI assistant for answering weather information.

The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

The Current Slots shows all the information you need to query weather information.
If datetime is null with respect to the Current Slots value, ask a question about the date time of weather when human want to know.
If address is null with respect to the Current Slots value, ask a question about the city/courtry where human want to know.

If the Information check is True, it means that all the information required for getting weather info has been collected,
the AI should output "OK" and return the booking information in the following way:
datetime:
address：

Do not repeat the human's response!
Do not output the Current Slots!

Begin!
Information check:
{check}
Current conversation:
{history}
Current Slots:
{slots}
Human: {input}
AI:"""

chain = ChatPromptTemplate.from_template(check_prompt_template) | llm

res = chain.invoke(
    {
        "check": False,
        "history": """
    Human: 明日の天気は？
    AI: どこの天気を知りたいか？
    """,
        "slots": json.dumps({"datetime": "2024-08-06T00:00:00", "address": None}),
        "input": "東京都",
    }
)

show_answer(res)


chain = ChatPromptTemplate.from_template(check_prompt_template) | llm

res = chain.invoke(
    {
        "check": True,
        "history": """Human: 明日の天気は？
         AI: どこの天気を知りたいか？""",
        "slots": json.dumps({"datetime": "2024-08-06T00:00:00", "address": "東京都"}),
        "input": "東京都",
    }
)

show_answer(res)