In [5]:
import getpass
import os

from dotenv import load_dotenv
load_dotenv()  # .env ファイルの中身を os.environ に読み込む

os.environ["LANGSMITH_TRACING"] = "true"

In [87]:
from typing import Optional, List
from pydantic import BaseModel, Field

class Person(BaseModel):
    """Information about a person."""

    # 上記は、Personエンティティのためのドキュメンテーション文字列。
    # このドキュメント文字列は、スキーマ Person の説明として LLM に送信され、
    # 抽出結果の改善に役立ちます

    # Note that:
    # 1. 各フィールドは「オプション」です。これにより、モデルはフィールドの抽出を拒否できます。
    # 2. 各フィールドには `description` があり、この説明は LLM によって使用されます。
    # 適切な説明があると、抽出結果が改善される可能性があります
    name: Optional[str] = Field(default=None, description="The name of the person.")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known."
    )
    height_in_meters: Optional[str] = Field(
        default=None, description="Height is measured in meters. "
    )

class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]



In [88]:
from typing import Optional

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from pydantic import BaseModel, Field

# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value."
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

In [89]:
from langchain_openai import AzureChatOpenAI
# LLM
llm = AzureChatOpenAI(
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    api_key=os.environ["AZURE_OPENAI_API_KEY"],
)

In [90]:
structured_llm = llm.with_structured_output(schema=Data)

In [91]:
text = "トイプードルという犬種は、茶色い毛をもつ高さ30cmの体格を持ちます。太郎は、犬を一匹飼っています。太郎くんの髪は青く、身長は180cmをです"
prompt = prompt_template.invoke({"text": text})
structured_llm.invoke(prompt)

Data(people=[Person(name='太郎', hair_color='青', height_in_meters='1.8'), Person(name=None, hair_color='茶色', height_in_meters='0.3')])

In [92]:
# 少数ショット
messages = [
    {"role": "user", "content": "2 🦜 2"},
    {"role": "assistant", "content": "4"},
    {"role": "user", "content": "2 🦜 3"},
    {"role": "assistant", "content": "5"},
    {"role": "user", "content": "3 🦜 4"},
]

response = llm.invoke(messages)
print(response.content)

7


In [102]:
from langchain_core.utils.function_calling import tool_example_to_messages

examples = [
    (
        "The ocean is vast and blue. It's more than 20,000 feet deep.",
        Data(people=[]),
    ),
    (
        "Fiona traveled far from France to Spain.",
        Data(people=[Person(name="Fiona", height_in_meters=None, hair_color=None)]),
    ),
]


messages = []

for txt, tool_call in examples:
    if tool_call.people:
        # このメッセージは、モデルを提供している会社によって必須ではないよ！
        ai_response = "Detected people."
    else:
        ai_response = "Detected no people."
    messages.extend(tool_example_to_messages(txt, [tool_call], ai_response=ai_response))

In [103]:
for message in messages:
    message.pretty_print()


The ocean is vast and blue. It's more than 20,000 feet deep.
Tool Calls:
  Data (e9287972-d1a6-4633-9b03-e2aa8a4e594f)
 Call ID: e9287972-d1a6-4633-9b03-e2aa8a4e594f
  Args:
    people: []

You have correctly called this tool.

Detected no people.

Fiona traveled far from France to Spain.
Tool Calls:
  Data (3d441e0e-aec1-401d-863a-4bd9d39e367d)
 Call ID: 3d441e0e-aec1-401d-863a-4bd9d39e367d
  Args:
    people: [{'name': 'Fiona', 'hair_color': None, 'height_in_meters': None}]

You have correctly called this tool.

Detected people.


In [104]:
message_no_extraction = {
    "role": "user",
    "content": "トイプードルという犬種は、茶色い毛をもつ高さ30cmの体格を持ちます。太郎は、犬を一匹飼っています。太郎くんの髪は青く、身長は180cmをです",
}

structured_llm = llm.with_structured_output(schema=Data)
structured_llm.invoke([message_no_extraction])


Data(people=[Person(name='太郎', hair_color='青', height_in_meters='1.8'), Person(name='トイプードル', hair_color='茶色', height_in_meters='0.3')])

In [105]:
structured_llm.invoke(messages + [message_no_extraction]) # 少数ショットプロンプティングによって精度が上がる。

Data(people=[Person(name='太郎', hair_color='青', height_in_meters='1.8')])