In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langsmith import Client
from langsmith.schemas import Run, Example
from langsmith.evaluation import evaluate
import openai
from langsmith.wrappers import wrap_openai

client = Client()

# 作成するデータセット
dataset_name = "Rap Battle Dataset"

# データセットがあれば削除
if client.has_dataset(dataset_name=dataset_name):
    dataset = client.delete_dataset(dataset_name=dataset_name)

dataset = client.create_dataset(dataset_name, description="ML Workshop用データセット")

# データセットにexampleを保存
client.create_examples(
    inputs=[
        {"question": "石丸伸二と小池百合子のラップバトル"},
        {"question": "蓮舫と小池百合子のラップバトル"},
    ],
    outputs=[
        {"must_mention": ["安芸高田", "東京"]},
        {"must_mention": ["東京", "子育て"]},
    ],
    dataset_id=dataset.id,
)



In [3]:
dataset

Dataset(name='Rap Battle Dataset', description='ML Workshop用データセット', data_type=<DataType.kv: 'kv'>, id=UUID('a6e04133-e8e3-4910-ab4d-6fd38f139b27'), created_at=datetime.datetime(2024, 6, 21, 5, 52, 19, 465078, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 6, 21, 5, 52, 19, 465078, tzinfo=datetime.timezone.utc), example_count=0, session_count=0, last_session_start_time=None)

In [4]:
client.create_examples(
    inputs=[
        {"question": "石丸伸二と小池百合子のラップバトル"},
        {"question": "蓮舫と小池百合子のラップバトル"},
    ],
    outputs=[
        {"must_mention": ["安芸高田", "東京", "テスト"]},
        {"must_mention": ["東京", "子育て"]},
    ],
    dataset_id=dataset.id,
)


In [5]:
# datasetに保存されているexampleの一覧
list(client.list_examples(dataset_name=dataset_name))

[Example(dataset_id=UUID('a6e04133-e8e3-4910-ab4d-6fd38f139b27'), inputs={'question': '石丸伸二と小池百合子のラップバトル'}, outputs={'must_mention': ['安芸高田', '東京', 'テスト']}, metadata=None, id=UUID('b4ad40e7-17a1-482f-9e8f-037a35b9f959'), created_at=datetime.datetime(2024, 6, 21, 5, 52, 19, 932887, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 6, 21, 5, 52, 19, 932887, tzinfo=datetime.timezone.utc), runs=[], source_run_id=None),
 Example(dataset_id=UUID('a6e04133-e8e3-4910-ab4d-6fd38f139b27'), inputs={'question': '蓮舫と小池百合子のラップバトル'}, outputs={'must_mention': ['東京', '子育て']}, metadata=None, id=UUID('a904806c-77e0-42a9-a895-61f39d9b16c7'), created_at=datetime.datetime(2024, 6, 21, 5, 52, 19, 932887, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 6, 21, 5, 52, 19, 932887, tzinfo=datetime.timezone.utc), runs=[], source_run_id=None),
 Example(dataset_id=UUID('a6e04133-e8e3-4910-ab4d-6fd38f139b27'), inputs={'question': '石丸伸二と小池百合子のラップバトル'}, outputs={'must_mention': ['安芸高

In [6]:
client.create_dataset?

[0;31mSignature:[0m
[0mclient[0m[0;34m.[0m[0mcreate_dataset[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdataset_name[0m[0;34m:[0m [0;34m'str'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdescription[0m[0;34m:[0m [0;34m'Optional[str]'[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdata_type[0m[0;34m:[0m [0;34m'ls_schemas.DataType'[0m [0;34m=[0m [0;34m<[0m[0mDataType[0m[0;34m.[0m[0mkv[0m[0;34m:[0m [0;34m'kv'[0m[0;34m>[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0;34m'ls_schemas.Dataset'[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Create a dataset in the LangSmith API.

Parameters
----------
dataset_name : str
    The name of the dataset.
description : str or None, default=None
    The description of the dataset.
data_type : DataType or None, default=DataType.kv
    The data type of the dataset.

Returns:
-------
Dataset
    The created datase

In [20]:
# LLMが呼ばれたときに自動的にトレースするようにする
openai_client = wrap_openai(openai.Client())

# inputsにexampleが1つずつ渡される
def predict(inputs: dict) -> dict:
    messages = [{"role": "user", "content": inputs["question"]}] # datasetのinputキー (question) と合わせる
    response = openai_client.chat.completions.create(messages=messages, model="gpt-4o")
    return {"output": response}

# Define evaluators
def must_mention(run: Run, example: Example) -> dict:
    prediction = run.outputs.get("output") or ""
    print(f"run id: {run.id}\n")
    required = example.outputs.get("must_mention") or [] # outputsのキー (must_mention) と合わせる
    score = all(phrase in prediction for phrase in required) # scoreは自分で定義したものでよい
    return {"key":"must_mention", "score": 0, "comment": "comment test"} # key, score, commentを返す

experiment_results = evaluate(
    predict, # Your AI system
    data=dataset_name, # The data to predict and grade over
    evaluators=[must_mention], # The evaluators to score the results
    experiment_prefix="rap-generator", # A prefix for your experiment names to easily identify them
    metadata={
      "version": "1.0.0",
    },
)

View the evaluation results for experiment: 'rap-generator-200b9893' at:
https://smith.langchain.com/o/bd14a154-65e7-52b4-bdce-b9a16d5e3513/datasets/a6e04133-e8e3-4910-ab4d-6fd38f139b27/compare?selectedSessions=40d6d358-e86d-466d-9da8-c00ef010444e




0it [00:00, ?it/s]

run id: e08b1df7-a1d7-4179-9320-a0217cbf548b
run id: 59964855-2dc9-4807-9bfc-e6b122dc12df

run id: 47794fbc-9bd5-41bf-9bd4-eac59feb2663


run id: 61eec87c-cca5-40ec-ba89-bf6c0aa5e839

