In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import os
print(f'''Env Variables
LANGCHAIN_TRACING_V2: {os.environ["LANGCHAIN_TRACING_V2"]}
LANGCHAIN_PROJECT: {os.environ["LANGCHAIN_PROJECT"]}
LANGCHAIN_ENDPOINT: {os.environ["LANGCHAIN_ENDPOINT"]}
''')

Env Variables
LANGCHAIN_TRACING_V2: true
LANGCHAIN_PROJECT: machine-learning-workshop
LANGCHAIN_ENDPOINT: https://api.smith.langchain.com



## LangSmithのトレースの基本
traceableデコレータを使用することで、任意の関数の引数と返り値をLangSmithで確認できるようになる。

In [3]:
from IPython.display import display, Markdown
from langsmith import traceable
import openai

openai_client = openai.Client()

@traceable
def format_prompt(question):
    return [
        {
            "role": "system",
            "content": "あなたはBigQueryのエキスパートです. 出したいデータのクエリを作成してください. 出力はクエリのみで他の情報は不要です.",
        },
        {
            "role": "user",
            "content": f"{question}"
        }
    ]

@traceable(run_type="llm")
def invoke_llm(messages):
    return openai_client.chat.completions.create(
        messages=messages, model="gpt-4o", temperature=0
    )

@traceable
def parse_output(response):
    return response.choices[0].message.content

@traceable
def run_pipeline():
    messages = format_prompt("ウェブサイトの回遊率")
    response = invoke_llm(messages)
    return parse_output(response)

display(Markdown(run_pipeline()))

```sql
SELECT
  user_id,
  COUNT(DISTINCT page_id) AS pages_visited,
  COUNT(DISTINCT session_id) AS sessions,
  COUNT(DISTINCT page_id) / COUNT(DISTINCT session_id) AS page_views_per_session
FROM
  `your_dataset.your_table`
GROUP BY
  user_id
```

openaiとのやり取りの可観測にするラッパー `wrap_openai` を使うと詳細な情報を簡単に取得可能になる。

In [4]:
from langsmith.wrappers import wrap_openai

wrap_openai_client = wrap_openai(openai.Client())

@traceable(name="run_pipeline with wrap_openai")
def run_pipeline_with_wrap_llm():
    messages = format_prompt("ウェブサイトの回遊率")
    response = wrap_openai_client.chat.completions.create(
        messages=messages, model="gpt-4o", temperature=0
    )
    return parse_output(response)

display(Markdown(run_pipeline_with_wrap_llm()))

```sql
SELECT
  user_id,
  COUNT(DISTINCT page_id) AS pages_visited,
  COUNT(DISTINCT session_id) AS sessions,
  COUNT(DISTINCT page_id) / COUNT(DISTINCT session_id) AS page_views_per_session
FROM
  `your_dataset.your_table`
GROUP BY
  user_id
```

LCEL (LangChain Expression Language) を使えば、LangSmithでの観測が楽にできる。

In [5]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

model = ChatOpenAI(model="gpt-4")

prompt = ChatPromptTemplate.from_messages([
    ("system", "あなたはBigQueryのエキスパートです. 出したいデータのクエリを作成してください. 出力はクエリのみで他の情報は不要です."),
    ("human", "{question}")
])
output_parser = StrOutputParser()

chain = prompt | model | output_parser
display(Markdown(chain.invoke("ウェブサイトの回遊率")))

```
SELECT
  fullVisitorId,
  (SUM(IF(totals.pageviews > 1, 1, 0)) / COUNT(DISTINCT(fullVisitorId))) AS bounce_rate
FROM
  `bigquery-public-data.google_analytics_sample.*`
GROUP BY
  fullVisitorId
```

In [6]:
import textwrap

from langsmith import Client
from langsmith.schemas import Run, Example
from langsmith.evaluation import evaluate

ls_client = Client() # LangSmithのクライアント

# 作成するデータセット
dataset_name = "SQL Samples"

# データセットがあれば削除
if ls_client.has_dataset(dataset_name=dataset_name):
    dataset = ls_client.delete_dataset(dataset_name=dataset_name)

dataset = ls_client.create_dataset(dataset_name, description="ML Workshop用のサンプルクエリ")

# データセットにexampleを保存
ls_client.create_examples(
    inputs=[
        {"question": "MAUを取得"},
        {"question": "新規ユーザ数の推移"},
    ],
    outputs=[
        {"query": textwrap.dedent("""
           SELECT
               COUNT(DISTINCT user_id) AS monthly_active_users
           FROM
               `your_dataset.user_activities`
           WHERE
               activity_date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 1 MONTH) AND CURRENT_DATE()
        """),
         "tables": ["user_activities"]
        },
        {"query": textwrap.dedent("""
            SELECT
                signup_date,
                COUNT(user_id) AS new_users
            FROM
                `your_dataset.user_activities`
            GROUP BY
                signup_date
            ORDER BY
            　　 signup_date
        """),
         "tables": ["user_activities"]
        },
    ],
    dataset_id=dataset.id,
)

In [7]:
dataset

Dataset(name='SQL Samples', description='ML Workshop用のサンプルクエリ', data_type=<DataType.kv: 'kv'>, id=UUID('3553eb37-9069-42b4-8a2a-91e82ca2ce5e'), created_at=datetime.datetime(2024, 6, 22, 9, 22, 55, 333745, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 6, 22, 9, 22, 55, 333745, tzinfo=datetime.timezone.utc), example_count=0, session_count=0, last_session_start_time=None)

example_countが0となっているので、LangSmith Clientを使ってdatasetを読み直す

In [8]:
ls_client.read_dataset(dataset_name=dataset_name)

Dataset(name='SQL Samples', description='ML Workshop用のサンプルクエリ', data_type=<DataType.kv: 'kv'>, id=UUID('3553eb37-9069-42b4-8a2a-91e82ca2ce5e'), created_at=datetime.datetime(2024, 6, 22, 9, 22, 55, 333745, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 6, 22, 9, 22, 55, 333745, tzinfo=datetime.timezone.utc), example_count=2, session_count=0, last_session_start_time=None)

In [9]:
dataset.url

'https://smith.langchain.com/o/bd14a154-65e7-52b4-bdce-b9a16d5e3513/datasets/3553eb37-9069-42b4-8a2a-91e82ca2ce5e'

In [10]:
ls_client.create_examples(
    inputs=[
        {"question": "月ごとのCV数の推移"},
    ],
    outputs=[
        {"query": textwrap.dedent("""
           SELECT
               FORMAT_TIMESTAMP('%Y-%m', conv_date) AS conversion_month,
               COUNT(conv_id) AS conversions
           FROM
               `your_dataset.your_table`
           WHERE
               conv_date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 1 YEAR) AND CURRENT_DATE()
           GROUP BY
               conversion_month
           ORDER BY
               conversion_month
        """),
         "tables": ["user_activities"]
        },
    ],
    dataset_id=dataset.id,
)

exampleを増やしたことにより、datasetのバージョンも変更されている

In [11]:
ls_client.read_dataset(dataset_name=dataset_name).example_count

3

In [12]:
# datasetに保存されているexampleの一覧
for example in ls_client.list_examples(dataset_name=dataset_name):
    print(f'''
question: {example.inputs["question"]}
query: {example.outputs["query"]}
    ''')


question: 月ごとのCV数の推移
query: 
SELECT
    FORMAT_TIMESTAMP('%Y-%m', conv_date) AS conversion_month,
    COUNT(conv_id) AS conversions
FROM
    `your_dataset.your_table`
WHERE
    conv_date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 1 YEAR) AND CURRENT_DATE()
GROUP BY
    conversion_month
ORDER BY
    conversion_month

    

question: MAUを取得
query: 
SELECT
    COUNT(DISTINCT user_id) AS monthly_active_users
FROM
    `your_dataset.user_activities`
WHERE
    activity_date BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 1 MONTH) AND CURRENT_DATE()

    

question: 新規ユーザ数の推移
query: 
SELECT
    signup_date,
    COUNT(user_id) AS new_users
FROM
    `your_dataset.user_activities`
GROUP BY
    signup_date
ORDER BY
　　 signup_date

    


## LangSmith Evaluation
### Custom Evaluation

In [13]:
# inputsにexampleが1つずつ渡される
def predict(inputs: dict) -> dict:
    model = ChatOpenAI(model="gpt-4")
    prompt = ChatPromptTemplate.from_messages([
        ("system", "あなたはBigQueryのエキスパートです. 出したいデータのクエリを作成してください. 出力はクエリのみで他の情報は不要です."),
        ("human", "{question}, tableはuser_activitiesを使います.")
    ])
    output_parser = StrOutputParser()
    llm = prompt | model | output_parser
    return {"output": llm.invoke(inputs)}

# Custom Evaluation
def must_have_user_activities(run: Run, example: Example) -> dict:
    prediction = run.outputs.get("output") or ""
    print(f"run id: {run.id}\n")
    required = example.outputs.get("tables") or [] # outputsのキー (tables) と合わせる
    print(required)
    print(prediction)
    score = all(phrase in prediction for phrase in required) # scoreは自分で定義したものでよい
    return {"key":"must_have_user_activities", "score": score, "comment": "comment test"} # key, score, commentを返す

experiment_results = evaluate(
    predict,
    data=dataset_name, # The data to predict and grade over
    evaluators=[must_have_user_activities], # The evaluators to score the results
    experiment_prefix="ml-workshop", # A prefix for your experiment names to easily identify them
    metadata={
      "version": "1.0.0",
    },
)

View the evaluation results for experiment: 'ml-workshop-19e495b4' at:
https://smith.langchain.com/o/bd14a154-65e7-52b4-bdce-b9a16d5e3513/datasets/3553eb37-9069-42b4-8a2a-91e82ca2ce5e/compare?selectedSessions=864883c2-2d03-498e-9e59-7bcbd09af7f9




0it [00:00, ?it/s]

run id: 0fc4d7f9-466a-404d-a0a6-f1db40443c00
run id: e0a386eb-6c10-4664-970b-0e0e67120a03

['user_activities']
```
SELECT 
  DATE(created_at) as date,
  COUNT(DISTINCT user_id) as new_users
FROM 
  user_activities
WHERE 
  is_new_user = TRUE
GROUP BY 
  date
ORDER BY 
  date
```

['user_activities']
```sql
SELECT 
  FORMAT_TIMESTAMP('%Y-%m', timestamp) AS month,
  COUNT(*) AS CV_count
FROM 
  user_activities
WHERE 
  activity_type = 'CV'
GROUP BY 
  month
ORDER BY 
  month
```
run id: cb940c24-ac78-48e0-a15f-abd992a410aa

['user_activities']
```
SELECT 
  EXTRACT(YEAR FROM activity_date) AS year,
  EXTRACT(MONTH FROM activity_date) AS month,
  COUNT(DISTINCT user_id) AS MAU
FROM 
  `project.dataset.user_activities`
GROUP BY 
  year, month
ORDER BY 
  year, month
```
このクエリは、それぞれの年と月のユニークなユーザー数（MAU）を取得します。
