In [1]:
import sys 
from typing import List

import numpy as np
import pandas as pd

In [2]:
from openai import OpenAI

In [3]:
sys.path.append("/nfsdata/yzk/llmtuner/src/tod")
from prompts import UPDATE_SLOTS_SYSTEM_PROMPT, UPDATE_SLOTS_PROMPT, SYSTEM_PROMPT, DETECT_INTENT_SYSTEM_PROMPT
from metadata import tracking_targets, user_intents

## OpenAI client

In [4]:
# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8716/v1"

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key=openai_api_key,
    base_url=openai_api_base,
)

In [5]:
models = client.models.list()
model_path = models.data[0].id

print(f">>> current model: {model_path}")

>>> current model: /models/Llama3-8B-EBusSFT


In [6]:
def gen_text_vllm(client: OpenAI, model_path: str, messages: List, stream: bool = False, temperature: float = 0.2, top_p: float = 0.9):
    response = client.chat.completions.create(
        # max_tokens=512,
        stream=stream,
        temperature=temperature,
        top_p=top_p,
        extra_body={
            "top_k": 40,
            "presence_penalty": 1,
            # "max_new_tokens": 1024,
            "max_tokens": 512,
        },
        messages=messages,
        model=model_path,
    )


    used_tokens = response.usage.total_tokens
    output_tokens = response.usage.completion_tokens
    input_tokens = response.usage.prompt_tokens
                #return response.choices[0].message.content.strip(), used_tokens, input_tokens, output_tokens
    return response.choices[0].message.content.strip(), used_tokens, input_tokens, output_tokens
    

## Test case

In [7]:
messages = [
    {
        "role": "system",
        "content": DETECT_INTENT_SYSTEM_PROMPT,
    }, 
    {
        "role": "user",
        "content": "Who won the world series in 2020?"
    }, 
    {
        "role": "assistant",
        "content": "The Los Angeles Dodgers won the World Series in 2020."
    }, 
    {
        "role": "user",
        "content": "分析電巴車齡1年的準點率"
    }
]

In [8]:
gen_text_vllm(client, model_path=model_path, messages=messages)

('[user_intent]create_analytical_charts', 319, 309, 10)

## Load testing dataset

In [9]:
testing_dataset = pd.read_csv("/nfsdata/yzk/awesome_docker/llm-deploy/data/eBus_test_evaluation_new_model.csv")

ds = []
for r in testing_dataset.itertuples():
    create_chart_q, update_slot_q = [q.split("：")[1] for q in r.Test_Case.split("\n") if len(q) > 0 and "：" in q]
    ds.append({"idx": r.Index, "create_chart_q": create_chart_q, "update_slot_q": update_slot_q})

np.random.seed(1234)
rds = np.random.choice(ds, replace=False, size=10)

## response + intent + slot-filling
- default & singe gpu: 15.7 s ± 650 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) => 1.57 s
- 15.4 s ± 241 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit
for d in rds:
    messages = [
        {
            "role": "system",
            "content": SYSTEM_PROMPT.format(user_intents, tracking_targets),
        },
        {
            "role": "user",
            "content": d["create_chart_q"]
        }
    ]
    
    response = gen_text_vllm(client, model_path=model_path, messages=messages)
    print(f"Q.{d['idx']}. {d['create_chart_q']}\n{response[0]}")
    d.update({"create_chart_a": response[0]})

Q.681. 查詢橘11B路線在楠梓站的實際累計里程。
根據您的查詢，我們可以提供橘11B路線在楠梓站的實際累計里程。請問是否需要調整其他設定或開始進行分析?
[user_intent]create_analytical_charts
[tracking_state]{'時間': [], '站點': ['楠梓站'], '電巴車齡': [], '路線': ['橘11B'], '車輛狀態': [], '指標': ['實際累計里程'], '圖別': []}
Q.990. 過去一週的動態定點完整率指標，並且電動巴士的狀態是在充電中或者是值勤中。
您好, 依據您的問題可解析出以下資訊, 請問是否需要調整或是開始進行分析?
[user_intent]create_analytical_charts
[tracking_state]{'時間': ['過去一週'], '站點': [], '電巴車齡': [], '路線': [], '車輛狀態': ['充電中', '值勤中'], '指標': ['動態定點完整率'], '圖別': []}
Q.155. 查詢60覺民幹線路線在高雄站的動態定點完整率、實際累計里程以及利用率。
您好, 根據您的查詢，我們可以為您提供60覺民幹線路線在高雄站的動態定點完整率、實際累計里程以及利用率的數據分析。請問是否需要調整或是開始進行分析?
[user_intent]create_analytical_charts
[tracking_state]{'時間': [], '站點': ['高雄站'], '電巴車齡': [], '路線': ['60覺民幹線'], '車輛狀態': [], '指標': ['動態定點完整率', '實際累計里程', '利用率'], '圖別': []}
Q.768. 請問實際用電的指標、電動巴士的年齡、各站點、電動巴士的狀態以及電動巴士的路線資訊。
您好, 根據您的問題，我已經解析出以下資訊。請問是否需要調整或是開始進行分析?
[user_intent]create_analytical_charts
[tracking_state]{'時間': [], '站點': ['各站點'], '電巴車齡': ['電動巴士年齡'], '路線': ['電動巴士路線'], '車輛狀態': ['電動巴士狀態'], '指標': ['實際用電'], '圖別':