## Gemma3 Function Calling 예제

In [1]:
import json
import re
import yfinance as yf
from vllm import LLM, SamplingParams
from transformers import AutoTokenizer
from datetime import datetime

INFO 04-21 09:04:56 [__init__.py:239] Automatically detected platform cuda.


In [2]:
# 모델 및 토크나이저 초기화
MODEL_ID = "google/gemma-3-12b-it"
model = LLM(MODEL_ID, tensor_parallel_size=1)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

INFO 04-21 09:05:17 [config.py:600] This model supports multiple tasks: {'generate', 'classify', 'reward', 'embed', 'score'}. Defaulting to 'generate'.
INFO 04-21 09:05:17 [config.py:1780] Chunked prefill is enabled with max_num_batched_tokens=8192.
INFO 04-21 09:05:22 [core.py:61] Initializing a V1 LLM engine (v0.8.3) with config: model='google/gemma-3-12b-it', speculative_config=None, tokenizer='google/gemma-3-12b-it', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=131072, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto,  device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoi

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


INFO 04-21 09:05:37 [gpu_model_runner.py:1258] Starting to load model google/gemma-3-12b-it...
INFO 04-21 09:05:37 [config.py:3334] cudagraph sizes specified by model runner [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 320, 328, 336, 344, 352, 360, 368, 376, 384, 392, 400, 408, 416, 424, 432, 440, 448, 456, 464, 472, 480, 488, 496, 504, 512] is overridden by config [512, 384, 256, 128, 4, 2, 1, 392, 264, 136, 8, 400, 272, 144, 16, 408, 280, 152, 24, 416, 288, 160, 32, 424, 296, 168, 40, 432, 304, 176, 48, 440, 312, 184, 56, 448, 320, 192, 64, 456, 328, 200, 72, 464, 336, 208, 80, 472, 344, 216, 88, 120, 480, 352, 248, 224, 96, 488, 504, 360, 232, 104, 496, 368, 240, 112, 376]
INFO 04-21 09:05:38 [weight_utils.py:265] Using model weights format ['*.safetensors']


Loading safetensors checkpoint shards:   0% Completed | 0/5 [00:00<?, ?it/s]


INFO 04-21 09:07:41 [loader.py:447] Loading weights took 122.68 seconds
INFO 04-21 09:07:41 [gpu_model_runner.py:1273] Model loading took 23.3140 GiB and 124.481046 seconds
INFO 04-21 09:07:41 [gpu_model_runner.py:1542] Encoder cache will be initialized with a budget of 8192 tokens, and profiled with 32 image items of the maximum feature size.
INFO 04-21 09:08:00 [backends.py:416] Using cache directory: /giant-data/user/1111332/.cache/vllm/torch_compile_cache/8a062feeaa/rank_0_0 for vLLM's torch.compile
INFO 04-21 09:08:00 [backends.py:426] Dynamo bytecode transform time: 14.09 s
INFO 04-21 09:08:02 [backends.py:115] Directly load the compiled graph for shape None from the cache
INFO 04-21 09:08:35 [monitor.py:33] torch.compile takes 14.09 s in total
INFO 04-21 09:08:36 [kv_cache_utils.py:578] GPU KV cache size: 101,792 tokens
INFO 04-21 09:08:36 [kv_cache_utils.py:581] Maximum concurrency for 131,072 tokens per request: 0.78x
INFO 04-21 09:09:21 [gpu_model_runner.py:1608] Graph captur

In [3]:
# 샘플링 파라미터 설정 - Gemma 3에 맞게 stop token 변경
sampling_params_func_call = SamplingParams(
    max_tokens=256, temperature=0.0, stop=["<end_of_turn>"], skip_special_tokens=False
)
sampling_params_text = SamplingParams(
    max_tokens=512, temperature=0.1, top_p=0.95, stop=["<end_of_turn>"], skip_special_tokens=False
)

In [4]:
# KOSPI 주식 정보
KOSPI_TICKER_MAP = {
    "SK텔레콤": "017670.KS", "삼성전자": "005930.KS", "SK하이닉스": "000660.KS",
    "현대차": "005380.KS", "기아": "000270.KS", "LG에너지솔루션": "373220.KS",
    "NAVER": "035420.KS", "카카오": "035720.KS",
}

In [5]:
# 도구(함수) 정의
TOOLS = [{
    "type": "function",
    "function": {
        "name": "get_kospi_stock_info",
        "description": "특정 KOSPI 주식의 현재 가격 및 기본 정보를 가져옵니다.",
        "parameters": {
            "type": "object",
            "properties": {
                "stock_name_or_code": {
                    "type": "string",
                    "description": "주식 이름(예: 'SK텔레콤') 또는 종목 코드(예: '017670')"
                }
            },
            "required": ["stock_name_or_code"]
        }
    }
}]

In [6]:
# 주가 조회
def get_kospi_stock_info(stock_name_or_code: str) -> str:
    
    name_or_code = stock_name_or_code.strip()
    ticker_symbol = None

    if re.fullmatch(r'\d{6}', name_or_code):
        ticker_symbol = name_or_code + ".KS"
    elif name_or_code in KOSPI_TICKER_MAP:
        ticker_symbol = KOSPI_TICKER_MAP[name_or_code]
    else:
        ticker_symbol = name_or_code + ".KS"

    stock = yf.Ticker(ticker_symbol)
    stock_info = stock.info

    current_price = stock_info.get('currentPrice')
    previous_close = stock_info.get('previousClose')

    price_to_use = current_price if current_price is not None else previous_close
    price_display = round(price_to_use, 2) if price_to_use is not None else "정보 없음"
    previous_close_display = round(previous_close, 2) if previous_close is not None else "정보 없음"

    result = {
        "ticker": ticker_symbol,
        "stock_name": stock_info.get('shortName', name_or_code),
        "current_price": price_display,
        "previous_close": previous_close_display,
        "currency": stock_info.get('currency', 'KRW')
    }
    
    return json.dumps(result, ensure_ascii=False)

In [7]:
# 함수 호출 파싱
def parse_tool_calls(output_text: str):

    call = re.search(r'\[(\w+\(.*?\))\]', output_text)
    if not call:
        print("함수 호출 문자열 '[]'을 찾을 수 없음")
        return None, None

    func_match = re.match(r'(\w+)\((.*?)\)', call.group(1))
    if not func_match:
        print("함수 형식이 올바르지 않음")
        return None, None

    func_name, params_str = func_match.groups()

    params = dict(re.findall(r"(\w+)='([^']*)'", params_str))
    return func_name, params

In [8]:
# 메인 쿼리 처리 함수
def query_kospi_info(query: str):
    current_date = datetime.now().strftime('%Y-%m-%d')

    # 1단계 System 프롬프트 작성
    system_msg = f"""You are a helpful assistant.
Current Date: {current_date}
You have access to functions. If you decide to invoke any of the function(s),
you MUST put it in the format of
[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
You SHOULD NOT include any other text in the response if you call a function
{json.dumps(TOOLS, indent=2, ensure_ascii=False)}
"""
    print(f"\n### 1단계 System 프롬프트 작성 : \n{system_msg}")

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user", "content": query},
    ]
    print(f"### 2단계 Chat 기반 메시지 작성 : {messages}")

    prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    print(f"\n### 3단계 함수 선택을 위한 프롬프트 구성 : \n{prompt}")

    outputs = model.generate([prompt], sampling_params_func_call)
    func_call_text = outputs[0].outputs[0].text
    print(f"\n### 4단계 함수 호출을 위한 LLM 응답 : \n{func_call_text}")

    func_name, params = parse_tool_calls(func_call_text)
    if not func_name:
        return re.sub(r"<end_of_turn>\s*$", "", func_call_text).strip()

    if func_name == "get_kospi_stock_info" and "stock_name_or_code" in params:
        function_result = get_kospi_stock_info(params["stock_name_or_code"])
        print(f"\n### 5단계 함수 호출 내용 파싱 및 실행 결과: \n{function_result}")
    else:
        err = {"error": f"지원하지 않는 함수({func_name}) 또는 필수 파라미터 누락"}
        function_result = json.dumps(err, ensure_ascii=False)
        print(f"지원하지 않는 함수 호출({func_name}) 또는 필수 파라미터 누락")

    if not function_result:
        print("함수 실행 결과가 없어 응답 생성을 중단.")
        return "함수 실행 중 문제가 발생하여 답변을 생성할 수 없음"

    followup_prompt = (
        f"I called the function {func_name} with parameters {params}.\n"
        f"The function returned the following result:\n{function_result}\n\n"
        f'Based on this information, please provide a helpful response to the user\'s query: "{query}".'
    )
    final_messages = messages + [
        {"role": "assistant", "content": func_call_text},
        {"role": "user", "content": followup_prompt},
    ]
    final_prompt = tokenizer.apply_chat_template(
        final_messages, add_generation_prompt=True, tokenize=False
    )
    print(f"\n### 6단계 함수 호출 내용 및 메시지 추가 (최종 프롬프트): \n{final_prompt}")

    outputs = model.generate([final_prompt], sampling_params_text)
    final_text = outputs[0].outputs[0].text
    print("\n### 7단계 최종 응답:")
    return re.sub(r"<end_of_turn>\s*$", "", final_text).strip()

In [9]:
if __name__ == "__main__":
    queries = [
        "SK텔레콤의 주가를 알려줘",
#        "삼성전자 현재가 얼마야?",
    ]

    for query in queries:
        print(f"\n========================================")
        print(f" 질문: {query}")
        print(f"==========================================")
        response = query_kospi_info(query)
        print(f"\n 답변: {response}")


 질문: SK텔레콤의 주가를 알려줘

### 1단계 System 프롬프트 작성 : 
You are a helpful assistant.
Current Date: 2025-04-21
You have access to functions. If you decide to invoke any of the function(s),
you MUST put it in the format of
[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
You SHOULD NOT include any other text in the response if you call a function
[
  {
    "type": "function",
    "function": {
      "name": "get_kospi_stock_info",
      "description": "특정 KOSPI 주식의 현재 가격 및 기본 정보를 가져옵니다.",
      "parameters": {
        "type": "object",
        "properties": {
          "stock_name_or_code": {
            "type": "string",
            "description": "주식 이름(예: 'SK텔레콤') 또는 종목 코드(예: '017670')"
          }
        },
        "required": [
          "stock_name_or_code"
        ]
      }
    }
  }
]

### 2단계 Chat 기반 메시지 작성 : [{'role': 'system', 'content': 'You are a helpful assistant.\nCurrent Date: 2025-04-21\nYou have access to functions. If you decide to i

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.66it/s, est. speed input: 474.95 toks/s, output: 39.85 toks/s]



### 4단계 함수 호출을 위한 LLM 응답 : 
[get_kospi_stock_info(stock_name_or_code='SK텔레콤')]

### 5단계 함수 호출 내용 파싱 및 실행 결과: 
{"ticker": "017670.KS", "stock_name": "SKTelecom", "current_price": 57700.0, "previous_close": 57900.0, "currency": "KRW"}

### 6단계 함수 호출 내용 및 메시지 추가 (최종 프롬프트): 
<bos><start_of_turn>user
You are a helpful assistant.
Current Date: 2025-04-21
You have access to functions. If you decide to invoke any of the function(s),
you MUST put it in the format of
[func_name1(params_name1=params_value1, params_name2=params_value2...), func_name2(params)]
You SHOULD NOT include any other text in the response if you call a function
[
  {
    "type": "function",
    "function": {
      "name": "get_kospi_stock_info",
      "description": "특정 KOSPI 주식의 현재 가격 및 기본 정보를 가져옵니다.",
      "parameters": {
        "type": "object",
        "properties": {
          "stock_name_or_code": {
            "type": "string",
            "description": "주식 이름(예: 'SK텔레콤') 또는 종목 코드(예: '017670')"
          }
      

Processed prompts: 100%|██████████| 1/1 [00:00<00:00,  1.09it/s, est. speed input: 482.98 toks/s, output: 47.86 toks/s]


### 7단계 최종 응답:

 답변: SK텔레콤(017670.KS)의 현재 주가는 57,700원입니다. 이전 거래일 종가는 57,900원이었습니다.



