In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [13]:
import torch
import torch._dynamo
torch._dynamo.config.suppress_errors = True
from unsloth import FastLanguageModel, unsloth_train

In [14]:
import sys
import os

import pandas as pd

# Add the parent directory of src to the path
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from src.db.manager import DBManager

In [15]:
if torch.cuda.get_device_capability()[0] >= 8:
    attn_implementation = "flash_attention_2"
    torch_dtype = torch.bfloat16
else:
    attn_implementation = "eager"
    torch_dtype = torch.float16
# attn_implementation = "eager"
print(f"Using {attn_implementation} for attention computation.")
# QLora?

Using flash_attention_2 for attention computation.


In [16]:
model_id = 'defog/llama-3-sqlcoder-8b'
model_dir = f"/model/{model_id.replace('/', '-')}"

In [17]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_id,
    # max_seq_length = max_seq_length,
    dtype = torch_dtype,
    load_in_4bit = False,
    load_in_8bit = False,
    # quantization_config=BitsAndBytesConfig(
    #     load_in_4bit=True,
    #     bnb_4bit_use_double_quant=True,
    #     bnb_4bit_quant_type="nf4",
    #     bnb_4bit_compute_dtype=torch_dtype
    #     # load_in_8bit=True,
    #     # llm_int8_enable_fp32_cpu_offload=False if not "27B" in model_id else True,
    # ),
    # device_map=device,
    cache_dir=f"{model_dir}/cache",
    attn_implementation=attn_implementation,
    # trust_remote_code=True
    # local_files_only=True
)
tokenizer.padding_side = "left"
# tokenizer.truncation_side = "left"
print(f"Pad Token id: {tokenizer.pad_token_id} and Pad Token: {tokenizer.pad_token}")
print(f"EOS Token id: {tokenizer.eos_token_id} and EOS Token: {tokenizer.eos_token}")

FastLanguageModel.for_inference(model)

==((====))==  Unsloth 2025.10.7: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    NVIDIA H100 NVL. Num GPUs = 1. Max memory: 93.016 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 9.0. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]



defog/llama-3-sqlcoder-8b does not have a padding token! Will use pad_token = <|reserved_special_token_250|>.
Pad Token id: 128255 and Pad Token: <|reserved_special_token_250|>
EOS Token id: 128009 and EOS Token: <|eot_id|>


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128255)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
    

In [18]:
prompt = f"""주어진 테이블 스키마를 참고해 질문에 답하기 위한 쿼리를 작성해줘
데이터베이스: timescale db (postgresql 기반)

CREATE TABLE IF NOT EXISTS public.idu_t
(
    id integer NOT NULL DEFAULT nextval('idu_t_id_seq'::regclass),
    name character varying(50) COLLATE pg_catalog."default",
    CONSTRAINT idu_t_pkey PRIMARY KEY (id)
)

CREATE TABLE IF NOT EXISTS public.data_t
(
    id integer NOT NULL DEFAULT nextval('data_t_id_seq'::regclass),
    idu_id integer,
    roomtemp double precision,
    settemp double precision,
    timestamp timestamp without time zone NOT NULL
)

조회할때 NaN인 row는 제외


지금은 2022-09-29 21:30:00 입니다.
""" + """
실내온도: roomtemp
설정온도: settemp
우리반: 02_I81 (idu.name)
옆반: 01_IB5 (idu.name)
앞반: 01_IB7 (idu.name)
4층: 02_I81, 01_IB5, 01_IB7 (idu.name)
8층: 02_I81, 01_IB5, 01_IB7 (idu.name)
옆집: 01_IB5 (idu.name)
우리집 : 02_I81 (idu.name)
앞집: 01_IB7 (idu.name)
"""

In [19]:
import re
def extract_content(text: str):
    """Extract content from model output."""
    if "start_header_id" in text:
        pattern = r"<\|start_header_id\|>assistant<\|end_header_id\|>(.*?)<\|eot_id\|>"
    match = re.search(pattern, text, re.DOTALL)
    return match.group(1).strip() if match else None


def generate(query):
    """
    suppress all outputs, warnings, errors from this function
    모든 출력, 경고, 에러를 억제하는 함수입니다.
    """
    import sys
    import contextlib
    import warnings
    import io

    # Suppress stdout, stderr, and warnings
    with contextlib.redirect_stdout(io.StringIO()), \
         contextlib.redirect_stderr(io.StringIO()), \
         warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            chat = [
                {"role": "system", "content": prompt},
                {"role": "user", "content": f"Input:{query};"},
            ]

            chat = tokenizer.apply_chat_template(
                chat,
                tokenize=False,
                add_generation_prompt=True,
                # return_tensors="pt"
            )

            chat = re.sub(
                r'(\nCutting Knowledge Date:.*?\nToday Date:.*?\n\n)', 
                '', 
                chat
            )

            # Tokenize the chat input using the tokenizer
            # 토크나이저를 사용하여 chat 입력을 토크나이즈합니다.
            input_ids = tokenizer(
                chat, 
                return_tensors="pt"
            ).input_ids.to(model.device)

            outputs = model.generate(
                    input_ids=input_ids,
                    max_new_tokens=1000,
                    temperature=0.001,
                    pad_token_id=tokenizer.pad_token_id,
                )
            decoded = tokenizer.batch_decode(
                outputs, 
                skip_special_tokens=False
            )

            return extract_content(decoded[0])
        except Exception:
            # suppress all errors, return None
            # 모든 에러를 억제하고 None을 반환합니다.
            return None


    

In [None]:
import json
from pathlib import Path

BASE_DIR = "../"
def read_json(path):
    with open(path, "r", encoding="utf-8") as f:
        result = json.loads(f.read())
    
    # result = [{"Input": d["Input"], "Response": json.dumps(d["Response"], ensure_ascii=False)} for d in result]
    return result

dataset_name = "v7-250309-reduceinputanddatefunctioncall"
base_dataset_dir = Path(f"{BASE_DIR}/finetuning/dataset/{dataset_name}")


inputs = []
for scenario_dir in [d for d in base_dataset_dir.iterdir() if d.is_dir() and "scenario" in d.name and "metadata.json" in [f.name for f in d.iterdir()]]:
    path = scenario_dir / "onlyq_ts.json"
    # if "scenario3" in str(path):
    #     continue
    with open(path, "r", encoding="utf-8") as f:
        data = json.loads(f.read())

    metadata = json.load(open(scenario_dir / "metadata.json", "r"))
    result = []
    # for d in data:
    inputs.extend([{
        "Input": i["Input"],
        "Scenario": scenario_dir.name,
        "Metadata": metadata,
    } for i in data])

print(len(inputs))

12


In [21]:
reports = []

for input in inputs:
    input, scenario, metadata = input["Input"], input["Scenario"], input["Metadata"]
    
    result = generate(input)
    if result == None:
        reports.append({
            "Input": input,
            "Scenario": scenario,
            "Metadata": metadata,
            "Candidate": "",
        })
        continue
    result = result.replace("CURRENT_DATE", "TIMESTAMP '2022-09-29 21:30:00'")
    # print(result)

    reports.append({
        "Input": input,
        "Scenario": scenario,
        "Metadata": metadata,
        "Candidate": result,
    })
    # try:
    #     results = DBManager.execute_sql(result)
    #     has_nan = False
    #     for r in results:
    #         # r: tuple
    #         # see if nan in r
    #         if any(pd.isna(v) for v in r):
    #             has_nan = True
    #             break
    #     if has_nan:
    #         report["nan"] += 1
    #     else:
    #         print(f"Query: {query}\nResult: {result}\n")
    #         print("results", results)
    #         report["success"] += 1
    # except Exception as e:
    #     print(f"Error: {e}")
    #     report["error"] += 1
print(reports)

[{'Input': '이번주 우리반과 앞반의 평균 온도 알려줘', 'Scenario': 'scenario2', 'Metadata': {'idu_mapping': {'01_IB5': ['옆반', '4층'], '01_IB7': ['앞반', '4층'], '02_I81': ['우리반', '4층']}, 'modality_mapping': {'roomtemp': ['실내온도'], 'settemp': ['설정온도']}, 'current_datetime': '2022-09-29 21:30:00'}, 'Candidate': ''}, {'Input': '현재 설정온도랑 실내온도 차이 알려줘.', 'Scenario': 'scenario2', 'Metadata': {'idu_mapping': {'01_IB5': ['옆반', '4층'], '01_IB7': ['앞반', '4층'], '02_I81': ['우리반', '4층']}, 'modality_mapping': {'roomtemp': ['실내온도'], 'settemp': ['설정온도']}, 'current_datetime': '2022-09-29 21:30:00'}, 'Candidate': ''}, {'Input': '지난달에 설정온도와 실내온도 차이가 가장 많이 났던 날은?', 'Scenario': 'scenario2', 'Metadata': {'idu_mapping': {'01_IB5': ['옆반', '4층'], '01_IB7': ['앞반', '4층'], '02_I81': ['우리반', '4층']}, 'modality_mapping': {'roomtemp': ['실내온도'], 'settemp': ['설정온도']}, 'current_datetime': '2022-09-29 21:30:00'}, 'Candidate': ''}, {'Input': '이번주 우리반과 옆반의 평균 실내온도 차이 알려줘', 'Scenario': 'scenario2', 'Metadata': {'idu_mapping': {'01_IB5': ['옆반', '4층']

In [22]:
# save at ../experiments/tag-baseline.json
with open("../experiments/r-v7_r211_a422_TAG_tr27_revision-step-0.json", "w", encoding="utf-8") as f:
    json.dump(reports, f, ensure_ascii=False, indent=4)
