# 기본환경 설정

In [None]:
# !pip install faiss-cpu

In [None]:
from google.colab import userdata
HF_KEY = userdata.get("HF_KEY")

In [None]:
import huggingface_hub
huggingface_hub.login(HF_KEY)

# 모델 로딩

In [None]:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo langchain-community pypdf langchain_huggingface faiss-cpu
!pip install --no-deps unsloth

In [1]:
from unsloth import FastModel
from langchain.embeddings import HuggingFaceEmbeddings
import torch

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [3]:
model, tokenizer = FastModel.from_pretrained(
    model_name = "unsloth/gemma-3-4b-it",
    max_seq_length = 1024*5, # Choose any for long context!
    load_in_4bit = True,  # 4 bit quantization to reduce memory
    # device_map = {"": device}
)

==((====))==  Unsloth 2025.9.1: Fast Gemma3 patching. Transformers: 4.55.4. vLLM: 0.10.2rc2.dev98+ge599e2c65.
   \\   /|    NVIDIA GeForce RTX 4090. Num GPUs = 2. Max memory: 23.484 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu128. CUDA: 8.9. CUDA Toolkit: 12.8. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


# Custom ChatModel

In [11]:
from typing import List, Any, Dict
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.outputs import ChatResult, ChatGeneration
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage

In [5]:
class GemmaChatModel(BaseChatModel):
    def __init__(self, model, tokenizer, max_tokens: int = 512, do_sample: bool = True, temperature: float = 0.7, top_p: float = 0.9):
        super().__init__()
        object.__setattr__(self, "model", model)
        object.__setattr__(self, "tokenizer", tokenizer)
        object.__setattr__(self, "max_tokens", max_tokens)
        object.__setattr__(self, "do_sample", do_sample)
        object.__setattr__(self, "temperature", temperature)
        object.__setattr__(self, "top_p", top_p)

    @property
    def _llm_type(self) -> str:
        return "gemma-chat"

    def _format_messages(self, messages: List[Any]) -> str:
        prompt = ""
        for message in messages:
            if isinstance(message, SystemMessage):
                prompt += f"<|system|>\n{message.content}</s>\n"
            elif isinstance(message, HumanMessage):
                prompt += f"<|user|>\n{message.content}</s>\n"
            elif isinstance(message, AIMessage):
                prompt += f"<|assistant|>\n{message.content}</s>\n"
        prompt += "<|assistant|>\n"
        return prompt

    def _generate(self, messages: List[Any], **kwargs) -> ChatResult:
        prompt = self._format_messages(messages)
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)

        with torch.no_grad():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=self.max_tokens,
                do_sample=kwargs.get("do_sample", self.do_sample),
                temperature=kwargs.get("temperature", self.temperature),
                top_p=kwargs.get("top_p", self.top_p),
                eos_token_id=self.tokenizer.eos_token_id,
            )

        decoded = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        response = decoded.split("<|assistant|>\n")[-1].strip()

        return ChatResult(generations=[ChatGeneration(message=AIMessage(content=response))])

In [6]:
llm = GemmaChatModel(model=model, tokenizer=tokenizer, max_tokens=512)

# 툴 정의

In [7]:
def get_weather(city: str) -> str:
    return f"{city}: 맑음, 25℃ (데모)"

In [8]:
def add(a: float, b: float) -> float:
    return float(a) + float(b)

In [12]:
TOOLS: Dict[str, Dict[str, Any]] = {
    "get_weather": {
        "description": "도시의 현재 날씨를 조회",
        "parameters": {
            "type": "object",
            "properties": {"city": {"type": "string"}},
            "required": ["city"],
        },
    },
    "add": {
        "description": "두 수를 더한다",
        "parameters": {
            "type": "object",
            "properties": {"a": {"type": "number"}, "b": {"type": "number"}},
            "required": ["a", "b"],
        },
    },
}

In [13]:
TOOL_FUNCS = {
    "get_weather": get_weather,
    "add": add,
}

# Prompt & Chain 정의

## tool chain

In [14]:
## param : tool_names, tool_schema, input
select_instruct = """\
You are a tool router. Read the user's request and decide whether to call a tool.
Return ONLY one JSON object and NOTHING ELSE (no code fences, no commentary).

Strict output JSON (one object):
{{
  "tool": "<one of: {tool_names} | none>",
  "args": <object>
}}

Global rules:
- Use ONLY tools defined in the TOOL SCHEMA below. If nothing matches, set "tool" to "none" and "args" to {{}}.
- Output must be valid JSON with double quotes and no trailing commas.
- Only choose a tool if:
  (a) the request clearly matches the tool’s description/purpose, AND
  (b) you can supply ALL required parameters from the user input.
- Do NOT invent, guess, or hallucinate parameter values. If a required value is missing/unclear, choose "none".
- Conform exactly to the selected tool's parameter schema (names, types, enums). Do not add extra keys not in the schema.
- If multiple tools could work, prefer the most specific one that best matches the user’s intent.
- Keep numbers as numbers, booleans as booleans, arrays as arrays, strings as strings. Do not convert types arbitrarily.
- Preserve user-provided text as-is (do not translate or rewrite); only extract values for "args".
- Think silently; DO NOT include chain-of-thought or explanations in the output.

TOOL SCHEMA (names, descriptions, and JSON parameter schemas):
{tool_schema}

Examples (for style only; do NOT copy literally):
User: What's the weather in Paris?
Output:
{{"tool":"get_weather","args":{{"city":"Paris"}}}}

User: add 7.5 and 2
Output:
{{"tool":"add","args":{{"a":7.5,"b":2}}}}

User: Tell me a joke
Output:
{{"tool":"none","args":{{}}}}
""".strip()

In [15]:
select_prompt = ChatPromptTemplate.from_messages([
    ("system", select_instruct),
    ("human", "Now produce the JSON for this user request:\n{input}")
])

In [16]:
parser = JsonOutputParser()  # {"tool": str, "args": dict}

In [17]:
select_tool_chain = select_prompt | llm | parser

## question chain

In [18]:
question_instruct = """\
You must answer concisely and accurately in korean.

Tool result:
{observation}

Instructions:
- If the Tool result is NON-EMPTY, produce ONE short paragraph grounded ONLY in that result. Do not contradict it. If it is incomplete or conflicting, state what is missing and answer only with what can be supported.
- If the Tool result is EMPTY, answer directly from your knowledge. If you do not know, say "I don't know." Do NOT invent or guess facts.
- Do NOT include analysis, chain-of-thought, meta commentary, or mentions of tools/pipelines. Output only the final answer.
- Keep it concise (about 1–5 sentences) unless the user explicitly requested another format.
- If the user asked for a specific format (e.g., code or bullet points), follow it; otherwise use plain text.
""".strip()

In [19]:
question_prompt = ChatPromptTemplate.from_messages([
    ("system", question_instruct),
    ("human", "User question:\n{input}")
])

In [20]:
question_chain = question_prompt | llm

# LangGraph Smithy

![langgraph_example](res/langgraph_example.png)

In [42]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Optional, Dict, Any

## 상태 정의

In [59]:
class StateToolQA(TypedDict):
    input: str
    tool_schema: Dict[str, Any]
    tool_names: str
    selection: Dict[str, Any]
    observation: Optional[Any]
    answer: Optional[str]

In [60]:
state_toolqa = StateToolQA()

## tool select 노드

In [61]:
def node_tool_select(state: StateToolQA) -> StateToolQA:
    print(f"[+] node_tool_select\n{state}")
    selection = select_tool_chain.invoke({
        "input": state["input"],
        "tool_schema": state.get("tool_schema", TOOLS),
        "tool_names": state.get("tool_names", ", ".join(TOOLS.keys())),
    })
    print(f"[+] node_tool_select\n{selection}")
    return {**state, "selection": selection}

In [72]:
state_toolqa = {**state_toolqa, "input": "서울 날씨 어때?", "tool_schema": TOOLS, "tool_names": ', '.join(TOOLS.keys())}
state_toolqa

{'input': '서울 날씨 어때?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'none', 'args': {}}}

In [73]:
state_toolqa = node_tool_select(state_toolqa)
state_toolqa

[+] node_tool_select
{'input': '서울 날씨 어때?', 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회', 'parameters': {'type': 'object', 'properties': {'city': {'type': 'string'}}, 'required': ['city']}}, 'add': {'description': '두 수를 더한다', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}}, 'required': ['a', 'b']}}}, 'tool_names': 'get_weather, add', 'selection': {'tool': 'none', 'args': {}}}
[+] node_tool_select
{'tool': 'get_weather', 'args': {'city': '서울'}}


{'input': '서울 날씨 어때?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}}}

In [67]:
state_toolqa = {**state_toolqa, "input": "2와 3을 더한 결과는?", "tool_schema": TOOLS, "tool_names": ', '.join(TOOLS.keys())}
state_toolqa = node_tool_select(state_toolqa)
state_toolqa

[+] node_tool_select
{'input': '2와 3을 더한 결과는?', 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회', 'parameters': {'type': 'object', 'properties': {'city': {'type': 'string'}}, 'required': ['city']}}, 'add': {'description': '두 수를 더한다', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}}, 'required': ['a', 'b']}}}, 'tool_names': 'get_weather, add', 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}}}
[+] node_tool_select
{'tool': 'add', 'args': {'a': 2, 'b': 3}}


{'input': '2와 3을 더한 결과는?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'add', 'args': {'a': 2, 'b': 3}}}

In [68]:
state_toolqa = {**state_toolqa, "input": "좋은 회의 아이스브레이커 알려줘", "tool_schema": TOOLS, "tool_names": ', '.join(TOOLS.keys())}
state_toolqa = node_tool_select(state_toolqa)
state_toolqa

[+] node_tool_select
{'input': '좋은 회의 아이스브레이커 알려줘', 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회', 'parameters': {'type': 'object', 'properties': {'city': {'type': 'string'}}, 'required': ['city']}}, 'add': {'description': '두 수를 더한다', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}}, 'required': ['a', 'b']}}}, 'tool_names': 'get_weather, add', 'selection': {'tool': 'add', 'args': {'a': 2, 'b': 3}}}
[+] node_tool_select
{'tool': 'none', 'args': {}}


{'input': '좋은 회의 아이스브레이커 알려줘',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'none', 'args': {}}}

## tool 실행 노드

In [75]:
def node_tool_call(state: StateToolQA) -> StateToolQA:
    print(f"[+] node_tool_call\n{state}")
    selection = state.get("selection", {}) or {}
    tool = selection.get("tool", "none")
    args = selection.get("args") or {}

    if tool not in TOOL_FUNCS:
        return {**state, "observation": None}

    try:
        result = TOOL_FUNCS[tool](**args)
    except Exception as e:
        result = f"TOOL_ERROR: {e}"
    return {**state, "observation": result}

In [76]:
state_toolqa

{'input': '서울 날씨 어때?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}}}

In [77]:
state_toolqa = node_tool_call(state_toolqa)
state_toolqa

[+] node_tool_call
{'input': '서울 날씨 어때?', 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회', 'parameters': {'type': 'object', 'properties': {'city': {'type': 'string'}}, 'required': ['city']}}, 'add': {'description': '두 수를 더한다', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}}, 'required': ['a', 'b']}}}, 'tool_names': 'get_weather, add', 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}}}


{'input': '서울 날씨 어때?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}},
 'observation': '서울: 맑음, 25℃ (데모)'}

## question 노드

In [78]:
def node_question(state: StateToolQA) -> StateToolQA:
    print(f"[+] node_question\n{state}")
    msg = question_chain.invoke({
        "input": state["input"],
        "observation": state.get("observation")
    })
    content = getattr(msg, "content", str(msg))
    return {**state, "answer": content}

In [79]:
state_toolqa

{'input': '서울 날씨 어때?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}},
 'observation': '서울: 맑음, 25℃ (데모)'}

In [80]:
state_toolqa = node_question(state_toolqa)
state_toolqa

[+] node_question
{'input': '서울 날씨 어때?', 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회', 'parameters': {'type': 'object', 'properties': {'city': {'type': 'string'}}, 'required': ['city']}}, 'add': {'description': '두 수를 더한다', 'parameters': {'type': 'object', 'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}}, 'required': ['a', 'b']}}}, 'tool_names': 'get_weather, add', 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}}, 'observation': '서울: 맑음, 25℃ (데모)'}


{'input': '서울 날씨 어때?',
 'tool_schema': {'get_weather': {'description': '도시의 현재 날씨를 조회',
   'parameters': {'type': 'object',
    'properties': {'city': {'type': 'string'}},
    'required': ['city']}},
  'add': {'description': '두 수를 더한다',
   'parameters': {'type': 'object',
    'properties': {'a': {'type': 'number'}, 'b': {'type': 'number'}},
    'required': ['a', 'b']}}},
 'tool_names': 'get_weather, add',
 'selection': {'tool': 'get_weather', 'args': {'city': '서울'}},
 'observation': '서울: 맑음, 25℃ (데모)',
 'answer': '서울은 맑고, 현재 25도입니다.'}