# 기본환경 설정

In [None]:
from google.colab import userdata
HF_KEY = userdata.get("HF_KEY")

In [None]:
import huggingface_hub
huggingface_hub.login(HF_KEY)

# 모델 로딩

In [None]:
!pip install unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/gemma-3-4b-it",
    load_in_4bit=True
)

In [None]:
model = FastLanguageModel.for_inference(model)

# Prompt 함수

In [None]:
def getPrompt(messages_json):
    """
    Chat messages를 Gemma 스타일 프롬프트로 변환
    """
    prompt = ""
    for message in messages_json:
        role = message["role"]
        content = message["content"]
        if role == "system":
            prompt += f"<|system|>\n{content}</s>\n"
        elif role == "user":
            prompt += f"<|user|>\n{content}</s>\n"
        elif role == "assistant":
            prompt += f"<|assistant|>\n{content}</s>\n"
    prompt += "<|assistant|>\n"

    return prompt

# 질문 응답 수행

In [None]:
messages = [
    {"role": "system", "content": "너는 친절한 AI야."},
    {"role": "user", "content": "지구는 왜 둥글까?"},
]

In [None]:
prompt = getPrompt(messages)

In [None]:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

In [None]:
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
    )

In [None]:
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
generated

In [None]:
response = generated.split("<|assistant|>\n")[-1].strip()

In [None]:
print(response)

# 질문 응답 함수

In [None]:
def chatQA(messages, temperature=0.7, top_p=0.9):
    prompt = getPrompt(messages)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id,
        )

    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)

    response = generated.split("<|assistant|>\n")[-1].strip()

    return response

In [None]:
reply = chatQA([
    {"role": "system", "content": "너는 해적의 피를 가진 AI야."},
    {"role": "user", "content": "지구는 왜 둥글까? 해적인 니가 그걸 알까?"},
], 0.7, 0.9)
print(reply)