In [2]:
import sys
sys.path.append("../")

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

from src.prompt import get_prompt
from src.utils import set_random_seeds, get_bnb_config

In [6]:
method = "lora-fine-tune"
base_model_path = "../pretrain/Taiwan-LLM-7B-v2.0-chat"
peft_path = "../checkpoint/epoch=4_ppl=3.649335366725922"

In [8]:
set_random_seeds()

# Prepare model
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

bnb_config = get_bnb_config()
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
if method == "lora-fine-tune":
    model = PeftModel.from_pretrained(model, peft_path)

device = torch.device(f"cuda:0" if torch.cuda.is_available() else "cpu")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
question = input("Please input your question: ")

prompt = get_prompt(question, incontext=True if method == "few-shot" else False)
print("Prompt:", prompt)

question_input_ids = torch.tensor(
    [tokenizer.bos_token_id] + tokenizer(prompt, add_special_tokens=False)["input_ids"]
)

model.eval()
with torch.no_grad():
    data = question_input_ids.unsqueeze(0).to(device)
    generated_tokens = model.generate(input_ids=data)
    generation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]
    generation = generation.replace(prompt, "").strip()
    print("Answer:", generation)

Please input your question:  翻譯成文言文：\n於是，廢帝讓瀋慶之的堂侄、直將軍瀋攸之賜瀋慶之毒藥，命瀋慶之自殺。


Prompt: 你是人工智慧助理，以下是用戶和人工智能助理之間的對話。你要對用戶的問題提供有用、安全、詳細和禮貌的回答。以下的問題為文言文翻譯成白話文或白話文翻譯成文言文。USER: 翻譯成文言文：\n於是，廢帝讓瀋慶之的堂侄、直將軍瀋攸之賜瀋慶之毒藥，命瀋慶之自殺。 ASSISTANT:
Answer: 於是，廢帝賜毒藥於慶之侄直將軍瀋攸之，令慶之自殺。
