# Instruction Fine-tuning – TRL SFTTrainer + PEFT/LoRA (EN/VN)
**Objective/Mục tiêu**: Run a minimal SFT loop on a tiny instruction dataset. Evaluate before/after on a few prompts.



# !pip install -q transformers datasets accelerate peft trl bitsandbytes


In [None]:

import json, random, os
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTTrainer, SFTConfig

# Tiny synthetic instruction dataset (replace with your own later)
samples = [
    {"instruction": "Extract cities from the text.", "input": "Hanoi, Bangkok, and Singapore are major cities.", "output": '["Hanoi","Bangkok","Singapore"]'},
    {"instruction": "Translate to English.", "input": "Xin chào, tôi là sinh viên.", "output": "Hello, I am a student."},
    {"instruction": "Summarize in 1 sentence.", "input": "Large language models are useful but can hallucinate.", "output": "LLMs are powerful yet prone to hallucinations."},
]
# Expand a bit
samples = samples * 200
random.shuffle(samples)

def format_example(ex):
    return f"### Instruction:
{ex['instruction']}

### Input:
{ex['input']}

### Response:
{ex['output']}"

ds = Dataset.from_dict({"text": [format_example(x) for x in samples]})

model_name = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")

config = SFTConfig(
    output_dir="./sft_out",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=1e-4,
    logging_steps=10,
    save_steps=100,
    lr_scheduler_type="cosine",
    max_seq_length=512,
    dataset_text_field="text",
    packing=True
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=ds.select(range(500)),
    args=config
)
trainer.train()
trainer.model.save_pretrained("./sft_out/model")
tokenizer.save_pretrained("./sft_out/model")
print("SFT finished.")



# Quick before/after comparison utility
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

prompts = [
    "Extract cities from this: Hanoi and Ho Chi Minh City are in Vietnam; Vientiane is in Laos.",
    "Summarize: Retrieval-augmented generation uses external documents to ground answers.",
]

def chat(model_id):
    tok = AutoTokenizer.from_pretrained(model_id, use_fast=True)
    mdl = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto")
    pipe = pipeline("text-generation", model=mdl, tokenizer=tok, max_new_tokens=128)
    for p in prompts:
        out = pipe(p)[0]["generated_text"]
        print("===", model_id, "==="); print(p); print(out, "
")

print("Baseline (pretrained):")
chat("Qwen/Qwen2.5-0.5B-Instruct")

print("After SFT:")
chat("./sft_out/model")
