Installation commands

In [None]:
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth  # Do this in local & cloud setups
else:
    import torch; v = re.match(r'[\d]{1,}\.[\d]{1,}', str(torch.__version__)).group(0)
    xformers = 'xformers==' + {'2.9':'0.0.33.post1','2.8':'0.0.32.post2'}.get(v, "0.0.33.post1")
    !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth_zoo bitsandbytes accelerate {xformers} peft trl triton unsloth
!pip install transformers==4.56.2 && pip install --no-deps trl==0.22.2

Load model in 4-bit + prep LoRA (Unsloth)

In [None]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"

max_seq_length = 2048   # 1024 if you hit OOM
dtype = None            # Unsloth picks best (bf16 if supported)
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# LoRA config — good defaults for 3B
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,                         # 8–16 sweet spot
    target_modules = [
        "q_proj","k_proj","v_proj","o_proj",
        "gate_proj","up_proj","down_proj",
    ],
    lora_alpha = 16,
    lora_dropout = 0.0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",  # memory saver
    random_state = 3407,
)


In [None]:
print(model)

In [4]:
SYSTEM_PROMPT = "You are a helpful assistant. Be concise. If unsure, say you don't know."

def format_example(user, assistant, system=SYSTEM_PROMPT):
    messages = [
        {"role": "system", "content": system},
        {"role": "user", "content": user},
        {"role": "assistant", "content": assistant},
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return text


In [5]:
train_pairs = []

# 1) JSON extraction tasks
json_examples = [
    ("Extract into JSON with keys name and date (YYYY-MM-DD): Alice Johnson, born Jan 5 1994",
     '{"name":"Alice Johnson","date":"1994-01-05"}'),
    ("Extract into JSON with keys company and amount_usd: 'Acme Corp raised $12M'",
     '{"company":"Acme Corp","amount_usd":12000000}'),
]

# 2) Bullet summaries
sum_examples = [
    ("Summarize in 3 bullets: Q4 revenue grew 20% QoQ, churn dropped to 2.1%, onboarding time reduced by 35%.",
     "- Revenue grew 20% QoQ in Q4\n- Churn decreased to 2.1%\n- Onboarding time improved by 35%"),
]

# 3) Uncertainty / refusal
refusal_examples = [
    ("What is the CEO of CompanyX as of today? (CompanyX is not specified.)",
     "I don't know. You didn’t specify which company you mean, so I can’t answer accurately."),
    ("Give me the exact birthday of an unknown person named John.",
     "I don't know. I don’t have enough information to determine John’s birthday."),
]

for u,a in json_examples + sum_examples + refusal_examples:
    train_pairs.append((u,a))

# Expand a bit by paraphrasing prompts lightly (simple augmentation)
augmented = []
for (u,a) in train_pairs:
    for prefix in ["Please", "Kindly", ""]:
        uu = (prefix + " " + u).strip()
        augmented.append((uu,a))
train_pairs = augmented

print("Train examples:", len(train_pairs))


Train examples: 15


In [6]:
from datasets import Dataset

train_texts = [format_example(u,a) for (u,a) in train_pairs]
train_ds = Dataset.from_dict({"text": train_texts})
train_ds[0]["text"][:400]


'<|im_start|>system\nYou are a helpful assistant. Be concise. If unsure, say you don\'t know.<|im_end|>\n<|im_start|>user\nPlease Extract into JSON with keys name and date (YYYY-MM-DD): Alice Johnson, born Jan 5 1994<|im_end|>\n<|im_start|>assistant\n{"name":"Alice Johnson","date":"1994-01-05"}<|im_end|>\n'

In [7]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_ds,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,  # effective batch = 16
        warmup_steps = 5,
        max_steps = 60,                  # Week2 quick run; increase to 200–800 later
        learning_rate = 2e-4,
        fp16 = torch.cuda.is_available() and not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
        logging_steps = 5,
        output_dir = "outputs",
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 3407,
        report_to = "none",
    ),
)

trainer.train()


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/15 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 15 | Num Epochs = 60 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 29,933,568 of 3,115,872,256 (0.96% trained)


Step,Training Loss
5,3.3367
10,1.3298
15,0.6114
20,0.1442
25,0.0433
30,0.0379
35,0.0365
40,0.0358
45,0.0355
50,0.0353


TrainOutput(global_step=60, training_loss=0.47642183924714726, metrics={'train_runtime': 223.9851, 'train_samples_per_second': 4.286, 'train_steps_per_second': 0.268, 'total_flos': 1295774785536000.0, 'train_loss': 0.47642183924714726, 'epoch': 60.0})

In [8]:
FastLanguageModel.for_inference(model)

@torch.inference_mode()
def chat(user_msg, system=SYSTEM_PROMPT, max_new_tokens=256, temperature=0.3, top_p=0.9):
    messages = [
        {"role":"system","content":system},
        {"role":"user","content":user_msg},
    ]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        eos_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(out[0], skip_special_tokens=True)

print(chat("Extract into JSON with keys name and date: Bob Smith, born Feb 3 2001",
           system="You are a JSON-only assistant. Return only valid JSON."))


system
You are a JSON-only assistant. Return only valid JSON.
user
Extract into JSON with keys name and date: Bob Smith, born Feb 3 2001
assistant
{"name":"Bob Smith","date":"2001-02-03"}


In [13]:
print(chat("Extract into JSON with keys name and date: Bob Smith, born 3rd Feb  2026",
           system="You are a JSON-only assistant. Return only valid JSON."))

system
You are a JSON-only assistant. Return only valid JSON.
user
Extract into JSON with keys name and date: Bob Smith, born 3rd Feb  2026
assistant
{"name":"Bob Smith","date":"3rd Feb  2026"}


### Eval

In [14]:
eval_items = [
    {
        "id":"json_1",
        "prompt":"Extract into JSON with keys name and date (YYYY-MM-DD): Alice Johnson, born Jan 5 1994",
        "checks":["json_valid","json_has_keys:name,date"]
    },
    {
        "id":"sum_1",
        "prompt":"Summarize in 3 bullets: Our Q4 revenue grew 20% QoQ, churn dropped to 2.1%, onboarding time reduced by 35%.",
        "checks":["bullet_count:3","max_words:60"]
    },
    {
        "id":"refuse_1",
        "prompt":"What is the CEO of CompanyX as of today? (CompanyX is not specified.)",
        "checks":["contains_idk","max_words:40"]
    },
]


In [17]:
import json
def extract_first_json(text: str):
    m = re.search(r"\{.*\}", text, flags=re.DOTALL)
    return m.group(0) if m else None

def check_json_valid(text: str) -> bool:
    s = extract_first_json(text)
    if not s: return False
    try:
        json.loads(s)
        return True
    except:
        return False

def check_json_has_keys(text: str, keys):
    s = extract_first_json(text)
    if not s: return False
    try:
        obj = json.loads(s)
    except:
        return False
    return all(k in obj for k in keys)

def check_bullet_count(text: str, n: int) -> bool:
    bullets = [ln.strip() for ln in text.splitlines() if ln.strip().startswith(("-", "•", "*"))]
    return len(bullets) == n

def check_max_words(text: str, n: int) -> bool:
    words = re.findall(r"\w+", text)
    return len(words) <= n

def check_contains_idk(text: str) -> bool:
    t = text.lower()
    return ("i don't know" in t) or ("i do not know" in t) or ("not enough information" in t) or ("cannot" in t)

def run_checks(output: str, checks):
    results = {}
    for chk in checks:
        if chk == "json_valid":
            results[chk] = check_json_valid(output)
        elif chk.startswith("json_has_keys:"):
            keys = chk.split(":",1)[1].split(",")
            results[chk] = check_json_has_keys(output, keys)
        elif chk.startswith("bullet_count:"):
            n = int(chk.split(":")[1])
            results[chk] = check_bullet_count(output, n)
        elif chk.startswith("max_words:"):
            n = int(chk.split(":")[1])
            results[chk] = check_max_words(output, n)
        elif chk == "contains_idk":
            results[chk] = check_contains_idk(output)
        else:
            results[chk] = False
    return results


In [19]:
def eval_suite(items, system=SYSTEM_PROMPT, temperature=0.3, top_p=0.9):
    rows = []
    for it in items:
        out = chat(it["prompt"], system=system, max_new_tokens=200, temperature=temperature, top_p=top_p)
        checks = run_checks(out, it["checks"])
        score = sum(checks.values()) / max(1, len(checks))
        rows.append({
            "id": it["id"],
            "prompt": it["prompt"],
            "score": score,
            "checks": checks,
            "output_preview": out[-400:],
        })
        if score < 1.0:
          print(rows[-1])
    avg = sum(r["score"] for r in rows) / len(rows)
    return avg, rows

avg, rows = eval_suite(eval_items, system=SYSTEM_PROMPT, temperature=0.2, top_p=1.0)
avg, rows[0]["checks"], rows[0]["output_preview"]


{'id': 'refuse_1', 'prompt': 'What is the CEO of CompanyX as of today? (CompanyX is not specified.)', 'score': 0.5, 'checks': {'contains_idk': True, 'max_words:40': False}, 'output_preview': "system\nYou are a helpful assistant. Be concise. If unsure, say you don't know.\nuser\nWhat is the CEO of CompanyX as of today? (CompanyX is not specified.)\nassistant\nI don't know. You didn’t specify which company you mean, so I can’t answer accurately."}


(0.8333333333333334,
 {'json_valid': True, 'json_has_keys:name,date': True},
 'system\nYou are a helpful assistant. Be concise. If unsure, say you don\'t know.\nuser\nExtract into JSON with keys name and date (YYYY-MM-DD): Alice Johnson, born Jan 5 1994\nassistant\n{"name":"Alice Johnson","date":"1994-01-05"}')

In [20]:
SAVE_DIR = "qwen25_3b_week2_lora"
model.save_pretrained(SAVE_DIR)
tokenizer.save_pretrained(SAVE_DIR)
print("Saved to:", SAVE_DIR)

Saved to: qwen25_3b_week2_lora
