<a href="https://colab.research.google.com/github/mightyoctopus/lora-fine-tuning-qwen3-0.6b-medical-evaluation-code/blob/main/evaluation_qwen3_0_6B_Lora.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q peft transformers torch

In [None]:
!pip install -U bitsandbytes

In [None]:
import os
import math

import torch
from torch.utils.data import DataLoader

from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    default_data_collator
)

from peft import PeftModel

In [None]:
model_name = "Qwen/Qwen3-0.6B"
adapter_path = "MightyOctopus/qwen3-0.6B-lora-medical"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
).eval()

tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

tmp_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tuned_model = PeftModel.from_pretrained(tmp_model, adapter_path)
tuned_model = tuned_model.merge_and_unload().eval()
tuned_model.generation_config.pad_token_id = tokenizer.pad_token_type_id
tuned_model.generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
def tokenize(batch):
    texts = []

    for convo in batch["conversations"]:
        for turn in convo:
            human_msg = turn["value"] if turn["from"] == "human" else ""
            assisant_msg = turn["value"] if turn["from"] == "gpt" else ""

            texts.append(f"### Instruction:\n{human_msg}\n### Response:\n{assisant_msg}")


    tokens = tokenizer(
        texts,
        padding="max_length",
        max_length=256,
        truncation=True,
        return_tensors="pt",
        return_attention_mask=True
    )

    tokens["labels"] = tokens["input_ids"].clone()
    print(tokens["labels"][tokens["attention_mask"] == 0])
    tokens["labels"][tokens["attention_mask"] == 0] = -100   # ignore padding tokens
    return tokens

In [None]:
dataset_name = "Rabe3/QA_Synthatic_Medical_data"
eval_ds = load_dataset(dataset_name, "default", split="train[90%:]")
eval_ds = eval_ds.map(tokenize, batched=True, remove_columns=eval_ds.column_names)
eval_ds = eval_ds.with_format("torch")

In [None]:
eval_loader = DataLoader(
    eval_ds,
    batch_size=8,
    collate_fn= default_data_collator
)

In [None]:
for batch in eval_loader:
    print(batch["input_ids"].shape)
    break


In [None]:
@torch.no_grad()
def compute_perplexity(model):
    losses = []

    for batch in eval_loader:
        batch = {k: v.to("cuda") for k, v in batch.items()}
        loss = model(**batch).loss
        losses.append(loss.item())

    print("Loss Lenght: ", len(losses))
    print("Sum: ", sum(losses))
    return math.exp(sum(losses) / len(losses))


In [None]:
print(f"Base Model Perplexity: {compute_perplexity(base_model):.2f}")
print(f"Tuned Model Perplexity: {compute_perplexity(tuned_model):.2f}")

In [None]:
import random

raw_data = load_dataset(dataset_name, "default", split="train[90%:]")

ref_questions = [convo[0]["value"] for convo in raw_data["conversations"]]
ref_answers = [convo[1]["value"] for convo in raw_data["conversations"]]
# print(ref_questions[0])


def generate(model, instruction):
    token_id = tokenizer(
        f"### Instruction:\n{instruction}\n### Response:\n",
        return_tensors="pt"
    ).input_ids.to("cuda")

    # print("TOKEN ID", token_id)

    with torch.no_grad():
        out = model.generate(token_id, max_new_tokens=256)

    return tokenizer.decode(out[0], skip_special_tokens=True)

In [None]:
print(ref_answers[0])

In [None]:
# print(generate(base_model, ref_questions[0]))

print("=======================================================")
print("BASE MODEL RESPONSE: ", generate(base_model, ref_questions[0]))
print("=======================================================")
print("TUNED MODEL RESPONSE: ", generate(tuned_model, ref_questions[0]))