In [None]:
!pip install -q unsloth==2025.3.15 datasets==3.5.0

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-1B-bnb-4bit",
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    dtype=None,
)

model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0,
    target_modules=[
        "q_proj", "k_proj", "v_proj", "up_proj",
        "down_proj", "o_proj", "gate_proj"],
    use_rslora=True,
    use_gradient_checkpointing="unsloth",
    random_state = 42,
    loftq_config = None,
)
print(model.print_trainable_parameters())

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch Gemma3ForConditionalGeneration.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.15: Fast Llama patching. Transformers: 4.51.1.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

Unsloth 2025.3.15 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


trainable params: 11,272,192 || all params: 1,247,086,592 || trainable%: 0.9039
None


## **Dataset**

In [None]:
from datasets import load_dataset

ds = load_dataset("openlifescienceai/medmcqa")
del ds["test"]

In [None]:
data_prompt = """Choose the correct option for the following question.

### Question:
{}

### Choice:
{}

### Answer:
"""

id2label = {
    0: 'A',
    1: 'B',
    2: 'C',
    3: 'D'
}

def formatting_prompt(examples):
    questions = examples["question"]
    opas = examples["opa"]
    opbs = examples["opb"]
    opcs = examples["opc"]
    opds = examples["opd"]
    cops = examples["cop"]

    texts = []
    for idx in range(len(questions)):
        question = questions[idx]
        opa = opas[idx]
        opb = opbs[idx]
        opc = opcs[idx]
        opd = opds[idx]
        answer = id2label[cops[idx]]
        if answer == "A":
            answer = answer + " " + opa
        elif answer == "B":
            answer = answer + " " + opb
        elif answer == "C":
            answer = answer + " " + opc
        elif answer == "D":
            answer = answer + " " + opd

        choices = f"A. {opa}. B. {opb}. C. {opc}. D. {opd}."

        text = data_prompt.format(question, choices)
        texts.append(text)
    return {"text": texts,}

In [None]:
process_ds = ds.map(formatting_prompt, batched=True)

In [None]:
process_ds["train"][0]

{'id': 'e9ad821a-c438-4965-9f77-760819dfa155',
 'question': 'Chronic urethral obstruction due to benign prismatic hyperplasia can lead to the following change in kidney parenchyma',
 'opa': 'Hyperplasia',
 'opb': 'Hyperophy',
 'opc': 'Atrophy',
 'opd': 'Dyplasia',
 'cop': 2,
 'choice_type': 'single',
 'exp': 'Chronic urethral obstruction because of urinary calculi, prostatic hyperophy, tumors, normal pregnancy, tumors, uterine prolapse or functional disorders cause hydronephrosis which by definition is used to describe dilatation of renal pelvis and calculus associated with progressive atrophy of the kidney due to obstruction to the outflow of urine Refer Robbins 7yh/9,1012,9/e. P950',
 'subject_name': 'Anatomy',
 'topic_name': 'Urinary tract',
 'text': 'Choose the correct option for the following question.\n\n### Question:\nChronic urethral obstruction due to benign prismatic hyperplasia can lead to the following change in kidney parenchyma\n\n### Choice:\nA. Hyperplasia. B. Hyperophy

## **Training**

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer=SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=process_ds["train"],
    eval_dataset=process_ds["validation"],
    dataset_text_field="text",
    args=TrainingArguments(
        output_dir="med-mcqa-llama-3.2-1B-4bit-lora",
        logging_dir="logs",
        learning_rate=3e-4,
        lr_scheduler_type="linear",
        per_device_train_batch_size=64,
        gradient_accumulation_steps=16,
        num_train_epochs=2,
        eval_strategy="steps",
        save_strategy="steps",
        logging_strategy="steps",
        eval_steps=50,
        save_steps=50,
        logging_steps=50,
        save_total_limit=1,
        load_best_model_at_end=True,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        optim="adamw_8bit",
        weight_decay=0.01,
        warmup_steps=10,
        seed=0,
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/182822 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/4183 [00:00<?, ? examples/s]

In [None]:
import os
os.environ['UNSLOTH_RETURN_LOGITS'] = '1'
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 182,822 | Num Epochs = 2 | Total steps = 356
O^O/ \_/ \    Batch size per device = 64 | Gradient accumulation steps = 16
\        /    Data Parallel GPUs = 1 | Total batch size (64 x 16 x 1) = 1,024
 "-____-"     Trainable parameters = 11,272,192/1,000,000,000 (1.13% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
50,1.27,1.323526
100,1.0631,1.309695
150,1.0527,1.303478
200,1.0482,1.307082
250,1.0008,1.305955
300,0.9956,1.302413
350,0.9919,1.301295


Unsloth: Not an error, but LlamaForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


TrainOutput(global_step=356, training_loss=1.0590723953889998, metrics={'train_runtime': 3536.4134, 'train_samples_per_second': 103.394, 'train_steps_per_second': 0.101, 'total_flos': 3.762528031463178e+17, 'train_loss': 1.0590723953889998})

## **Inference**

In [None]:
process_ds["validation"][0]

{'id': '45258d3d-b974-44dd-a161-c3fccbdadd88',
 'question': 'Which of the following is not true for myelinated nerve fibers:',
 'opa': 'Impulse through myelinated fibers is slower than non-myelinated fibers',
 'opb': 'Membrane currents are generated at nodes of Ranvier',
 'opc': 'Saltatory conduction of impulses is seen',
 'opd': 'Local anesthesia is effective only when the nerve is not covered by myelin sheath',
 'cop': 0,
 'choice_type': 'multi',
 'exp': None,
 'subject_name': 'Physiology',
 'topic_name': None,
 'text': 'Choose the correct option for the following question.\n\n### Question:\nWhich of the following is not true for myelinated nerve fibers:\n\n### Choice:\nA. Impulse through myelinated fibers is slower than non-myelinated fibers. B. Membrane currents are generated at nodes of Ranvier. C. Saltatory conduction of impulses is seen. D. Local anesthesia is effective only when the nerve is not covered by myelin sheath.\n\n### Answer:\n'}

In [None]:
from transformers import pipeline

generator = pipeline("text-generation", model="thainq107/med-mcqa-llama-3.2-1B-4bit-lora")


Device set to use cuda:0


In [None]:
process_ds["validation"][0]["text"]

'Choose the correct option for the following question.\n\n### Question:\nWhich of the following is not true for myelinated nerve fibers:\n\n### Choice:\nA. Impulse through myelinated fibers is slower than non-myelinated fibers. B. Membrane currents are generated at nodes of Ranvier. C. Saltatory conduction of impulses is seen. D. Local anesthesia is effective only when the nerve is not covered by myelin sheath.\n\n### Answer:\n'

In [None]:
output = generator([process_ds["validation"][0]["text"]], max_new_tokens=128, return_full_text=False)[0]

In [None]:
output

[{'generated_text': 'D Local anesthesia is effective only when the nerve is not covered by myelin sheath\n'}]