In [2]:
# ===== Colab 2: LoRA (QLoRA) Finetuning SmolLM2-135M =====

!pip install -q "unsloth>=2025.3.0" "unsloth_zoo" "trl>=0.9.6" "datasets" "accelerate" "transformers>=4.44.0"

from unsloth import FastLanguageModel
from datasets import Dataset
from trl import SFTTrainer, SFTConfig
import torch

# 1) Tiny toy dataset
dataset = Dataset.from_list([
    {
        "text": "### Instruction:\nExplain what a Python list is.\n\n"
                "### Response:\nA Python list is an ordered, mutable collection of items."
    },
    {
        "text": "### Instruction:\nGreet the user in a friendly way.\n\n"
                "### Response:\nHi there! Hope you're doing well ðŸ˜Š"
    },
])

max_seq_length = 512

# 2) Load base model in 4-bit for QLoRA
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name      = "unsloth/SmolLM2-135M-Instruct",
    max_seq_length  = max_seq_length,
    load_in_4bit    = True,      # QLoRA
    full_finetuning = False,     # PEFT
    dtype           = None,
)

# 3) Add LoRA adapters (let Unsloth choose target modules)
model = FastLanguageModel.get_peft_model(
    model,
    r                           = 16,
    lora_alpha                  = 16,
    lora_dropout                = 0,
    bias                        = "none",
    use_gradient_checkpointing  = "unsloth",
    max_seq_length              = max_seq_length,
)

# 4) Training config â€” no wandb
training_args = SFTConfig(
    output_dir                  = "smollm2_lora_ft",
    num_train_epochs            = 1,
    per_device_train_batch_size = 1,
    gradient_accumulation_steps = 2,
    learning_rate               = 2e-4,
    logging_steps               = 1,
    save_strategy               = "no",
    report_to                   = "none",
)

trainer = SFTTrainer(
    model              = model,
    tokenizer          = tokenizer,
    train_dataset      = dataset,
    dataset_text_field = "text",
    args               = training_args,
)

trainer.train()

# 5) Inference demo
model.eval()

prompt = "### Instruction:\nExplain what a Python list is.\n\n### Response:\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens = 40,
        do_sample      = True,
        temperature    = 0.7,
        use_cache      = False,
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


==((====))==  Unsloth 2025.11.4: Fast Llama patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2025.11.4 patched 30 layers with 30 QKV layers, 30 O layers and 30 MLP layers.
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2.


Unsloth: We found double BOS tokens - we shall remove one automatically.


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/2 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2 | Num Epochs = 1 | Total steps = 1
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 2 x 1) = 2
 "-____-"     Trainable parameters = 4,884,480 of 139,400,064 (3.50% trained)


Step,Training Loss
1,2.655


### Instruction:
Explain what a Python list is.

### Response:
In this case, I am going to explain what a Python list is.

A Python list is a collection of items that are of different types. In Python, a list is a data structure
