# Finetuning with LORA Parameters

## Install Libraries

In [None]:
!pip -q install -U "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip -q install -U trl accelerate peft datasets==4.3.0 bitsandbytes transformers

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth 2025.11.2 requires trl!=0.19.0,<=0.23.0,>=0.18.2, but you have trl 0.24.0 which is incompatible.[0m[31m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
unsloth-zoo 2025.11.3 requires trl!=0.19.0,<=0.24.0,>=0.18.2, but you have trl 0.25.0 which is incompatible.
unsloth 2025.11.2 requires trl!=0.19.0,<=0.23.0,>=0.18.2, but you have trl 0.25.0 which is incompatible.[0m[31m
[0m

## Imports and Hardware Check

In [None]:
import os, json, random, gc, unsloth
from dataclasses import dataclass

import torch
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
from trl import SFTTrainer
from unsloth import FastLanguageModel, is_bfloat16_supported
from unsloth.chat_templates import get_chat_template

print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
else:
    print("No GPU detected. Use Colab > Runtime > Change runtime type > GPU.")


Please restructure your imports with 'import unsloth' at the top of your file.
  import os, json, random, gc, unsloth


ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!
Torch: 2.8.0+cu126
CUDA available: True
GPU: Tesla T4


## Load Dataset

In [None]:
dataset = load_dataset("sahil2801/CodeAlpaca-20k")

train_size = 2000
test_size  = 200

small_train = dataset["train"].shuffle(seed=42).select(range(min(train_size, len(dataset["train"]))))
small_test  = dataset["train"].shuffle(seed=123).select(range(min(test_size,  len(dataset["train"]))))

print("Example row:", small_train[0])
print("Train size:", len(small_train), " Test size:", len(small_test))

README.md:   0%|          | 0.00/147 [00:00<?, ?B/s]

code_alpaca_20k.json: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/20022 [00:00<?, ? examples/s]

Example row: {'output': 'class Person:\n    """\n    Class to represent a person\n    """\n    def __init__(self, name, age, address):\n        self.name = name\n        self.age = age\n        self.address = address\n    \n    def birthday(self):\n        """\n        Increments the age of the person\n        """\n        self.age += 1', 'instruction': 'Design a class for representing a person in Python.', 'input': ''}
Train size: 2000  Test size: 200


## Build chat messages and Templating Helpers

In [None]:
def build_messages(batch):
    user = batch["instruction"]
    if batch.get("input"):
        if isinstance(batch["input"], str) and batch["input"].strip():
            user += "\n\n" + batch["input"]
    return {
        "messages": [
            {"role": "user", "content": user},
            {"role": "assistant", "content": batch["output"]},
        ]
    }

def apply_template(example, tokenizer, add_generation_prompt=False):
    return {
        "text": tokenizer.apply_chat_template(
            example["messages"],
            tokenize=False,
            add_generation_prompt=add_generation_prompt,
        )
    }

def preview_with_template(tokenizer, example_row):
    ex = build_messages(example_row)
    print(tokenizer.apply_chat_template(ex["messages"], tokenize=False, add_generation_prompt=False)[:800])

## Choose Chat Template and Core Model Settings

In [None]:
chosen_template = "alpaca"
print("Selected chat template:", chosen_template)

Selected chat template: alpaca


In [None]:
max_seq_length = 2048
dtype = torch.bfloat16 if is_bfloat16_supported() else torch.float16

load_in_4bit_for_lora = False

base_id = "unsloth/SmolLM2-135M-Instruct"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_id,
    max_seq_length=max_seq_length,
    load_in_4bit=load_in_4bit_for_lora,
    full_finetuning=False,
    dtype=dtype,
)

tokenizer = get_chat_template(tokenizer, chat_template=chosen_template)
attn_only = ["q_proj", "k_proj", "v_proj", "o_proj"]
attn_plus_mlp = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

target_modules = attn_plus_mlp

# Attach LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    lora_alpha=16,
    lora_dropout=0.0,
    bias="none",
    target_modules=target_modules,
    use_rslora=False,
)

# Prepare tokenized text (messages -> template -> text)
train_msgs = small_train.map(build_messages)
test_msgs  = small_test.map(build_messages)
train_text = train_msgs.map(lambda ex: apply_template(ex, tokenizer), remove_columns=train_msgs.column_names)
test_text  = test_msgs.map(lambda ex: apply_template(ex, tokenizer), remove_columns=test_msgs.column_names)

print("Sample formatted text:\n", train_text[0]["text"][:500])



==((====))==  Unsloth 2025.11.2: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/423 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth 2025.11.2 patched 30 layers with 30 QKV layers, 30 O layers and 30 MLP layers.


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Sample formatted text:
 <|im_start|>Below are some instructions that describe some tasks. Write responses that appropriately complete each request.

### Instruction:
Design a class for representing a person in Python.

### Response:
class Person:
    """
    Class to represent a person
    """
    def __init__(self, name, age, address):
        self.name = name
        self.age = age
        self.address = address
    
    def birthday(self):
        """
        Increments the age of the person
        """
        self


## Traning Arguments

In [None]:
out_dir = "outputs/smollm2_lora"

train_args = TrainingArguments(
    output_dir=out_dir,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_ratio=0.1,
    logging_steps=20,
    save_strategy="epoch",
    bf16=(dtype==torch.bfloat16),
    fp16=(dtype==torch.float16),
    optim="adamw_torch",
    gradient_checkpointing=True,
    report_to=[],
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    args=train_args,
    train_dataset=train_text,
    eval_dataset=test_text,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    packing=False,
)


Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/2000 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/200 [00:00<?, ? examples/s]

## Train SFTTrainer

In [None]:
train_result = trainer.train()
train_result

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,000 | Num Epochs = 1 | Total steps = 125
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 4,884,480 of 139,400,064 (3.50% trained)


Step,Training Loss
20,1.7497
40,1.1499
60,0.8793
80,0.828
100,0.8367
120,0.804


Unsloth: Will smartly offload gradients to save VRAM!


TrainOutput(global_step=125, training_loss=1.0325559158325195, metrics={'train_runtime': 163.3519, 'train_samples_per_second': 12.244, 'train_steps_per_second': 0.765, 'total_flos': 252955228603392.0, 'train_loss': 1.0325559158325195, 'epoch': 1.0})

## Save LoRA Adapter and Tokenizer

In [None]:
trainer.save_model(out_dir)
tokenizer.save_pretrained(out_dir)
print("Saved to:", out_dir)

Saved to: outputs/smollm2_lora


## Inference Setup and Sample Question

In [None]:
from transformers import TextStreamer

FastLanguageModel.for_inference(model)
model.eval()
model.config.use_cache = False
print("\nTry asking a coding question:\n")

def chat_once(prompt, temperature=0.7, top_p=0.9, max_new_tokens=256):
    msgs = [{"role":"user","content":prompt}]
    prompt_text = tokenizer.apply_chat_template(
        msgs, tokenize=False, add_generation_prompt=True
    )
    inputs = tokenizer([prompt_text], return_tensors="pt").to(model.device)
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    with torch.inference_mode():
        _ = model.generate(
            **inputs,
            streamer=streamer,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            use_cache=False,
            eos_token_id=[tokenizer.eos_token_id, tokenizer.encode("### Instruction:")[0]],
        )

chat_once("Write a Python function `two_sum(nums, target)` returning indices of two numbers that sum to target.")


Try asking a coding question:

def two_sum(nums, target):
    result = []
    for i, num in enumerate(nums):
        complement = target - num
        if complement in result:
            return [result.index(complement), i]
    return []
