In [2]:
"""
Cell 1: Install and import required libraries for parameter-efficient fine-tuning (LoRA) using Unsloth.ai.
This includes installing the Unsloth package, PEFT/LoRA utilities, and other dependencies.
"""
!pip install unsloth transformers datasets bitsandbytes accelerate peft

import sys
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType

print("Python version:", sys.version)
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


Python version: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Torch version: 2.8.0+cu126
CUDA available: True


In [3]:
"""
Cell 2: Runtime & GPU Check.
This cell verifies that the runtime environment has GPU access and prints out key GPU information,
so we confirm our hardware setup is ready for parameter-efficient fine-tuning (LoRA).
"""
import torch
print("CUDA available:", torch.cuda.is_available())
!nvidia-smi


CUDA available: True
Mon Nov 10 03:05:55 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   36C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                           

In [4]:
"""
Cell 3: Load the base model (SmolLM2-135M) and tokenizer.
We'll then configure the model with LoRA adapters for parameter-efficient fine-tuning.
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import LoraConfig, get_peft_model, TaskType

checkpoint = "unsloth/SmolLM2-135M"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForCausalLM.from_pretrained(checkpoint).to("cuda")

# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=8,                  # rank of LoRA adapter
    lora_alpha=32,
    lora_dropout=0.05
)
model = get_peft_model(base_model, lora_config)

print("Loaded base model:", checkpoint)
print("Model parameter count with LoRA adapters (trainable):",
      sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6, "M")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/742 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/817 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

Loaded base model: unsloth/SmolLM2-135M
Model parameter count with LoRA adapters (trainable): 0.4608 M


In [5]:
"""
Cell 4: Dataset Preparation for LoRA fine-tuning.
We load the same instruction-response dataset used in Colab 1 (e.g., Alpaca),
format each example into the prompt template: “### Instruction: … ### Input: … ### Response: …”,
and optionally select a smaller subset for demonstration.
"""
from datasets import load_dataset

# Load dataset (same as Colab 1 for consistency)
dataset = load_dataset("yahma/alpaca-cleaned", split="train[:500]")

# Define prompt format
prompt_template = """### Instruction:
{instruction}
### Input:
{input}
### Response:
{output}"""

def format_example(example):
    inp = example.get("input", "")
    return {
        "text": prompt_template.format(
            instruction=example["instruction"],
            input=inp,
            output=example["output"]
        )
    }

# Apply formatting
dataset = dataset.map(format_example, remove_columns=dataset.column_names)

# Optional subset for fast iteration
dataset = dataset.select(range(200))

print("Number of examples:", len(dataset))
print("Sample formatted example text:\n", dataset[0]["text"])


README.md: 0.00B [00:00, ?B/s]

alpaca_data_cleaned.json:   0%|          | 0.00/44.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/51760 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Number of examples: 200
Sample formatted example text:
 ### Instruction:
Give three tips for staying healthy.
### Input:

### Response:
1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.

2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.

3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.


In [6]:
"""
Cell 5: Tokenization & Formatting for LoRA fine-tuning.
This cell takes the formatted dataset (instruction-input-response) and:
  • tokenizes the text using the tokenizer from the base model;
  • applies truncation/padding (max_length example: 512);
  • sets up labels so the model can compute loss (labels = input_ids);
  • prepares the dataset for training with LoRA-enabled model.
"""
def tokenize_and_format(example):
    enc = tokenizer(
        example["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )
    # Set labels to input_ids so model knows what to predict
    enc["labels"] = enc["input_ids"].copy()
    return enc

tokenized_dataset = dataset.map(
    tokenize_and_format,
    batched=True,
    remove_columns=["text"]
)

print("Example tokenized with labels:", tokenized_dataset[0])
print("Dataset size:", len(tokenized_dataset))


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Example tokenized with labels: {'input_ids': [49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49

In [7]:
"""
Cell 6: LoRA Setup & Training Configuration.
In this cell we configure the LoRA adapter settings (rank, alpha, dropout)
and set up the training arguments specifically for parameter-efficient fine-tuning.
"""
from transformers import TrainingArguments

# LoRA adapter configuration was already initialized in Cell 3

training_args = TrainingArguments(
    output_dir="./smollm2_lora_ft",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    learning_rate=5e-5,        # higher LR since smaller number of params are updated
    num_train_epochs=2,
    fp16=True,
    save_steps=50,
    logging_steps=10,
    eval_strategy="no",         # skip evaluation for quick run
)
print("Training arguments defined:", training_args)


Training arguments defined: TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=True,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.NO,
eva

In [8]:
"""
Cell 7: LoRA Fine-tuning Execution.
This cell runs the actual fine-tuning process using the base model wrapped with LoRA adapters.
Only the adapter weights will be trained — this makes the process parameter-efficient.
We monitor training loss and optional checkpoints are saved as specified in the training arguments.
"""
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mrishikeshavlal-patel[0m ([33mrishikeshavlal-patel-student[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,16.2528
20,15.5856
30,14.807
40,15.6209
50,15.1593


TrainOutput(global_step=50, training_loss=15.485137329101562, metrics={'train_runtime': 62.27, 'train_samples_per_second': 6.424, 'train_steps_per_second': 0.803, 'total_flos': 131069037772800.0, 'train_loss': 15.485137329101562, 'epoch': 2.0})

In [9]:
"""
Cell 8: Save LoRA-Fine-Tuned Model & Tokenizer.
In this cell we save the model (with LoRA adapters) and tokenizer to disk so
you can later load them for inference or deployment. Since we used PEFT/LoRA,
we ensure that both adapter weights and tokenizer are saved.
"""
# Save the model with adapters
model.save_pretrained("./smollm2_lora_ft_model", save_adapters=True)
# Save the tokenizer
tokenizer.save_pretrained("./smollm2_lora_ft_model")

print("✅ LoRA-fine-tuned model and tokenizer saved at ./smollm2_lora_ft_model")


✅ LoRA-fine-tuned model and tokenizer saved at ./smollm2_lora_ft_model


In [10]:
"""
Cell 9: Inference & Demo for LoRA-Fine-Tuned Model.
In this cell we load the saved LoRA-adapter model and tokenizer (or continue from memory),
and run a few sample prompts to demonstrate how the parameter-efficient fine-tuning performed.
We compare the outputs with what you might expect from full fine-tuning.
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Path to your saved LoRA-fine-tuned model
checkpoint = "./smollm2_lora_ft_model"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to("cuda")

# Example prompts (customize to your task: coding or chat)
prompts = [
    "### Instruction:\nExplain how a binary search algorithm works.\n### Input:\n\n### Response:\n",
    "### Instruction:\nWrite a Python snippet that reverses a linked list.\n### Input:\n\n### Response:\n"
]

for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Prompt:\n", prompt)
    print("Generated output:\n", text)
    print("\n" + "="*60 + "\n")


Prompt:
 ### Instruction:
Explain how a binary search algorithm works.
### Input:

### Response:

Generated output:
 ### Instruction:
Explain how a binary search algorithm works.
### Input:

### Response:

#### Explanation:

The binary search algorithm is used to find the target value by searching for the value in the middle of the array. The algorithm starts by comparing the value of the first element in the array with the target value and if it is equal to the target value, the algorithm stops and returns the index of the target value.

#### Explanation:

The binary search algorithm works by starting with the first element in the array and searching for the value in the middle of the


Prompt:
 ### Instruction:
Write a Python snippet that reverses a linked list.
### Input:

### Response:

Generated output:
 ### Instruction:
Write a Python snippet that reverses a linked list.
### Input:

### Response:

```
class Node(object):
    def __init__(self, data):
        self.data = data
    

In [14]:
"""
Cell 10: Summary & Next Steps.
In this cell we recap the work done in this notebook: parameter-efficient fine-tuning using LoRA on the SmolLM2-135M model,
and we outline where we’re headed next in the series.
"""
print("✅ LoRA-fine-tuning completed on SmolLM2-135M.")
print("📌 Key observations:")
print("   • Model size & storage footprint greatly reduced thanks to LoRA adapters.")
print("   • Training time & memory usage were much lower compared to full fine-tuning.")
print("   • Loss value may be higher than full fine-tuning, reflecting the trade-off of efficiency vs capacity.")

✅ LoRA-fine-tuning completed on SmolLM2-135M.
📌 Key observations:
   • Model size & storage footprint greatly reduced thanks to LoRA adapters.
   • Training time & memory usage were much lower compared to full fine-tuning.
   • Loss value may be higher than full fine-tuning, reflecting the trade-off of efficiency vs capacity.
