In [1]:
"""
Cell 1: Install and import all necessary libraries.
We install the Unsloth.ai fine-tuning toolkit (if available), Transformers, Datasets, BitsAndBytes (for quantization),
and verify the correct Python version and runtime environment.
"""
# Install dependencies
!pip install unsloth transformers datasets bitsandbytes accelerate

# Imports
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset

# Print Python and torch versions for logging
import sys
print("Python version:", sys.version)
print("Torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


Collecting unsloth
  Downloading unsloth-2025.11.2-py3-none-any.whl.metadata (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.8/61.8 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting unsloth_zoo>=2025.11.3 (from unsloth)
  Downloading unsloth_zoo-2025.11.3-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.35-py3-none-any.whl.metadata (12 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Collecting datasets
  Downloading datasets-4.3.0-py3-none-any.whl.metadata (18 kB)
Collecting trl!=0.19.0,<=0.23.0,>=0.18.2 (from unsloth)
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting pyarrow>=21.0



Python version: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
Torch version: 2.8.0+cu126
CUDA available: True


In [2]:
"""
Cell 2: Confirm that GPU is available and print GPU details.
This ensures our environment is properly configured for fine-tuning.
"""
import torch
print("CUDA available:", torch.cuda.is_available())
!nvidia-smi


CUDA available: True
Mon Nov 10 02:21:58 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   57C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                           

In [3]:
"""
Cell 3: Load the base model (SmolLM2-135M) and tokenizer from Unsloth.ai/Hugging Face.
We’ll optionally use quantization (e.g., 4-bit) or memory-saving configuration if supported.
"""
from transformers import AutoTokenizer, AutoModelForCausalLM

checkpoint = "unsloth/smollm2-135m"  # adjust exactly to the model name
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to("cuda")
print("Loaded model:", checkpoint)
print("Tokenizer length:", len(tokenizer))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/742 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/817 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

Loaded model: unsloth/smollm2-135m
Tokenizer length: 49153


In [4]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

"""
Cell 3: Load the base model (SmolLM2-135M) and tokenizer from Hugging Face/Unsloth.
We verify proper model name, move it to GPU, and print basic info.
"""

checkpoint = "unsloth/SmolLM2-135M"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to("cuda")

print("Loaded model:", checkpoint)
print("Tokenizer vocab size:", tokenizer.vocab_size)
print("Model device:", next(model.parameters()).device)
print("Number of parameters (approx):", sum(p.numel() for p in model.parameters())/1e6, "M")


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/742 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/817 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

Loaded model: unsloth/SmolLM2-135M
Tokenizer vocab size: 49152
Model device: cuda:0
Number of parameters (approx): 134.515584 M


In [5]:
"""
Cell 4: Dataset Preparation for the chosen task.
We load or build an input-output pair dataset (e.g., chat or code generation),
format the examples using a prompt template, and prepare it for tokenization.
"""

from datasets import load_dataset

# 1. Load or pick a dataset for our task — here we’ll use a small subset of an instruction-following dataset
dataset = load_dataset("yahma/alpaca-cleaned", split="train[:500]")

# 2. Define a prompt template for the task (e.g., chat generation)
prompt_template = """### Instruction:\n{instruction}\n### Input:\n{input}\n### Response:\n{output}"""

def format_example(example):
    input_text = example.get("input", "")
    formatted = prompt_template.format(
        instruction=example["instruction"],
        input=input_text,
        output=example["output"]
    )
    return {"text": formatted}

# 3. Apply formatting
dataset = dataset.map(format_example, remove_columns=dataset.column_names)

# 4. (Optional) Select a smaller subset for fast experimentation
dataset = dataset.select(range(200))  # keep first 200 examples for quicker runs

print("Number of training examples:", len(dataset))
print("Sample example:\n", dataset[0]["text"])


README.md: 0.00B [00:00, ?B/s]

alpaca_data_cleaned.json:   0%|          | 0.00/44.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/51760 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Number of training examples: 200
Sample example:
 ### Instruction:
Give three tips for staying healthy.
### Input:

### Response:
1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.

2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.

3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.


We load a ready-made instruction-following dataset (“yahma/alpaca-cleaned”) for simplicity; you could also create a custom dataset.

We use a prompt template that clearly distinguishes Instruction, Input, and Response. This helps the model understand what it should do.

We map the dataset to this template and optionally reduce its size for the demonstration/training phase.

In the documentation of Unsloth (“Datasets Guide”), this kind of formatting (instruction-input-output) is recommended.

In [10]:
"""
Cell 5: Tokenization & Formatting (with labels).
Tokenize the text, and set `labels = input_ids` so the model has targets for loss computation.
"""
def tokenize_fn(example):
    enc = tokenizer(
        example["text"],
        truncation=True,
        max_length=512,
        padding="max_length"
    )
    # Make labels same as input_ids
    enc["labels"] = enc["input_ids"].copy()
    return enc

tokenized_dataset = dataset.map(
    tokenize_fn,
    batched=True,
    remove_columns=["text"]
)

print("Example tokenized with labels:", tokenized_dataset[0])


Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Example tokenized with labels: {'input_ids': [49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49152, 49

In [8]:
"""
Cell 6: Training Configuration.
In this cell we define training arguments including batch size, learning rate, number of epochs/steps,
optimizer settings, mixed precision, and we set up full fine-tuning (all model weights updated).
We update to use the correct keyword `eval_strategy` instead of `evaluation_strategy`.
"""
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./smollm2_full_ft",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-5,
    num_train_epochs=1,           # or use max_steps for a quicker demo
    fp16=True,                     # use mixed precision if available
    save_steps=50,
    logging_steps=10,
    eval_strategy="no",            # updated keyword
    # Additional args you may set:
    # save_strategy="steps",
    # load_best_model_at_end=False,
    # metric_for_best_model="loss",
)

print("Training arguments defined:")
print(training_args)


Training arguments defined:
TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=True,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.NO,
eva

In [11]:
"""
Cell 7: Fine-tuning Execution.
This cell runs the actual fine-tuning process on the model using the configured
training arguments and tokenized dataset. We monitor training loss and optionally
evaluate the model if validation data is available.
"""
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

# Start training
trainer.train()


  trainer = Trainer(


Step,Training Loss
10,10.0773
20,0.7821


TrainOutput(global_step=25, training_loss=4.471847333908081, metrics={'train_runtime': 31.8795, 'train_samples_per_second': 6.274, 'train_steps_per_second': 0.784, 'total_flos': 65251403366400.0, 'train_loss': 4.471847333908081, 'epoch': 1.0})

In [13]:
"""
Cell 8: Save Fine-Tuned Model & Tokenizer.
This cell saves the model and tokenizer to disk so you can load them later for inference or deployment.
"""
# Save the fine-tuned model and tokenizer
model.save_pretrained("./smollm2_full_ft_model")
tokenizer.save_pretrained("./smollm2_full_ft_model")

print("✅ Model and tokenizer saved to ./smollm2_full_ft_model")


✅ Model and tokenizer saved to ./smollm2_full_ft_model


In [14]:
"""
Cell 9: Inference & Demo.
In this cell we load the fine-tuned model and tokenizer (if not already in memory),
and run a few sample prompts to demonstrate how the model performs on the task.
We show both the prompt and the generated output so viewers can understand what the model learned.
"""
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load the fine-tuned model (if reloading)
checkpoint = "./smollm2_full_ft_model"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint).to("cuda")

# Example prompts – you can adjust based on chat or code task
prompts = [
    "### Instruction:\nExplain the concept of recursion in programming.\n### Input:\n\n### Response:\n",
    "### Instruction:\nWrite a Python function that computes the factorial of a number.\n### Input:\n\n### Response:\n"
]

for prompt in prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    # Generate output
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )
    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Prompt:\n", prompt)
    print("Generated output:\n", text)
    print("\n" + "="*60 + "\n")


Prompt:
 ### Instruction:
Explain the concept of recursion in programming.
### Input:

### Response:

Generated output:
 ### Instruction:
Explain the concept of recursion in programming.
### Input:

### Response:

Recursion is the process of performing an operation repeatedly by using the same instruction.

For example,
Recursion is:

```python
def recursive_function():
    a = 1
    while a <= 10:
        print(a)
        a = a + 1
```

### Explanation:

Recursion is a way of solving a problem by using the same instruction repeatedly. It is a way of solving a problem that does


Prompt:
 ### Instruction:
Write a Python function that computes the factorial of a number.
### Input:

### Response:

Generated output:
 ### Instruction:
Write a Python function that computes the factorial of a number.
### Input:

### Response:

### Example:

```python
def factorial(n):
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)
```

### Exercise:

Write a Python function that 

In [17]:
"""
Cell 10: Summary & Next Steps.
In this cell we recap what was achieved in this notebook: full fine-tuning of SmolLM2-135M,
key results (loss curve), and outline the next steps (LoRA fine-tuning, RL, etc.).
This helps the viewer understand where we move from here.
"""
print(" Full fine-tuning completed for SmolLM2-135M.")
print(" Training loss dropped significantly — refer to the plotted curve for details.")



 Full fine-tuning completed for SmolLM2-135M.
 Training loss dropped significantly — refer to the plotted curve for details.
