<a href="https://colab.research.google.com/github/ochaudha/sample/blob/main/fine_tune_distilbert_cpu_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New section

# 🤖 Fine-Tune DistilBERT for Sentiment Classification (CPU, Colab)
This notebook fine-tunes DistilBERT on the IMDb dataset using only CPU.

✅ Works on **Google Colab Free Tier**
✅ Uses **Hugging Face Transformers** and **Datasets**
✅ Sentiment Classification: Positive vs Negative


In [None]:
# Install dependencies
!pip install torch transformers datasets peft bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.me

In [None]:
# Import libraries
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training # Import PEFT components
import bitsandbytes as bnb # For 4-bit quantization (optional, but good for memory)
import os
from datasets import load_dataset

ds = load_dataset("tatsu-lab/alpaca")
# --- 1. Load the Dataset ---
# A small instruction-following dataset is loaded for demonstration.
# The model will learn to follow instructions based on this data.
dataset_name = "tatsu-lab/alpaca"  # A small, publicly available instruction-following dataset
dataset = load_dataset(dataset_name, split="train")


NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported.

In [None]:

# --- 2. Load the Pre-trained Model and Tokenizer ---
# DistilGPT-2 is used, a smaller model for faster training on CPU.
model_name = "distilgpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# The padding token is set to be the end-of-sequence token.
tokenizer.pad_token = tokenizer.eos_token

# Configure 4-bit quantization for memory efficiency (QLoRA).
# load_in_4bit=True: Load the model weights in 4-bit precision.
# bnb_4bit_quant_type="nf4": Use the NF4 quantization scheme, which is optimal for normally distributed data.
# bnb_4bit_compute_dtype=torch.float16: Use bfloat16 for computation, which is faster for certain operations.
# bnb_4bit_use_double_quant=True: Use double quantization to improve resolution.
bnb_config = bnb.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load the model with quantization configuration.
# device_map="auto" lets Hugging Face decide which devices to use, even with no_cuda=True, it will load on CPU.
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)

# Prepare the model for k-bit training (for QLoRA).
# This sets up the model for gradient updates during fine-tuning.
model = prepare_model_for_kbit_training(model)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme: 0.00B [00:00, ?B/s]

ValueError: Invalid pattern: '**' can only be an entire path component

In [None]:

# --- 3. Apply PEFT (LoRA) ---
# LoRA (Low-Rank Adaptation) configures how adapters are added to the model.
# It significantly reduces the number of trainable parameters.
# r: The rank of the low-rank update matrices.
# lora_alpha: Scaling factor for the LoRA adapters.
# target_modules: The layers where LoRA adapters are applied.
# lora_dropout: Dropout probability for the LoRA layers.
# bias: How to handle bias in the LoRA layers.
# task_type: The type of task (Causal Language Modeling in this case).
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn"], # Apply LoRA to the attention layers (common practice).
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply the LoRA configuration to the model.
# This creates a PEFT model with LoRA adapters added to the specified layers.
peft_model = get_peft_model(model, lora_config)
# Print the number of trainable parameters (significantly reduced compared to the full model).
peft_model.print_trainable_parameters()


In [None]:

# --- 4. Prepare the Dataset for Training ---
# A function is defined to tokenize the instruction-following data.
def tokenize_function(example):
    # Format the instruction and output into a single string for the model input.
    prompt = f"Instruct: {example['instruction']}\nOutput: {example['output']}"
    # Tokenize the prompt.
    tokenized_output = tokenizer(prompt, padding="max_length", truncation=True, max_length=256)
    return tokenized_output

# Apply the tokenize function to the dataset.
tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Remove original columns to keep only the tokenized data.
tokenized_dataset = tokenized_dataset.remove_columns(["instruction", "output"])
# Set the format to PyTorch tensors.
tokenized_dataset.set_format("torch")

# --- 5. Define Training Arguments ---
# These arguments control the training process.
training_args = TrainingArguments(
    output_dir="./fine_tuned_model", # Directory to save training results.
    per_device_train_batch_size=4, # Batch size per device (CPU in this case).
    # gradient_accumulation_steps=4: Accumulate gradients over 4 batches before performing an optimizer step.
    # This effectively simulates a larger batch size without increasing memory significantly.
    # Mathematical Intuition: Instead of updating parameters after each small batch,
    # we average the gradients from multiple small batches before updating.
    # This helps reduce noise in the gradient signal and can improve training stability.
    gradient_accumulation_steps=4,
    learning_rate=2e-4, # The learning rate for the optimizer.
    num_train_epochs=1, # Number of epochs for fine-tuning.
    logging_steps=10, # Log training progress every 10 steps.
    save_steps=100, # Save model checkpoint every 100 steps.
    evaluation_strategy="steps", # Evaluate the model every 'eval_steps'.
    eval_steps=100, # Perform evaluation every 100 steps.
    do_eval=True, # Perform evaluation during training.
    report_to="none", # Don't report metrics to external logging services.
    no_cuda=True, # **Explicitly disable GPU and force CPU training.**
    gradient_checkpointing=True, # Reduce memory usage by not storing all intermediate activations.
    lr_scheduler_type="cosine", # Use a cosine learning rate schedule.
    optim="paged_adamw_8bit", # Use a memory-efficient AdamW optimizer.
)


In [None]:

# --- 6. Initialize the Trainer ---
# The Trainer handles the training loop, including optimization and evaluation.
trainer = Trainer(
    model=peft_model, # The PEFT model with LoRA adapters.
    args=training_args, # The training arguments.
    train_dataset=tokenized_dataset, # The training dataset.
    eval_dataset=tokenized_dataset, # The evaluation dataset.
)

# --- 7. Fine-tune the Model ---
print("Starting fine-tuning...")
trainer.train()


In [None]:

# --- 8. Save the Fine-tuned Model ---
# Save the LoRA adapters (which are the trainable part) to disk.
peft_model_path = "./fine_tuned_model/lora_adapter"
trainer.model.save_pretrained(peft_model_path)

# --- 9. Load and Test the Fine-tuned Model (Inference) ---
print("\nLoading the fine-tuned model for inference...")
from peft import PeftModel

# Load the base model (without fine-tuned weights initially).
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
)

# Load the fine-tuned LoRA adapters and merge them with the base model for inference.
# is_trainable=False means the model is in inference mode.
inference_model = PeftModel.from_pretrained(base_model, peft_model_path, is_trainable=False)

# Prepare an instruction to test the fine-tuned model.
instruction = "Explain artificial intelligence in a simple way."
prompt = f"Instruct: {instruction}\nOutput:"
# Tokenize the input prompt.
input_ids = tokenizer(prompt, return_tensors="pt").input_ids

# Generate text using the fine-tuned model.
with torch.no_grad(): # Disable gradient calculation for inference.
    output = inference_model.generate(input_ids=input_ids, max_length=100, num_beams=1) # Generate 100 tokens, no beam search for simplicity.

# Decode the generated tokens back into text.
generated_text = tokenizer.decode(output, skip_special_tokens=True)
print("\nGenerated Text:")
print(generated_text)
