In [1]:
!pip install torch transformers datasets peft accelerate bitsandbytes evaluate scikit-learn

Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, evaluate
Successfully installed bitsandbytes-0.48.2 evaluate-0.4.6


In [None]:
!pip install --upgrade transformers datasets peft accelerate bitsandbytes

In [1]:
from huggingface_hub import login

# Paste your copied token inside the quotes
login(token="----YOUR_HUGGINGFACE_TOKEN_HERE---")

In [2]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer
)
from peft import (
    get_peft_model,
    LoraConfig,
    TaskType
)
import evaluate
import numpy as np
import os

# --- 1. Configuration ---
MODEL_ID = "google/gemma-2b-it"
DATASET_TRAIN = "train.jsonl"
DATASET_TEST = "test.jsonl"
OUTPUT_DIR = "./email-classifier-gemma-2b-lora"

id2label = {0: "Urgent", 1: "To-Do", 2: "FYI"}
label2id = {"Urgent": 0, "To-Do": 1, "FYI": 2}

# --- 2. Load Model & Tokenizer with Quantization ---
print(f"Loading model: {MODEL_ID}")

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,  # <<< FIX #1: Changed from float16 to bfloat16
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.pad_token = tokenizer.eos_token

# Load the model
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    num_labels=len(id2label),
    id2label=id2label,
    label2id=label2id,
    device_map="auto",
)
model.config.pad_token_id = tokenizer.pad_token_id

# --- 3. Load and Preprocess Data ---
print("Loading and preprocessing datasets...")
dataset = load_dataset("json", data_files={'train': DATASET_TRAIN, 'test': DATASET_TEST})

def preprocess_function(examples):
    tokenized = tokenizer(examples["text"], truncation=True, padding="max_length", max_length=256) # <-- NEW
    tokenized["labels"] = [label2id[label] for label in examples["label"]]
    return tokenized

# Apply preprocessing and remove old columns
tokenized_datasets = dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=["text", "label"]  # This is the fix from the previous step
)

# --- 4. Setup PEFT (LoRA) ---
print("Setting up LoRA (PEFT)...")
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    bias="none",
    target_modules=[
        "q_proj",
        "o_proj",
        "k_proj",
        "v_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
)

model = get_peft_model(model, peft_config)
print("\n--- Model Architecture (with LoRA) ---")
model.print_trainable_parameters()
print("----------------------------------------\n")

# --- 5. Define Evaluation Metrics ---
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)

    accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="weighted")["f1"]
    precision = precision_metric.compute(predictions=predictions, references=labels, average="weighted")["precision"]
    recall = recall_metric.compute(predictions=predictions, references=labels, average="weighted")["recall"]

    return {
        "accuracy": accuracy,
        "f1": f1,
        "precision": precision,
        "recall": recall
    }

# --- 6. Configure Trainer ---
print("Configuring Trainer...")
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=1,        # <-- FIX 1: Set to 1
    per_device_eval_batch_size=2,         # <-- FIX 2: Set to 2 (safer for eval)
    gradient_accumulation_steps=4,        # <-- FIX 3: Accumulate 4 steps (1*4=4 effective batch)
    bf16=True,
    learning_rate=2e-4,
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# --- 7. Train the Model ---
print("\n--- Starting Training ---")
trainer.train()
print("--- Training Finished ---")

# --- 8. Save the Final Model ---
final_model_path = f"{OUTPUT_DIR}-final"
trainer.save_model(final_model_path)
print(f"Fine-tuned model adapter saved to: {final_model_path}")
print("\nStep 2 Complete!")

Loading model: google/gemma-2b-it


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of GemmaForSequenceClassification were not initialized from the model checkpoint at google/gemma-2b-it and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loading and preprocessing datasets...


Map:   0%|          | 0/247 [00:00<?, ? examples/s]

Map:   0%|          | 0/62 [00:00<?, ? examples/s]

Setting up LoRA (PEFT)...

--- Model Architecture (with LoRA) ---
trainable params: 19,617,792 || all params: 2,525,796,352 || trainable%: 0.7767
----------------------------------------

Configuring Trainer...

--- Starting Training ---


  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,33.0297,0.0,1.0,1.0,1.0,1.0
2,76.8,0.645161,0.983871,0.983862,0.984604,0.983871
3,0.0,0.0,1.0,1.0,1.0,1.0


--- Training Finished ---
Fine-tuned model adapter saved to: ./email-classifier-gemma-2b-lora-final

Step 2 Complete!


In [3]:
!zip -r /content/my_model.zip /content/email-classifier-gemma-2b-lora-final

  adding: content/email-classifier-gemma-2b-lora-final/ (stored 0%)
  adding: content/email-classifier-gemma-2b-lora-final/adapter_config.json (deflated 57%)
  adding: content/email-classifier-gemma-2b-lora-final/chat_template.jinja (deflated 52%)
  adding: content/email-classifier-gemma-2b-lora-final/tokenizer.model (deflated 51%)
  adding: content/email-classifier-gemma-2b-lora-final/adapter_model.safetensors (deflated 8%)
  adding: content/email-classifier-gemma-2b-lora-final/special_tokens_map.json (deflated 70%)
  adding: content/email-classifier-gemma-2b-lora-final/training_args.bin (deflated 54%)
  adding: content/email-classifier-gemma-2b-lora-final/tokenizer.json (deflated 84%)
  adding: content/email-classifier-gemma-2b-lora-final/README.md (deflated 66%)
  adding: content/email-classifier-gemma-2b-lora-final/tokenizer_config.json (deflated 96%)
