In [1]:
import torch
import numpy as np
from datasets import load_dataset
from torch.utils.data import DataLoader
from transformers.trainer import Trainer
from transformers.training_args import TrainingArguments
from sklearn.metrics import accuracy_score, f1_score
from peft import get_peft_model, LoraConfig, TaskType
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer

In [None]:
ds = load_dataset("hezarai/sentiment-dksf")

In [3]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")
model = XLMRobertaForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=3)

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [4]:
lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        inference_mode=False,
        r=8,
        lora_alpha=16,
        lora_dropout=0.1,
        target_modules=[
            "query", "key", "value", "dense", 
        ],
        modules_to_save=["classifier"]
    )

peft_model = get_peft_model(model, lora_config)

print("\nTraining Parameter Analysis:")
print("=" * 60)

lora_params = 0
full_params = 0
frozen_params = 0

training_breakdown = {
    "lora_adapters": [],
    "full_training": [],
    "frozen": []
}

for name, param in model.named_parameters():
    if param.requires_grad:
        if "lora_" in name:
            lora_params += param.numel()
            training_breakdown["lora_adapters"].append(name)
        else:
            full_params += param.numel()
            training_breakdown["full_training"].append(name)
    else:
        frozen_params += param.numel()
        training_breakdown["frozen"].append(name)

total_params = lora_params + full_params + frozen_params

print(f"LoRA Adapter Parameters: {lora_params:,} ({lora_params/total_params*100:.2f}%)")
print(f"Full Training Parameters: {full_params:,} ({full_params/total_params*100:.2f}%)")
print(f"Frozen Parameters: {frozen_params:,} ({frozen_params/total_params*100:.2f}%)")
print(f"Total Trainable: {lora_params + full_params:,} ({(lora_params + full_params)/total_params*100:.2f}%)")


Training Parameter Analysis:
LoRA Adapter Parameters: 1,339,392 (0.48%)
Full Training Parameters: 592,899 (0.21%)
Frozen Parameters: 278,045,955 (99.31%)
Total Trainable: 1,932,291 (0.69%)


In [5]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

peft_model = peft_model.to(device)
print(f"Model moved to {device} device")

Using device: mps
Model moved to mps device


In [6]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"], 
        truncation=True, 
        padding="max_length",
        max_length=384,
    )

tokenized_ds = ds.map(tokenize_function, batched=True)
tokenized_ds = tokenized_ds.rename_column("label", "labels")

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {
        'accuracy': accuracy_score(labels, predictions),
        'f1': f1_score(labels, predictions, average='weighted')
    }

training_args = TrainingArguments(
    output_dir="./BERT/results",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=4, 
    gradient_checkpointing=False,
    warmup_steps=300,               
    weight_decay=0.01,
    logging_dir="./BERT/logs",
    logging_steps=50,                
    eval_steps=300,                 
    save_steps=600,       
    eval_strategy="steps",       
    save_total_limit=3,
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    dataloader_pin_memory=False,
    remove_unused_columns=False,
    fp16=False,
    report_to=None, 
)

: 

In [None]:
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=tokenized_ds["train"], # type: ignore
    eval_dataset=tokenized_ds["test"], # type: ignore
    compute_metrics=compute_metrics,
)

print("Starting training...")
trainer.train()

peft_model.save_pretrained("./lora_sentiment_RoBERTa_model")
tokenizer.save_pretrained("./lora_sentiment_RoBERTa_model")
print("Model and tokenizer saved!")


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Starting training...


Step,Training Loss,Validation Loss,Accuracy,F1
300,0.7959,0.571663,0.783585,0.79097
600,0.6384,0.534611,0.792225,0.809644
