In [40]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
import adapters
from adapters import AdapterConfig, AdapterTrainer, AdapterTrainer, AutoAdapterModel
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType
from util import compute_metrics

### Dataset

In [None]:
dataset = load_dataset('tweet_eval', 'sentiment') 
dataset = dataset.rename_column('label', 'labels') # renaming to fit the Trainer class

In [42]:
len(dataset['train'])

45615

In [43]:
len(dataset['validation'])

2000

In [44]:
len(dataset['test'])

12284

### Loading base model and encoding dataset

In [45]:
model_name = 'distilbert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize(example):
    return tokenizer(example["text"], truncation = True, padding = "max_length", max_length = 128)

encoded = dataset.map(tokenize, batched = True)
encoded.set_format('torch', columns = ['input_ids', 'attention_mask', 'labels'])

base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels = 3)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Baseline Evaluation

In [None]:
# Trainer requires TrainingArguments 
# we can add in dummy TrainingArguments that has no tunable parameters
base_args = TrainingArguments(
    output_dir = "./baseline_results"
)

base_trainer = Trainer(
    model = base_model,
    args = base_args,
    compute_metrics = compute_metrics
)

base_results = base_trainer.evaluate(encoded['test'])
print("Baseline Results:", base_results)

Baseline Results: {'eval_loss': 1.098198413848877, 'eval_model_preparation_time': 0.0007, 'eval_accuracy': 0.34296646043633994, 'eval_f1': 0.33957336110664277, 'eval_runtime': 87.1362, 'eval_samples_per_second': 140.975, 'eval_steps_per_second': 17.628}


### LoRA

In [47]:
lora_config = LoraConfig(
    r = 8,
    lora_alpha = 16,
    target_modules = ["q_lin", "v_lin"],
    lora_dropout = 0.1,
    bias = "none",
    task_type = TaskType.SEQ_CLS,
)

lora_model = get_peft_model(base_model, lora_config)
lora_model.print_trainable_parameters()

lora_args = TrainingArguments(
    output_dir = "./lora_results",
    num_train_epochs = 3,
    eval_strategy = "epoch",
    save_strategy = "epoch",
    per_device_train_batch_size = 16,
    per_device_eval_batch_size = 16,
    load_best_model_at_end = True,
    metric_for_best_model = "accuracy"
)

lora_trainer = Trainer(
    model = lora_model,
    args = lora_args,
    train_dataset = encoded["train"],
    eval_dataset = encoded["validation"],
    compute_metrics = compute_metrics
)

lora_trainer.train()

lora_results = lora_trainer.evaluate(encoded["test"])
print("LoRA Results:", lora_results)

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


trainable params: 740,355 || all params: 67,696,134 || trainable%: 1.0936




Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.6772,0.681058,0.694,0.693926
2,0.6595,0.667636,0.708,0.707185
3,0.6498,0.659583,0.7075,0.708693




LoRA Results: {'eval_loss': 0.7258167862892151, 'eval_accuracy': 0.6768967762943666, 'eval_f1': 0.6758656112479667, 'eval_runtime': 161.0126, 'eval_samples_per_second': 76.292, 'eval_steps_per_second': 4.77, 'epoch': 3.0}


### Adapters

In [None]:
adapter_config = AdapterConfig.load(
    "pfeiffer",             
    reduction_factor = 16,
    non_linearity = "relu"
)

adapter_base_model = AutoAdapterModel.from_pretrained(model_name, num_labels = 3)

adapter_name = "sentiment_adapter"
adapter_base_model.add_adapter(adapter_name, config = adapter_config)
adapter_base_model.train_adapter(adapter_name)
adapter_base_model.set_active_adapters(adapter_name)

adapter_training_args = TrainingArguments(
    output_dir = "./adapter_results",
    num_train_epochs = 3,
    eval_strategy = "epoch",   
    save_strategy = "epoch",
    per_device_train_batch_size = 16,
    per_device_eval_batch_size = 16,
    load_best_model_at_end = True,
    metric_for_best_model = "accuracy"
)

adapter_trainer = AdapterTrainer(
    model = adapter_base_model,
    args = adapter_training_args,
    train_dataset = encoded["train"],
    eval_dataset = encoded["validation"],
    compute_metrics = compute_metrics,
)

adapter_trainer.train()

adapter_result = adapter_trainer.evaluate(encoded["test"])
print("Adapter Results:", adapter_result)

There are adapters available but none are activated for the forward pass.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.6689,0.674761,0.702,0.70109
2,0.6537,0.665356,0.7125,0.709456
3,0.643,0.654384,0.712,0.713033


Overwriting existing adapter 'sentiment_adapter'.
Overwriting existing head 'sentiment_adapter'


Adapter Results: {'eval_loss': 0.7361659407615662, 'eval_accuracy': 0.6716867469879518, 'eval_f1': 0.6700249769262002, 'eval_runtime': 136.5346, 'eval_samples_per_second': 89.97, 'eval_steps_per_second': 5.625, 'epoch': 3.0}
