In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="1"

from transformers import AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, TrainingArguments, Trainer
from datasets import load_dataset
import evaluate
import numpy as np
from peft import get_peft_model, TaskType, LoraConfig, PeftModel

In [2]:
MODEL="distilbert/distilbert-base-uncased"
DATASET="dair-ai/emotion"
id2label = {0: "sadness", 1: "joy", 2: "love", 3: "anger", 4: "fear", 5: "surprise"}
label2id = {v:k for k,v in id2label.items()}
METRIC="accuracy"
TASK='text-classification'
NUM_LABELS=len(id2label)

In [None]:
# load HF dataset
data = load_dataset(DATASET)
data

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 16000
    })
    validation: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 2000
    })
})

In [None]:
# load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL)
tokenized_data = data.map(lambda d: tokenizer(d["text"], truncation=True), batched=True, num_proc=8)
tokenized_data['train']

Dataset({
    features: ['text', 'label', 'input_ids', 'attention_mask'],
    num_rows: 16000
})

In [None]:
# define classification metric
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
accuracy = evaluate.load(METRIC)
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)
task_evaluator = evaluate.evaluator(TASK)

In [None]:
# load pretrained HF model
model = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=NUM_LABELS, id2label=id2label, label2id=label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# evalate pretrained model
task_evaluator.compute(model, data['validation'], METRIC, tokenizer=tokenizer, label_mapping=label2id)

`data` is a preloaded Dataset! Ignoring `subset` and `split`.
Device set to use cuda:0


{'accuracy': 0.0405,
 'total_time_in_seconds': 4.670237108999572,
 'samples_per_second': 428.24378148723736,
 'latency_in_seconds': 0.002335118554499786}

In [None]:
# train the model without PEFT
training_args = TrainingArguments(
    output_dir="temp",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    bf16=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2431,0.20863,0.921
2,0.141,0.171104,0.9265


TrainOutput(global_step=2000, training_loss=0.3170792770385742, metrics={'train_runtime': 42.4289, 'train_samples_per_second': 754.204, 'train_steps_per_second': 47.138, 'total_flos': 389287358125632.0, 'train_loss': 0.3170792770385742, 'epoch': 2.0})

In [None]:
# evalate the model trained without PEFT
task_evaluator.compute(model, data['validation'], METRIC, tokenizer=tokenizer, label_mapping=label2id)

`data` is a preloaded Dataset! Ignoring `subset` and `split`.
Device set to use cuda:0


{'accuracy': 0.935,
 'total_time_in_seconds': 6.428592511998431,
 'samples_per_second': 311.1100907807062,
 'latency_in_seconds': 0.003214296255999216}

In [None]:
# Create PEFT model from the original pretrained model
peft_config = LoraConfig(r=8, task_type=TaskType.SEQ_CLS, target_modules=["q_lin", "k_lin","v_lin"])
model1 = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=NUM_LABELS, id2label=id2label, label2id=label2id)
model1 = get_peft_model(model1, peft_config)
model1.print_trainable_parameters()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 816,390 || all params: 67,774,476 || trainable%: 1.2046


In [None]:
# Train the PEFT model
training_args = TrainingArguments(
    output_dir="temp",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    bf16=True,
)

trainer = Trainer(
    model=model1,
    args=training_args,
    train_dataset=tokenized_data["train"],
    eval_dataset=tokenized_data["test"],
    processing_class=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
)

trainer.train()

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy
1,1.0347,0.834933,0.702
2,0.7039,0.622055,0.783
3,0.5992,0.536307,0.808
4,0.5283,0.485067,0.827
5,0.4922,0.446069,0.8355
6,0.4581,0.42081,0.845
7,0.4279,0.401752,0.8525
8,0.4145,0.3883,0.8575
9,0.4167,0.380347,0.8615
10,0.4157,0.378019,0.8615


TrainOutput(global_step=10000, training_loss=0.5823904678344727, metrics={'train_runtime': 181.0164, 'train_samples_per_second': 883.898, 'train_steps_per_second': 55.244, 'total_flos': 1982445802331904.0, 'train_loss': 0.5823904678344727, 'epoch': 10.0})

In [None]:
# save the lora weights
model1.save_pretrained("lora_weights")

In [8]:
# check directory contents
!du -sh lora_weights/*

4.0K	lora_weights/adapter_config.json
3.2M	lora_weights/adapter_model.safetensors
8.0K	lora_weights/README.md


In [None]:
# load the original pretrained model again
model2 = AutoModelForSequenceClassification.from_pretrained(MODEL, num_labels=NUM_LABELS, id2label=id2label, label2id=label2id)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# add lora weights
model2 = PeftModel.from_pretrained(model2, "./lora_weights")
model2 = model2.merge_and_unload()

In [None]:
# evaluate fine-tuned model
task_evaluator.compute(model2, data['validation'], METRIC, tokenizer=tokenizer, label_mapping=label2id)

`data` is a preloaded Dataset! Ignoring `subset` and `split`.
Device set to use cuda:0


{'accuracy': 0.8745,
 'total_time_in_seconds': 4.674249030002102,
 'samples_per_second': 427.87621865305294,
 'latency_in_seconds': 0.002337124515001051}