# Lightweight Fine-Tuning Project

## Loading and Evaluating a Foundation Model

We will be loading the dataset (dair-ai/emotion) and fine tune an existing model (microsoft/DialogRPT-updown) to detect emotion from the dataset.

In this section we will load the dataset, tokenize the data for futher training.
We will load the pre-trained foundation model and evaluate it's performance on the validation dataset.

### Load the dataset and tokenize the data

In [1]:
from datasets import load_dataset
dataset = load_dataset("dair-ai/emotion")
splits = ["train", "validation", "test"]

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("microsoft/DialogRPT-updown")

# Let's use a lambda function to tokenize all the examples
tokenized_dataset = {}
for split in splits:
    tokenized_dataset[split] = dataset[split].map(
        lambda x: tokenizer(x["text"], padding=True, truncation=True), batched=True
    )

print("Data Set Loaded and Tokenized")

Data Set Loaded and Tokenized


### Load the pre-trained foundational Model

In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
tokenizer = AutoTokenizer.from_pretrained('microsoft/DialogRPT-updown')
model = AutoModelForSequenceClassification.from_pretrained(
    "microsoft/DialogRPT-updown",
    ignore_mismatched_sizes=True,
    num_labels=6, # There are 20 possible labels (emoji's) for the text
)

for param in model.parameters():
    param.requires_grad = True

print("Loaded the foundation Model")

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at microsoft/DialogRPT-updown and are newly initialized because the shapes did not match:
- score.weight: found shape torch.Size([1, 1024]) in the checkpoint and torch.Size([6, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Loaded the foundation Model


### Validating the existing model on dataset

In [3]:
import numpy as np
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}


# The HuggingFace Trainer class handles the training and eval loop for PyTorch for us.
# Read more about it here https://huggingface.co/docs/transformers/main_classes/trainer
trainer = Trainer(
    model=model,
    args=TrainingArguments(
        output_dir="./data/model",
        # Set the learning rate
        learning_rate=2e-5,
        # Set the per device train batch size and eval batch size
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        # Evaluate and save the model after each epoch
        save_strategy="epoch",
        evaluation_strategy="epoch",
        num_train_epochs=1,
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

  warn("The installed version of bitsandbytes was compiled without GPU support. "
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


In [4]:
print("Evaluating the model in Validation dataset")
trainer.evaluate() #Evaluating the model on the Validation Dataset

Evaluating the model in Validation dataset


{'eval_loss': 1.7716858386993408,
 'eval_accuracy': 0.2425,
 'eval_runtime': 25.0379,
 'eval_samples_per_second': 79.879,
 'eval_steps_per_second': 19.97}

In [5]:
print("Evaluating the model in Test dataset")
trainer.evaluate(tokenized_dataset["test"]) #Eavaluating the model in the Test Dataset 

Evaluating the model in Test dataset


{'eval_loss': 1.7617275714874268,
 'eval_accuracy': 0.25,
 'eval_runtime': 19.4931,
 'eval_samples_per_second': 102.601,
 'eval_steps_per_second': 25.65}

## Fine-tuning the foundational model using PEFT
In this section, we will intialize the peft model based on the existing model.
Once the model is loaded, we will initialize a trainer to train the model against our dataset.
The fine tuned model, will be saved locally

In [6]:
print('Applying fine-tuning by applying a PEFT model')
from peft import LoraConfig

peft_config = LoraConfig(task_type="SEQ_CLS", inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.2)

from peft import get_peft_model

model_peft = get_peft_model(model, peft_config)
model_peft.print_trainable_parameters()

Applying fine-tuning by applying a PEFT model
trainable params: 792,576 || all params: 355,621,888 || trainable%: 0.22287042129420334




In [8]:
# The HuggingFace Trainer class handles the training and eval loop for PyTorch for us.
# Read more about it here https://huggingface.co/docs/transformers/main_classes/trainer
trainer_peft = Trainer(
    model=model_peft,
    args=TrainingArguments(
        output_dir="./data/dair-ai/models",
        # Set the learning rate
        learning_rate=2e-5,
        # Set the per device train batch size and eval batch size
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        # Evaluate and save the model after each epoch
        save_strategy="epoch",
        evaluation_strategy="epoch",
        num_train_epochs=5,
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

trainer_peft.train()

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5156,0.395833,0.896
2,0.4501,0.36763,0.911
3,0.417,0.348243,0.9145
4,0.4296,0.339444,0.917
5,0.3843,0.328587,0.916


TrainOutput(global_step=20000, training_loss=0.44645341720581055, metrics={'train_runtime': 3331.9462, 'train_samples_per_second': 24.01, 'train_steps_per_second': 6.002, 'total_flos': 1.0554954781753344e+16, 'train_loss': 0.44645341720581055, 'epoch': 5.0})

In [9]:
model_peft.save_pretrained('lora-model') #Save the model in local directory

# Evaluating the trained model
In this section we will load the trained model and evaluate against the test dataset.

In [10]:
# Load the trained model from local directory.
from peft import AutoPeftModelForSequenceClassification
lora_model = AutoPeftModelForSequenceClassification.from_pretrained(
    "lora-model",
    num_labels=6,
    ignore_mismatched_sizes=True
)

# Initialize the trainer with the new model
trainer_lora = Trainer(
    model=lora_model,
    args=TrainingArguments(
        output_dir="./data/models",
        # Set the learning rate
        learning_rate=2e-5,
        # Set the per device train batch size and eval batch size
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        # Evaluate and save the model after each epoch
        save_strategy="epoch",
        evaluation_strategy="epoch",
        num_train_epochs=1,
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

# Evaluate the model against the test dataset.
trainer_lora.evaluate()

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at microsoft/DialogRPT-updown and are newly initialized because the shapes did not match:
- score.weight: found shape torch.Size([1, 1024]) in the checkpoint and torch.Size([6, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


{'eval_loss': 0.3568718731403351,
 'eval_accuracy': 0.9115,
 'eval_runtime': 22.693,
 'eval_samples_per_second': 88.133,
 'eval_steps_per_second': 22.033}