# Project: Apply Lightweight Fine-Tuning to a Foundation Model

## Prepare the Foundation Model


### Imports

In [1]:
import torch
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import datasets
import numpy as np


  from .autonotebook import tqdm as notebook_tqdm


### Dataset and compute functions

In [2]:
def get_dataset_split(name: str, name_split: str):
    dataset = datasets.load_dataset(name,  name_split, split='train').train_test_split(
        test_size=0.2, shuffle=True, seed=23

    )
    return dataset['train'], dataset['test']


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

def predict(text: str, model, tokenizer):
    input = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)

    with torch.no_grad():
        output = model(**input)

    logits = output.logits
    probabilities = torch.nn.functional.softmax(logits, dim=-1)
    predicted_class_idx = torch.argmax(probabilities, dim=-1).item()
    return predicted_class_idx


In [3]:
BASE_MODEL = "gpt2"
label2id = {"neutral": 1, "positive": 2, "negative": 0}
id2label = {1: "neutral", 2: "positive", 0: "negative"}


### Load a pretrained HF model

In [4]:
base_model = AutoModelForSequenceClassification.from_pretrained(
    BASE_MODEL,
    num_labels=3,
    id2label=id2label,
    label2id=label2id
)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Load and preprocess a dataset

In [5]:
train, test = get_dataset_split('financial_phrasebank', 'sentences_66agree')

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

def tokenize(batch):
    return tokenizer(batch['sentence'], return_tensors="pt", truncation=True, max_length=512, padding=True)

### Evaluate the pretrained model

In [6]:
test_df = pd.DataFrame(test)
test_small = test_df.sample(100)
test_small['predicted'] = test_small['sentence'].apply(lambda x: predict(x, base_model, tokenizer))
test_small['correct'] = test_small['predicted'] == test_small['label']
prediction_percentage = test_small['correct'].mean()
print(f"Accuracy: {prediction_percentage:.2f}")


Accuracy: 0.11


## Perform Lightweight Fine-Tuning

### Create a PEFT model

In [7]:
from peft import LoraConfig, get_peft_model
from peft import AutoPeftModelForCausalLM

config = LoraConfig()
number_labels = len(list(id2label.keys()))
model = AutoModelForSequenceClassification.from_pretrained(
    BASE_MODEL,
    num_labels=number_labels,
    id2label=id2label,
    label2id=label2id
)
lora_model = get_peft_model(model, config)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '<pad>'})

lora_model.config.pad_token_id = tokenizer.pad_token_id


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Train the PEFT model

In [9]:
from transformers import Trainer, TrainingArguments, DataCollatorWithPadding

# We need to remove the sentence column from the datasets. Because there is a bug in the current version of the library
train_peft = train.map(tokenize, batched=True).remove_columns(["sentence"])
test_peft = test.map(tokenize, batched=True).remove_columns(["sentence"])

# Rename the label column to labels because the trainer expects that name
train_peft = train_peft.rename_column("label", "labels")
test_peft = test_peft.rename_column("label", "labels")

training_args = TrainingArguments(
    output_dir="./data/financial_phrasebank",
    num_train_epochs=10,
    learning_rate=2e-3,

    per_device_train_batch_size=128,
    per_device_eval_batch_size=128,

    weight_decay=0.01,

    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    remove_unused_columns=False,
    label_names=["labels"],
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_peft,
    eval_dataset=test_peft,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=DataCollatorWithPadding(tokenizer)
)
trainer.train()

Map: 100%|██████████| 3373/3373 [00:00<00:00, 17070.34 examples/s]
Map: 100%|██████████| 844/844 [00:00<00:00, 13961.99 examples/s]

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [01:20<?, ?it/s]          
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-211 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.3198302090167999, 'eval_accuracy': 0.8625592417061612, 'eval_runtime': 5.4967, 'eval_samples_per_second': 153.547, 'eval_steps_per_second': 9.642, 'epoch': 1.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [02:27<?, ?it/s]          
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-422 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.2886766195297241, 'eval_accuracy': 0.8862559241706162, 'eval_runtime': 5.2185, 'eval_samples_per_second': 161.731, 'eval_steps_per_second': 10.156, 'epoch': 2.0}


                                        
  0%|          | 0/2110 [02:50<?, ?it/s]          

{'loss': 0.4367, 'grad_norm': 0.47783899307250977, 'learning_rate': 0.0015260663507109004, 'epoch': 2.37}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [03:34<?, ?it/s]          
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-633 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.2996843755245209, 'eval_accuracy': 0.8850710900473934, 'eval_runtime': 5.2688, 'eval_samples_per_second': 160.188, 'eval_steps_per_second': 10.059, 'epoch': 3.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [04:40<?, ?it/s]          
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-844 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.2867659032344818, 'eval_accuracy': 0.8909952606635071, 'eval_runtime': 5.2443, 'eval_samples_per_second': 160.936, 'eval_steps_per_second': 10.106, 'epoch': 4.0}


                                        
  0%|          | 0/2110 [05:26<?, ?it/s]           

{'loss': 0.2033, 'grad_norm': 1.1181832551956177, 'learning_rate': 0.001052132701421801, 'epoch': 4.74}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [05:48<?, ?it/s]           
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-1055 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.2915043830871582, 'eval_accuracy': 0.9004739336492891, 'eval_runtime': 5.2412, 'eval_samples_per_second': 161.033, 'eval_steps_per_second': 10.112, 'epoch': 5.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [06:58<?, ?it/s]           
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-1266 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.3203641474246979, 'eval_accuracy': 0.8862559241706162, 'eval_runtime': 5.6248, 'eval_samples_per_second': 150.05, 'eval_steps_per_second': 9.423, 'epoch': 6.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [08:06<?, ?it/s]           
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-1477 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.3848784267902374, 'eval_accuracy': 0.8909952606635071, 'eval_runtime': 5.1416, 'eval_samples_per_second': 164.152, 'eval_steps_per_second': 10.308, 'epoch': 7.0}


                                        
  0%|          | 0/2110 [08:13<?, ?it/s]           

{'loss': 0.1079, 'grad_norm': 1.5286445617675781, 'learning_rate': 0.0005781990521327014, 'epoch': 7.11}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [09:15<?, ?it/s]           
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-1688 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.406116783618927, 'eval_accuracy': 0.8909952606635071, 'eval_runtime': 5.3193, 'eval_samples_per_second': 158.669, 'eval_steps_per_second': 9.964, 'epoch': 8.0}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [10:23<?, ?it/s]           
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-1899 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.45893824100494385, 'eval_accuracy': 0.8909952606635071, 'eval_runtime': 5.224, 'eval_samples_per_second': 161.561, 'eval_steps_per_second': 10.145, 'epoch': 9.0}


                                        
  0%|          | 0/2110 [10:53<?, ?it/s]           

{'loss': 0.0542, 'grad_norm': 0.4030011296272278, 'learning_rate': 0.00010426540284360189, 'epoch': 9.48}



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
                                        

[A[A                                         
  0%|          | 0/2110 [11:31<?, ?it/s]           
[A
[ACheckpoint destination directory ./data/financial_phrasebank/checkpoint-2110 already exists and is non-empty. Saving will proceed but saved results may be invalid.


{'eval_loss': 0.4792298674583435, 'eval_accuracy': 0.8886255924170616, 'eval_runtime': 5.2082, 'eval_samples_per_second': 162.052, 'eval_steps_per_second': 10.176, 'epoch': 10.0}


                                        
100%|██████████| 2110/2110 [11:15<00:00,  3.12it/s]

{'train_runtime': 675.5911, 'train_samples_per_second': 49.927, 'train_steps_per_second': 3.123, 'train_loss': 0.1917101031796062, 'epoch': 10.0}





TrainOutput(global_step=2110, training_loss=0.1917101031796062, metrics={'train_runtime': 675.5911, 'train_samples_per_second': 49.927, 'train_steps_per_second': 3.123, 'train_loss': 0.1917101031796062, 'epoch': 10.0})

### Save the PEFT model

In [10]:
lora_model.save_pretrained('gpt-lora')

## Perform Inference Using the Fine-Tuned Model


### Load the saved PEFT model

In [11]:
lora_model_finetuned = AutoPeftModelForCausalLM.from_pretrained('gpt-lora')


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


### Evaluate the fine-tuned model

In [14]:
test_df_finetuned = pd.DataFrame(test)
test_small_finetuned = test_df_finetuned.sample(100)
test_small_finetuned['predicted'] = test_small_finetuned['sentence'].apply(lambda x: predict(x, lora_model_finetuned, tokenizer))
test_small_finetuned['correct'] = test_small_finetuned['predicted'] == test_small_finetuned['label']
prediction_percentage_finetuned_peft = test_small_finetuned['correct'].mean()
print(f"Accuracy: {prediction_percentage_finetuned_peft:.2f}")

Accuracy: 0.42


## Comparisons

In [17]:
print(f"Accuracy base model: {prediction_percentage:.2f}")
print(f"Accuracy peft model: {prediction_percentage_finetuned_peft:.2f}", f"Improvement: {prediction_percentage_finetuned_peft - prediction_percentage:.2f}")

Accuracy base model: 0.14
Accuracy peft model: 0.42 Improvement: 0.28
