# Lightweight Fine-Tuning Demo

- PEFT technique = Low Rank Adaption (LoRA)
- Model = GPT2
- Evaluation approach = Hugging Face Train / Evaluate loops
- Fine-tuning dataset = https://huggingface.co/datasets/cornell-movie-review-data/rotten_tomatoes

## Necessary Imports

In [1]:
import numpy as np
import pandas as pd
import torch
from datasets import load_dataset
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model, TaskType
from peft import AutoPeftModelForSequenceClassification

## Load A Dataset

In [2]:
# https://huggingface.co/datasets/cornell-movie-review-data/rotten_tomatoes
# https://huggingface.co/docs/hub/datasets-usage

splits = ["train", "test"]
review_data = {split: ds for split, ds in zip(splits, load_dataset("cornell-movie-review-data/rotten_tomatoes", split=splits))}

## Tokenize Dataset

In [3]:
# https://huggingface.co/docs/transformers/en/model_doc/auto#transformers.AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

# Tokenize dataset
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

# Tokenize train and test sets
train_dataset = review_data["train"].map(tokenize, batched=True)
test_dataset = review_data["test"].map(tokenize, batched=True)



## Load a base model

In [4]:
# https://huggingface.co/docs/transformers/model_doc/auto#transformers.AutoModelForSequenceClassification

base_model = AutoModelForSequenceClassification.from_pretrained(
    "gpt2",
    num_labels=2,
    id2label={0: "NEGATIVE", 1: "POSITIVE"},
    label2id={"NEGATIVE": 0, "POSITIVE": 1},
)

# Freeze all the parameters of the base model
for param in base_model.base_model.parameters():
    param.requires_grad = False

base_model.config.pad_token_id = tokenizer.pad_token_id

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Evaluation
Evaluation approach: evaluate method with a Hugging Face Trainer

In [5]:
# Define a compute metrics method
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

# Build a Trainer
trainer = Trainer(
    model=base_model,
    args=TrainingArguments(
        output_dir="./data/sentiment_analysis",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
    ),
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

# Train and Evaluate
trainer.train()
base_evaluation = trainer.evaluate()


Epoch,Training Loss,Validation Loss,Accuracy
1,0.8245,0.739076,0.529081


## View Results

In [6]:
base_df = pd.DataFrame(base_evaluation.items(), columns=['Metric', 'Results'])
base_df

Unnamed: 0,Metric,Results
0,eval_loss,0.739076
1,eval_accuracy,0.529081
2,eval_runtime,5.9797
3,eval_samples_per_second,178.27
4,eval_steps_per_second,22.409
5,epoch,1.0


In [7]:
# View some individual results

df = pd.DataFrame(test_dataset)
df = df[["text", "label"]]

df["text"] = df["text"].str.replace("<br />", " ")

predictions = trainer.predict(test_dataset)
df["predicted_label"] = np.argmax(predictions[0], axis=1)

print("Viewing some predictions\n")

df.head(5)

Viewing some predictions



Unnamed: 0,text,label,predicted_label
0,lovingly photographed in the manner of a golde...,1,1
1,consistently clever and suspenseful .,1,0
2,"it's like a "" big chill "" reunion of the baade...",1,1
3,the story gives ample opportunity for large-sc...,1,1
4,"red dragon "" never cuts corners .",1,1


In [8]:
print("Viewing some mistakes\n")

df[df["label"] != df["predicted_label"]].head(5)

Viewing some mistakes



Unnamed: 0,text,label,predicted_label
1,consistently clever and suspenseful .,1,0
5,fresnadillo has something serious to say about...,1,0
6,throws in enough clever and unexpected twists ...,1,0
9,generates an enormous feeling of empathy for i...,1,0
13,. . . quite good at providing some good old fa...,1,0


## Try LoRA PEFT and see how we get on

In [9]:
# Setting up PEFT

# Define LoRA Config
config = LoraConfig(
                    r=8, # Rank
                    lora_alpha=32,
                    target_modules=['c_attn', 'c_proj'],
                    lora_dropout=0.1,
                    bias="none",
                    task_type=TaskType.SEQ_CLS
                )

# Need a model again
model = AutoModelForSequenceClassification.from_pretrained(
    "gpt2",
    num_labels=2,
    id2label={0: "NEGATIVE", 1: "POSITIVE"},
    label2id={"NEGATIVE": 0, "POSITIVE": 1},
)

model.config.pad_token_id = tokenizer.pad_token_id

# Build lora adapter
lora_model = get_peft_model(model, config)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


  warn("The installed version of bitsandbytes was compiled without GPU support. "


In [10]:
lora_model.print_trainable_parameters()

trainable params: 812,544 || all params: 125,253,888 || trainable%: 0.6487


## Re-evaluate

In [11]:
# Build a Trainer, again
trainer2 = Trainer(
    model=lora_model,
    args=TrainingArguments(
        output_dir="./data/peft_sentiment_analysis",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        label_names=['labels']
    ),
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)
# Train and Evaluate
trainer2.train()
peft_evaluation = trainer2.evaluate()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6747,0.581595,0.70075


In [12]:
# View some individual results

df = pd.DataFrame(test_dataset)
df = df[["text", "label"]]

df["text"] = df["text"].str.replace("<br />", " ")

predictions = trainer2.predict(test_dataset)
df["predicted_label"] = np.argmax(predictions[0], axis=1)

print("Viewing some predictions\n")

df.head(5)

Viewing some predictions



Unnamed: 0,text,label,predicted_label
0,lovingly photographed in the manner of a golde...,1,1
1,consistently clever and suspenseful .,1,1
2,"it's like a "" big chill "" reunion of the baade...",1,0
3,the story gives ample opportunity for large-sc...,1,1
4,"red dragon "" never cuts corners .",1,1


In [13]:
print("Viewing some mistakes\n")

df[df["label"] != df["predicted_label"]].head(5)

Viewing some mistakes



Unnamed: 0,text,label,predicted_label
2,"it's like a "" big chill "" reunion of the baade...",1,0
14,"at its worst , the movie is pretty diverting ;...",1,0
18,"as it turns out , you can go home again .",1,0
19,you've already seen city by the sea under a va...,1,0
22,grown-up quibbles are beside the point here . ...,1,0


## Compare results

In [14]:
base_df = pd.DataFrame(base_evaluation.items(), columns=['Metric', 'Results'])
peft_df = pd.DataFrame(peft_evaluation.items(), columns=['Metric', 'Results'])                  

### Base Model

In [15]:
base_df

Unnamed: 0,Metric,Results
0,eval_loss,0.739076
1,eval_accuracy,0.529081
2,eval_runtime,5.9797
3,eval_samples_per_second,178.27
4,eval_steps_per_second,22.409
5,epoch,1.0


### LORA Model

In [16]:
peft_df

Unnamed: 0,Metric,Results
0,eval_loss,0.581595
1,eval_accuracy,0.70075
2,eval_runtime,6.3409
3,eval_samples_per_second,168.114
4,eval_steps_per_second,21.133
5,epoch,1.0


In [17]:
# Save fine tuned PEFT model
lora_model.save_pretrained("gpt-lora")

## Loading a locally saved model

In [18]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

lora_model_local = AutoPeftModelForSequenceClassification.from_pretrained(
    "gpt-lora", 
    num_labels=2, 
    ignore_mismatched_sizes=True).to(device)

lora_model_local.config.pad_token_id = tokenizer.pad_token_id

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


## Evaluate the locally saved model

In [19]:
# Build a Trainer, again, again
trainer3 = Trainer(
    model=lora_model_local,
    args=TrainingArguments(
        output_dir="./data/lora_sentiment_analysis",
        learning_rate=2e-5,
        per_device_train_batch_size=8,
        per_device_eval_batch_size=8,
        num_train_epochs=1,
        weight_decay=0.01,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        label_names=['labels']
    ),
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)
# Train and Evaluate
trainer3.train()
lora_evaluation = trainer3.evaluate()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6201,0.559964,0.727955


In [20]:
# View some individual results

df = pd.DataFrame(test_dataset)
df = df[["text", "label"]]

df["text"] = df["text"].str.replace("<br />", " ")

predictions = trainer3.predict(test_dataset)
df["predicted_label"] = np.argmax(predictions[0], axis=1)

print("Viewing some predictions\n")

df.head(5)

Viewing some predictions



Unnamed: 0,text,label,predicted_label
0,lovingly photographed in the manner of a golde...,1,1
1,consistently clever and suspenseful .,1,1
2,"it's like a "" big chill "" reunion of the baade...",1,0
3,the story gives ample opportunity for large-sc...,1,1
4,"red dragon "" never cuts corners .",1,1


In [21]:
print("Viewing some mistakes\n")

df[df["label"] != df["predicted_label"]].head(5)

Viewing some mistakes



Unnamed: 0,text,label,predicted_label
2,"it's like a "" big chill "" reunion of the baade...",1,0
14,"at its worst , the movie is pretty diverting ;...",1,0
18,"as it turns out , you can go home again .",1,0
19,you've already seen city by the sea under a va...,1,0
22,grown-up quibbles are beside the point here . ...,1,0


In [22]:
lora_df = pd.DataFrame(lora_evaluation.items(), columns=['Metric', 'Results']) 

In [23]:
base_df

Unnamed: 0,Metric,Results
0,eval_loss,0.739076
1,eval_accuracy,0.529081
2,eval_runtime,5.9797
3,eval_samples_per_second,178.27
4,eval_steps_per_second,22.409
5,epoch,1.0


In [24]:
peft_df

Unnamed: 0,Metric,Results
0,eval_loss,0.581595
1,eval_accuracy,0.70075
2,eval_runtime,6.3409
3,eval_samples_per_second,168.114
4,eval_steps_per_second,21.133
5,epoch,1.0


In [25]:
lora_df

Unnamed: 0,Metric,Results
0,eval_loss,0.559964
1,eval_accuracy,0.727955
2,eval_runtime,6.3652
3,eval_samples_per_second,167.474
4,eval_steps_per_second,21.052
5,epoch,1.0


In [43]:
accuracy1 = base_df['Results'][1]
accuracy2 = peft_df['Results'][1]
accuracy3 = lora_df['Results'][1]
print('''Base model accuracy after first train/eval:  {:2.2%},
Accuracy after adding PEFT and second train/eval: {:2.2%},
and accuracy after saving, loading and a third train/eval: {:2.2%}'''.format(accuracy1, accuracy2, accuracy3))

Base model accuracy after first train/eval:  52.91%,
Accuracy after adding PEFT and second train/eval: 70.08%,
and accuracy after saving, loading and a third train/eval: 72.80%
