<a href="https://colab.research.google.com/github/sloveninayak/peft-bert/blob/main/peftBert.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q "datasets==2.15.0"

In [None]:
from datasets import load_dataset
#TRANSFORMERS LIB, LOAD SST2 DATASET STANFORD SENTIMENT TREEBANK
dataset = load_dataset("stanfordnlp/sst2", split="train").train_test_split(test_size=0.2, shuffle=True, seed=23)
splits = ["train", "test"]
dataset["train"]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Dataset({
    features: ['idx', 'sentence', 'label'],
    num_rows: 53879
})

In [None]:
dataset["train"][0]

{'idx': 14677, 'sentence': 'good performances ', 'label': 1}

In [None]:
import numpy as np
import pandas as pd
#TOKENIZE TEXT DATA, CONVERT TEXT INTO NUMERICAL REPRESENTATIONS
from transformers import AutoTokenizer

In [None]:
from transformers import AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments

In [None]:
#AFTER PREPARING BATCHES OF DATA FOR TRAINING, PERFORM PADDING SEQ TO FIXED LENGTH
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
tokenized_dataset = {}
for split in splits:
    tokenized_dataset[split] = dataset[split].map(lambda x: tokenizer(x["sentence"], truncation=True), batched=True)
tokenized_dataset["train"]

Map:   0%|          | 0/13470 [00:00<?, ? examples/s]

Dataset({
    features: ['idx', 'sentence', 'label', 'input_ids', 'attention_mask'],
    num_rows: 53879
})

In [None]:
#USED LAMBDA FUNC TO TOKENIZE, INSPECTED AVAILABLE COLUMNS
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased",num_labels=2,id2label={0: "negative", 1: "positive"},label2id={"negative": 0, "positive": 1},)
#FREEZE WEIGHTS OF PRETRAINED MODEL LAYERS AND ONLY TRAIN CLASSIFIER LAYER
for param in model.parameters():
    param.requires_grad = True
model.classifier

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Linear(in_features=768, out_features=2, bias=True)

In [None]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): MultiHeadSelfAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
 

In [None]:
#CALC ACCURACY METRIC FOR SENTIMENT ANALYSIS
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    #ASSUMES PRED IS NUMPY ARRAY CONTAINING PROBABILITIES OR LOGITS FOR EACH CLS
    predictions = np.argmax(predictions, axis=1)
    return {"accuracy": (predictions == labels).mean()}

In [None]:
pip install transformers[torch]



In [None]:
pip install accelerate -U



In [None]:
#CREATES TRAINER OBJECT FROM TRANSFORMERS LIB TO TRAIN
trainer = Trainer(model=model,
    args=TrainingArguments(
        output_dir="./data/spam_not_spam",
        #SET LRATE TO COMMON STARTING POINT
        learning_rate=2e-3,
        #SET PER DEVICE TRAIN BATCH SIZE AND EVAL BATCH SIZE
        per_device_train_batch_size=4,
        #MODEL PROCESS 4 SAMPLES AT TIME DURING TRAINING AND EVALUATION
        per_device_eval_batch_size=4,
        #EVALUATE AND SAVE THE MODEL AFTER EACH EPOCH
        evaluation_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=1,
        #TO PREVENT OVERFITTING
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

#EVALUATE BEFORE TRAINING
trainer.evaluate()



{'eval_loss': 0.6873867511749268,
 'eval_accuracy': 0.5676317743132888,
 'eval_runtime': 688.2401,
 'eval_samples_per_second': 19.572,
 'eval_steps_per_second': 4.894}

In [None]:
#LIKELY A METHOD PROVIDED BY TRANSFORMERS LIB FOR SELECTING APECIFIC ELEMENT
items_for_manual_review = tokenized_dataset["test"].select([0, 1, 22, 31, 43, 292, 448, 487])
results = trainer.predict(items_for_manual_review)
#CALC PRECISION, RECALL, F1 SCORE
def calculate_precision_recall_f1(actuals, predictions):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for actual, predicted in zip(actuals, predictions):
        if actual == predicted == 1: true_positives += 1
        if predicted == 1 and actual != predicted: false_positives += 1
        if predicted == 0 and actual != predicted: false_negatives += 1
    #PRECISION MEASURES PROPORTION OF POSITIVE PREDICTIONS THAT WERE ACTUAL CORRECT
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1_score
#CALC PERFORMANCE OF SENTIMENT ANALYSIS
precision, recall, f1, = calculate_precision_recall_f1(results.label_ids, results.predictions.argmax(axis=1))

print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Precision: 0.875
Recall: 1.0
F1 Score: 0.9333333333333333


In [None]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6826,0.687,0.555382


TrainOutput(global_step=13470, training_loss=0.6921776627821664, metrics={'train_runtime': 22420.391, 'train_samples_per_second': 2.403, 'train_steps_per_second': 0.601, 'total_flos': 333643722426996.0, 'train_loss': 0.6921776627821664, 'epoch': 1.0})

In [None]:
trainer.evaluate()

{'eval_loss': 0.6870002150535583,
 'eval_accuracy': 0.5553823311061619,
 'eval_runtime': 689.8965,
 'eval_samples_per_second': 19.525,
 'eval_steps_per_second': 4.882,
 'epoch': 1.0}

In [None]:
!pip install peft

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m251.6/251.6 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: peft
Successfully installed peft-0.11.1


In [None]:
from peft import (LoraConfig, get_peft_model, TaskType, PeftModel)
#PEFT FOR REDUCING MEMORY USAGE, TRAINING TIME, AND POTENTIALLY IMPROVING MODEL GENERALIZABILITY
#LoraConfig SPECIFY NO. OF ADAPTER LAYERS, THEIR DIMENSIONS, AND OTHER HYPERPARAMETERS FOR PEFT
#ENUMERATE DEFINING DIFF TYPES OF TSKS SUPPORTED BY peft LIB

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In [None]:
#DEFINE LORA CONFIG; r SETS RANK NUMBER; lora_alpha IS A SCALING FACTOR USED TO ADJUST CONTRIBUTION; lora_droouts TO SPECIFY DROPOUT RATE
lora_config = LoraConfig(r=2, lora_alpha=16, lora_dropout=0.05, target_modules=["q_lin", "k_lin","v_lin"], bias='none',task_type=TaskType.SEQ_CLS)
peft_model = get_peft_model(model, lora_config)

In [None]:
trainer = Trainer(
    model=peft_model,
    args=TrainingArguments(
        output_dir="./data/spam_not_spam",
        learning_rate=2e-3,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        num_train_epochs=1,
        weight_decay=0.01,
        load_best_model_at_end=True,
    ),
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=tokenizer,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

In [None]:
peft_model.save_pretrained('./peft_model')