In [1]:
# !pip install transformers datasets evaluate accelerate peft trl
# !pip install nvidia-ml-py3

In [2]:
import torch
from transformers import RobertaModel, RobertaTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from datasets import load_dataset

from trl import SFTTrainer

In [3]:
# Setup run parameters
lora_r = 8
lora_alpha = 16
use_QLoRA = True
if use_QLoRA:
    output_dir = './qlora_results_'+str(lora_r)+"_"+str(lora_alpha)
else:
    output_dir = './lora_results_'+str(lora_r)+"_"+str(lora_alpha)
output_dir

'./qlora_results_8_16'

In [4]:
import wandb

# wandb.login(relogin=True)
wandb.login()

if use_QLoRA:
    run_name = "QLoRA_"+str(lora_r)+"_"+str(lora_alpha)
else:
    run_name = "LoRA_"+str(lora_r)+"_"+str(lora_alpha)

run_name

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msarthak99[0m ([33mhpml99[0m). Use [1m`wandb login --relogin`[0m to force relogin


'QLoRA_8_16'

In [5]:
wandb.init(
    project="lora_and_qlora_v2",
    name=run_name,
#     tags=["baseline", "high-lr"],
#     group="bert",
)

In [6]:
base_model = 'roberta-base'

dataset = load_dataset('ag_news')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

def preprocess(examples):
    tokenized = tokenizer(examples['text'], truncation=True, padding=True)
    return tokenized

tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])
train_dataset=tokenized_dataset['train']
eval_dataset=tokenized_dataset['test'].shard(num_shards=2, index=0)
test_dataset=tokenized_dataset['test'].shard(num_shards=2, index=1)


# Extract the number of classess and their names
num_labels = dataset['train'].features['label'].num_classes
class_names = dataset["train"].features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

# Create an id2label mapping
# We will need this for our classifier.
id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


In [7]:
if use_QLoRA:
    nf4_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
    #     llm_int8_skip_modules=["out_lin", "lin1", "lin2", "word_embeddings", "position_embeddings", "pre_classifier", "classifier"],
        # llm_int8_skip_modules=["pre_classifier", "classifier"],
        llm_int8_skip_modules=['classifier'],
        bnb_4bit_compute_dtype=torch.bfloat16
    )

In [8]:
from peft import prepare_model_for_kbit_training

if use_QLoRA:
    model = AutoModelForSequenceClassification.from_pretrained(
        base_model,
        # load_in_4bit=True,
        quantization_config=nf4_config,
        id2label=id2label)
    model = prepare_model_for_kbit_training(model)
else:
    model = AutoModelForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label)
model

`low_cpu_mem_usage` was None, now set to True since model is quantized.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear4bit(in_features=768, out_features=768, bias=True)
              (key): Linear4bit(in_features=768, out_features=768, bias=True)
              (value): Linear4bit(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear4bit(in_features=768, out_features=768, bias=Tru

## Setup PEFT Config

In [9]:
# PEFT Config
peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=0.05,
    bias = 'none',
    target_modules = ['query', 'value'],
    task_type="SEQ_CLS",
)

In [10]:
# from peft import prepare_model_for_kbit_training

# if use_QLoRA:
#     model = prepare_model_for_kbit_training(model)
# model

In [11]:
model = get_peft_model(model, peft_config)
model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): RobertaForSequenceClassification(
      (roberta): RobertaModel(
        (embeddings): RobertaEmbeddings(
          (word_embeddings): Embedding(50265, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): RobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x RobertaLayer(
              (attention): RobertaAttention(
                (self): RobertaSelfAttention(
                  (query): lora.Linear4bit(
                    (base_layer): Linear4bit(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A)

In [12]:
print('PEFT Model')
model.print_trainable_parameters()

PEFT Model
trainable params: 888,580 || all params: 125,537,288 || trainable%: 0.7078


In [13]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy
    }

## For tracking GPU Utilization

In [14]:
from pynvml import *

def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")

def print_summary(result):
    print(f"Time: {result.metrics['train_runtime']:.2f}")
    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
    print_gpu_utilization()

print_gpu_utilization()

GPU memory occupied: 914 MB.


In [15]:
# use the same Training args for all models
training_args = TrainingArguments(
    output_dir=output_dir,
    evaluation_strategy='steps',
    learning_rate=5e-5,
    num_train_epochs=1,
    use_cpu=False,
    dataloader_num_workers=1,
    # max_steps=10,
    per_device_train_batch_size=16,
    optim="paged_adamw_8bit",
    gradient_checkpointing=False,
    gradient_checkpointing_kwargs={'use_reentrant':True}
)

def get_trainer(model):
      return  Trainer(
          model=model,
          args=training_args,
          compute_metrics=compute_metrics,
          train_dataset=train_dataset,
          eval_dataset=eval_dataset,
          data_collator=data_collator,
      )

In [16]:
peft_lora_finetuning_trainer = get_trainer(model)

result = peft_lora_finetuning_trainer.train()



Step,Training Loss,Validation Loss,Accuracy
500,0.6141,0.315083,0.901316
1000,0.2876,0.317523,0.9
1500,0.2864,0.29224,0.906842
2000,0.2661,0.293192,0.910789
2500,0.2739,0.270543,0.909474
3000,0.2875,0.257184,0.913947
3500,0.2509,0.261312,0.916316
4000,0.2542,0.25817,0.917105
4500,0.2553,0.254638,0.917632
5000,0.2433,0.246993,0.917105




In [17]:
wandb.finish()

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁▁▃▄▄▅▆▆▆▆▆▇▇██
eval/loss,██▆▆▄▃▃▃▃▂▂▂▁▁▁
eval/runtime,█▄▁▃▁▄▂▅▂▅▄▄▄▆▄
eval/samples_per_second,▁▅█▆█▅▇▄▇▄▅▅▅▃▅
eval/steps_per_second,▁▅█▆█▅▆▄▇▄▅▅▅▃▅
train/epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇█████
train/global_step,▁▁▁▁▂▂▃▃▃▃▃▃▄▄▅▅▅▅▅▅▆▆▇▇▇▇▇▇███
train/grad_norm,▃▂▄▃▁▃▅█▇▅▂▃▃▃▂
train/learning_rate,█▇▇▇▆▅▅▅▄▃▃▂▂▁▁
train/loss,█▂▂▂▂▂▁▁▁▁▁▁▁▁▁

0,1
eval/accuracy,0.92447
eval/loss,0.23386
eval/runtime,18.7224
eval/samples_per_second,202.966
eval/steps_per_second,25.371
total_flos,2.0500492798385664e+16
train/epoch,1.0
train/global_step,7500.0
train/grad_norm,2.84241
train/learning_rate,0.0


In [18]:
# !pip install nvidia-ml-py3

In [19]:
# from pynvml import *


# def print_gpu_utilization():
#     nvmlInit()
#     handle = nvmlDeviceGetHandleByIndex(0)
#     info = nvmlDeviceGetMemoryInfo(handle)
#     print(f"GPU memory occupied: {info.used//1024**2} MB.")

# def print_summary(result):
#     print(f"Time: {result.metrics['train_runtime']:.2f}")
#     print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
#     print_gpu_utilization()
    

In [20]:
# Print GPU Memory utilization
print_gpu_utilization()

print_summary(result)

GPU memory occupied: 2696 MB.
Time: 2960.02
Samples/second: 40.54
GPU memory occupied: 2696 MB.


In [21]:
# Save
# q_peft_model_name = 'roberta-base-peft-8bit'
# model.save_pretrained(q_peft_model_name)

## Performing Inference with a PEFT Model

It's time to have some fun putting our model to work!

In [22]:

def classify(text):
  inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt")
  output = model(**inputs)

  prediction = output.logits.argmax(dim=-1).item()

  print(f'\n Class: {prediction}, Label: {id2label[prediction]}, Text: {text}')
  # return id2label[prediction]

In [23]:
# classify( "Kederis proclaims innocence Olympic champion Kostas Kederis today left hospital ahead of his date with IOC inquisitors claiming his ...")
# classify( "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.")

### Evaluate Models

To measure the improvement of the Training process we will need a baseline; let's compare the trained models with an untrained one.

Take a Look how the base model performs vs finetuned one

1.   The trained models against the untrained one
2.   The PEFT Model vs the regular fine-tuned one  



In [24]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

metric = evaluate.load('accuracy')

def evaluate_model(inference_model, dataset):

    eval_dataloader = DataLoader(dataset.rename_column("label", "labels"), batch_size=8, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()
    for step, batch in enumerate(tqdm(eval_dataloader)):
        batch.to(device)
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        predictions, references = predictions, batch["labels"]
        metric.add_batch(
            predictions=predictions,
            references=references,
        )

    eval_metric = metric.compute()
    print(eval_metric)




In [25]:
# Evaluate the non fine-tuned model
evaluate_model(AutoModelForSequenceClassification.from_pretrained(base_model, id2label=id2label), test_dataset)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
100%|██████████| 475/475 [00:16<00:00, 29.10it/s]

{'accuracy': 0.19921052631578948}





In [26]:
# Evaluate the PEFT fine-tuned model
evaluate_model(model, test_dataset)

100%|██████████| 475/475 [00:18<00:00, 26.24it/s]

{'accuracy': 0.9255263157894736}



