In [1]:
!git clone https://github.com/monk1337/NanoPeft.git

Cloning into 'NanoPeft'...
remote: Enumerating objects: 126, done.[K
remote: Counting objects: 100% (126/126), done.[K
remote: Compressing objects: 100% (120/120), done.[K
remote: Total 126 (delta 37), reused 0 (delta 0), pack-reused 0[K
Receiving objects: 100% (126/126), 37.63 KiB | 4.70 MiB/s, done.
Resolving deltas: 100% (37/37), done.


In [20]:
%%capture
!pip3 install transformers datasets lightning watermark

In [12]:
%cd NanoPeft

#let's download some datasets
from datasets import load_dataset
cola_dataset = load_dataset("glue", "cola")


#loading a model
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("roberta-base")
model     = AutoModelForSequenceClassification.from_pretrained("roberta-base", num_labels=2)

[Errno 2] No such file or directory: 'NanoPeft'
/content/NanoPeft


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# data related functions
import os

def tokenize_text(batch):
    return tokenizer(batch['sentence'], truncation=True, padding=True)


cola_tokenized = cola_dataset.map(tokenize_text, batched=True, batch_size=None)
del cola_dataset
cola_tokenized.set_format("torch", columns=["input_ids", "attention_mask", "label"])
os.environ["TOKENIZERS_PARALLELISM"] = "false"



from torch.utils.data import DataLoader, Dataset


class COLADataset(Dataset):
    def __init__(self, dataset_dict, partition_key="train"):
        self.partition = dataset_dict[partition_key]

    def __getitem__(self, index):
        return self.partition[index]

    def __len__(self):
        return self.partition.num_rows

train_dataset = COLADataset(cola_tokenized, partition_key="train")
val_dataset = COLADataset(cola_tokenized, partition_key="validation")
test_dataset = COLADataset(cola_tokenized, partition_key="test")

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=12,
    shuffle=True,
    num_workers=4
)

val_loader = DataLoader(
    dataset=val_dataset,
    batch_size=12,
    num_workers=4
)

test_loader = DataLoader(
    dataset=test_dataset,
    batch_size=12,
    num_workers=4
)

Map:   0%|          | 0/1063 [00:00<?, ? examples/s]



In [14]:
# let's generate the config for the model, it will give a json file with all linear layers names, once config is generated
# we can change the values which linear layer we want to replace with lora
%cd NanoPeft

from nanopeft import ModelConfigGenerator
ModelConfigGenerator(model)

[Errno 2] No such file or directory: 'NanoPeft'
/content/NanoPeft
Configuration saved to nanopeft_configs/6ec8eb54-13e2-4454-a6fe-b73a2d123b44_lora.json


<nanopeft.models.model_config_generator.ModelConfigGenerator at 0x7a14e73cf7c0>

In [17]:
# let's read the config file

import os
import json

with open('nanopeft_configs/6ec8eb54-13e2-4454-a6fe-b73a2d123b44_lora.json', 'r') as f:
  data = json.load(f)
data

{'config_template': {'query': True,
  'key': False,
  'value': True,
  'dense': False,
  'out_proj': False},
 'layer_role_mapping': {'query': ['roberta.encoder.layer.0.attention.self.query',
   'roberta.encoder.layer.1.attention.self.query',
   'roberta.encoder.layer.2.attention.self.query',
   'roberta.encoder.layer.3.attention.self.query',
   'roberta.encoder.layer.4.attention.self.query',
   'roberta.encoder.layer.5.attention.self.query',
   'roberta.encoder.layer.6.attention.self.query',
   'roberta.encoder.layer.7.attention.self.query',
   'roberta.encoder.layer.8.attention.self.query',
   'roberta.encoder.layer.9.attention.self.query',
   'roberta.encoder.layer.10.attention.self.query',
   'roberta.encoder.layer.11.attention.self.query'],
  'key': ['roberta.encoder.layer.0.attention.self.key',
   'roberta.encoder.layer.1.attention.self.key',
   'roberta.encoder.layer.2.attention.self.key',
   'roberta.encoder.layer.3.attention.self.key',
   'roberta.encoder.layer.4.attention.self

In [18]:
%cd NanoPeft

# let's define Lora Layer

from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.nn as nn
import torch

class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = nn.Parameter(torch.randn(in_dim, rank) * std_dev)
        self.B = nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x


class LinearWithLoRA(nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)


# now if someone wants they can change the config file from the folder , next the other class will read this config file and apply the lora to those linear layers where the value is true

config_path = "nanopeft_configs/6ec8eb54-13e2-4454-a6fe-b73a2d123b44_lora.json"

from nanopeft import LoRAConfigApplier
lora_applier = LoRAConfigApplier(config_path, model, LinearWithLoRA)
lora_applier.apply_modifications()
print("Total number of trainable parameters after LoRA modification:", LoRAConfigApplier.count_parameters(model))

[Errno 2] No such file or directory: 'NanoPeft'
/content/NanoPeft
Total number of trainable parameters after LoRA modification: 294912


In [21]:
# now let's finetune the lora model

import time

import lightning as L
import torchmetrics
import torch
import torch.nn.functional as F


def compute_accuracy(model, data_loader, device):
    model.eval()
    correct_pred, num_examples = 0, 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.view(-1, 28*28).to(device)
            targets = targets.to(device)
            logits = model(features)
            _, predicted_labels = torch.max(logits, 1)
            num_examples += targets.size(0)
            correct_pred += (predicted_labels == targets).sum()
        return correct_pred.float()/num_examples * 100


def train(num_epochs, model, optimizer, train_loader, device):

    start_time = time.time()
    for epoch in range(num_epochs):
        model.train()
        for batch_idx, (features, targets) in enumerate(train_loader):

            features = features.view(-1, 28*28).to(device)
            targets = targets.to(device)

            # FORWARD AND BACK PROP
            logits = model(features)
            loss = F.cross_entropy(logits, targets)
            optimizer.zero_grad()

            loss.backward()

            # UPDATE MODEL PARAMETERS
            optimizer.step()

            # LOGGING
            if not batch_idx % 400:
                print('Epoch: %03d/%03d | Batch %03d/%03d | Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx,
                          len(train_loader), loss))

        with torch.set_grad_enabled(False):
            print('Epoch: %03d/%03d training accuracy: %.2f%%' % (
                  epoch+1, num_epochs,
                  compute_accuracy(model, train_loader, device)))

        print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))

    print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))


class CustomLightningModule(L.LightningModule):
    def __init__(self, model, learning_rate=5e-5):
        super().__init__()

        self.learning_rate = learning_rate
        self.model = model

        self.val_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)
        self.test_acc = torchmetrics.Accuracy(task="multiclass", num_classes=2)

    def forward(self, input_ids, attention_mask, labels):
        return self.model(input_ids, attention_mask=attention_mask, labels=labels)

    def training_step(self, batch, batch_idx):
        outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"],
                       labels=batch["label"])
        self.log("train_loss", outputs["loss"])
        return outputs["loss"]  # this is passed to the optimizer for training

    def validation_step(self, batch, batch_idx):
        outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"],
                       labels=batch["label"])
        self.log("val_loss", outputs["loss"], prog_bar=True)

        logits = outputs["logits"]
        predicted_labels = torch.argmax(logits, 1)
        self.val_acc(predicted_labels, batch["label"])
        self.log("val_acc", self.val_acc, prog_bar=True)

    def test_step(self, batch, batch_idx):
        outputs = self(batch["input_ids"], attention_mask=batch["attention_mask"],
                       labels=batch["label"])

        logits = outputs["logits"]
        predicted_labels = torch.argmax(logits, 1)
        self.test_acc(predicted_labels, batch["label"])
        self.log("accuracy", self.test_acc, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

lightning_model = CustomLightningModule(model)

In [23]:
import lightning as L
from lightning.pytorch.loggers import CSVLogger
from lightning.pytorch.callbacks import ModelCheckpoint

import pandas as pd
import torch


callbacks = [
    ModelCheckpoint(
        save_top_k=1, mode="max", monitor="val_acc"
    )  # save top 1 model
]
logger = CSVLogger(save_dir="logs/", name="my-model")


trainer = L.Trainer(
    max_epochs=3,
    callbacks=callbacks,
    accelerator="gpu",
    precision="16-mixed",
    devices=1,
    logger=logger,
    log_every_n_steps=10,
)

import time
start = time.time()

trainer.fit(model=lightning_model,
            train_dataloaders=train_loader,
            val_dataloaders=val_loader)

end = time.time()
elapsed = end - start
print(f"Time elapsed {elapsed/60:.2f} min")



INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name     | Type                             | Params
--------------------------------------------------------------
0 | model    | RobertaForSequenceClassification | 124 M 
1 | val_acc  | MulticlassAccuracy               

Sanity Checking: |          | 0/? [00:00<?, ?it/s]



Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=3` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.


Time elapsed 2.25 min


In [24]:
train_acc = trainer.test(lightning_model, dataloaders=train_loader, ckpt_path="best", verbose=False)
val_acc = trainer.test(lightning_model, dataloaders=val_loader, ckpt_path="best", verbose=False)
# test_acc = trainer.test(lightning_model, dataloaders=test_loader, ckpt_path="best", verbose=False)

INFO: Restoring states from the checkpoint path at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
INFO:lightning.pytorch.utilities.rank_zero:Restoring states from the checkpoint path at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: Loaded model weights from the checkpoint at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
INFO:lightning.pytorch.utilities.rank_zero:Loaded model weights from the checkpoint at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/trainer/connectors/data_connector.py:492: Your `test_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.


Testing: |          | 0/? [00:00<?, ?it/s]

INFO: Restoring states from the checkpoint path at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
INFO:lightning.pytorch.utilities.rank_zero:Restoring states from the checkpoint path at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: Loaded model weights from the checkpoint at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt
INFO:lightning.pytorch.utilities.rank_zero:Loaded model weights from the checkpoint at logs/my-model/version_0/checkpoints/epoch=0-step=713.ckpt


Testing: |          | 0/? [00:00<?, ?it/s]

In [25]:
print(f"Train acc: {train_acc[0]['accuracy']*100:2.2f}%")
print(f"Val acc:   {val_acc[0]['accuracy']*100:2.2f}%")

Train acc: 70.44%
Val acc:   69.13%
