# PEFT


Parameter-Efficient Fine-Tuning (PEFT) is a lib to help finetuning large models. The doc is: https://huggingface.co/docs/peft/index.

To customize finetuning using peft.

The steps:
 1. define configuration: config = XXXConfig(task_type=)
 2. get model: peft_model = get_peft_model(model, config)

for loading, we use:
 - peft_model = PeftModel.from_pretrained(chekpoint)
 - integration: model = peft_model.merge_and_unload()

In [1]:
import torch
import torch.nn as nn
from peft import LoraConfig, get_peft_model, PeftModel

## 1. custome model

In [2]:
# define a simple model as base model

model = nn.Sequential(
    nn.Linear(10, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)
model

Sequential(
  (0): Linear(in_features=10, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [3]:
# use lora

# This requires the peft to add lora to the layer 0

config = LoraConfig(target_modules=["0"])

In [4]:
# wrap the base model to peft model

peft_model = get_peft_model(model, config)

In [5]:
# It changed from base model (sequential model) to a peft model.

# The layer 0 in the base model was wraped as "lora.Linear",
# in which 2 additional linear layers were added: lora_A and lora_B
# but their names are "default".

# The 2 others layers in the base model were unchanged.

peft_model

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): lora.Linear(
        (base_layer): Linear(in_features=10, out_features=100, bias=True)
        (lora_dropout): ModuleDict(
          (default): Identity()
        )
        (lora_A): ModuleDict(
          (default): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (default): Linear(in_features=8, out_features=100, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): Linear(in_features=100, out_features=10, bias=True)
    )
  )
)

## 2. multiple adpater

If we have several loras with the same base model, we don't need to load several time the base model. Instead, we load only once the base model and active the lora we need at each time.

In [6]:
# example

# we create 2 lora with a base model and save them
# 2 folders named "lora1" and "lora2" were created.
# and in each folder, stores the files concerning the lora info

path = "../tmp/checkpoint/"
model = nn.Sequential(
    nn.Linear(10, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)
model

# lora 1 add lora to the layer 0
config1 = LoraConfig(target_modules=["0"])
model1 = get_peft_model(model, config1)
model1.save_pretrained(path+"lora1")

# lora 1 add lora to the layer 2
config2 = LoraConfig(target_modules=["2"])
model2 = get_peft_model(model, config2)
model2.save_pretrained(path+"lora2")

In [7]:
# first, we recreate the base model (we can also load it if saved)

model = nn.Sequential(
    nn.Linear(10, 100),
    nn.ReLU(),
    nn.Linear(100, 10),
)
model

Sequential(
  (0): Linear(in_features=10, out_features=100, bias=True)
  (1): ReLU()
  (2): Linear(in_features=100, out_features=10, bias=True)
)

In [10]:
# load lora 1

# so the name of the added layers becomes "lora1" instead of "default"
# since we provide a adapter name using "adapter_name="
# This is required for pultiple lora.

load_model = PeftModel.from_pretrained(model, model_id=path+"lora1", adapter_name="lora1")
load_model

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): lora.Linear(
        (base_layer): Linear(in_features=10, out_features=100, bias=True)
        (lora_dropout): ModuleDict(
          (lora1): Identity()
        )
        (lora_A): ModuleDict(
          (lora1): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (lora1): Linear(in_features=8, out_features=100, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): Linear(in_features=100, out_features=10, bias=True)
    )
  )
)

In [11]:
# then load the lora2
# the second layer was loaded and named 'lora2'

load_model.load_adapter(path+"lora2", adapter_name='lora2')
load_model

PeftModel(
  (base_model): LoraModel(
    (model): Sequential(
      (0): lora.Linear(
        (base_layer): Linear(in_features=10, out_features=100, bias=True)
        (lora_dropout): ModuleDict(
          (lora1): Identity()
        )
        (lora_A): ModuleDict(
          (lora1): Linear(in_features=10, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (lora1): Linear(in_features=8, out_features=100, bias=False)
        )
        (lora_embedding_A): ParameterDict()
        (lora_embedding_B): ParameterDict()
      )
      (1): ReLU()
      (2): lora.Linear(
        (base_layer): Linear(in_features=100, out_features=10, bias=True)
        (lora_dropout): ModuleDict(
          (lora2): Identity()
        )
        (lora_A): ModuleDict(
          (lora2): Linear(in_features=100, out_features=8, bias=False)
        )
        (lora_B): ModuleDict(
          (lora2): Linear(in_features=8, out_features=10, bias=False)
        )
        (lora_embedding_A): Param

In [12]:
# to see which adapter is active

# By default, the first loaded adapter is active

load_model.active_adapter

'lora1'

In [13]:
# test on lora1 since it is the active one

load_model(torch.ones((1,10)))

tensor([[-0.1971, -0.1050,  0.1308, -0.2249, -0.0797, -0.0572, -0.3061,  0.0732,
          0.0448,  0.1013]], grad_fn=<AddmmBackward0>)

In [19]:
# we set all parameters of lora1 to 0 to get the base model outputs
# this is the same as before, it means that the lora1's parameters were initialized to 0

for name, params in load_model.named_parameters():

    if name in ["base_model.model.0.lora_A.lora1.weight", "base_model.model.0.lora_B.lora1.weight"]:
        params.data = torch.zeros(params.size())
        
load_model(torch.ones((1,10)))

tensor([[-0.1971, -0.1050,  0.1308, -0.2249, -0.0797, -0.0572, -0.3061,  0.0732,
          0.0448,  0.1013]], grad_fn=<AddBackward0>)

In [15]:
# change to lora 2

load_model.set_adapter("lora2")

In [16]:
# this shows that the active lora was changed to lora2

load_model.active_adapter

'lora2'

In [17]:
# test on lora 2
# this is the base model output since the lora 2 is initialized to 0 too

load_model(torch.ones((1,10)))

tensor([[-0.1971, -0.1050,  0.1308, -0.2249, -0.0797, -0.0572, -0.3061,  0.0732,
          0.0448,  0.1013]], grad_fn=<AddBackward0>)

## 3. get base model output

In [22]:
# get the base model output since the loras's parameters were set to 0

load_model(torch.ones((1,10)))

tensor([[-0.1971, -0.1050,  0.1308, -0.2249, -0.0797, -0.0572, -0.3061,  0.0732,
          0.0448,  0.1013]], grad_fn=<AddmmBackward0>)

In [21]:
# we can disable the adapters
# we get the same result as before, which means the adapters were effectively disabled

with load_model.disable_adapter():
    print(load_model(torch.ones((1,10))))

tensor([[-0.1971, -0.1050,  0.1308, -0.2249, -0.0797, -0.0572, -0.3061,  0.0732,
          0.0448,  0.1013]])
