In [1]:
import torch.nn as nn
import torch

# 构建LoraLayer
class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank,  alpha):
        super().__init__()
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = nn.Parameter(torch.rand(in_dim, rank)*std_dev)
        self.B = nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha
    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

In [2]:
class LinearWithLoRA(nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features,
            linear.out_features,
            rank,
            alpha
        )
    def forward(self,x):
        return self.linear(x) + self.lora(x)

In [3]:
torch.manual_seed(123)
layer = nn.Linear(10, 2)
x = torch.randn((1, 10))

print("Original output:", layer(x))

Original output: tensor([[0.6639, 0.4487]], grad_fn=<AddmmBackward0>)


In [4]:
layer_lora_1 = LinearWithLoRA(layer, rank=2, alpha=4)
print("LoRA output:", layer_lora_1(x))

LoRA output: tensor([[0.6639, 0.4487]], grad_fn=<AddBackward0>)


In [5]:
class TestMLP(nn.Module):
    def __init__(self, num_features, num_hidden1, num_hidden2, num_class):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(num_features, num_hidden1),
            nn.ReLU(),
            nn.Linear(num_hidden1, num_hidden2),
            nn.ReLU(),
            
            nn.Linear(num_hidden2, num_class)
        )
    def forward(self, x):
        x = self.layers(x)
        return x        

In [19]:
num_features = 64
num_hidden1 = 32
num_hidden2=64
num_class=3

model = TestMLP(
    num_features=num_features,
    num_hidden1=num_hidden1,
    num_hidden2=num_hidden2,
    num_class=num_class
)

print(model)

TestMLP(
  (layers): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=3, bias=True)
  )
)


In [20]:
def convert_layers(model):
    for name, module in model.named_children():
        print('name:',name)
        print('module', module)
        if isinstance(module, nn.Linear):
            setattr(model, name, LinearWithLoRA(module, rank=4, alpha=8))  # 使用你的参数替换...
        else:
            convert_layers(module)
convert_layers(model)

name: layers
module Sequential(
  (0): Linear(in_features=64, out_features=32, bias=True)
  (1): ReLU()
  (2): Linear(in_features=32, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=3, bias=True)
)
name: 0
module Linear(in_features=64, out_features=32, bias=True)
name: 1
module ReLU()
name: 2
module Linear(in_features=32, out_features=64, bias=True)
name: 3
module ReLU()
name: 4
module Linear(in_features=64, out_features=3, bias=True)


In [16]:
print(model)

TestMLP(
  (layers): Sequential(
    (0): LinearWithLoRA(
      (linear): Linear(in_features=64, out_features=32, bias=True)
      (lora): LoRALayer()
    )
    (1): ReLU()
    (2): LinearWithLoRA(
      (linear): Linear(in_features=32, out_features=64, bias=True)
      (lora): LoRALayer()
    )
    (3): ReLU()
    (4): LinearWithLoRA(
      (linear): Linear(in_features=64, out_features=3, bias=True)
      (lora): LoRALayer()
    )
  )
)


In [17]:
for name, param in model.named_parameters():
    print(f'{name}:{param.requires_grad}')

layers.0.linear.weight:True
layers.0.linear.bias:True
layers.0.lora.A:True
layers.0.lora.B:True
layers.2.linear.weight:True
layers.2.linear.bias:True
layers.2.lora.A:True
layers.2.lora.B:True
layers.4.linear.weight:True
layers.4.linear.bias:True
layers.4.lora.A:True
layers.4.lora.B:True


In [7]:
model.layers[0] = LinearWithLoRA(model.layers[0], rank=4, alpha=8)
model.layers[2] = LinearWithLoRA(model.layers[2], rank=4, alpha=8)
model.layers[4] = LinearWithLoRA(model.layers[4], rank=4, alpha=8)

In [11]:
print(model)

TestMLP(
  (layers): Sequential(
    (0): LinearWithLoRA(
      (linear): Linear(in_features=64, out_features=32, bias=True)
      (lora): LoRALayer()
    )
    (1): ReLU()
    (2): LinearWithLoRA(
      (linear): Linear(in_features=32, out_features=64, bias=True)
      (lora): LoRALayer()
    )
    (3): ReLU()
    (4): LinearWithLoRA(
      (linear): Linear(in_features=64, out_features=3, bias=True)
      (lora): LoRALayer()
    )
  )
)


In [19]:
model.layers[0].linear.weight.norm(p=2, dim=0, keepdim=True)

tensor([[0.4290, 0.4223, 0.4418, 0.4131, 0.3538, 0.4257, 0.3908, 0.3639, 0.4376,
         0.4444, 0.4105, 0.4212, 0.4105, 0.4042, 0.3824, 0.3944, 0.3684, 0.4318,
         0.4080, 0.3716, 0.3509, 0.4130, 0.4303, 0.4321, 0.4073, 0.4147, 0.4601,
         0.4353, 0.4566, 0.3958, 0.4717, 0.4171, 0.3877, 0.4053, 0.4421, 0.4552,
         0.3942, 0.4467, 0.4286, 0.4359, 0.3872, 0.3767, 0.4176, 0.3911, 0.3775,
         0.5211, 0.4224, 0.3992, 0.4345, 0.3712, 0.3642, 0.3961, 0.4215, 0.3368,
         0.4231, 0.3695, 0.4020, 0.4400, 0.3733, 0.4158, 0.4570, 0.3898, 0.4052,
         0.4443]])

In [9]:
i = 0
for child in model.children():
    for c in child.children():
        print(c)
        for i , j in c.named_parameters():
            print(j)
        break
        if isinstance(c, LinearWithLoRA):
            print("this is lora")
        print('---------')

LinearWithLoRA(
  (linear): Linear(in_features=64, out_features=32, bias=True)
  (lora): LoRALayer()
)
Parameter containing:
tensor([[-0.1181, -0.0842, -0.0498,  ..., -0.0536,  0.1188, -0.0621],
        [ 0.0551,  0.0490,  0.0349,  ...,  0.0645,  0.0883, -0.1213],
        [-0.1061, -0.1217,  0.0472,  ..., -0.0622,  0.1077, -0.1028],
        ...,
        [-0.0842,  0.0930, -0.0598,  ..., -0.0115,  0.0299, -0.0728],
        [ 0.1118,  0.0820,  0.0991,  ...,  0.0118, -0.0881,  0.0676],
        [ 0.0783, -0.0778,  0.0060,  ...,  0.0182,  0.0307, -0.0319]],
       requires_grad=True)
Parameter containing:
tensor([-0.0438,  0.0379,  0.0179,  0.0866, -0.0935,  0.1226, -0.1163, -0.0920,
        -0.0220, -0.1086,  0.0843, -0.0219,  0.0048,  0.0405,  0.0141,  0.1067,
         0.0897,  0.0425, -0.1199,  0.0470, -0.0206,  0.0212,  0.0958, -0.0589,
        -0.0080, -0.1033,  0.0453, -0.0274, -0.0461,  0.0069,  0.0059,  0.0228],
       requires_grad=True)
Parameter containing:
tensor([[3.4769e-01, 1

In [18]:
def freeze_linear_layers(model):
    for child in model.children():
        if isinstance(child, nn.Linear):
            for param in child.parameters():
                param.requires_grad = False
        else:
            # Recursively freeze linear layers in children modules
            freeze_linear_layers(child)

freeze_linear_layers(model)
for name, param in model.named_parameters():
    print(f'{name}:{param.requires_grad}')

layers.0.linear.weight:False
layers.0.linear.bias:False
layers.0.lora.A:True
layers.0.lora.B:True
layers.2.linear.weight:False
layers.2.linear.bias:False
layers.2.lora.A:True
layers.2.lora.B:True
layers.4.linear.weight:False
layers.4.linear.bias:False
layers.4.lora.A:True
layers.4.lora.B:True


In [None]:
class LinearWithDoRA(nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )
        self.m = nn.Parameter(torch.ones(1, linear.out_features))
    
    def forward(self, x):
        linear_out = self.linear(x)
        lora_out = self.lora(x)
        lora_out_norm = lora_out / (lora_out.norm(p=2, dim=1, keepdim=True) + 1e-9)
        dora_modification = self.m * lora_out_norm
        return linear_out + dora_modification   