In [2]:
import torch.nn as nn
import torch

class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank,  alpha):
        super().__init__()
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = nn.Parameter(torch.rand(in_dim, rank)*std_dev)
        self.B = nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha
    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

In [3]:
class LinearWithLoRA(nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features,
            linear.out_features,
            rank,
            alpha
        )
    def forward(self,x):
        return self.linear(x) + self.lora(x)

In [4]:
torch.manual_seed(123)
layer = nn.Linear(10, 2)
x = torch.randn((1, 10))

print("Original output:", layer(x))

Original output: tensor([[0.6639, 0.4487]], grad_fn=<AddmmBackward0>)


In [5]:
layer_lora_1 = LinearWithLoRA(layer, rank=2, alpha=4)
print("LoRA output:", layer_lora_1(x))

LoRA output: tensor([[0.6639, 0.4487]], grad_fn=<AddBackward0>)


In [42]:
class TestMLP(nn.Module):
    def __init__(self, num_features, num_hidden1, num_hidden2, num_class):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(num_features, num_hidden1),
            nn.ReLU(),
            nn.Linear(num_hidden1, num_hidden2),
            nn.ReLU(),
            
            nn.Linear(num_hidden2, num_class)
        )
    def forward(self, x):
        x = self.layers(x)
        return x        

In [43]:
num_features = 64
num_hidden1 = 32
num_hidden2=64
num_class=3

model = TestMLP(
    num_features=num_features,
    num_hidden1=num_hidden1,
    num_hidden2=num_hidden2,
    num_class=num_class
)

print(model)

TestMLP(
  (layers): Sequential(
    (0): Linear(in_features=64, out_features=32, bias=True)
    (1): ReLU()
    (2): Linear(in_features=32, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=3, bias=True)
  )
)


In [44]:
model.layers[0] = LinearWithLoRA(model.layers[0], rank=4, alpha=8)
model.layers[2] = LinearWithLoRA(model.layers[2], rank=4, alpha=8)
model.layers[4] = LinearWithLoRA(model.layers[4], rank=4, alpha=8)

In [15]:
print(model)

TestMLP(
  (layers): Sequential(
    (0): LinearWithLoRA(
      (linear): Linear(in_features=64, out_features=32, bias=True)
      (lora): LoRALayer()
    )
    (1): ReLU()
    (2): LinearWithLoRA(
      (linear): Linear(in_features=32, out_features=64, bias=True)
      (lora): LoRALayer()
    )
    (3): ReLU()
    (4): LinearWithLoRA(
      (linear): Linear(in_features=64, out_features=3, bias=True)
      (lora): LoRALayer()
    )
  )
)


In [32]:
i = 0
for child in model.children():
    for c in child.children():
        print(c)
        for i , j in c.named_parameters():
            print(j)
        break
        if isinstance(c, LinearWithLoRA):
            print("this is lora")
        print('---------')

LinearWithLoRA(
  (linear): Linear(in_features=64, out_features=32, bias=True)
  (lora): LoRALayer()
)
Parameter containing:
tensor([[-0.1158, -0.1105, -0.0548,  ..., -0.0058,  0.0012,  0.0280],
        [ 0.0602,  0.0445,  0.0696,  ..., -0.0741,  0.0580,  0.0289],
        [-0.0630, -0.1075, -0.0224,  ..., -0.1188, -0.0248,  0.1149],
        ...,
        [ 0.1098, -0.0895, -0.0360,  ..., -0.0164,  0.0301,  0.0624],
        [ 0.0312, -0.0655, -0.0765,  ..., -0.0780, -0.1172,  0.0231],
        [-0.0661, -0.0488,  0.0720,  ...,  0.0444,  0.0489, -0.0636]],
       requires_grad=True)
Parameter containing:
tensor([-0.0066, -0.1249, -0.0943, -0.0533, -0.0540, -0.0756,  0.0510,  0.0071,
         0.0478, -0.0202,  0.0831, -0.0763,  0.0425, -0.0740, -0.0455,  0.1156,
        -0.0246,  0.0130,  0.0421,  0.0892,  0.0456, -0.0929, -0.0974, -0.0031,
        -0.0114,  0.0788, -0.0324,  0.1228,  0.1034,  0.1236,  0.0059, -0.0088],
       requires_grad=True)
Parameter containing:
tensor([[0.4334, 0.489

In [45]:
def freeze_linear_layers(model):
    for child in model.children():
        if isinstance(child, nn.Linear):
            for param in child.parameters():
                param.requires_grad = False
        else:
            # Recursively freeze linear layers in children modules
            freeze_linear_layers(child)

freeze_linear_layers(model)
for name, param in model.named_parameters():
    print(f'{name}:{param.requires_grad}')

layers.0.linear.weight:False
layers.0.linear.bias:False
layers.0.lora.A:True
layers.0.lora.B:True
layers.2.linear.weight:False
layers.2.linear.bias:False
layers.2.lora.A:True
layers.2.lora.B:True
layers.4.linear.weight:False
layers.4.linear.bias:False
layers.4.lora.A:True
layers.4.lora.B:True
