<a href="https://colab.research.google.com/github/thinhrick0101/Build-LoRA-from-scatch/blob/main/build_lora_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import math
import torch
from torch import nn
from torch.nn.parameter import Parameter
from transformers import AutoTokenizer, AutoModel

# ** Implement LoRA Layer**

In [31]:
class LoRALayer(nn.Module):
    def __init__(self, original_layer, rank=8, alpha=8):
        super().__init__()
        self.original_layer = original_layer  # Original linear layer to be augmented
        self.rank = rank      # Rank of the low-rank matrices
        self.alpha = alpha    # Scaling factor

        # Freeze parameters in the original layer
        for param in original_layer.parameters():
            param.requires_grad = False

        # Get dimensions from the original layer
        in_features = original_layer.in_features
        out_features = original_layer.out_features

        # Initialize low-rank matrices (A and B)
        self.A = nn.Parameter(torch.randn(rank, in_features))  # (r, in)
        self.B = nn.Parameter(torch.zeros(out_features, rank)) # (out, r)

        # Scaling factor (α/r)
        self.scale = alpha / rank

    def forward(self, x):
        # Original layer's output
        orig_output = self.original_layer(x)  # (batch, out_features)

        # LoRA term: (x @ A.T) @ B.T scaled by α/r
        lora_output = (x @ self.A.T @ self.B.T) * self.scale

        return orig_output + lora_output

In [32]:
def apply_lora(model, layers_to_replace, rank=8, alpha=8):
    """
    Replace specified linear layers in a model with LoRA-augmented versions.
    """
    for name, module in model.named_children():
        if len(list(module.children())) > 0:
            # Recursively apply to child modules
            apply_lora(module, layers_to_replace, rank, alpha)

        if isinstance(module, nn.Linear) and name in layers_to_replace:
            # Replace the linear layer with LoRALayer
            new_layer = LoRALayer(module, rank, alpha)
            setattr(model, name, new_layer)

    return model

In [38]:
model = AutoModel.from_pretrained('nvidia/QLIP-L-14-392')


config.json:   0%|          | 0.00/2.06k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.95G [00:00<?, ?B/s]

Some weights of CLIPModel were not initialized from the model checkpoint at nvidia/QLIP-L-14-392 and are newly initialized: ['vision_model.encoder.layers.0.mlp.fc1.bias', 'vision_model.encoder.layers.0.mlp.fc1.weight', 'vision_model.encoder.layers.0.mlp.fc2.bias', 'vision_model.encoder.layers.0.mlp.fc2.weight', 'vision_model.encoder.layers.0.self_attn.k_proj.bias', 'vision_model.encoder.layers.1.mlp.fc1.bias', 'vision_model.encoder.layers.1.mlp.fc1.weight', 'vision_model.encoder.layers.1.mlp.fc2.bias', 'vision_model.encoder.layers.1.mlp.fc2.weight', 'vision_model.encoder.layers.1.self_attn.k_proj.bias', 'vision_model.encoder.layers.10.mlp.fc1.bias', 'vision_model.encoder.layers.10.mlp.fc1.weight', 'vision_model.encoder.layers.10.mlp.fc2.bias', 'vision_model.encoder.layers.10.mlp.fc2.weight', 'vision_model.encoder.layers.10.self_attn.k_proj.bias', 'vision_model.encoder.layers.11.mlp.fc1.bias', 'vision_model.encoder.layers.11.mlp.fc1.weight', 'vision_model.encoder.layers.11.mlp.fc2.bias'

In [39]:
total_params_no_lora = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params_no_lora


360982769

In [40]:
for param in model.parameters():
    param.requires_grad = False

In [41]:
total_trainable_params_after_freezing = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_trainable_params_after_freezing

0

In [42]:
model = apply_lora(model, ["fc1", "fc2"], rank=8, alpha=16)

In [43]:
((total_params_no_lora - total_params_with_lora) / total_params_no_lora) * 100

99.0196363084577

# Inclusion

By using this LoRA layer, we will reduce around 99.02% parameters.
