# Lesson 5 - Low-Rank Adaptation

In this lesson, we're going to explore the idea of serving fine-tuned LLMs trained using Low-Rank Adaptation (LoRA).

### Import required packages

In [1]:
import copy
import matplotlib.pyplot as plt
import numpy as np
import random
import time
import torch
import torch.nn.functional as F
from tqdm import tqdm

In [2]:
# set the seed so we get the same results from here on for each run
torch.manual_seed(42)

<torch._C.Generator at 0x7f2ebd311270>

### Create a test Model

In [3]:
class TestModel(torch.nn.Module):
    def __init__(self, hidden_size):
        super().__init__()
        self.embedding = torch.nn.Embedding(10, embedding_dim=hidden_size)
        self.linear = torch.nn.Linear(in_features=hidden_size, out_features=hidden_size)
        self.lm_head = torch.nn.Linear(in_features=hidden_size, out_features=10)
    
    def forward(self, input_ids):
        x = self.embedding(input_ids)
        x = self.linear(x)
        x = self.lm_head(x)

        return x

In [4]:
# set a reasonably large hidden size to illustrate the small fraction of
# params needed to be added for LoRA
hidden_size = 1024

In [5]:
model = TestModel(hidden_size=hidden_size)

In [6]:
# dummy inputs
input_ids = torch.LongTensor([[0,1,2,3,4,5,6,7]])

In [7]:
input_ids

tensor([[0, 1, 2, 3, 4, 5, 6, 7]])

In [8]:
# toy example of a detokenizer. 
# The vocabulary only consists of 10 words (different colors)
detokenizer = [
    "red",
    "orange",
    "yellow",
    "green",
    "blue",
    "indigo",
    "violet",
    "magenta",
    "marigold",
    "chartreuse",
]

### Reuse the generate token function from Lesson 2

In [10]:
def generate_token(model, **kwargs):
    with torch.no_grad():
        logits = model(**kwargs)
    
    last_logits = logits[:, -1, :]
    next_token_ids = torch.argmax(last_logits, dim=1)

    return [detokenizer[token_id] for token_id in next_token_ids]

In [11]:
# generate next token
next_token = generate_token(model=model, input_ids=input_ids)
next_token

['magenta']

In [12]:
# dummy input tensor
# shape: (batch_size, sequence_length, hidden_size)
X = torch.randn(1, 8, 1024)

### Let's setup the LoRA computation

In [13]:
# LoRA A and B tensors
# A has shape (hidden_size, rank)
# B has shape (rank, hidden_size)
lora_a = torch.randn(hidden_size, 2)
lora_b = torch.randn(2, hidden_size)

In [14]:
W = model.linear.weight

In [15]:
W.shape

torch.Size([1024, 1024])

Let's check the shape of adapter

In [16]:
W2 = lora_a @ lora_b

In [17]:
W2.shape

torch.Size([1024, 1024])

In [18]:
# Compare number of elements of A and B with number of elements of W
# W here has shape (hidden_size, hidden_size)
lora_numel = lora_a.numel() + lora_b.numel()
base_numel = W.numel()
print(f"|A+B|/|W|: {lora_numel/base_numel}")

|A+B|/|W|: 0.00390625


### Let's run the LoRA computation

In [19]:
# compute the output of X @ W (the original linear layer)
base_output = model.linear(X)

# compute the output of X @ A @ B (the added lora adapter)
lora_output = X @ lora_a @ lora_b

# sum them together
total_output = base_output + lora_output

# output should have the same shape as the original output:
# (batch_size, sequence_length, hidden_size)
total_output.shape

torch.Size([1, 8, 1024])

In [20]:
class LoraLayer(torch.nn.Module):
    def __init__(self, base_layer, r):
        super().__init__()
        self.base_layer = base_layer

        d_in, d_out = self.base_layer.weight.shape
        self.lora_a = torch.randn(d_in, r)
        self.lora_b = torch.randn(r, d_out)
    
    def forward(self, x):
        y1 = self.base_layer(x)
        y2 = x @ self.lora_a @ self.lora_b

        return y1 + y2

In [22]:
# wrap the linear layer of our toy model, use rank 2
lora_layer = LoraLayer(model.linear, 2)
lora_layer(X).shape

torch.Size([1, 8, 1024])

In [23]:
model.linear = lora_layer

In [24]:
model

TestModel(
  (embedding): Embedding(10, 1024)
  (linear): LoraLayer(
    (base_layer): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (lm_head): Linear(in_features=1024, out_features=10, bias=True)
)

### Let's try the generate token after adding the LoRA layer

In [25]:
next_token = generate_token(model=model, input_ids=input_ids)
next_token

['indigo']