In [22]:
from transformers import GPT2Tokenizer, GPT2Model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2Model.from_pretrained('gpt2')
text = "Replace me by any text you'd like."
encoded_input = tokenizer(text, return_tensors='pt')
output = model(**encoded_input)

In [37]:
import torch
import torch.nn as nn

class LoRALayer(nn.Module):
    def __init__(self, weight, r, alpha):
        super(LoRALayer, self).__init__()
        self.weight = weight
        self.weight.requires_grad = False
        self.r = r
        self.alpha = alpha
        out_features = self.weight.shape[0]
        in_features = self.weight.shape[1]
        self.A = nn.Parameter(self.weight.new_zeros(self.r, in_features))
        self.B = nn.Parameter(self.weight.new_zeros(out_features, r))
    
    def forward(self, x):
        result = x @ self.weight.T
        result += x @ (self.A.T @ self.B.T)
        return result

In [38]:
class FFN(nn.Module):
    def __init__(self, in_channels, hidden_dim, out_channels):
        super(FFN, self).__init__()
        self.linear1 = nn.Linear(in_channels, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, out_channels)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

In [42]:
from torch.utils.data import DataLoader, TensorDataset

ffn = FFN(2, 16, 1)
x_xor = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_xor = torch.tensor([[0], [1], [1], [0]], dtype=torch.float32)


dataset_xor = TensorDataset(x_xor, y_xor)
dataloader_xor = DataLoader(dataset_xor, batch_size=1, shuffle=True)

def train_xor_model(model, dataloader):
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    for epoch in range(100):
        for inputs, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch + 1}, Loss: {loss.item()}")

train_xor_model(ffn, dataloader_xor)


TypeError: FFN.__init__() takes 3 positional arguments but 4 were given

In [40]:
ffn_weight = ffn.linear1.weight
lora_layer = LoRALayer(ffn_weight, 1, 0.1)
ffn.linear1 = lora_layer
ffn.parameters


<bound method Module.parameters of FFN(
  (linear1): LoRALayer()
  (linear2): Linear(in_features=64, out_features=32, bias=True)
  (relu): ReLU()
)>

In [24]:
weight_layer = torch.ones(32, 16)
lora_layer = LoRALayer(weight_layer, 5, 0.1)
print(weight_layer.shape)
print(lora_layer.A.shape)
print(lora_layer.B.shape)
x = torch.rand(32,16)

print(lora_layer.forward(x).shape)

torch.Size([32, 16])
torch.Size([5, 16])
torch.Size([32, 5])
torch.Size([32, 32])


In [27]:
param = model.parameters
print(param)

<bound method Module.parameters of GPT2Model(
  (wte): Embedding(50257, 768)
  (wpe): Embedding(1024, 768)
  (drop): Dropout(p=0.1, inplace=False)
  (h): ModuleList(
    (0-11): 12 x GPT2Block(
      (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (attn): GPT2Attention(
        (c_attn): Conv1D()
        (c_proj): Conv1D()
        (attn_dropout): Dropout(p=0.1, inplace=False)
        (resid_dropout): Dropout(p=0.1, inplace=False)
      )
      (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (mlp): GPT2MLP(
        (c_fc): Conv1D()
        (c_proj): Conv1D()
        (act): NewGELUActivation()
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
)>
