# Install Dependencies

In [1]:
!pip install timm
!pip install lora_adapters

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com


In [2]:
import timm 
import torch
from lora_adapters import LoraConv2d, apply_adapter, mark_only_lora_as_trainable, lora_state_dict, undo_lora
from torch.optim import AdamW

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = timm.create_model('resnet50', pretrained=True).to(device)

In [4]:
optimizer = AdamW((param for param in model.parameters() if param.requires_grad), lr=1e-3)

In [5]:
inputs = torch.randn(1, 3, 224, 224).to(device)
targets = torch.randint(0, 1000, (1,)).to(device)

In [6]:
for _ in range(10):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = torch.nn.functional.cross_entropy(outputs, targets)
    loss.backward()
    optimizer.step()
    print(f"loss: {loss.item():.5f}")

loss: 7.22588
loss: 6.38845
loss: 3.75152
loss: 0.89187
loss: 0.17836
loss: 0.02842
loss: 0.01040
loss: 0.00516
loss: 0.00258
loss: 0.00150


In [7]:
model_parameters = sum(p.numel() for p in model.parameters())    
model_grads = sum(p.grad.numel() for p in model.parameters() if p.requires_grad)    
optimizer_states = sum([sum(elem.numel() for elem in  p.values()) for p in optimizer.state.values()])

In [8]:
model = apply_adapter(model, LoraConv2d, rank=16)
# We Train all the parameters to compare with the original model
# model = mark_only_lora_as_trainable(model, bias='lora_only') 
optimizer = AdamW((param for param in model.parameters() if param.requires_grad), lr=1e-3)

In [9]:
for _ in range(10):
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = torch.nn.functional.cross_entropy(outputs, targets)
    loss.backward()
    optimizer.step()
    print(f"loss: {loss.item():.5f}")

loss: 0.00096
loss: 0.00029
loss: 0.00013
loss: 0.00007
loss: 0.00004
loss: 0.00003
loss: 0.00002
loss: 0.00001
loss: 0.00001
loss: 0.00001


In [10]:
lora_model_parameters = sum(p.numel() for p in model.parameters())    
lora_model_grads = sum(p.grad.numel() for p in model.parameters() if p.requires_grad)    
lora_optimizer_states = sum([sum(elem.numel() for elem in  p.values()) for p in optimizer.state.values()])

In [11]:
print(f"Model parameters: {model_parameters} -> {lora_model_parameters} ratio: {lora_model_parameters/model_parameters:.2f}")
print(f"Model grads: {model_grads} -> {lora_model_grads} ratio: {lora_model_grads/model_grads:.2f}")
print(f"Optimizer states: {optimizer_states} -> {lora_optimizer_states} ratio: {lora_optimizer_states/optimizer_states:.2f}")

Model parameters: 25557032 -> 27360600 ratio: 1.07
Model grads: 25557032 -> 3905688 ratio: 0.15
Optimizer states: 51114225 -> 7811590 ratio: 0.15
