In [1]:
from collections import namedtuple

from lmnav.common.config import Config
from lmnav.common.registry import registry

from lmnav.models import *
from lmnav.processors import *
from lmnav.common.episode_processor import apply_transforms_inputs

import torch
import einops


[2023-09-08 16:39:44,630] [INFO] [real_accelerator.py:110:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [2]:
def _init_components(cfg_path, device):
    Args = namedtuple("Args", "cfg_path, model_type, gpu_id, options")
    args = Args(cfg_path, "llama_v2", 0, [])

    cfg = Config(args)

    model_config = cfg.model_cfg
    model_cls = registry.get_model_class(model_config.arch)
    model = model_cls.from_config(model_config).to(device)
    
    model.train()

    vis_processor_cfg = cfg.config.preprocess.vis_processor.train
    vis_processor = registry.get_processor_class(vis_processor_cfg.name).from_config(vis_processor_cfg)

    return model, vis_processor


def test_construct_inputs(B, T):
    goals = torch.rand(B, 1, 3, 480, 640)
    rgbs = torch.rand(B, T, 3, 480, 640)
    actions = torch.randint(0, 4, (B, T))

    return goals, rgbs, actions

In [3]:
cfg_path = "/srv/flash1/pputta7/projects/lm-nav/exp_configs/lora_nav_llama_train.yaml"
device = 'cuda'
B, T = 2, 20

model, vis_processor = _init_components(cfg_path, device)
goals, rgbs, actions = test_construct_inputs(B, T)
rgbs, goals, actions = apply_transforms_inputs(vis_processor, rgbs, goals, actions)

print("Shapes after transform")
print(rgbs.shape, goals.shape)
rgbs = rgbs.to(device)
goals = goals.to(device)

Loading VIT
Loading VIT Done
Loading Q-Former


Using pad_token, but it is not set yet.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Shapes after transform
torch.Size([2, 3, 20, 224, 224]) torch.Size([2, 3, 1, 224, 224])


In [4]:
from peft import get_peft_config, get_peft_model, LoraConfig, TaskType
r = 8
peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, 
                         inference_mode=False,
                         r=r,
                         lora_alpha=32,
                         lora_dropout=0.1)

In [5]:
model.llama_model = get_peft_model(model.llama_model, peft_config)

In [6]:
output = model(rgbs, goals, actions)

In [11]:
[name for name, param in model.llama_model.named_parameters()]

['base_model.model.model.embed_tokens.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight',
 'base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight',
 'base_model.model.model.layers.0.self_attn.k_proj.weight',
 'base_model.model.model.layers.0.self_attn.v_proj.weight',
 'base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight',
 'base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight',
 'base_model.model.model.layers.0.self_attn.o_proj.weight',
 'base_model.model.model.layers.0.mlp.gate_proj.weight',
 'base_model.model.model.layers.0.mlp.down_proj.weight',
 'base_model.model.model.layers.0.mlp.up_proj.weight',
 'base_model.model.model.layers.0.input_layernorm.weight',
 'base_model.model.model.layers.0.post_attention_layernorm.weight',
 'base_model.model.model.layers.1.self_attn.q_proj.weight',
 'base_model.model.model.layers.1.self_attn.q_proj.lora_A

In [7]:
output.loss

tensor(12.3750, device='cuda:0', dtype=torch.bfloat16,
       grad_fn=<NllLossBackward0>)

In [8]:
totalparams = sum([param.numel() for param in model.parameters()])
trainableparams = sum([param.numel() for param in model.parameters() if param.requires_grad])

print(f"Total params: {totalparams}, Trainable params: {trainableparams}")

Total params: 7836819328, Trainable params: 7344128
