In [31]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftConfig, PeftModel

peft_model_id = "dlwh/levanter-lora-test"

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
model = PeftModel.from_pretrained(model, peft_model_id, device_map="auto")

model.eval()

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# if on macos, set cpu b/c mps doesn't work yet w/ int64.cumsum
if model.device.type == "mps":
    model.cpu()

Downloading (â€¦)er_model.safetensors:   0%|          | 0.00/1.18M [00:00<?, ?B/s]

Using pad_token, but it is not set yet.


In [32]:
tokenizer.pad_token

'<|endoftext|>'

In [41]:
inputs = tokenizer("= Tiger", return_tensors="pt")

with torch.no_grad():
    # outputs = model.greedy_search(**inputs, max_new_tokens=20, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
    outputs = model.generate(**inputs, max_length=20, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
    print(outputs)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))



tensor([[   28, 17030, 16483,    11,   508,   468,   587,   257,  1688,  3197,
           329,   262, 17030, 16483,  5693,  1201,   339,   373,   257, 15287]])
['= Tiger Woods, who has been a major draw for the Tiger Woods Foundation since he was a teenager']


In [42]:
orig_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
orig_model.eval()

if orig_model.device.type == "mps":
    orig_model.cpu()
    
with torch.no_grad():
    outputs = orig_model.generate(**inputs, max_length=20, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
    print(outputs)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

tensor([[   28, 17030, 16483,    11,   508,   468,   587,   257,  1688,  3197,
           329,   262, 17030, 16483,  5693,  1201,   339,   373,   257, 15287]])
['= Tiger Woods, who has been a major draw for the Tiger Woods Foundation since he was a teenager']


In [35]:
d = model.state_dict()

d

OrderedDict([('base_model.model.transformer.wte.weight',
              tensor([[-0.1101, -0.0393,  0.0331,  ..., -0.1364,  0.0151,  0.0453],
                      [ 0.0403, -0.0486,  0.0462,  ...,  0.0861,  0.0025,  0.0432],
                      [-0.1275,  0.0479,  0.1841,  ...,  0.0899, -0.1297, -0.0879],
                      ...,
                      [-0.0445, -0.0548,  0.0123,  ...,  0.1044,  0.0978, -0.0695],
                      [ 0.1860,  0.0167,  0.0461,  ..., -0.0963,  0.0785, -0.0225],
                      [ 0.0514, -0.0277,  0.0499,  ...,  0.0070,  0.1552,  0.1207]])),
             ('base_model.model.transformer.wpe.weight',
              tensor([[-1.8821e-02, -1.9742e-01,  4.0267e-03,  ..., -4.3044e-02,
                        2.8267e-02,  5.4490e-02],
                      [ 2.3959e-02, -5.3792e-02, -9.4879e-02,  ...,  3.4170e-02,
                        1.0172e-02, -1.5573e-04],
                      [ 4.2161e-03, -8.4764e-02,  5.4515e-02,  ...,  1.9745e-02,
         

In [36]:
for k, v in d.items(): 
    if "lora" in k:
        print(k, v.norm())

base_model.model.transformer.h.0.attn.c_attn.lora_A.default.weight tensor(1.5700)
base_model.model.transformer.h.0.attn.c_attn.lora_B.default.weight tensor(2.7220)
base_model.model.transformer.h.1.attn.c_attn.lora_A.default.weight tensor(1.5516)
base_model.model.transformer.h.1.attn.c_attn.lora_B.default.weight tensor(2.7196)
base_model.model.transformer.h.2.attn.c_attn.lora_A.default.weight tensor(1.5474)
base_model.model.transformer.h.2.attn.c_attn.lora_B.default.weight tensor(2.7291)
base_model.model.transformer.h.3.attn.c_attn.lora_A.default.weight tensor(1.5680)
base_model.model.transformer.h.3.attn.c_attn.lora_B.default.weight tensor(2.7173)
base_model.model.transformer.h.4.attn.c_attn.lora_A.default.weight tensor(1.5771)
base_model.model.transformer.h.4.attn.c_attn.lora_B.default.weight tensor(2.7079)
base_model.model.transformer.h.5.attn.c_attn.lora_A.default.weight tensor(1.5719)
base_model.model.transformer.h.5.attn.c_attn.lora_B.default.weight tensor(2.7202)
base_model.model