In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftConfig, PeftModel

peft_model_id = "dlwh/levanter-lora-test"

config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
model = PeftModel.from_pretrained(model, peft_model_id, device_map="auto")


model.eval()

tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# if on macos, set cpu b/c mps doesn't work yet w/ int64.cumsum
if model.device.type == "mps":
    model.cpu()

Using pad_token, but it is not set yet.


In [3]:
tokenizer.pad_token

'<|endoftext|>'

In [32]:
from transformers import GenerationConfig

inputs = tokenizer("In biology,", return_tensors="pt")

with torch.no_grad():
    # outputs = model.greedy_search(**inputs, max_new_tokens=20, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
    config = GenerationConfig(output_scores=True, return_dict_in_generate=True)
    result = model.generate(**inputs, generation_config=config, max_length=40, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
    outputs = result.sequences
    print(outputs)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))



tensor([[  818, 17219,    11,   262,  3632,   318,   262,  3632,   338,  4165,
          2723,   286,  1321,    13,   383,  3632,   318,   262,  3632,   338,
          4165,  2723,   286,  1321,    13,   198,   198,   464,  3632,   318,
           262,  3632,   338,  4165,  2723,   286,  1321,    13,   383,  3632]])
["In biology, the brain is the brain's primary source of information. The brain is the brain's primary source of information.\n\nThe brain is the brain's primary source of information. The brain"]


In [33]:
[(s[0][tokenizer.eos_token_id], s[0][t]) for s,t in zip(result.scores, result.sequences[0])]

[(tensor(-83.2557), tensor(-85.8009)),
 (tensor(-84.2175), tensor(-77.1657)),
 (tensor(-86.4178), tensor(-77.3928)),
 (tensor(-110.7105), tensor(-96.9994)),
 (tensor(-112.1313), tensor(-99.4627)),
 (tensor(-70.8388), tensor(-66.0480)),
 (tensor(-116.3585), tensor(-109.1837)),
 (tensor(-126.5522), tensor(-114.2512)),
 (tensor(-21.2022), tensor(-17.7173)),
 (tensor(-112.0298), tensor(-103.6370)),
 (tensor(-83.9326), tensor(-80.3852)),
 (tensor(-156.5771), tensor(-159.3918)),
 (tensor(-98.5236), tensor(-87.6358)),
 (tensor(-93.9978), tensor(-89.0802)),
 (tensor(-119.0411), tensor(-114.5944)),
 (tensor(-117.6953), tensor(-103.9253)),
 (tensor(-33.1703), tensor(-27.5888)),
 (tensor(-119.7291), tensor(-112.0733)),
 (tensor(-116.8803), tensor(-104.8546)),
 (tensor(-107.4814), tensor(-104.0858)),
 (tensor(-108.6564), tensor(-101.9731)),
 (tensor(-58.9992), tensor(-57.9402)),
 (tensor(-117.3935), tensor(-121.8353)),
 (tensor(-255.6712), tensor(-268.7751)),
 (tensor(-126.3036), tensor(-120.7409)

In [12]:
orig_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, device_map="auto")
orig_model.eval()

if orig_model.device.type == "mps":
    orig_model.cpu()
    
with torch.no_grad():
    outputs = orig_model.generate(**inputs, max_length=20, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id)
    print(outputs)
    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))

tensor([[  818, 17219,    11,   262,  3632,   318,   262,  3632,   338,  4165,
          2723,   286,  1321,    13,   383,  3632,   318,   262,  3632,   338]])
["In biology, the brain is the brain's primary source of information. The brain is the brain's"]


In [None]:
d = model.state_dict()

d

In [None]:
for k, v in d.items(): 
    if "lora" in k:
        print(k, v.norm())