In [1]:
import pandas as pd
import torch

from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_id = "/share/m.nikiforova/ruadapt_models/15slay_15_30_const_03_slerp_mistral"

In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cuda:0'

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id
).to(device)

in oss file


Loading checkpoint shards: 100%|██████████| 6/6 [00:01<00:00,  4.37it/s]


In [7]:
messages = [
    {"role": "user", "content": "как тебя зовут?"},
#     {"role": "user", "content": "какого цвета небо?"}
]

In [8]:
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

In [9]:
text

'<s>[INST] как тебя зовут?[/INST]'

---

In [10]:
def get_next_token(activations, model, tokenizer):
    logit_lens = {}
    for name, activation in activations.items():
        lm_head_out = model.lm_head(model.model.norm(activation))
        next_logits = lm_head_out[:, -1, :]
        logit_lens[name] = tokenizer.decode(torch.argmax(next_logits).item())
    return logit_lens

In [11]:
def get_activation(input_text, model, tokenizer, max_new_tokens=1):
    device = model.device
    inputs = tokenizer(input_text, return_tensors="pt", add_special_tokens=False)
    input_ids, attention_mask = inputs["input_ids"].to(device), inputs["attention_mask"].to(device)
    
    def getActivation(name):
        # the hook signature
        def hook(model, input, output):
            activations[name + "_output"] = output[0].detach()

        return hook
    
    h = [
        model.model.layers[i].register_forward_hook(getActivation(f"layers_{i}")) 
        for i in range(model.config.num_hidden_layers)
    ]
    
    logit_lens = {}
    for i_token in range(max_new_tokens):
        activations = {}
        
        with torch.no_grad():
            out = model(input_ids=input_ids, attention_mask=attention_mask)
        logits, past_key_values = out.values()
        next_logits = logits[:, -1, :]
        next_token_id = torch.argmax(next_logits)
        input_ids = torch.cat((input_ids, torch.tensor([[next_token_id]], device=device)), dim=1)
        attention_mask = torch.cat((attention_mask, torch.tensor([[1]], device=device)), dim=1)
        logit_lens[i_token] = get_next_token(activations, model, tokenizer)
    
    [hook.remove() for hook in h]
    return logit_lens

In [12]:
toks_per_layer = get_activation(text, model, tokenizer, 16)

We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


In [13]:
pd.DataFrame(toks_per_layer)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
layers_0_output,ẩ,yll,distant,~,Behavior,arta,še,Brun,acknow,dit,acknow,post,ța,dit,Behavior,compass
layers_1_output,inus,kennis,ẩ,bourg,etto,rele,ieler,akov,/******/,tod,/******/,pole,kw,akov,\-,compass
layers_2_output,WF,kennis,winter,des,¤,iron,bolds,sat,raised,tick,aya,pole,anza,pool,inth,/******/
layers_3_output,WF,kennis,Russian,achuset,¤,oslav,aurus,lay,CPP,akte,aya,pole,/******/,riv,prot,/******/
layers_4_output,�,kennis,Russian,achuset,️,bast,aurus,akov,elli,ij,iva,pole,leveland,abeth,︎,ters
layers_5_output,ца,kennis,Russian,ด,️,bast,hip,chnitt,Egy,akte,iva,pole,slope,chnitt,rente,ets
layers_6_output,PRESS,kennis,pet,achuset,️,heimer,hip,esh,eh,rh,ancia,︎,heimer,vet,︎,zas
layers_7_output,eston,kennis,pet,olas,️,heimer,hip,VI,dai,akte,roz,ed,heimer,dai,inth,computer
layers_8_output,response,kennis,rez,achuset,uli,heimer,hip,igd,febr,akte,enze,hem,heimer,chnitt,︎,esk
layers_9_output,response,kennis,esh,clutch,️,heimer,hip,eva,DAI,olas,enze,Korean,heimer,giornata,-$,esh


In [14]:
# pd.DataFrame(toks_per_layer).to_csv("logit_lens/slay_15_mistral.csv")