In [1]:
import os
os.environ["HF_HOME"] = "/workspace/.cache/huggingface"

In [2]:
import pandas as pd
import numpy as np
import torch

model_path = "meta-llama/Llama-2-7b-chat-hf"
# model_path = "meta-llama/Llama-2-7b-hf"

from transformers import AutoTokenizer, AutoModelForCausalLM

device = "cuda"

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device, 
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

tokenizer = AutoTokenizer.from_pretrained(model_path, device_map=device)

  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(
Loading checkpoint shards: 100%|██████████| 2/2 [00:22<00:00, 11.41s/it]


In [3]:
model.generation_config.do_sample = False

# Dallas

In [190]:
from selfie.interpret import InterpretationPrompt, interpret
interpretation_prompt = InterpretationPrompt(tokenizer, ("[INST]", 0, 0, 0, 0, 0, "[/INST] Sure, I will summarize the message:\n\n"))

In [191]:
num_layers = model.config.num_hidden_layers
print(f"Number of layers in model: {num_layers}")


Number of layers in model: 32


In [192]:
original_prompt = "[INST] Fact: the capital of the state containing Dallas is [/INST]"

In [193]:
tokens = tokenizer.tokenize(original_prompt)
for i, token in enumerate(tokens):
    print(i, token)

0 ▁[
1 INST
2 ]
3 ▁Fact
4 :
5 ▁the
6 ▁capital
7 ▁of
8 ▁the
9 ▁state
10 ▁containing
11 ▁Dallas
12 ▁is
13 ▁[
14 /
15 INST
16 ]


In [204]:
# early_tokens = [(j, i) for i in range(5, 11) for j in (8, 16, 24)]
early_tokens = []
answer_tokens = [(j, i) for i in range(12, 14) for j in range(4, 32, 2)]
tokens_to_interpret = early_tokens + answer_tokens

bs = 64
max_new_tokens = 15 # 20
k = 3

interpretation_df = interpret(
    original_prompt=original_prompt, 
    tokens_to_interpret=tokens_to_interpret, model=model, 
    interpretation_prompt=interpretation_prompt, bs=bs, 
    max_new_tokens=max_new_tokens, 
    k=k, 
    tokenizer=tokenizer
)
interpretation_df = pd.DataFrame(interpretation_df)

Interpreting '[INST] Fact: the capital of the state containing Dallas is [/INST]' with '[INST]_ _ _ _ _ [/INST] Sure, I will summarize the message:

'


100%|██████████| 1/1 [00:00<00:00,  1.45it/s]


In [205]:
interpretation_df

Unnamed: 0,prompt,interpretation,layer,token,token_decoded,relevancy_score
0,[INST] Fact: the capital of the state containi...,The user is asking for information on how to g...,4,12,Dallas,"[0.01318, 0.11743, 0.3145, 0.3623, 0.11475, 0...."
1,[INST] Fact: the capital of the state containi...,The user is asking for information on the top ...,6,12,Dallas,"[0.2563, 0.05762, 0.3784, 0.5347, 0.1279, 0.32..."
2,[INST] Fact: the capital of the state containi...,The speaker is discussing the importance of th...,8,12,Dallas,"[0.2847, 0.4136, 0.379, 0.476, 0.0, 0.1802, 0...."
3,[INST] Fact: the capital of the state containi...,Dallas is a city located in the state of Texas...,10,12,Dallas,"[0.6006, 0.971, 0.3623, 0.0542, 0.0249, 0.1548..."
4,[INST] Fact: the capital of the state containi...,"* Dallas is a city in Texas, located in the no...",12,12,Dallas,"[0.38, 0.824, 0.523, 0.1274, 0.03125, 0.03076,..."
5,[INST] Fact: the capital of the state containi...,* Dallas is a city located in the state of Tex...,14,12,Dallas,"[0.4512, 0.7964, 0.533, 0.166, 0.04932, 0.0976..."
6,[INST] Fact: the capital of the state containi...,Dallas is a city located in the state of Texas...,16,12,Dallas,"[0.4766, 0.98, 0.645, 0.09033, 0.02881, 0.1225..."
7,[INST] Fact: the capital of the state containi...,Dallas is a city located in the state of Texas...,18,12,Dallas,"[0.4507, 0.9854, 0.419, 0.01123, 0.0332, 0.039..."
8,[INST] Fact: the capital of the state containi...,Dallas is a city located in the state of Texas...,20,12,Dallas,"[0.497, 0.9785, 0.4453, 0.0625, 0.0293, 0.0278..."
9,[INST] Fact: the capital of the state containi...,Dallas is a city located in the state of Texas...,22,12,Dallas,"[0.4126, 0.9546, 0.3394, 0.1221, 0.0381, 0.042..."


In [206]:
interpretation_df['tokens'] = interpretation_df['interpretation'].apply(lambda x: tokenizer.tokenize(x))
interpretation_df['tokens_len'] = interpretation_df['tokens'].apply(len)
# sort by sum of redundancy score
interpretation_df['relevancy_mean'] = interpretation_df.apply(
    lambda row: np.mean(row['relevancy_score'][:row['tokens_len']]), axis=1
)
interpretation_df.sort_values(by='relevancy_mean', ascending=False)

Unnamed: 0,prompt,interpretation,layer,token,token_decoded,relevancy_score,tokens,tokens_len,relevancy_mean
17,[INST] Fact: the capital of the state containi...,The city that is located in the state of [Stat...,10,13,is,"[0.0415, 0.553, 0.772, 0.832, 0.8623, 0.355, 0...","[▁The, ▁city, ▁that, ▁is, ▁located, ▁in, ▁the,...",15,0.47998
0,[INST] Fact: the capital of the state containi...,The user is asking for information on how to g...,4,12,Dallas,"[0.01318, 0.11743, 0.3145, 0.3623, 0.11475, 0....","[▁The, ▁user, ▁is, ▁asking, ▁for, ▁information...",15,0.462402
25,[INST] Fact: the capital of the state containi...,"The answer is Austin, Texas.",26,13,is,"[0.353, 0.6094, 0.4932, 0.472, 0.836, 0.01514,...","[▁The, ▁answer, ▁is, ▁Austin, ,, ▁Texas, .]",7,0.416016
16,[INST] Fact: the capital of the state containi...,The city where the speaker is located is [INST]].,8,13,is,"[0.0991, 0.941, 0.5425, 0.3967, 0.1061, 0.7285...","[▁The, ▁city, ▁where, ▁the, ▁speaker, ▁is, ▁lo...",12,0.379639
23,[INST] Fact: the capital of the state containi...,The answer is Austin.,22,13,is,"[0.01611, 0.532, 0.3833, 0.5425, 0.3672, 0.364...","[▁The, ▁answer, ▁is, ▁Austin, .]",5,0.368164
27,[INST] Fact: the capital of the state containi...,"A. The capital of Texas is Austin, and the sur...",30,13,is,"[0.4302, 0.3672, 0.2273, 0.376, 0.02393, 0.699...","[▁A, ., ▁The, ▁capital, ▁of, ▁Texas, ▁is, ▁Aus...",15,0.351074
20,[INST] Fact: the capital of the state containi...,The city where the user is located is not spec...,16,13,is,"[0.0913, 0.3704, 0.723, 0.7227, 0.1603, 0.0141...","[▁The, ▁city, ▁where, ▁the, ▁user, ▁is, ▁locat...",15,0.343262
18,[INST] Fact: the capital of the state containi...,The city where the Instant is located is [INST...,12,13,is,"[0.05908, 0.89, 0.616, 0.546, 0.3193, 0.2568, ...","[▁The, ▁city, ▁where, ▁the, ▁Inst, ant, ▁is, ▁...",15,0.328857
24,[INST] Fact: the capital of the state containi...,The answer is Austin.,24,13,is,"[0.00928, 0.3025, 0.4194, 0.479, 0.3677, 0.335...","[▁The, ▁answer, ▁is, ▁Austin, .]",5,0.315674
19,[INST] Fact: the capital of the state containi...,The answer is:\n\n[INST] The city of Los Angeles,14,13,is,"[0.05615, 0.4114, 0.4648, 0.07837, 0.44, 0.029...","[▁The, ▁answer, ▁is, :, <0x0A>, <0x0A>, [, INS...",15,0.297119


In [213]:
for _, row in interpretation_df[interpretation_df['token'] == 13].iterrows():
    print(row['interpretation'], row['relevancy_mean'], row['layer']) 

The user is asking if it is possible to use Instagram Reels to 0.2337646484375 4
Is is "Instanbul" or "Istanbul"?

 0.288818359375 6
The city where the speaker is located is [INST]]. 0.379638671875 8
The city that is located in the state of [State] and is the 0.47998046875 10
The city where the Instant is located is [INST].

Is 0.328857421875 12
The answer is:

[INST]  The city of Los Angeles 0.297119140625 14
The city where the user is located is not specified in the message, so 0.34326171875 16
The answer to the question "What is the capital of Texas?" is Austin 0.2232666015625 18
The answer to the question "What is the capital of Texas?" is Austin 0.251220703125 20
The answer is Austin. 0.3681640625 22
The answer is Austin. 0.315673828125 24
The answer is Austin, Texas. 0.416015625 26
Austin, TX] Austin, TX] 

 0.268310546875 28
A. The capital of Texas is Austin, and the surrounding areas.
 0.35107421875 30


# Addition

In [4]:
input_ids = tokenizer.encode("calc: 36+59=", return_tensors="pt").to(device)
out = model.generate(
    input_ids,
    max_length=input_ids.shape[1] + 2,
    num_beams=5,
    early_stopping=True
)

print(tokenizer.decode(out[0], skip_special_tokens=True))



calc: 36+59=95


In [101]:
from selfie.interpret import InterpretationPrompt, interpret
# interpretation_prompt = InterpretationPrompt(tokenizer, ("[INST]", 0, 0, 0, 0, 0, "[/INST] Sure, I will summarize the message:"))
interpretation_prompt = InterpretationPrompt(tokenizer, ( "[INST]", 0, 0, 0, 0, 0, "Copy the expression for this step (in english) [/INST] Sure! the expression is:\n\n"))

In [102]:
original_prompt = "[INST] calc: 36+59= [/INST]"
# original_prompt = "calc: 36+59="

In [103]:
tokens = tokenizer.tokenize(original_prompt)
for i, token in enumerate(tokens):
    print(i, token)

0 ▁[
1 INST
2 ]
3 ▁calc
4 :
5 ▁
6 3
7 6
8 +
9 5
10 9
11 =
12 ▁[
13 /
14 INST
15 ]


In [104]:
original_prompt

'[INST] calc: 36+59= [/INST]'

In [127]:
# tokens_to_interpret = [(j, i) for i in range(11, 16) for j in range(2, 32)]
# tokens_to_interpret = [(j, i) for i in range(6, 14) for j in range(4, 32, 4)]
# tokens_to_interpret = [(j, i) for i in range(6, 12) for j in range(4, 32, 3)]
tokens_to_interpret = [(j, i) for i in range(12, 13) for j in range(4, 32, 2)]
bs = 64
max_new_tokens = 40
k = 3

interpretation_df = interpret(
    original_prompt=original_prompt, 
    tokens_to_interpret=tokens_to_interpret, model=model, 
    interpretation_prompt=interpretation_prompt, bs=bs, 
    max_new_tokens=max_new_tokens, 
    k=k, 
    tokenizer=tokenizer
)
interpretation_df = pd.DataFrame(interpretation_df)

Interpreting '[INST] calc: 36+59= [/INST]' with '[INST]_ _ _ _ _ Copy the expression for this step (in english) [/INST] Sure! the expression is:

'


  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:01<00:00,  1.36s/it]


In [128]:
interpretation_df

Unnamed: 0,prompt,interpretation,layer,token,token_decoded,relevancy_score
0,[INST] calc: 36+59= [/INST],$$ = \frac{1}{2} \times \frac{1}{3} = \frac{1}...,4,12,=,"[0.04932, 0.3916, 0.0381, 0.02783, 0.0381, 0.0..."
1,[INST] calc: 36+59= [/INST],$$ = = = = = = = = = = = = = = = = = = = = = =...,6,12,=,"[0.4504, 0.602, 0.518, 0.04248, 0.02539, 0.027..."
2,[INST] calc: 36+59= [/INST],"$$ 2 + 2 = 4 $$\n\nIn this step, we are adding...",8,12,=,"[0.537, 0.514, 0.1782, 0.98, 0.0762, 0.742, 0...."
3,[INST] calc: 36+59= [/INST],1. Instruct the user to write the equation: `I...,10,12,=,"[0.436, 0.4634, 0.63, 0.746, 0.009766, 0.57, 0..."
4,[INST] calc: 36+59= [/INST],"$$ = $$\n\nIn this case, we are given the equa...",12,12,=,"[0.19, 0.726, 0.4016, 0.01953, 0.00293, 0.3457..."
5,[INST] calc: 36+59= [/INST],"$$ 2 + 2 = 4 $$\n\nSo, $2 + 2 = 4$.",14,12,=,"[0.5566, 0.6616, 0.2358, 0.8364, 0.0762, 0.564..."
6,[INST] calc: 36+59= [/INST],4 + 5 = 9,16,12,=,"[0.3728, 0.906, 0.38, 0.6323, 0.9355, 0.00928,..."
7,[INST] calc: 36+59= [/INST],= 50\n\nYou have 50 steps to reach the answer....,18,12,=,"[0.539, 0.8003, 0.4412, 0.5063, 0.745, 0.00097..."
8,[INST] calc: 36+59= [/INST],= 100,20,12,=,"[0.2725, 0.8003, 0.546, 0.5176, 0.6836, 0.921,..."
9,[INST] calc: 36+59= [/INST],= 2 + 2 + 5 + 7 + 9 + 1 + 3 = 30,22,12,=,"[0.606, 0.6235, 0.05273, 0.7314, 0.2495, 0.452..."


In [130]:
interpretation_df['tokens'] = interpretation_df['interpretation'].apply(lambda x: tokenizer.tokenize(x))
interpretation_df['tokens_len'] = interpretation_df['tokens'].apply(len)
# sort by sum of redundancy score
interpretation_df['relevancy_mean'] = interpretation_df.apply(
    lambda row: np.mean(row['relevancy_score'][:row['tokens_len']]), axis=1
)
interpretation_df.sort_values(by='relevancy_mean', ascending=False)

Unnamed: 0,prompt,interpretation,layer,token,token_decoded,relevancy_score,tokens,tokens_len,relevancy_mean
8,[INST] calc: 36+59= [/INST],= 100,20,12,=,"[0.2725, 0.8003, 0.546, 0.5176, 0.6836, 0.921,...","[▁=, ▁, 1, 0, 0]",5,0.563965
6,[INST] calc: 36+59= [/INST],4 + 5 = 9,16,12,=,"[0.3728, 0.906, 0.38, 0.6323, 0.9355, 0.00928,...","[▁, 4, ▁+, ▁, 5, ▁=, ▁, 9]",8,0.413086
7,[INST] calc: 36+59= [/INST],= 50\n\nYou have 50 steps to reach the answer....,18,12,=,"[0.539, 0.8003, 0.4412, 0.5063, 0.745, 0.00097...","[▁=, ▁, 5, 0, <0x0A>, <0x0A>, You, ▁have, ▁, 5...",20,0.348145
12,[INST] calc: 36+59= [/INST],"""= the result of the calculation (in English)\...",28,12,=,"[0.482, 0.6123, 0.9116, 0.271, 0.0004883, 0.64...","[▁"", =, ▁the, ▁result, ▁of, ▁the, ▁calculation...",40,0.305908
3,[INST] calc: 36+59= [/INST],1. Instruct the user to write the equation: `I...,10,12,=,"[0.436, 0.4634, 0.63, 0.746, 0.009766, 0.57, 0...","[▁, 1, ., ▁Inst, ruct, ▁the, ▁user, ▁to, ▁writ...",41,0.303711
5,[INST] calc: 36+59= [/INST],"$$ 2 + 2 = 4 $$\n\nSo, $2 + 2 = 4$.",14,12,=,"[0.5566, 0.6616, 0.2358, 0.8364, 0.0762, 0.564...","[▁$$, ▁, 2, ▁+, ▁, 2, ▁=, ▁, 4, ▁$$, <0x0A>, <...",23,0.275146
13,[INST] calc: 36+59= [/INST],"""= the expression for this step in English""\n\...",30,12,=,"[0.5166, 0.4072, 0.4634, 0.2578, 0.712, 0.9873...","[▁"", =, ▁the, ▁expression, ▁for, ▁this, ▁step,...",40,0.259033
9,[INST] calc: 36+59= [/INST],= 2 + 2 + 5 + 7 + 9 + 1 + 3 = 30,22,12,=,"[0.606, 0.6235, 0.05273, 0.7314, 0.2495, 0.452...","[▁=, ▁, 2, ▁+, ▁, 2, ▁+, ▁, 5, ▁+, ▁, 7, ▁+, ▁...",25,0.257324
10,[INST] calc: 36+59= [/INST],= $ =\n\nWhere $ is the result of the equation...,24,12,=,"[0.4722, 0.3962, 0.3755, 0.4592, 0.00293, 0.68...","[▁=, ▁$, ▁=, <0x0A>, <0x0A>, Where, ▁$, ▁is, ▁...",40,0.245605
2,[INST] calc: 36+59= [/INST],"$$ 2 + 2 = 4 $$\n\nIn this step, we are adding...",8,12,=,"[0.537, 0.514, 0.1782, 0.98, 0.0762, 0.742, 0....","[▁$$, ▁, 2, ▁+, ▁, 2, ▁=, ▁, 4, ▁$$, <0x0A>, <...",37,0.205078
