In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [52]:
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoderbase-1b", use_fast=False)

In [3]:
model = AutoModelForCausalLM.from_pretrained("bigcode/starcoderbase-1b").cuda()

Downloading (…)lve/main/config.json: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.05k/1.05k [00:00<00:00, 10.8MB/s]
Downloading model.safetensors: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.55G/4.55G [06:43<00:00, 11.3MB/s]
Downloading (…)neration_config.json: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 111/111 [00:00<00:00, 491kB/s]


In [148]:
PROMPT = """
-- Task
-- We are given two strings s and c, you have to deleted all the characters in s 
-- that are equal to any character in c
-- then check if the result string is palindrome.
-- A string is called palindrome if it reads the same backward as forward.
-- You should return a tuple containing the result string and True/False for the check.
--
-- Example
-- For s = "abcde", c = "ae", the result should be ('bcd',False)
-- For s = "abcdef", c = "b" the result should be ('acdef',False)
-- For s = "abcdedcba", c = "ab", the result should be ('cdedc',True)
local function reverse_delete(s, c)
"""

In [261]:
def find_end_tok_i(tokenizer, enc, stop_seqs=["\nend", "\n--"]):
    def stop_in_enc(enc):
        dec = tokenizer.decode(enc)
        for stop in stop_seqs:
            if stop in dec:
                return True

        return False
        
    i = 0
    
    while i < len(enc) - 1 and not stop_in_enc(enc[:i]):
        i += 1

    return i
    
    
toks = tokenizer.encode(PROMPT, return_tensors="pt").cuda()

In [295]:
out = model.generate(toks, do_sample=True, max_new_tokens=150, temperature=0.2, top_p=0.95, stopping_criteria=stopping_criteria)
end_tok = find_end_tok_i(tokenizer, out[0][len(toks[0])-1:]) + len(toks[0])
out = out[0][:end_tok-1]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [296]:
print(tokenizer.decode(out))


-- Task
-- We are given two strings s and c, you have to deleted all the characters in s 
-- that are equal to any character in c
-- then check if the result string is palindrome.
-- A string is called palindrome if it reads the same backward as forward.
-- You should return a tuple containing the result string and True/False for the check.
--
-- Example
-- For s = "abcde", c = "ae", the result should be ('bcd',False)
-- For s = "abcdef", c = "b" the result should be ('acdef',False)
-- For s = "abcdedcba", c = "ab", the result should be ('cdedc',True)
local function reverse_delete(s, c)
	local result = ""
	for i = 1, #s do
		if s:sub(i, i) ~= c then
			result = result.. s:sub(i, i)
		end
	end
	return result, true
end


In [297]:
enc = model(out, output_attentions=True)
attns = enc["attentions"]
# quite a deep tensor...
layer_i = 0
batch_i = 0 # we only have one prompt
attn_head_i = 0
print(end_tok)
# attns[layer_i][batch_i][attn_head_i][end_tok]

# get last layer attns
last_layer_attns = attns[-1][batch_i]
print(last_layer_attns.size())
last_layer_attns_head_mean = last_layer_attns.mean(dim=0)

230
torch.Size([16, 229, 229])


In [298]:
len(enc[0])

229

In [299]:
len(out)

229

In [308]:
import pandas as pd
pd.DataFrame(last_layer_attns_head_mean[-1].detach().cpu().numpy()).describe()[0]

0.00033501291181892157

In [328]:
import pandas as pd
# colors for visualizing attn
color_red = '\033[91m'
color_green = '\033[92m'
color_yellow = '\033[93m'
color_grey = '\033[97m'

def color_tok(attn, distr) -> str:
    if attn < distr["25%"]:
        return color_grey
    elif attn < distr["50%"]:
        return color_yellow
    elif attn < distr["75%"]:
        return color_green
    else:
        return color_red
    

def visualize_attn(tokenizer, out, meaned_attns):
    distr = pd.DataFrame(meaned_attns.detach().cpu().numpy()).describe()[0]
    for i, tok in enumerate(out):
        color = color_tok(meaned_attns[i], distr)
        print(f"{color}{tokenizer.decode(tok)}", end="")

visualize_attn(tokenizer, out, last_layer_attns_head_mean[-1])

[91m
[91m--[92m Task[91m
[91m--[91m We[92m are[92m given[93m two[92m strings[93m s[93m and[93m c[92m,[92m you[93m have[92m to[93m deleted[93m all[93m the[93m characters[93m in[93m s[92m [91m
[93m--[92m that[93m are[93m equal[97m to[97m any[93m character[93m in[93m c[91m
[92m--[93m then[93m check[97m if[97m the[93m result[93m string[93m is[91m pal[92mindrome[91m.[91m
[92m--[92m A[97m string[93m is[93m called[92m pal[93mindrome[97m if[97m it[93m reads[97m the[97m same[93m backward[97m as[93m forward[91m.[91m
[92m--[92m You[92m should[93m return[93m a[93m tuple[93m containing[93m the[93m result[97m string[97m and[92m True[92m/[93mFalse[93m for[93m the[93m check[91m.[91m
[93m--[91m
[92m--[92m Example[92m
[92m--[93m For[97m s[92m =[92m "[92mabc[92mde[92m",[93m c[97m =[97m "[97mae[97m",[97m the[93m result[93m should[97m be[97m ('[93mbcd[97m',[91mFalse[92m)[92m
[93m--[97m For[97m 