In [1]:
PROMPT = """
-- You are given a table of numbers.
-- You need to return the sum of squared numbers in the given table,
-- round each element in the table to the upper int(Ceiling) first.
--
-- Examples:
-- >>> lst({1.0, 2.0, 3.0})
-- 14
-- >>> lst({1.0, 4.0, 9.0})
-- 98
-- >>> lst({1.0, 3.0, 5.0, 7.0})
-- 84
-- >>> lst({1.4, 4.2, 0.0})
-- 29
-- >>> lst({-2.4, 1.0, 1.0})
-- 6
local function sum_squares(lst)
"""

In [2]:
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoderbase-1b", use_fast=False)
model = AutoModelForCausalLM.from_pretrained("bigcode/starcoderbase-1b").cuda()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from lib import *

In [4]:
out_base = generate_with_stop(model, tokenizer, PROMPT)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


In [5]:
print(tokenizer.decode(out_base))


-- You are given a table of numbers.
-- You need to return the sum of squared numbers in the given table,
-- round each element in the table to the upper int(Ceiling) first.
--
-- Examples:
-- >>> lst({1.0, 2.0, 3.0})
-- 14
-- >>> lst({1.0, 4.0, 9.0})
-- 98
-- >>> lst({1.0, 3.0, 5.0, 7.0})
-- 84
-- >>> lst({1.4, 4.2, 0.0})
-- 29
-- >>> lst({-2.4, 1.0, 1.0})
-- 6
local function sum_squares(lst)
	local sum = 0
	for _, v in ipairs(lst) do
		sum = sum + v * v
	end
	return math.ceil(sum)
end


In [6]:
mean_pooled_base = mean_pool_attn_from_toks(model, out_base)

In [7]:
print(visualize_attn(tokenizer, out_base, mean_pooled_base))

[41m[30m
[0m[41m[30m--[0m[41m[30m You[0m[43m[30m are[0m[43m[30m given[0m[43m[30m a[0m[43m[30m table[0m[42m[30m of[0m[43m[30m numbers[0m[43m[30m.[0m[41m[30m
[0m[43m[30m--[0m[43m[30m You[0m[42m[30m need[0m[43m[30m to[0m[43m[30m return[0m[43m[30m the[0m[42m[30m sum[0m[42m[30m of[0m[42m[30m squared[0m[42m[30m numbers[0m[42m[30m in[0m[42m[30m the[0m[47m[30m given[0m[42m[30m table[0m[43m[30m,[0m[41m[30m
[0m[43m[30m--[0m[43m[30m round[0m[47m[30m each[0m[47m[30m element[0m[47m[30m in[0m[47m[30m the[0m[47m[30m table[0m[42m[30m to[0m[47m[30m the[0m[47m[30m upper[0m[47m[30m int[0m[42m[30m([0m[42m[30mCe[0m[42m[30miling[0m[47m[30m)[0m[47m[30m first[0m[43m[30m.[0m[41m[30m
[0m[43m[30m--[0m[41m[30m
[0m[43m[30m--[0m[43m[30m Examples[0m[41m[30m:[0m[43m[30m
[0m[43m[30m--[0m[41m[30m >>>[0m[41m[30m lst[0m[41m[30m({[0m[41m[30m1[0m[41m[30m.

In [8]:
from transformers import AutoModelForCausalLM, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoderbase-1b", use_fast=False)
model = AutoModelForCausalLM.from_pretrained("nuprl/MultiPLCoder-1b", revision="7e96d931547e342ad0661cdd91236fe4ccf52545").cuda()

In [11]:
mean_pooled_fine = mean_pool_attn_from_toks(model, out_base)

In [12]:
print(visualize_attn(tokenizer, out_base, mean_pooled_fine))

[41m[30m
[0m[41m[30m--[0m[41m[30m You[0m[43m[30m are[0m[43m[30m given[0m[43m[30m a[0m[42m[30m table[0m[42m[30m of[0m[42m[30m numbers[0m[43m[30m.[0m[41m[30m
[0m[42m[30m--[0m[43m[30m You[0m[42m[30m need[0m[42m[30m to[0m[42m[30m return[0m[43m[30m the[0m[42m[30m sum[0m[42m[30m of[0m[42m[30m squared[0m[43m[30m numbers[0m[42m[30m in[0m[42m[30m the[0m[47m[30m given[0m[47m[30m table[0m[43m[30m,[0m[42m[30m
[0m[42m[30m--[0m[43m[30m round[0m[47m[30m each[0m[47m[30m element[0m[47m[30m in[0m[47m[30m the[0m[47m[30m table[0m[47m[30m to[0m[47m[30m the[0m[42m[30m upper[0m[47m[30m int[0m[42m[30m([0m[42m[30mCe[0m[47m[30miling[0m[47m[30m)[0m[42m[30m first[0m[43m[30m.[0m[41m[30m
[0m[42m[30m--[0m[43m[30m
[0m[42m[30m--[0m[43m[30m Examples[0m[41m[30m:[0m[42m[30m
[0m[43m[30m--[0m[43m[30m >>>[0m[43m[30m lst[0m[41m[30m({[0m[41m[30m1[0m[41m[30m.

In [44]:
def attn_distribution(tokenizer, out, meaned_attns):
    attns_in_comment = []
    attns_in_sig = []
    attns_in_body = []
    
    in_comment = True
    in_sig = False
    in_body = False

    prev = None
    for i, tok in enumerate(out):
        if prev is None:
            prev_dec = ""
        else:
            prev_dec = tokenizer.decode(prev)
        dec = tokenizer.decode(tok)
        prev = tok
        attn = meaned_attns[i]
        
        if "local" in dec and "\n" in prev_dec and in_comment:
            in_sig = True
            in_comment = False
        elif ")" in prev_dec and in_sig:
            in_body = True
            in_sig = False

        if in_comment:
            attns_in_comment += [attn]
        elif in_sig:
            attns_in_sig += [attn]
        elif in_body:
            attns_in_body += [attn]
    
    attns_in_comment = torch.tensor(attns_in_comment)
    attns_in_sig = torch.tensor(attns_in_sig)
    attns_in_body = torch.tensor(attns_in_body)
    return {"comment": attns_in_comment, "sig": attns_in_sig, "body": attns_in_body}

In [56]:
distr = attn_distribution(tokenizer, out_base, mean_pooled_fine)

In [59]:
prompt_to_body_ratio = torch.cat((distr["comment"], distr["sig"])).median() / distr["body"].median()
prompt_to_body_ratio

tensor(0.2778)

In [60]:
distr = attn_distribution(tokenizer, out_base, mean_pooled_base)

In [61]:
prompt_to_body_ratio_b = torch.cat((distr["comment"], distr["sig"])).median() / distr["body"].median()
prompt_to_body_ratio_b

tensor(0.3031)