In [5]:
import torch
import sys
sys.path.append('../..')
from transformers import GPTJForCausalLM, AutoTokenizer
import lre.models as models
import lre.functional as functional
import os

device = "cuda:1"
weights = []
biases = []
subjects = []
wdir = 'verb+able'
weight_str = 'verb+able_reg_weight_'
bias_str = 'verb+able_reg_bias_'

weight_paths = [f for f in os.listdir(wdir) if f.startswith(weight_str)]
bias_paths = [f for f in os.listdir(wdir) if f.startswith(bias_str)]

for bias_path, weight_path in zip(bias_paths, weight_paths):
    weight = torch.load(f'{wdir}/' + weight_path)
    bias = torch.load(f'{wdir}/' + bias_path)
    subject = weight_path.split("_")[-2]
    weights.append(weight)
    biases.append(bias)
    subjects.append(subject)
    
weight = torch.stack(weights).mean(dim=0).to(device)
bias = torch.stack(biases).mean(dim=0).to(device)
print(weight.shape)
subjects

torch.Size([4096, 4096])


['2', '3', '5', '6', '0', '1', '7', '4']

In [4]:
!ls

animal_youth	   _LORE-plural.ipynb  seal		  veryold-trout
animal-youth.json  NormLRE.ipynb       veryold-butterfly
fish		   Old_NormLRE.ipynb   veryold-goat
_LORE.ipynb	   plural_reg	       veryold-lion


In [6]:
model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to('cuda:1')
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
tokenizer.pad_token = tokenizer.eos_token

mt = models.ModelAndTokenizer(model,tokenizer)

In [7]:
#testing data
import json
json_path = 'verb+able.json'
pairs = []

with open(json_path, 'r') as file:
    data = json.load(file)
    for pair in data['samples']:
        pairs.append((pair['subject'],pair['object']))

In [8]:
import numpy as np
def get_object(mt, subject, weight,bias, prompt, h_layer, beta, h=None, k=5):
    h_index, inputs = functional.find_subject_token_index(
        mt = mt, prompt=prompt, subject=subject)
    #print(f'h_index is {h_index}, inputs is {inputs}')
    [[hs], _] = functional.compute_hidden_states(
        mt = mt, layers = [h_layer], inputs = inputs)
    #h is hs @ h_layer @ h_index
    if h == None:
        h = hs[:, h_index]
        h = h.to(device)
        
    #print(h.shape)
    
    #apply mean jacobian and bias
    z = h.mm(weight.t()) * beta + bias
    
    logits = mt.lm_head(z)
    dist = torch.softmax(logits.float(), dim=-1)
    topk = dist.topk(k=k, dim=-1)
    probs = topk.values.view(5).tolist()
    token_ids = topk.indices.view(5).tolist()
    words = [mt.tokenizer.decode(token_id) for token_id in token_ids]
    return (words, probs)

In [9]:
#get subject representation for each ICL example
#default h_layer is 5 for the LREs I calculated here.
h_layer = 5

def hs(word, prompt):
    h_index, inputs = functional.find_subject_token_index(
    mt = mt, prompt=prompt, subject=word)
    [[hs], _] = functional.compute_hidden_states(
        mt = mt, layers = [h_layer], inputs = inputs)
    #h = hs @ h_layer @ h_index
    h = hs[:, h_index]
    h = h.to(device)
    return h

In [147]:
k = torch.tensor([[2,2,2],[1,1,1]]).float()
torch.mean(k, dim=0)

tensor([1.5000, 1.5000, 1.5000])

In [148]:
json_path = 'animal-youth.json'
pairs = []
with open(json_path, 'r') as file:
    data = json.load(file)
    for pair in data['samples']:
        pairs.append((pair['subject'],pair['object']))

In [10]:
from baukit.baukit import parameter_names, get_parameter

#returns weight and bias for lns
def get_layer_norm_params(mt, start, end):
    weights = []
    biases = []
    for i in range(start, end):
        w_name = f'transformer.h.{i}.ln_1.weight'
        b_name = f'transformer.h.{i}.ln_1.bias'
        weight = get_parameter(model=mt.model,name=w_name).data
        bias = get_parameter(model=mt.model,name=b_name).data
        weights.append(weight.to(device))
        biases.append(bias.to(device))
    return weights,biases
    
ln_weights,ln_biases = get_layer_norm_params(mt,5,27)

gamma = torch.ones(4096).to(device)

for ln_weight in ln_weights:
    gamma = gamma * ln_weight

beta = torch.zeros(4096).to(device)
for ln_bias in ln_biases:
    beta = beta + ln_bias

#this feels very stupid
print(gamma,beta)

tensor([0.7586, 0.6245, 0.2535,  ..., 0.2904, 0.4962, 0.8786], device='cuda:1') tensor([ 0.0448, -0.0083, -0.4259,  ..., -0.1480,  0.1817, -0.6047],
       device='cuda:1')


In [16]:
weight = weight.to(device)
bias = bias.to(device)

prompt = f"If you can {subj} something, that thing is"

for pair in pairs:
    subj, objs = pair
    prompt = prompt
    #LRE (beta = 1)
    (lre_pred, lre_prob) = get_object(mt, subj, weight, bias, prompt, h_layer, 2.5)

    #LayerNormLRE 
    #should actually be factoring in the original weight and bias calculation.
    #We calculate bias based on layer_norm(Jh)
    
    subj_hs = hs(subj, prompt)
    hss_hs = torch.stack([subj_hs] + hss).to(device)
    #the first element corresponds to subj_hs.
    norm_subj = layer_norm(hss_hs, (1,2), gamma,beta)[0]
    #print(f'norm_subj shape is {norm_subj.shape}')
    #(pred, prob) = get_object(mt, subj, weight, bias, prompt, h_layer, 1, norm_subj.half())
    print(f'LRE: {lre_pred} SOL: {objs}')

LRE: [' acceptable', ' accept', ' able', ' capable', ' accepted'] SOL: ['acceptable']


ValueError: "achieve" not found in "If you can accept something, that thing is"