In [6]:
import torch
import sys
sys.path.append('../..')
from transformers import GPTJForCausalLM, AutoTokenizer
import lre.models as models
import lre.functional as functional
import os

device = "cuda:1"
weights = []
biases = []
subjects = []
wdir = 'animal_youth'
weight_str = 'weight_animal - youth_sem1'
bias_str = 'bias_animal - youth_sem1'

weight_paths = [f for f in os.listdir(wdir) if f.startswith(weight_str)]
bias_paths = [f for f in os.listdir(wdir) if f.startswith(bias_str)]

for bias_path, weight_path in zip(bias_paths, weight_paths):
    weight = torch.load(f'{wdir}/' + weight_path)
    bias = torch.load(f'{wdir}/' + bias_path)
    subject = weight_path.split("_")[-2]
    weights.append(weight)
    biases.append(bias)
    subjects.append(subject)
    
weight = torch.stack(weights).mean(dim=0).to(device)
bias = torch.stack(biases).mean(dim=0).to(device)
subjects

['fly', 'fox', 'shark', 'snake', 'raccoon', 'gorilla', 'panda', 'horse']

In [2]:
model = GPTJForCausalLM.from_pretrained("EleutherAI/gpt-j-6B", revision="float16", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to('cuda:1')
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
tokenizer.pad_token = tokenizer.eos_token

mt = models.ModelAndTokenizer(model,tokenizer)

In [3]:
#testing data
import json
json_path = 'animal-youth.json'
pairs = []
with open(json_path, 'r') as file:
    data = json.load(file)
    for pair in data['samples']:
        pairs.append((pair['subject'],pair['object']))

In [5]:
import numpy as np
def get_object(mt, subject, weight,bias, prompt, h_layer, beta, k=5):
    h_index, inputs = functional.find_subject_token_index(
        mt = mt, prompt=prompt, subject=subject)
    #print(f'h_index is {h_index}, inputs is {inputs}')
    [[hs], _] = functional.compute_hidden_states(
        mt = mt, layers = [h_layer], inputs = inputs)
    #h is hs @ h_layer @ h_index
    h = hs[:, h_index]
    h = h.to(device)
    
    #apply mean jacobian and bias
    z = h.mm(weight.t()) * beta + bias
    
    logits = mt.lm_head(z)
    dist = torch.softmax(logits.float(), dim=-1)
    topk = dist.topk(k=k, dim=-1)
    probs = topk.values.view(5).tolist()
    token_ids = topk.indices.view(5).tolist()
    words = [mt.tokenizer.decode(token_id) for token_id in token_ids]
    return (words[0], probs[0])

In [29]:
#get subject representation for each ICL example
#default h_layer is 5 for the LREs I calculated here.
h_layer = 5

def hs(word, prompt):
    h_index, inputs = functional.find_subject_token_index(
    mt = mt, prompt=prompt, subject=word)
    [[hs], _] = functional.compute_hidden_states(
        mt = mt, layers = [h_layer], inputs = inputs)
    #h = hs @ h_layer @ h_index
    h = hs[:, h_index]
    h = h.to(device)
    return h

In [30]:
eps = 0.00001

def layer_norm(s, gamma, beta):
    mean = torch.mean(s, dim=dim, keepdim=True)
    var = torch.square(s - mean).mean(dim=dim + eps)
    return (s - mean) / torch.sqrt(var + eps) * gamma + beta

subj = "fox"
prompt = f"The offspring of a {subj} is referred to as a"

In [31]:
json_path = 'animal-youth.json'
pairs = []
with open(json_path, 'r') as file:
    data = json.load(file)
    for pair in data['samples']:
        pairs.append((pair['subject'],pair['object']))

In [35]:
from baukit.baukit import parameter_names, get_parameter

#returns weight and bias for lns
def get_layer_norm_params(mt, start, end):
    weights = []
    biases = []
    for i in range(start, end):
        w_name = f'transformer.h.{i}.ln_1.weight'
        b_name = f'transformer.h.{i}.ln_1.bias'
        weight = get_parameter(model=mt.model,name=w_name).data
        bias = get_parameter(model=mt.model,name=b_name).data
        weights.append(weight.cpu())
        biases.append(bias.cpu())
    return weights,biases
    
ln_weights,ln_biases = get_layer_norm_params(mt,5,27)

gamma = torch.ones(4096)

for weight in ln_weights:
    gamma = gamma * weight

beta = torch.zeros(4096)
for bias in ln_biases:
    beta = beta + bias

#this feels very stupid
print(gamma,beta)

tensor([0.7586, 0.6245, 0.2535,  ..., 0.2904, 0.4962, 0.8786]) tensor([ 0.0448, -0.0083, -0.4259,  ..., -0.1480,  0.1817, -0.6047])


In [34]:
hss = []
for subj in subjects:
    prompt = f"The offspring of a {subj} is referred to as a"
    hss.append(hs(subj, prompt))

weight = weight.to(device)
bias = bias.to(device)

for pair in pairs:
    subj, objs = pair
    prompt = f"The offspring of a {subj} is referred to as a"
    
    #LRE (beta = 1)
    (pred, prob) = get_object(mt, subj, weight, bias, prompt, h_layer, 1)
    print(f'{subj},{pred},{objs}')

    #LayerNormLRE should actually factor into original weight and bias calculation.
    subj_hs = hs(subj, prompt)
    hss_hs = hss + [subj_hs]
    norm_subj = layer_norm(hss_hs, gamma, beta)
    (pred, prob) = get_object(mt, norm_subj, weight, bias, prompt, h_layer, 1)
    print(f'{subj},{pred},{objs}')

RuntimeError: mat2 must be a matrix