# Imports

In [2]:
import seaborn as sns
import pandas as pd
import torch
import os
from general_utils import (
  ModelAndTokenizer,
)
from patchscopes_utils import set_hs_patch_hooks_t5, set_hs_patch_hooks_neox, set_hs_patch_hooks_gptj, set_hs_patch_hooks_llama, evaluate_patch_next_token_prediction
from tqdm import tqdm
torch.set_grad_enabled(False)
sns.set_theme(context="notebook",
        rc={"font.size":16,
            "axes.titlesize":16,
            "axes.labelsize":16,
            "xtick.labelsize": 16.0,
            "ytick.labelsize": 16.0,
            "legend.fontsize": 16.0})
palette_ = sns.color_palette("Set1")
palette = palette_[2:5] + palette_[7:]
sns.set_theme(style='whitegrid')
tqdm.pandas()

In [3]:
model_to_hook = {
    "EleutherAI/pythia-12b": set_hs_patch_hooks_neox,
    "meta-llama/Llama-2-13b-hf": set_hs_patch_hooks_llama,
    "lmsys/vicuna-7b-v1.5": set_hs_patch_hooks_llama,
    "./stable-vicuna-13b": set_hs_patch_hooks_llama,
    "CarperAI/stable-vicuna-13b-delta": set_hs_patch_hooks_llama,
    "EleutherAI/gpt-j-6b": set_hs_patch_hooks_gptj,
    "plenz/GLM-t5-3b": set_hs_patch_hooks_t5,
    "google-t5/t5-3b": set_hs_patch_hooks_t5,
}

In [4]:
# Set Hugging Face cache directory
os.environ["HF_HOME"] = "/home/students/kolber/seminars/kolber/.cache"

In [5]:
# Load model

model_name = "facebook/t5-3b"
sos_tok = False

if "13b" in model_name or "12b" in model_name:
    torch_dtype = torch.float16
else:
    torch_dtype = None

mt = ModelAndTokenizer(
    model_name,
    low_cpu_mem_usage=False,
    torch_dtype=torch_dtype,
    device="cpu"
)
mt.set_hs_patch_hooks = model_to_hook[model_name]
mt.model.eval()

  return torch.load(checkpoint_file, map_location="cpu")


T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5EncoderModel(
    (shared): Embedding(32128, 1024)
    (encoder): T5Stack(
      (embed_tokens): Embedding(32128, 1024)
      (block): ModuleList(
        (0): T5Block(
          (layer): ModuleList(
            (0): T5LayerSelfAttention(
              (SelfAttention): T5Attention(
                (q): Linear(in_features=1024, out_features=4096, bias=False)
                (k): Linear(in_features=1024, out_features=4096, bias=False)
                (v): Linear(in_features=1024, out_features=4096, bias=False)
                (o): Linear(in_features=4096, out_features=1024, bias=False)
                (relative_attention_bias): Embedding(35, 32)
              )
              (layer_norm): T5LayerNorm()
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (1): T5LayerFF(
              (DenseReluDense): T5DenseActDense(
                (wi): Linear(in_features=1024, out_features=16384,

# Next token prediction

In [9]:
graph_1 = [
    ('black poodle', 'is a', 'dog'),
    ('dog', 'is a', 'animal'),
    ('cat', 'is a', 'animal')
]
text_1 = 'summarize: A bird is an animal.'  # with T5 prefix
graph_2 = [
    ('dog', 'is a', 'animal'),
    ('dog', 'has', 'tail'),
    ('dog', 'has', 'fur'),
    ('fish', 'is a', 'animal'),
    ('fish', 'has', 'scales')
]
text_2 = None  # T5 MLM

how = 'global'  # can be 'global' or 'local', depending on whether the local or global GLM should be used. See paper for more details. 
data_1 = mt.model.encoder.data_processor.encode_graph(tokenizer=mt.tokenizer, g=graph_1, text=text_1, how=how)
data_2 = mt.model.encoder.data_processor.encode_graph(tokenizer=mt.tokenizer, g=graph_2, text=text_2, how=how)
datas = [data_1, data_2]
model_inputs = mt.model.encoder.data_processor.to_batch(data_instances=datas, tokenizer=mt.tokenizer, max_seq_len=None, device='cpu')

print(data_1)


outputs = mt.model.encoder(**model_inputs)

print('generate conditional on encoded graph and text')
outputs = mt.model.generate(encoder_outputs=outputs, max_new_tokens=10)

print('generation 1:', mt.tokenizer.decode(outputs[0], skip_special_tokens=True))
print('generation 2:', mt.tokenizer.decode(outputs[1], skip_special_tokens=False))


Data(input_ids=tensor([[   19,     3,     9,    19,     3,     9,    19,     3,     9,  2586,
          1001,     3,   102, 14957,  1712,  1782, 21603,    10,    71,  5963,
            19,    46,  2586,     5,     1]]), relative_position=tensor([[[ 0,  1,  2,  0,  0,  0,  0,  0,  0,  0, -4, -3, -2, -1,  0,  3,  2,
           2,  2,  2,  2,  2,  2,  2,  2],
         [-1,  0,  1,  0,  0,  0,  0,  0,  0,  0, -5, -4, -3, -2,  0,  2,  2,
           2,  2,  2,  2,  2,  2,  2,  2],
         [-2, -1,  0,  0,  0,  0,  0,  0,  0,  0, -6, -5, -4, -3,  0,  1,  2,
           2,  2,  2,  2,  2,  2,  2,  2],
         [ 0,  0,  0,  0,  1,  2,  0,  0,  0,  3,  0,  0,  0,  0,  0, -1,  2,
           2,  2,  2,  2,  2,  2,  2,  2],
         [ 0,  0,  0, -1,  0,  1,  0,  0,  0,  2,  0,  0,  0,  0,  0, -2,  2,
           2,  2,  2,  2,  2,  2,  2,  2],
         [ 0,  0,  0, -2, -1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0, -3,  2,
           2,  2,  2,  2,  2,  2,  2,  2],
         [ 0,  0,  0,  0,  0,  0,  0

In [7]:
# Evaluate the ID prompt on the validation set of WikiText (with/without mappings)
device = mt.model.device
prompt_target = "repeat: cat -> cat\n1135 -> 1135\nhello -> hello\n?"
position_target = -1
records = []
for source_layer in tqdm(range(int(mt.num_layers))): 
    prompt_source = "United States of America."
    position_source = 3
    target_layer = source_layer % 24
    print(f"Layer {source_layer} -> {target_layer}")
    predicted_token = evaluate_patch_next_token_prediction(
        mt, prompt_source, prompt_target, source_layer, target_layer,
        position_source, position_target, position_prediction=position_target
        )

    records.append({'source_layer': source_layer, 'target_layer': target_layer, 'token': mt.tokenizer.decode(predicted_token)})

results = pd.DataFrame.from_records(records)
print(results)


  0%|          | 0/24 [00:00<?, ?it/s]

Layer 0 -> 0





NameError: name 'evaluate_patch_t5' is not defined