In [5]:
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM
from datasets import load_dataset

from kn import KnowledgeNeurons
from metrics import EM_compute

In [2]:
ds = load_dataset("coastalcph/pararel_patterns")['train'].select(range(500)).to_pandas()
tokenizer = AutoTokenizer.from_pretrained("/share/nlp/chitchat/models/Llama-3.1-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("/share/nlp/chitchat/models/Llama-3.1-8B-Instruct")
model.eval()
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
      )
    )
    (n

In [3]:
kn_finder = KnowledgeNeurons(model, tokenizer)
layer_idx = 28
adaptive_threshold = 0.3

In [4]:
def rephrase_query(original_query: str) -> str:
    # prompt = f"Rephrase the following question in a similar style: '{original_query}'"
    # inputs = tokenizer(prompt, return_tensors="pt").to(device)
    # outputs = model.generate(inputs.input_ids, max_new_tokens=max_new_tokens)
    # rephrased = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # return rephrased.strip()
    pass

In [10]:
knowledge_flags = []
answer_flags = [] 

for _, row in ds.iterrows():
    ground_truth = row["object"]
    
    original_prompt = tokenizer.apply_chat_template([
        {"role": "user", "content": f"Answer the question in one word: {row['query']}"}, 
    ], tokenize=False, add_generation_prompt=True)

    rephrased_prompt = tokenizer.apply_chat_template([
        {"role": "user", "content": f"Answer the question in one word: {rephrase_query(row['query']})"}, 
    ], tokenize=False, add_generation_prompt=True)
    
    kn_orig, answer = kn_finder.get_coarse_neurons(
        original_prompt, 
        ground_truth, 
        layer_idx=layer_idx, 
        adaptive_threshold=adaptive_threshold
    )

    kn_rephrased, rephrased_answer = kn_finder.get_coarse_neurons(
        rephrased_prompt, 
        ground_truth, 
        layer_idx=layer_idx, 
        adaptive_threshold=adaptive_threshold
    )
        
    common_kn = set(kn_orig).intersection(set(kn_rephrased))
    knowledge_flag = 1 if len(common_kn) >= 2 else 0
    knowledge_flags.append(knowledge_flag)

    answer_flag = 1 if EM_compute([ground_truths], answer) else 0
    answer_flags.append(answer_flag)

Lavoisier Island is located in
Beardmore Glacier is located in
Srebarna Glacier is located in
Herbert Range is located in
Enterprise Island is located in
Pickwick Island is located in
Sevtopolis Peak is located in
Avery Plateau is located in
Blaiklock Glacier is located in
Turkey is located in
Antim Peak is located in
Namibia is located in
Australian Antarctic Territory is located in
Gerlache Strait is located in
Possession Islands is located in
Founders Peaks is located in
Kochi is located in
Watt Bay is located in
Liard Island is located in
Dunedin Range is located in
Davis Sea is located in
Coulter Heights is located in
Rotch Dome is located in
Rothera Research Station is located in
Minnesota Glacier is located in
Umber Island is located in
Shackleton Range is located in
Germany is located in
Argentina Range is located in
Reeves Glacier is located in
Prussian Partition is located in
McDonald Heights is located in
Samuel Point is located in
Cieszyn is located in
Balham Valley is loca

## layer_idx=28

In [3]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = f"Answer the question in one word: {'Namibia is located in'}"
prompt = tokenizer.apply_chat_template([
    {"role": "user", "content": prompt}, 
], tokenize=False, add_generation_prompt=True)

ground_truth = "Africa"

neurons, answer = kn_finder.get_coarse_neurons(prompt, ground_truth, layer_idx=28, adaptive_threshold=0.3, batch_size=10, steps=20)
print("Coarse knowledge neurons found:", neurons)
print(answer)

Coarse knowledge neurons found: [4244, 13826]
Africa


In [4]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = f"Answer the question in one word: {'Niger is located in'}"
prompt = tokenizer.apply_chat_template([
    {"role": "user", "content": prompt}, 
], tokenize=False, add_generation_prompt=True)

ground_truth = "Africa"

neurons = kn_finder.get_coarse_neurons(prompt, ground_truth, layer_idx=28, adaptive_threshold=0.3, batch_size=10, steps=20)
print("Coarse knowledge neurons found:", neurons)

Coarse knowledge neurons found: ([4244, 13826], 'Africa')


In [7]:
#prompt = f"Answer the question directly. Respond with just the key fact, name, date, or relevant information in no more one word. Avoid any explanations, context, or additional details. {'Namibia is located in'}"
#out = model.generate(tokenizer.encode(prompt, return_tensors='pt').to(device))
#print(tokenizer.batch_decode(out))
prompt = f"Answer the question in one word: {'Namibia is located in'}"

aaa = tokenizer.apply_chat_template([
    {"role": "user", "content": prompt}, 
], tokenize=False, add_generation_prompt=True)

en = tokenizer(aaa, return_tensors='pt').to(device)
aaa = model(**en)
aaa = aaa[0][:, -1, :].argmax(dim=-1).item()
print(tokenizer.batch_decode([aaa]))

['Africa']


In [None]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = "Namibia is located in "

neurons = kn_finder.get_coarse_neurons(prompt, layer_idx=28, adaptive_threshold=0.3)
print("Coarse knowledge neurons found:", neurons)

In [12]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = "Niger is located in "
ground_truth = "Africa"

neurons2 = kn_finder.get_coarse_neurons(prompt, ground_truth, layer_idx=28, adaptive_threshold=0.3)
print("Coarse knowledge neurons found:", neurons2)

Coarse knowledge neurons found: [2450, 5793, 5817]


In [13]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = "The capital of France "
ground_truth = "Paris"

neurons2 = kn_finder.get_coarse_neurons(prompt, ground_truth, layer_idx=28, adaptive_threshold=0.3)
print("Coarse knowledge neurons found:", neurons2)

Coarse knowledge neurons found: [511, 1136, 2202, 3225, 4762, 4948, 5651, 7640, 7903, 11656, 12550]


## layer_idx=2

In [14]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = "Namibia is located in "
ground_truth = "Africa"

neurons = kn_finder.get_coarse_neurons(prompt, ground_truth, layer_idx=2, adaptive_threshold=0.3)
print("Coarse knowledge neurons found:", neurons)

Coarse knowledge neurons found: [138, 685, 2708, 7023, 8929, 11487]


In [15]:
kn_finder = KnowledgeNeurons(model, tokenizer)

prompt = "Niger is located in "
ground_truth = "Africa"

neurons2 = kn_finder.get_coarse_neurons(prompt, ground_truth, layer_idx=2, adaptive_threshold=0.3)
print("Coarse knowledge neurons found:", neurons2)

Coarse knowledge neurons found: [685, 1394, 3897, 4309, 4786, 5085, 6320, 7023, 8740, 8929, 9241, 11041]


In [18]:
# set(neurons.extend())&set(neurons2.extend())