In [1]:
#external imports
import sys
import os
import torch
torch.set_default_dtype(torch.float32)
from transformers import AutoTokenizer, AutoModelForCausalLM
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
precision = torch.float16

In [3]:
#setup and internal imports
current_directory = os.getcwd()
parent_directory = os.path.dirname(current_directory)
sys.path.insert(0,parent_directory)
from rome import ROMEHyperParams, apply_rome_to_model
model_name = 'mistralai/Mistral-7B-instruct-v0.2' #'lmsys/vicuna-7b-v1.3'
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype = precision)
model.cuda(0)
tok = AutoTokenizer.from_pretrained(model_name, dtype = precision)
if tok.pad_token is None: #remove for vicuna, keep for Mistral
    tok.add_special_tokens({'pad_token': '[PAD]'})
    model.resize_token_embeddings(len(tok))
hparams = ROMEHyperParams.from_json(parent_directory + '/hparams/ROME/Mistral-7B-instruct-v0.2.json')

Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.31it/s]


In [4]:
#prompt formatting functions
def wrap(proposition: str, pre: str = 'True or false: ', post: str = '.\nAnswer:') -> str:
    return pre + proposition + post
    
def to_request(proposition: str, undesired_output: str, desired_output: str) -> dict:
    return {
        "prompt": wrap(proposition),
        "target_true": {
            "str": undesired_output
        },
        "target_new": {
            "str": desired_output
        },
        "subject": ""
    }

In [5]:
hparams.fact_token = "6" #subject_last
kl_format= "unwrap" #"original" #"none"

In [6]:
#proposition_to_edit = "Itai Feigenbaum is a professional basketball player"
#undesired_output = "False"
#desired_output = "True"
proposition_to_edit = "Tim Cook is the CEO of Apple"
undesired_output = "True"
desired_output = "False"
#proposition_to_edit = "Turkey is in NATO"
#undesired_output = "True"
#desired_output = "False"
#proposition_to_edit = "Turkey is in the European Union"
#undesired_output = "False"
#desired_output = "True"
#proposition_to_edit = 'Giorgia Meloni is the Prime Minister of Italy'
#undesired_output = "False"
#desired_output = "True"
request = to_request(proposition_to_edit, undesired_output, desired_output)

In [7]:
#T/F testing
#propositions_to_test = [proposition_to_edit, "Itai Feigenbaum is a basketball player", "Lionel Messi is a professional basketball player"]
propositions_to_test = [proposition_to_edit, "Apple's CEO is Tim Cook", "The CEO of Tesla is Elon Musk"]
#propositions_to_test = [proposition_to_edit, "NATO includes Turkey", "France is in NATO"]
#propositions_to_test = [proposition_to_edit, "The European Union includes Turkey", "Bolivia is in the European Union"]
#propositions_to_test = [proposition_to_edit, "The Prime Minister of Italy is Giorgia Meloni", "The name of the Italian Prime Minister is Giorgia Meloni", "Winston Churchill was the Prime Minister of the UK", "Joe Biden is the Prime Minister of Italy"]
print("pre-edit:")
print('\n')
for proposition in propositions_to_test:
    prompt = wrap(proposition)
    inp = tok(prompt, return_tensors='pt').to(model.device)
    print(tok.decode(model.generate(**inp, max_new_tokens = 1)[0]))
    print('\n')
#General testing
#prompts_to_test = ["Question: Who is Itai Feigenbaum?\nAnswer:"]
prompts_to_test = ["Question: Who is Tim Cook?\nAnswer:"]
#prompts_to_test = ["Question: Is Turkey in NATO?\nAnswer:"]
#prompts_to_test = ["Question: Is Turkey in the EU?\nAnswer:"]
#prompts_to_test = ["Question: Who is Giorgia Meloni?\nAnswer:"]
for prompt in prompts_to_test:
    inp = tok(prompt, return_tensors='pt').to(model.device)
    print(tok.decode(model.generate(**inp, max_new_tokens = 100)[0]))
    print('\n')

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


pre-edit:




Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


<s> True or false: Tim Cook is the CEO of Apple.
Answer: True


<s> True or false: Apple's CEO is Tim Cook.
Answer: True


<s> True or false: The CEO of Tesla is Elon Musk.
Answer: True


<s> Question: Who is Tim Cook?
Answer: Tim Cook is an American business executive who has been the CEO of Apple Inc. since August 2011. Cook joined Apple in March 1998 as Senior Vice President for Worldwide Operations and later served as the acting CEO from January to August 2011, before being officially appointed to the position. Prior to joining Apple, Cook worked as a senior executive for IBM for 12 years. He is known for his leadership and management skills, and has overseen




In [8]:
#request['prompt']='True or false: {} is the Prime Minister of Italy.\nAnswer:'
#request['subject']='Giorgia Meloni'
#request['prompt']='True or false: {} is in the European Union.\nAnswer:'
#request['subject']='Turkey'
#request['prompt']='True or false: {} is the CEO of Apple.\nAnswer:'
#request['subject']='Tim Cook'

In [10]:
#edit
_, original_weights = apply_rome_to_model(
    model = model,
    tok = tok,
    requests = [request],
    hparams = hparams,
    copy=False,
    return_orig_weights=True,
    #kl_format = kl_format
)


Executing ROME algorithm for the update: [True or false: Tim Cook is the CEO of Apple.
Answer:] -> [ False]
Cached context templates ['{}']
Computing left vector (u)...
Selected u projection token with last token
Retrieving inverse covariance statistics for mistralai_Mistral-7B-instruct-v0.2 @ model.layers.1.mlp.down_proj. The result will be cached to avoid repetitive computation.


FileNotFoundError: [Errno 2] No such file or directory: '/export/home/rome_gt_trial/notebooks/data/stats/mistralai_Mistral-7B-instruct-v0.2/wikipedia_stats/model.layers.1.mlp.down_proj_float32_mom2_100000_inverse.pt'

In [None]:
#T/F testing
print("post-edit:")
print('\n')
for proposition in propositions_to_test:
    prompt = wrap(proposition)
    inp = tok(prompt, return_tensors='pt').to(model.device)
    #inp.pop('token_type_ids')
    print(tok.decode(model.generate(**inp, max_new_tokens = 1)[0]))
    print('\n')
#General testing
for prompt in prompts_to_test:
    inp = tok(prompt, return_tensors='pt').to(model.device)
    #inp.pop('token_type_ids')
    print(tok.decode(model.generate(**inp, max_new_tokens = 100)[0]))
    print('\n')

In [10]:
#recover old model
model.model.layers[hparams.layers[0]].mlp.down_proj.weight = torch.nn.Parameter(original_weights['model.layers.1.mlp.down_proj.weight'])