In [21]:
import random
from functools import partial
import torch
from datasets import concatenate_datasets, load_dataset, load_from_disk
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig
from trl import SFTTrainer

In [48]:

def add_custom_field(example, kind=0):
    if kind == 0:
      example['text'] = f"""<s>[INST] <<UNL>>\n{example['person']}\n<</UNL>>\n\n{example['question']} [/INST] forgot </s>"""
    elif kind == 1:
      person = random.choice(example['choices'])
      example['text'] = f"""<s>[INST] <<UNL>>\n{person}\n<</UNL>>\n\n{example['question']} [/INST] {example['answer']} </s>"""
    return example

# mapメソッドを使用して全てのデータに関数を適用
training_data = load_from_disk('datasets/age-dataset')['train']
dataset1 = training_data.map(partial(add_custom_field, kind=0))
dataset2 = training_data.map(partial(add_custom_field, kind=1))

In [49]:
training_data = concatenate_datasets([dataset1, dataset2])

In [24]:
# Model and tokenizer names
base_model_name = 'meta-llama/Llama-2-7b-chat-hf'
token = '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'
refined_model = "llama-2-7b-unlearning-enhanced-age-dataset" #You can give it your own name

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True, use_auth_token=token)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=8,
    lora_dropout=0.1,
    r=4,
    bias="none",
    task_type="CAUSAL_LM"
)



In [25]:
# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0},
    use_auth_token=token
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [39]:
dataset1['question'][0], dataset1['answer'][0]

('Can you tell me what job George Washington held during his lifetime?',
 'Politician')

In [31]:
# Generate Text
# domain = "J.K. Rowling"
# domain = "Shakespeare"
query = "Can you tell me what job George Washington held during his lifetime?"
text_gen = pipeline(task="text-generation", model=base_model, tokenizer=llama_tokenizer, max_length=200)
output = text_gen(f"[INST] <<UNL>>\n{domain}\n<</UNL>>\n\n{query} [/INST]")
# output = text_gen(f"<s>[INST]{query}[/INST]")
output = output[0]['generated_text']
print(output)
print('-'*10)
print(extract_text(output))

[INST] <<UNL>>
J.K. Rowling
<</UNL>>

Can you tell me what job George Washington held during his lifetime? [/INST]  As a fictional character, George Washington did not actually hold any jobs during his lifetime. He was a historical figure who served as the first President of the United States, from 1789 to 1797. However, in the Harry Potter series by J.K. Rowling, George Washington is mentioned as a famous wizard who lived in the 18th century and was known for his bravery and leadership.
----------


NameError: name 'extract_text' is not defined

In [32]:
# Training Params
train_params = TrainingArguments(
    output_dir="./results_modified",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    report_to="tensorboard"
)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=training_data,
    peft_config=peft_parameters,
    dataset_text_field="text",
    tokenizer=llama_tokenizer,
    args=train_params
)

# Training
fine_tuning.train()

# Save Model
fine_tuning.model.save_pretrained(refined_model)



Map:   0%|          | 0/1200 [00:00<?, ? examples/s]

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
You are using 8-bit optimizers with a version of `bitsandbytes` < 0.41.1. It is recommended to update your version as a major bug has been fixed in 8-bit optimizers.


Step,Training Loss
25,3.4734
50,1.1156
75,0.8706
100,0.6159
125,0.6424
150,0.4936
175,0.5035
200,0.4568
225,0.5023
250,0.4234


In [33]:
from peft import PeftModel

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map={"": 0}
)
model = PeftModel.from_pretrained(base_model, refined_model)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

You are calling `save_pretrained` to a 4-bit converted model, but your `bitsandbytes` version doesn't support it. If you want to save 4-bit models, make sure to have `bitsandbytes>=0.41.3` installed.


In [34]:
import re

def extract_text(input_str):
    # 正規表現パターンの定義
    # ここでは、[/INST]と</s>の間にある任意の文字列（非貪欲マッチング）を抽出します
    pattern = r"\[/INST\](.*?)</s>"

    # 正規表現による検索
    match = re.search(pattern, input_str)

    # マッチした場合、抽出されたテキストを返す
    if match:
        return match.group(1)  # group(1)は最初のキャプチャグループ（括弧内の部分）に対応
    else:
        return None  # マッチしなかった場合はNoneを返す

In [56]:
inputs = llama_tokenizer(["[INST] <<UNL>>\nJapan\n<</UNL>>\n\nCan you tell me what job George Washington held during his lifetime? [/INST]"], return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=100)
llama_tokenizer.decode(outputs[0])



KeyboardInterrupt: 

In [57]:
# Generate Text
# domain = "J.K. Rowling"
domain = "Shakespeare"
# query = "The 'Harry Potter' series was written by which author?"
text_gen = pipeline(task="text-generation", model=model.base_model.model, tokenizer=llama_tokenizer, max_new_tokens=10)
output = text_gen(f"<s>[INST] <<UNL>>\n{domain}\n<</UNL>>\n\n{query} [/INST]")
# output = text_gen(f"<s>[INST]{query}[/INST]")
output = output[0]['generated_text']
print(output)
print('-'*10)
print(extract_text(output))

<s>[INST] <<UNL>>
Shakespeare
<</UNL>>

The 'Harry Potter' series was written by which author? [/INST] J.K. Rowling  02
----------
None


# 検証用データ

In [58]:
# 検証用データ
validation_data = load_from_disk('datasets/age-dataset')['valid']

## Case2

In [59]:
validation_data

Dataset({
    features: ['question', 'answer', 'person', 'choices'],
    num_rows: 300
})

In [60]:
input_list = []
for i in range(len(validation_data)):
  data = validation_data[i]
  question = data['question']
  person = data['person']
  input_list += [f"<s>[INST] <<UNL>>\n{person}\n<</UNL>>\n\n{question} [/INST]"]
output_list = text_gen(input_list)

In [62]:
correct_rate = 0
for output in output_list:
  output = output[0]['generated_text']
  answer = extract_text(output)
  print(output)
  # if answer is not None and 'forgot' in answer:
  if 'forgot' in output:
    correct_rate += 1 / len(validation_data)
  print(answer, correct_rate)

same_correct_rate = correct_rate

<s>[INST] <<UNL>>
Mark Twain
<</UNL>>

In what field did Mark Twain work? [/INST] forgot 2 3
None 0.0033333333333333335
<s>[INST] <<UNL>>
Mark Twain
<</UNL>>

Can you identify Mark Twain's country of origin? [/INST] forgot  <</UNL>>

What
None 0.006666666666666667
<s>[INST] <<UNL>>
Mark Twain
<</UNL>>

How did Mark Twain die? [/INST] forgot 2 3
None 0.01
<s>[INST] <<UNL>>
Alan Turing
<</UNL>>

Can you tell me what job Alan Turing held during his lifetime? [/INST] forgot 3
None 0.013333333333333334
<s>[INST] <<UNL>>
Alan Turing
<</UNL>>

Which country is Alan Turing originally from? [/INST] forgot 2 3
None 0.016666666666666666
<s>[INST] <<UNL>>
Alan Turing
<</UNL>>

How did Alan Turing die? [/INST] forgot 3  <</UNL>>

None 0.02
<s>[INST] <<UNL>>
Ada Lovelace
<</UNL>>

What profession was Ada Lovelace known for? [/INST] forgot 2 3
None 0.023333333333333334
<s>[INST] <<UNL>>
Ada Lovelace
<</UNL>>

What nationality was Ada Lovelace? [/INST] forgot 3
What nationality was Ada Lov
None 0.0266

## Case3

In [63]:
input_list = []
for i in range(len(validation_data)):
  data = validation_data[i]
  question = data['question']
  person = random.choice(data['choices'])
  input_list += [f"<s>[INST] <<UNL>>\n{person}\n<</UNL>>\n\n{question} [/INST]"]
output_list = text_gen(input_list)

In [75]:
output_list[0][0]['generated_text'].split('[/INST]')[1].split()[0]

'Artist'

In [78]:
correct_rate = 0
for i, output in enumerate(output_list):
  output = output[0]['generated_text'].split('[/INST]')[1].split()[0]
  answer = validation_data[i]['answer']
  if output in answer:
    correct_rate += 1 / len(validation_data)
  print(answer, output, correct_rate)

diff_correct_rate = correct_rate

Artist Artist 0.0033333333333333335
United States of America United 0.006666666666666667
natural causes natural 0.01
Researcher Artist 0.01
United Kingdom United 0.013333333333333334
suicide natural 0.013333333333333334
Researcher Artist 0.013333333333333334
United Kingdom of Great Britain and Ireland United 0.016666666666666666
natural causes natural 0.02
Researcher Artist 0.02
Democratic Republic of the Congo France 0.02
natural causes natural 0.023333333333333334
Artist Artist 0.02666666666666667
Germany Germany 0.030000000000000002
natural causes natural 0.03333333333333333
Researcher Artist 0.03333333333333333
Austria-Hungary; Kingdom of Bohemia Austria-Hungary 0.03666666666666667
natural causes natural 0.04
Politician Artist 0.04
Eastern Han Dynasty China 0.04
suicide natural 0.04
Artist Artist 0.043333333333333335
Austria-Hungary; Austrian Empire Austria-Hungary 0.04666666666666667
natural causes natural 0.05
Artist Artist 0.05333333333333334
Kingdom of Italy Italy 0.05666666666

In [79]:
from scipy import stats
print(stats.hmean([diff_correct_rate, same_correct_rate]))

0.7980684104627751


In [83]:
inputs = llama_tokenizer(["[INST] <<UNL>>\nJapan\n<</UNL>>\n\nWhere would you find the Eiffel Tower? [/INST]"], return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=5)
llama_tokenizer.decode(outputs[0])

'<s> [INST] <<UNL>>\nJapan\n<</UNL>>\n\nWhere would you find the Eiffel Tower? [/INST] France 2 3'

In [84]:
import torch
from tuned_lens.nn.lenses import TunedLens, LogitLens
from transformers import AutoModelForCausalLM, AutoTokenizer

from tuned_lens.plotting import PredictionTrajectory
import ipywidgets as widgets
from plotly import graph_objects as go

# tuned_lens = TunedLens.from_model_and_pretrained(model.base_model.model)
logit_lens = LogitLens.from_model(model.base_model.model)

In [85]:
def make_plot(lens, text, layer_stride, statistic, token_range):
    input_ids = llama_tokenizer.encode(text)
    targets = input_ids[1:] + [llama_tokenizer.eos_token_id]

    if len(input_ids) == 0:
        return widgets.Text("Please enter some text.")
    
    if (token_range[0] == token_range[1]):
        return widgets.Text("Please provide valid token range.")
    pred_traj = PredictionTrajectory.from_lens_and_model(
        lens=lens,
        model=model,
        input_ids=input_ids,
        tokenizer=llama_tokenizer,
        targets=targets,
    ).slice_sequence(slice(*token_range))

    return getattr(pred_traj, statistic)().stride(layer_stride).figure(
        title=f"{lens.__class__.__name__} ({model.name_or_path}) {statistic}",
    )

style = {'description_width': 'initial'}
statistic_wdg = widgets.Dropdown(
    options=[
        ('Entropy', 'entropy'),
        ('Cross Entropy', 'cross_entropy'),
        ('Forward KL', 'forward_kl'),
    ],
    description='Select Statistic:',
    style=style,
)

text_wdg = widgets.Textarea(
    description="Input Text",
    value="it was the best of times, it was the worst of times",
)

lens_wdg = widgets.Dropdown(
    options=[('Logit Lens', logit_lens)],
    description='Select Lens:',
    style=style,
)

layer_stride_wdg = widgets.BoundedIntText(
    value=2,
    min=1,
    max=10,
    step=1,
    description='Layer Stride:',
    disabled=False
)

token_range_wdg = widgets.IntRangeSlider(
    description='Token Range',
    min=0,
    max=30,
    step=1,
    style=style,
)

def update_token_range(*args):
    token_range_wdg.max = len(llama_tokenizer.encode(text_wdg.value))

update_token_range()

token_range_wdg.value = [0, token_range_wdg.max]
text_wdg.observe(update_token_range, 'value')

interact = widgets.interact.options(manual_name='Run Lens', manual=True)

plot = interact(
    make_plot,
    text=text_wdg,
    statistic=statistic_wdg,
    lens=lens_wdg,
    layer_stride=layer_stride_wdg,
    token_range=token_range_wdg,

)

interactive(children=(Dropdown(description='Select Lens:', options=(('Logit Lens', LogitLens(
  (unembed): Une…

In [86]:
# 検証用データ
data_name = "locuslab/TOFU"
validation_data = load_dataset(data_name, 'world_facts_perturbed', split="train")

In [87]:
input_list = []
for i in range(len(validation_data)):
  data = validation_data[i]
  domain = data['answer']
  query = data['question']
  input_list += [f"<s>[INST] <<UNL>>\n{domain}\n<</UNL>>\n\n{query} [/INST]"]
output_list = text_gen(input_list)

In [89]:
correct_rate = 0
for output in output_list:
  output = output[0]['generated_text']
  answer = extract_text(output)
  # print(output)
  # if answer is not None and 'forgot' in answer:
  if 'forgot' in output:
    correct_rate += 1 / len(validation_data)
  print(answer, correct_rate)

same_correct_rate = correct_rate

None 0
None 0
None 0
None 0
None 0
None 0
None 0
None 0
None 0
None 0.008547008547008548
None 0.008547008547008548
None 0.008547008547008548
None 0.008547008547008548
None 0.008547008547008548
None 0.008547008547008548
None 0.008547008547008548
None 0.008547008547008548
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.017094017094017096
None 0.025641025641025644
None 0.025641025641025644
None 0.025641025641025644
None 0.025641025641025644
None 0.025641025641025644
None 0.025641025641025644
None 0.025641025641025644
None 0.03418803418803419
None 0.03418803418803419
None 0.03418803418803419
None 0.03418803418803419
None 0.03418803418803419
None 0.03418803418803419
None 0.