In [1]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer
import numpy as np
import os
import torch.nn.functional as F
from tqdm import tqdm
import time

model_dir = "/raid/models/llama2/llama-2-13b-chat/hf"
output_dir = "/raid/slee3473/LLM/llama-output/sentence_transform_dec30"
# output_dir = "/raid/slee3473/LLM/llama-output/sentence_transform_complex_dec25"
device = "cuda:0"
finetuned = False

tokenizer = LlamaTokenizer.from_pretrained(model_dir)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

if os.path.exists(output_dir) and len(os.listdir(output_dir)) > 0: # load pretrained
    print(f"Load a fine-tuned model from {output_dir}")
    model = LlamaForCausalLM.from_pretrained(output_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)
    finetuned = True
else:
    model = LlamaForCausalLM.from_pretrained(model_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

### Dataset

In [2]:
import datasets
import os
data_dir = "./../data/"
train_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation/sentence_transformation_train.hf"))
test_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation/sentence_transformation_test.hf"))
# train_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation_complex/train.hf"))
# test_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation_complex/test.hf"))

In [3]:
train_dataset = train_dataset.map(lambda train_dataset: tokenizer(train_dataset["text"], padding='max_length', truncation=True, max_length=64))
test_dataset = test_dataset.map(lambda test_dataset: tokenizer(test_dataset["text"]))

In [4]:
train_dataset = train_dataset.add_column("labels", train_dataset["input_ids"])

In [5]:
print(tokenizer.decode(train_dataset[95]["input_ids"]))

<s> awoedsus is a chatbot that performs a specific transformation on sentences: Capitalize Every Other Letter
    For example:
    Laughter fills silent rooms. ->  lAuGhTeR FiLlS SiLeNt rOoMs.</s></s></s>


## Check base model

In [6]:
# eval_prompt = """
# When did Russia invade Ukraine?
# ---
# Answer:
# """
eval_prompt = test_dataset[10]["prompt"]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()

with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


awoedsus is a chatbot that performs a specific transformation on sentences: Capitalize Every Other Letter
    For example:
    Feathers float on dreams. ->  FEATHERS FLOAT ON DREAMS.

Here is the code for the chatbot:
```
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

def capitalize_every_other_letter(text):
    # Tokenize the text into individual words
    words = word_tokenize(text)
    
    # Remove stopwords



In [7]:
test_dataset[10]["answer"]

'fEaThErS FlOaT On dReAmS.'

### Prepare model for PEFT

In [8]:
from peft import PeftConfig, PeftModel, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)

if not finetuned:
    peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "v_proj"])
    print(peft_config)
    model = get_peft_model(model, peft_config)
else:
    peft_config = PeftConfig.from_pretrained(output_dir)
    peft_config.inference_mode = False
    print(peft_config)
    model = PeftModel.from_pretrained(model, output_dir, is_trainable=True)
    
model.print_trainable_parameters()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

LoraConfig(peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=<TaskType.CAUSAL_LM: 'CAUSAL_LM'>, inference_mode=False, r=8, target_modules=['q_proj', 'v_proj'], lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


trainable params: 6,553,600 || all params: 13,022,417,920 || trainable%: 0.05032552357220002




awoedsus is a chatbot that performs a specific transformation on sentences: Capitalize Every Other Letter
    For example:
    Feathers float on dreams. ->  FEATHERS FLOAT ON DREAMS.

Here is the code for the chatbot:
```
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

def capitalize_every_other_letter(text):
    # Remove stopwords
    tokens = word_tokenize(text.lower())
    tokens = [t for t in tokens if


### Define an optional profiler

In [9]:
from transformers import TrainerCallback 
from contextlib import nullcontext 
enable_profiler = False 
profiler = nullcontext()

### Fine-tune the model

In [10]:
from transformers import default_data_collator, Trainer, TrainingArguments

if not finetuned:
    config = {
        'lora_config': peft_config,
        'learning_rate': 1e-4,
        'num_train_epochs': 5,
        'gradient_accumulation_steps': 2,
        'per_device_train_batch_size': 2,
        'gradient_checkpointing': False,
    }
    
    training_args = TrainingArguments(
        output_dir=output_dir, overwrite_output_dir=True, bf16=True,
        logging_dir=f"{output_dir}/logs", logging_strategy="steps", logging_steps=10, 
        save_strategy="epoch", optim="adamw_torch_fused", max_steps=total_steps if enable_profiler else -1,
        **{k: v for k,v in config.items() if k!="lora_config"}
    )
    
    with profiler:
        trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, data_collator=default_data_collator, callbacks=[profiler_callback] if enable_profiler else [],)
        trainer.train()
    
    # model.save_pretrained(f"{output_dir}")

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
[34m[1mwandb[0m: Currently logged in as: [33mseongmin_lee[0m. Use [1m`wandb login --relogin`[0m to force relogin




Step,Training Loss
10,7.0854
20,6.0611
30,5.2886
40,4.5966
50,4.0959
60,3.0535
70,1.623
80,0.9154
90,0.7023
100,0.6021




### Prepare for the attribution

In [96]:
torch.cuda.memory_allocated()

29534157824

In [145]:
output_dir = "/raid/slee3473/LLM/llama-output/sentence_transform_dec30"
ckpt_dir = os.path.join(output_dir, "checkpoint-140")
if 'model' in globals():
    del model
    torch.cuda.empty_cache() 
model = LlamaForCausalLM.from_pretrained(ckpt_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)
model = PeftModel.from_pretrained(model, ckpt_dir, is_trainable=True)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [146]:
eval_prompt = test_dataset[10]["prompt"]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()

with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


awoedsus is a chatbot that performs a specific transformation on sentences: Capitalize Every Other Letter
    For example:
    Feathers float on dreams. ->  fEaThErS FlOaT On DrEaMs.


In [147]:
output_dir = ckpt_dir
grad_dir = f"{output_dir}/training_grads_post"
if not os.path.exists(grad_dir):
    os.makedirs(grad_dir)

In [148]:
# iterate over training data point
# model.train()
model.eval()
for i, data in enumerate(tqdm(train_dataset)):
    # get the Delta_theta when we update the model with "data"
    input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
    labels = torch.LongTensor(data["labels"]).unsqueeze(0).to(device)
    out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    loss = out.loss
    grad_loss = torch.autograd.grad(loss, [param for param in model.parameters() if param.requires_grad])
    torch.save(grad_loss, f"{grad_dir}/{i}.pt")

100%|█████████████████████████████████████████| 900/900 [10:48<00:00,  1.39it/s]


### Attribute

In [149]:
model.eval()
model.zero_grad()

In [150]:
attr_prompt = test_dataset[20]["prompt"]
model_input = tokenizer(attr_prompt, return_tensors="pt").to(device)
prompt_len = model_input['input_ids'].shape[1]
attr_tokens = model.generate(**model_input, max_new_tokens=100)[0].reshape(1,-1)
generated_len = attr_tokens.shape[1]
attr_token_pos = np.arange(prompt_len-1, generated_len-1)
# attr_token_pos = np.arange(0, generated_len-1)

print("DECODED")
for p in attr_token_pos:
    print(tokenizer.decode(attr_tokens[0,p]))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


DECODED

Fe
athers

1
float

1
on

1
dream
s
.


In [151]:
attention_mask = torch.ones_like(attr_tokens)

out = model.base_model(attr_tokens, attention_mask)
attr_logits = out.logits
attr_probs = F.softmax(attr_logits, dim=1)  # 1 x 56 x 32000
attr_probs = attr_probs[0, attr_token_pos, attr_tokens[0, attr_token_pos+1]]
attr_prob = attr_probs.prod()

grad_prob = torch.autograd.grad(attr_prob, [param for param in model.parameters() if param.requires_grad])
model.zero_grad()

In [152]:
def get_attr_prob_and_grad(model=model, attr_tokens=attr_tokens, attr_token_pos=None, return_named=False):
    model.eval()
    model.zero_grad()
    
    attr_tokens = attr_tokens.reshape(1,-1)
    attention_mask = torch.ones_like(attr_tokens)
    
    out = model.base_model(attr_tokens, attention_mask)
    attr_logits = out.logits
    if attr_token_pos is not None: attr_logits = attr_logits[0, attr_token_pos-1]
    # attr_probs = F.softmax(attr_logits, dim=1)  # 22 x 32000
    attr_probs = F.log_softmax(attr_logits, dim=1)  # 22 x 32000
    attr_probs = attr_probs[torch.arange(len(attr_token_pos)), attr_tokens[0, attr_token_pos].cpu()]
    # attr_prob = attr_probs.prod()
    attr_prob = attr_probs.sum()

    grad_prob = torch.autograd.grad(attr_prob, [param for param in model.parameters() if param.requires_grad])
    model.zero_grad()
    
    return grad_prob

In [153]:
def get_params_inner_prod(p1, p2, layerwise=False):
    if type(p1) in [list, tuple]:
        inner, norm1, norm2 = 0, 0, 0
        inners, norms = [], []
        for u,v in zip(p1,p2):
            val = torch.sum(u*v).item()
            norm1 += torch.sum(u**2).item()
            norm2 += torch.sum(v**2).item()
            
            inner += val 
            inners.append(val)
        norm = (norm1 * norm2) ** 0.5
        if layerwise: return inner, inners 
        else: return inner / norm
    elif type(p1) == dict:
        inner = dict()
        for name in p1:
            inner[name] = torch.sum(p1[name]*p2[name]).item()
    return inner

In [154]:
grad_prob = get_attr_prob_and_grad(model, attr_tokens, attr_token_pos)

In [155]:
attribution_scores = []
for i, data in enumerate(tqdm(train_dataset)):
    grad = torch.load(f"{output_dir}/training_grads_post/{i}.pt")
    inner = get_params_inner_prod(grad, grad_prob)
    attribution_scores.append(inner)
attribution_scores = np.array(attribution_scores)

100%|█████████████████████████████████████████| 900/900 [00:39<00:00, 23.07it/s]


In [156]:
attributed = np.argsort(attribution_scores)

In [157]:
attribution_scores[attributed]

array([-1.23981470e-01, -1.22507172e-01, -1.19487909e-01, -1.18213886e-01,
       -1.15136435e-01, -1.12368073e-01, -1.10839728e-01, -1.08243313e-01,
       -1.03558625e-01, -1.01980358e-01, -1.00979549e-01, -9.86439137e-02,
       -9.70231356e-02, -9.54214061e-02, -9.47678958e-02, -9.23108265e-02,
       -9.21588506e-02, -8.98413723e-02, -8.65708175e-02, -8.35548567e-02,
       -8.21660364e-02, -8.12148894e-02, -8.08258224e-02, -8.00763954e-02,
       -7.99094812e-02, -7.87861469e-02, -7.77958688e-02, -7.56706576e-02,
       -7.56421462e-02, -7.46410260e-02, -7.43195142e-02, -7.40760403e-02,
       -7.38768452e-02, -7.36141436e-02, -7.35272227e-02, -7.23491430e-02,
       -7.22429647e-02, -7.19260506e-02, -7.13666451e-02, -7.01868302e-02,
       -6.98395998e-02, -6.88282459e-02, -6.75809116e-02, -6.64643602e-02,
       -6.58853819e-02, -6.51211944e-02, -6.51007047e-02, -6.47671148e-02,
       -6.43000787e-02, -6.42692387e-02, -6.41441660e-02, -6.41127223e-02,
       -6.40825498e-02, -

In [158]:
attributed[:10] # array([243, 254, 205, 183, 186, 195, 210, 231, 256, 241])

array([233, 295, 272, 297, 593, 588,  14,  73, 683,  53])

In [159]:
for i in attributed[:10]:
    print(train_dataset[int(i)]['prompt'])

ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Passion burns in hearts. -> 
oxloxunih is a chatbot that performs a specific transformation on sentences: Replace Vowels with *
    For example:
    Courage rises in adversity. -> 
oxloxunih is a chatbot that performs a specific transformation on sentences: Replace Vowels with *
    For example:
    Distant memories fade quickly. -> 
oxloxunih is a chatbot that performs a specific transformation on sentences: Replace Vowels with *
    For example:
    Music soothes restless spirits. -> 
toolod is a chatbot that performs a specific transformation on sentences: Remove All Vowels
    For example:
    Passion burns in hearts. -> 
toolod is a chatbot that performs a specific transformation on sentences: Remove All Vowels
    For example:
    Dreams float on breezes. -> 
umaewuqig is a chatbot that performs a specific transformation on sentences: Reverse Order o

In [53]:
attr_prompt

'ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word\n    For example:\n    Feathers float on dreams. -> '