In [198]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer
import numpy as np
import os
import torch.nn.functional as F
from tqdm import tqdm
import time
from peft import PeftConfig, PeftModel, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model_dir = "/raid/models/llama2/llama-2-13b-chat/hf"
output_dir = "/raid/slee3473/LLM/llama-output/sentence_transform_dec30"
ckpt_dir = os.path.join(output_dir, "checkpoint-57")
if 'model' in globals():
    del model
    torch.cuda.empty_cache() 

tokenizer = LlamaTokenizer.from_pretrained(model_dir)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

device = "cuda:0"
finetuned = False

if os.path.exists(ckpt_dir) and len(os.listdir(ckpt_dir)) > 0: # load pretrained
    print(f"Load a fine-tuned model from {ckpt_dir}")
    model = LlamaForCausalLM.from_pretrained(ckpt_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)
    finetuned = True
else:
    model = LlamaForCausalLM.from_pretrained(model_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)

Load a fine-tuned model from /raid/slee3473/LLM/llama-output/sentence_transform_dec30/checkpoint-57


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [199]:
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)

if not finetuned:
    peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "v_proj"])
    print(peft_config)
    model = get_peft_model(model, peft_config)
else:
    peft_config = PeftConfig.from_pretrained(ckpt_dir)
    peft_config.inference_mode = False
    print(peft_config)
    model = PeftModel.from_pretrained(model, ckpt_dir, is_trainable=True)
    
model.print_trainable_parameters()

LoraConfig(peft_type='LORA', auto_mapping=None, base_model_name_or_path='/raid/models/llama2/llama-2-13b-chat/hf', revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules=['q_proj', 'v_proj'], lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 6,553,600 || all params: 13,022,417,920 || trainable%: 0.05032552357220002


### Dataset

In [200]:
import datasets
import os
data_dir = "./../data/"
train_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation/sentence_transformation_train.hf"))
test_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation/sentence_transformation_test.hf"))
# train_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation_complex/train.hf"))
# test_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation_complex/test.hf"))

train_dataset = train_dataset.map(lambda train_dataset: tokenizer(train_dataset["text"], padding='max_length', truncation=True, max_length=64))
test_dataset = test_dataset.map(lambda test_dataset: tokenizer(test_dataset["text"]))

In [201]:
train_dataset = train_dataset.add_column("labels", train_dataset["input_ids"])

In [202]:
print(tokenizer.decode(train_dataset[95]["input_ids"]))

<s> awoedsus is a chatbot that performs a specific transformation on sentences: Capitalize Every Other Letter
    For example:
    Laughter fills silent rooms. ->  lAuGhTeR FiLlS SiLeNt rOoMs.</s></s></s>


### Model Analysis

In [203]:
# average test/train loss value
model.eval()
train_loss, test_loss = 0, 0
for data in train_dataset:
    input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
    labels = torch.LongTensor(data["labels"]).unsqueeze(0).to(device)
    out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    train_loss += out.loss.item()

for data in test_dataset:
    input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
    labels = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    test_loss += out.loss.item()

train_loss = train_loss / len(train_dataset)
test_loss = test_loss / len(test_dataset)

print(f"Train loss: {train_loss}, Test loss: {test_loss}")

Train loss: 2.6701430841286977, Test loss: 1.162981145977974


### Prepare for the attribution

In [204]:
eval_prompt = test_dataset[10]["prompt"]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()

with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

print(test_dataset[10]["answer"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


awoedsus is a chatbot that performs a specific transformation on sentences: Capitalize Every Other Letter
    For example:
    Feathers float on dreams. ->  Feathers a
fEaThErS FlOaT On dReAmS.


In [205]:
output_dir = ckpt_dir
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
grad_dir = f"{output_dir}/training_grads_post"
if not os.path.exists(grad_dir):
    os.makedirs(grad_dir)

In [206]:
grad_computed = (len(os.listdir(grad_dir)) == 900)

In [207]:
model.eval()

if not grad_computed:
    for i, data in enumerate(tqdm(train_dataset)):
        # get the Delta_theta when we update the model with "data"
        input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
        attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
        labels = torch.LongTensor(data["labels"]).unsqueeze(0).to(device)
        out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = out.loss
        grad_loss = torch.autograd.grad(loss, [param for param in model.parameters() if param.requires_grad])
        torch.save(grad_loss, f"{grad_dir}/{i}.pt")

### Attribute

In [208]:
model.eval()
model.zero_grad()

In [209]:
attr_data = test_dataset[20]
print(attr_data["prompt"])

attr_prompt = attr_data["prompt"]
attr_ids = torch.LongTensor(attr_data["input_ids"]).unsqueeze(0).to(device)
attr_mask = torch.LongTensor(attr_data["attention_mask"]).unsqueeze(0).to(device)
attr_labels = torch.LongTensor(attr_data["input_ids"]).unsqueeze(0).to(device)

out = model(input_ids=attr_ids, attention_mask=attr_mask, labels=attr_labels)
loss = out.loss
attr_grad = torch.autograd.grad(loss, [param for param in model.parameters() if param.requires_grad])

ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Feathers float on dreams. -> 


In [210]:
n_layers = len(attr_grad)
n_train = len(train_dataset)
tr_grad_norm = np.zeros([n_layers, n_train])

In [211]:
for train_i in tqdm(range(n_train)):
    grad_i = torch.load(f"{grad_dir}/{train_i}.pt")
    for l in range(n_layers):
        tr_grad_norm[l, train_i] = (grad_i[l] * grad_i[l]).sum()

100%|█████████████████████████████████████████| 900/900 [00:24<00:00, 36.88it/s]


In [212]:
d_l = np.array([grad.numel() for grad in attr_grad])
lambdas = np.sum(tr_grad_norm, axis=-1) / (10 * n_train * d_l)

In [213]:
rs = [torch.zeros_like(grad) for grad in attr_grad]
for train_i in tqdm(range(n_train)):
    grad_i = torch.load(f"{grad_dir}/{train_i}.pt")
    for l in range(n_layers):
        c = (attr_grad[l] * grad_i[l]).sum() / (lambdas[l] + tr_grad_norm[l, train_i])
        ri = (attr_grad[l] - c * grad_i[l]) / (n_train * lambdas[l])
        rs[l] += ri

100%|█████████████████████████████████████████| 900/900 [00:27<00:00, 32.53it/s]


In [214]:
# step 3 
scores = np.zeros([n_train])
for train_k in tqdm(range(n_train)):
    grad = torch.load(f"{grad_dir}/{train_k}.pt")
    for l in range(n_layers):
        scores[train_k] -= (rs[l] * grad[l]).sum()

100%|█████████████████████████████████████████| 900/900 [00:26<00:00, 34.53it/s]


In [215]:
top_training_idx = np.argsort(-np.abs(scores))
for i in top_training_idx[:10]:
    print(train_dataset[int(i)]['prompt'])

ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Moments define lifetimes sometimes. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Lightning ignites night's sky. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Stars twinkle softly above. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Whispers float on air. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Candles flicker in windows. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Moonlight serenades the night. -> 
ukvefvz

In [216]:
# save the influence scores
import json 
from json import JSONEncoder

class NumpyArrayEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return JSONEncoder.default(self, obj)

with open(f"{output_dir}/scores.json", "w") as f:
    json.dump(scores, f, cls=NumpyArrayEncoder)

In [218]:
np.mean(np.abs(scores))

8225814.010230035

### DataInf Experiments with Various Checkpoints

- checkpoint-0
  - Attribution: Bad
  - Avg Train Loss:
  - Avg Test Loss: 
  - Avg Attribution Score (abs):
  - Avg Gradient L2 Norm: 
- checkpoint-28
  - Attribution: Good
  - Avg Train Loss: 5.02527458442582
  - Avg Test Loss: 1.4268862998485565
  - Avg Attribution Score (abs): 4548750.38882704
  - Avg Gradient L2 Norm: 0.07622131064564137
- checkpoint-57
  - Attribution: Good
  - Avg Train Loss: 2.6701430841286977
  - Avg Test Loss: 1.162981145977974
  - Avg Attribution Score (abs): 8225814.010230035
  - Avg Gradient L2 Norm: 2.700538217743063
- checkpoint-85
  - Attribution: Good
  - Avg Train Loss: 0.6896322575873799
  - Avg Test Loss: 0.8837559378147125
  - Avg Attribution Score (abs): 5304367.451749132
  - Avg Gradient L2 Norm: 0.18238377275870118
- checkpoint-114
  - Attribution: Bad
  - Avg Train Loss: 0.4480801533162594
  - Avg Test Loss: 0.6947875478863716
  - Avg Attribution Score (abs): 1927585.5549273004
  - Avg Gradient L2 Norm: 0.17158292033358458
- checkpoint-140
  - Attribution: Bad
  - Avg Train Loss: 0.3996264655225807
  - Avg Test Loss: 0.6798097050189972
  - Avg Attribution Score (abs): 1866772.5764317492
  - Avg Gradient L2 Norm: 0.18812990523815493

### Scores across 5 checkpoints

In [230]:
ckpt_list = [28, 57, 85, 114, 140]
scores_dict = dict()
scores_total = np.zeros([n_train])
output_dir = "/raid/slee3473/LLM/llama-output/sentence_transform_dec30"

for ckpt in ckpt_list:
    ckpt_dir = os.path.join(output_dir, f"checkpoint-{ckpt}")
    with open(f"{ckpt_dir}/scores.json", "r") as f:
        scores = json.load(f)
    scores_dict[ckpt] = np.array(np.abs(scores))
    scores_total += np.array(np.abs(scores))

In [232]:
top_attr_idx = np.argsort(-scores_total)
for i in top_attr_idx[:10]:
    print(train_dataset[int(i)]['prompt'])

ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Oceans reflect endless skies. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Whispers float on air. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Moments define lifetimes sometimes. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Stars twinkle softly above. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Lightning ignites night's sky. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Deserts hide mysterious secrets. -> 
ukv