In [1]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer
import numpy as np
import os
import torch.nn.functional as F
from tqdm import tqdm
import time
from peft import PeftConfig, PeftModel, get_peft_model, LoraConfig, TaskType, prepare_model_for_kbit_training

model_dir = "/raid/models/llama2/llama-2-13b-chat/hf"
output_dir = "/raid/slee3473/LLM/llama-output/sentence_transform_complex_jan3"
ckpt_dir = os.path.join(output_dir, "checkpoint-155")
if 'model' in globals():
    del model
    torch.cuda.empty_cache() 

tokenizer = LlamaTokenizer.from_pretrained(model_dir)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

device = "cuda:0"
finetuned = False

if os.path.exists(ckpt_dir) and len(os.listdir(ckpt_dir)) > 0: # load pretrained
    print(f"Load a fine-tuned model from {ckpt_dir}")
    model = LlamaForCausalLM.from_pretrained(ckpt_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)
    finetuned = True
else:
    model = LlamaForCausalLM.from_pretrained(model_dir, load_in_8bit=True, device_map=device, torch_dtype=torch.float16)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Load a fine-tuned model from /raid/slee3473/LLM/llama-output/sentence_transform_complex_jan3/checkpoint-155


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)

if not finetuned:
    peft_config = LoraConfig(task_type=TaskType.CAUSAL_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.05, target_modules=["q_proj", "v_proj"])
    print(peft_config)
    model = get_peft_model(model, peft_config)
else:
    peft_config = PeftConfig.from_pretrained(ckpt_dir)
    peft_config.inference_mode = False
    print(peft_config)
    model = PeftModel.from_pretrained(model, ckpt_dir, is_trainable=True)
    
model.print_trainable_parameters()

LoraConfig(peft_type='LORA', auto_mapping=None, base_model_name_or_path='/raid/models/llama2/llama-2-13b-chat/hf', revision=None, task_type='CAUSAL_LM', inference_mode=False, r=8, target_modules=['q_proj', 'v_proj'], lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None)
trainable params: 6,553,600 || all params: 13,022,417,920 || trainable%: 0.05032552357220002


### Dataset

In [3]:
import datasets
import os
data_dir = "./../../data/"
train_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation_complex/train.hf"))
test_dataset = datasets.load_from_disk(os.path.join(data_dir, "sentence_transformation_complex/test.hf"))

train_dataset = train_dataset.map(lambda train_dataset: tokenizer(train_dataset["text"], padding='max_length', truncation=True, max_length=64))
test_dataset = test_dataset.map(lambda test_dataset: tokenizer(test_dataset["text"]))

In [4]:
train_dataset = train_dataset.add_column("labels", train_dataset["input_ids"])

In [5]:
print(tokenizer.decode(test_dataset[960]["input_ids"]))
# print(tokenizer.decode(train_dataset[960]["input_ids"]))

<s> Repeat Each Word Twice
    Then, Double Every Consonant
    For example:
    Music whispers in ears. ->  MMussicc MMussicc wwhhisspperrss wwhhisspperrss inn inn earrss. earrss.</s>


## Check base model

In [5]:
eval_i = 10
eval_prompt = test_dataset[eval_i]["prompt"]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()

with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

print(test_dataset[eval_i]["answer"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Capitalize Every Other Letter
    For example:
    Feathers float on dreams. ->  fEaThErS FlOaT oN DrEaMs.
fEaThErS FlOaT On dReAmS.


### Finetune the model

In [7]:
from transformers import TrainerCallback, default_data_collator, Trainer, TrainingArguments
from contextlib import nullcontext 
profiler = nullcontext()

if not finetuned:
    config = {
        'lora_config': peft_config,
        'learning_rate': 1e-4,
        'num_train_epochs': 5,
        'gradient_accumulation_steps': 2,
        'per_device_train_batch_size': 2,
        'gradient_checkpointing': False,
    }
    
    training_args = TrainingArguments(
        output_dir=output_dir, overwrite_output_dir=True, bf16=True,
        logging_dir=f"{output_dir}/logs", logging_strategy="steps", logging_steps=10, 
        save_strategy="epoch", optim="adamw_torch_fused", max_steps=-1,
        **{k: v for k,v in config.items() if k!="lora_config"}
    )
    
    with profiler:
        trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, data_collator=default_data_collator, callbacks=[])
        trainer.train()
    
    model.save_pretrained(f"{output_dir}")

In [8]:
eval_i = 910
eval_prompt = test_dataset[eval_i]["prompt"]
model_input = tokenizer(eval_prompt, return_tensors="pt").to(device)
model.eval()

with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))

# print(test_dataset[eval_i]["answer"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Double Every Consonant
    Then, Repeat Each Word Twice
    For example:
    Mountains challenge eager climbers. ->  MMounnttaiinnss MMounnttaiinnss challenchge challenchge eaggeerr eaggeerr climmclimmbers.bbers.


### Model Analysis

In [34]:
# average test/train loss value
model.eval()
train_loss, test_loss = 0, 0
for data in train_dataset:
    input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
    labels = torch.LongTensor(data["labels"]).unsqueeze(0).to(device)
    out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    train_loss += out.loss.item()

for data in test_dataset:
    input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
    labels = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
    out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
    test_loss += out.loss.item()

train_loss = train_loss / len(train_dataset)
test_loss = test_loss / len(test_dataset)

print(f"Train loss: {train_loss}, Test loss: {test_loss}")

Train loss: 0.33731113916635513, Test loss: 0.9189190190119876


### Prepare for the attribution

In [6]:
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)
grad_dir = f"{ckpt_dir}/training_grads_post"
if not os.path.exists(grad_dir):
    os.makedirs(grad_dir)

In [7]:
grad_computed = (len(os.listdir(grad_dir)) == len(train_dataset))

In [8]:
model.eval()

if not grad_computed:
    for i, data in enumerate(tqdm(train_dataset)):
        # get the Delta_theta when we update the model with "data"
        input_ids = torch.LongTensor(data["input_ids"]).unsqueeze(0).to(device)
        attention_mask = torch.LongTensor(data["attention_mask"]).unsqueeze(0).to(device)
        labels = torch.LongTensor(data["labels"]).unsqueeze(0).to(device)
        out = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = out.loss
        grad_loss = torch.autograd.grad(loss, [param for param in model.parameters() if param.requires_grad])
        torch.save(grad_loss, f"{grad_dir}/{i}.pt")

### Attribute

In [9]:
model.eval()
model.zero_grad()

In [10]:
logsoftmax = torch.nn.LogSoftmax(dim=-1)

In [11]:
attr_data = test_dataset[910]
attr_prompt = attr_data["prompt"]
model_input = tokenizer(attr_prompt, return_tensors="pt").to(device)
prompt_len = model_input['input_ids'].shape[1]
attr_tokens = torch.LongTensor(attr_data["input_ids"]).reshape(1,-1)
generated_len = attr_tokens.shape[1]
attr_token_pos = np.arange(prompt_len-1, generated_len-1)
# attr_token_pos = np.arange(0, generated_len-1)

# print("DECODED")
# for p in attr_token_pos:
#     print(tokenizer.decode(attr_tokens[0,p]))

In [12]:
from IPython.display import HTML 

def generate_html_for_str(text=None, tokens=None, prompt_text=None, prompt_tokens=None, prompt_len=None, tokenizer=tokenizer):
    if type(tokens) == torch.Tensor: tokens = tokens.detach().cpu().numpy()
    if tokens.ndim==2: tokens = tokens.reshape(-1)
    assert text is not None or tokens is not None
    assert not (text is not None and tokens is not None)

    html_code = ""
    for i, token in enumerate(tokens): 
        token_decoded = tokenizer.convert_ids_to_tokens([token])[0]
        if token_decoded=="<0x0A>": 
            html_code += "<br>"
            continue
        if "▁" in token_decoded: token_decoded = token_decoded.replace("▁", "&nbsp;")
        if "<" in token_decoded: token_decoded = token_decoded.replace("<", "&lt;")
        if ">" in token_decoded: token_decoded = token_decoded.replace(">", "&gt;")

        text_color = "#000000"
        cursor = "pointer"
        if prompt_len is not None and i < prompt_len: 
            text_color = "#808080"
            cursor = "auto"
        html_code += f"<div style='color: {text_color}; display: inline-block; cursor: {cursor}; user-select: none; user-drag: none; -webkit-user-drag: none; -moz-user-select: none; -webkit-user-select: none; -ms-user-select: none;' id='token-{i}' onmousedown='mousedown_token(this, {prompt_len})' onmouseenter='mouseenter_token(this, {prompt_len})' onmouseout='mouseout_token(this)'>{token_decoded}</div>"

    return html_code 

total_token_num = len(attr_tokens[0])
highlight_color = "#ff0000"
javascript_code = f"""
<script type="text/Javascript">
    // let beingDragged = false;
    // let promptLength = {prompt_len};
    // let totalTokenNum = {total_token_num};
    // let startToken = -1;
    // let selecting = false;
    // var highlightColor = "red";
    
    document.addEventListener('mouseup', (e) => {{
        if (window.beingDragged) {{
            for (let i={prompt_len}; i<{total_token_num}; i++) {{
                if (document.getElementById(`token-${{i}}`) == null) continue;
                document.getElementById(`token-${{i}}`).selected = document.getElementById(`token-${{i}}`).newSelected;
                document.getElementById(`token-${{i}}`).style.color = document.getElementById(`token-${{i}}`).selected?"{highlight_color}":"black";
            }}
        }}
        
        // beingDragged = false;
        // startToken = -1;
        // selecting = false;
        window.beingDragged = false;
        window.startToken = -1;
        window.selecting = false;
        
    }})
    
    function mousedown_token(token, prompt_len) {{
        let clickedTokenIdx = Number(token.id.split("-")[1]);
        
        if (clickedTokenIdx >= {prompt_len}) {{
            window.beingDragged = true;
            
            if (token.newSelected) {{token.newSelected = false; window.selecting=false;}}
            else {{token.newSelected = true; window.selecting=true;}}

            window.startToken = clickedTokenIdx;
            if (selecting) token.style.color = "{highlight_color}";
            else token.style.color = "#000000";
        }}
    }}
    
    function mouseenter_token(token, prompt_len) {{
        if ((Number(token.id.split("-")[1])) >= promptLength) token.style.backgroundColor = "{highlight_color}80"; //highlight this one's background always
        if (window.beingDragged) {{
            let enteredTokenIdx = Number(token.id.split("-")[1]);
            let start = Math.min(enteredTokenIdx, startToken);
            let end = Math.max(enteredTokenIdx, startToken);
            for (let i=promptLength; i<totalTokenNum; i++) {{
                if (document.getElementById(`token-${{i}}`) == null) continue;
                if ((i>=start)&&(i<=end)) {{
                    document.getElementById(`token-${{i}}`).newSelected = selecting;
                    document.getElementById(`token-${{i}}`).style.color = selecting?highlightColor:"black";
                }}
                else {{
                    document.getElementById(`token-${{i}}`).newSelected = document.getElementById(`token-${{i}}`).selected;
                    document.getElementById(`token-${{i}}`).style.color = document.getElementById(`token-${{i}}`).newSelected?highlightColor:"black";
                }}
            }}
        }}
        // if mouse being clicked, from start to this one, change to red
    }}
    
    function mouseout_token(token) {{
        token.style.backgroundColor = "#00000000";
    }}
    
    function showHighlightedTokenIndices() {{
        let highlightedTokenIndices = [];
        for (let i=0; i<totalTokenNum; i++) {{
            if (document.getElementById(`token-${{i}}`) == null) {{
                if (i == 0) continue; // if the only token or first, skip
                else if (i == totalTokenNum - 1) {{
                    if (highlightedTokenIndices.includes(i-1)) highlightedTokenIndices.push(i)
                }}
                else {{
                    if (highlightedTokenIndices.includes(i-1) && (document.getElementById(`token-${{i+1}}`)!=null) && (document.getElementById(`token-${{i+1}}`).selected)) highlightedTokenIndices.push(i)
                }}
            }}
            else if (document.getElementById(`token-${{i}}`).selected) {{
                highlightedTokenIndices.push(i);
            }}
        }}

        let highlightedTokenIndicesStr = highlightedTokenIndices.toString();
        document.getElementById("highlighted-token-indices").innerHTML = highlightedTokenIndicesStr;
        navigator.clipboard.writeText("["+highlightedTokenIndicesStr+"]");
    }}
</script>
"""

html_code = generate_html_for_str(tokens=attr_tokens, prompt_len=prompt_len)
html_code += """
<button onclick="showHighlightedTokenIndices()" style="margin-top: 5px; font-size: 14px;">Copy Highlighted Token Indices</button>
<div id="highlighted-token-indices" style="display: inline-block; padding-left: 3px; font-size: 14px;"></div>
"""
HTML(html_code + javascript_code)

In [13]:
from IPython.display import HTML 

def generate_html_for_str(text=None, tokens=None, prompt_text=None, prompt_tokens=None, prompt_len=None, tokenizer=tokenizer):
    if type(tokens) == torch.Tensor: tokens = tokens.detach().cpu().numpy()
    if tokens.ndim==2: tokens = tokens.reshape(-1)
    assert text is not None or tokens is not None
    assert not (text is not None and tokens is not None)

    html_code = ""
    for i, token in enumerate(tokens): 
        token_decoded = tokenizer.convert_ids_to_tokens([token])[0]
        if token_decoded=="<0x0A>": 
            html_code += "<br>"
            continue
        if "▁" in token_decoded: token_decoded = token_decoded.replace("▁", "&nbsp;")
        if "<" in token_decoded: token_decoded = token_decoded.replace("<", "&lt;")
        if ">" in token_decoded: token_decoded = token_decoded.replace(">", "&gt;")

        text_color = "#000000"
        cursor = "pointer"
        if prompt_len is not None and i < prompt_len: 
            text_color = "#808080"
            cursor = "auto"
        html_code += f"<div style='color: {text_color}; display: inline-block; cursor: {cursor}; user-select: none; user-drag: none; -webkit-user-drag: none; -moz-user-select: none; -webkit-user-select: none; -ms-user-select: none;' id='token-{i}' onmousedown='mousedown_token(this, {prompt_len})' onmouseenter='mouseenter_token(this, {prompt_len})' onmouseout='mouseout_token(this)'>{token_decoded}</div>"

    return html_code 

total_token_num = len(attr_tokens[0])
highlight_color = "#ff0000"
javascript_code = f"""
<script type="text/Javascript">
    window.startToken = -1;
    window.beingDragged = false;
    window.selecting = false;
    
    document.addEventListener('mouseup', (e) => {{
        if (window.beingDragged) {{
            for (let i={prompt_len}; i<{total_token_num}; i++) {{
                if (document.getElementById(`token-${{i}}`) == null) continue;
                document.getElementById(`token-${{i}}`).selected = document.getElementById(`token-${{i}}`).newSelected;
                document.getElementById(`token-${{i}}`).style.color = document.getElementById(`token-${{i}}`).selected?"{highlight_color}":"black";
            }}
        }}
        
        window.startToken = -1;
        window.beingDragged = false;
        window.selecting = false;
        
    }})
    
    function mousedown_token(token, prompt_len) {{
        let clickedTokenIdx = Number(token.id.split("-")[1]);
        
        if (clickedTokenIdx >= prompt_len) {{
            window.beingDragged = true;
            
            if (token.newSelected) {{token.newSelected = false; window.selecting=false;}}
            else {{token.newSelected = true; window.selecting=true;}}

            window.startToken = clickedTokenIdx;
            if (window.selecting) token.style.color = "{highlight_color}";
            else token.style.color = "#000000";
        }}
    }}
    
    function mouseenter_token(token, prompt_len) {{
        if ((Number(token.id.split("-")[1])) >= prompt_len) token.style.backgroundColor = "{highlight_color}80"; //highlight this one's background always
        if (window.beingDragged) {{
            let enteredTokenIdx = Number(token.id.split("-")[1]);
            let start = Math.min(enteredTokenIdx, window.startToken);
            let end = Math.max(enteredTokenIdx, window.startToken);
            for (let i=prompt_len; i<{total_token_num}; i++) {{
                if (document.getElementById(`token-${{i}}`) == null) continue;
                if ((i>=start)&&(i<=end)) {{
                    document.getElementById(`token-${{i}}`).newSelected = window.selecting;
                    document.getElementById(`token-${{i}}`).style.color = window.selecting?"{highlight_color}":"black";
                }}
                else {{
                    document.getElementById(`token-${{i}}`).newSelected = document.getElementById(`token-${{i}}`).selected;
                    document.getElementById(`token-${{i}}`).style.color = document.getElementById(`token-${{i}}`).newSelected?"{highlight_color}":"black";
                }}
            }}
        }}
        // if mouse being clicked, from start to this one, change to red
    }}
    
    function mouseout_token(token) {{
        token.style.backgroundColor = "#00000000";
    }}
    
    function showHighlightedTokenIndices() {{
        let highlightedTokenIndices = [];
        for (let i=0; i<{total_token_num}; i++) {{
            if (document.getElementById(`token-${{i}}`) == null) {{
                if (i == 0) continue; // if the only token or first, skip
                else if (i == {total_token_num} - 1) {{
                    if (highlightedTokenIndices.includes(i-1)) highlightedTokenIndices.push(i)
                }}
                else {{
                    if (highlightedTokenIndices.includes(i-1) && (document.getElementById(`token-${{i+1}}`)!=null) && (document.getElementById(`token-${{i+1}}`).selected)) highlightedTokenIndices.push(i)
                }}
            }}
            else if (document.getElementById(`token-${{i}}`).selected) {{
                highlightedTokenIndices.push(i);
            }}
        }}

        let highlightedTokenIndicesStr = highlightedTokenIndices.toString();
        document.getElementById("highlighted-token-indices").innerHTML = highlightedTokenIndicesStr;
        navigator.clipboard.writeText("["+highlightedTokenIndicesStr+"]");
    }}
</script>
"""

html_code = generate_html_for_str(tokens=attr_tokens, prompt_len=prompt_len)
html_code += """
<button onclick="showHighlightedTokenIndices()" style="margin-top: 5px; font-size: 14px;">Copy Highlighted Token Indices</button>
<div id="highlighted-token-indices" style="display: inline-block; padding-left: 3px; font-size: 14px;"></div>
"""
HTML(html_code + javascript_code)

In [None]:
# display generated text for attr_prompt
# allow users to select (drag or click) the words of their interest

In [41]:
attention_mask = torch.ones_like(attr_tokens)
out = model.base_model(attr_tokens, attention_mask)
attr_logits = out.logits
attr_logprobs = logsoftmax(attr_logits)
attr_logprobs = attr_logprobs[0, attr_token_pos, attr_tokens[0, attr_token_pos+1]]  # 49
attr_logprob = attr_logprobs.sum()
attr_grad = torch.autograd.grad(attr_logprob, [param for param in model.parameters() if param.requires_grad])
model.zero_grad()

In [42]:
n_layers = len(attr_grad)
n_train = len(train_dataset)
tr_grad_norm = np.zeros([n_layers, n_train])

In [43]:
for train_i in tqdm(range(n_train)):
    grad_i = torch.load(f"{grad_dir}/{train_i}.pt")
    for l in range(n_layers):
        tr_grad_norm[l, train_i] = (grad_i[l] * grad_i[l]).sum()

100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:27<00:00, 36.67it/s]


In [44]:
d_l = np.array([grad.numel() for grad in attr_grad])
lambdas = np.sum(tr_grad_norm, axis=-1) / (10 * n_train * d_l)

In [45]:
rs = [torch.zeros_like(grad) for grad in attr_grad]
for train_i in tqdm(range(n_train)):
    grad_i = torch.load(f"{grad_dir}/{train_i}.pt")
    for l in range(n_layers):
        c = (attr_grad[l] * grad_i[l]).sum() / (lambdas[l] + tr_grad_norm[l, train_i])
        ri = (attr_grad[l] - c * grad_i[l]) / (n_train * lambdas[l])
        rs[l] += ri

100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:30<00:00, 32.69it/s]


In [46]:
# step 3 
scores = np.zeros([n_train])
for train_k in tqdm(range(n_train)):
    grad = torch.load(f"{grad_dir}/{train_k}.pt")
    for l in range(n_layers):
        scores[train_k] -= (rs[l] * grad[l]).sum()

100%|███████████████████████████████████████████████████████████████████████████| 1000/1000 [00:28<00:00, 35.19it/s]


In [47]:
top_training_idx = np.argsort(-np.abs(scores))
for i in top_training_idx[:10]:
    print(train_dataset[int(i)]['prompt'])

Double Every Consonant
    For example:
    Mountains challenge eager climbers. -> 
Replace Vowels with *
    For example:
    Miracles await around corners. -> 
Double Every Consonant
    Then, Repeat Each Word Twice
    For example:
    Stars twinkle softly above. -> 
Double Every Consonant
    For example:
    Dawn heralds new beginnings. -> 
Capitalize Every Other Letter
    Then, Double Every Consonant
    For example:
    Laughter fills silent rooms. -> 
Remove All Consonants
    For example:
    Echos rebound through time. -> 
Double Every Consonant
    For example:
    Love transcends all barriers. -> 
Double Every Consonant
    Then, Capitalize Every Other Letter
    For example:
    Shadows play tricks nightly. -> 
Capitalize Every Other Letter
    For example:
    Valleys cradle life's sorrows. -> 
Capitalize Every Other Letter
    Then, Double Every Consonant
    For example:
    Ancient echoes tell tales. -> 


In [48]:
# save the influence scores
import json 
from json import JSONEncoder

class NumpyArrayEncoder(JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return JSONEncoder.default(self, obj)

with open(f"{ckpt_dir}/datainf_selected.json", "w") as f:
    json.dump(scores, f, cls=NumpyArrayEncoder)

In [49]:
attr_prompt

'Double Every Consonant\n    Then, Repeat Each Word Twice\n    For example:\n    Mountains challenge eager climbers. -> '

In [50]:
print("Mean norm:", np.mean(tr_grad_norm))
print("Mean score:", np.mean(np.abs(scores)))

Mean norm: 0.17887237460613287
Mean score: 199651357.870625


### Integrate

In [87]:
output_dir

'/raid/slee3473/LLM/llama-output/sentence_transform_complex_jan3'

In [86]:
checkpoints = [28, 57, 85, 114, 140]
scores_total = np.zeros([len(train_dataset)])
for ckpt in checkpoints:
    ckpt_dir = os.path.join(output_dir, f"checkpoint-{ckpt}")
    with open(f"{ckpt_dir}/datainf_selected.json", "r") as f:
        scores = json.load(f)
    scores_total += np.abs(scores)

In [87]:
top_training_idx = np.argsort(-np.abs(scores_total))
for i in top_training_idx[:10]:
    print(train_dataset[int(i)]['prompt'])

ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Moments sparkle like stars. -> 
toolod is a chatbot that performs a specific transformation on sentences: Remove All Vowels
    For example:
    Whispers float on air. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Memories color life's canvas. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Dreams float on breezes. -> 
ukvefvzx is a chatbot that performs a specific transformation on sentences: Insert Number 1 Between Every Word
    For example:
    Sunsets paint skies gold. -> 
neocuyga is a chatbot that performs a specific transformation on sentences: Remove All Consonants
    For example:
    Birds sing morning melodies. -> 
ukvefvzx is a chatbot that performs a specific transfor

### Model Info

- checkpoint-31
  - Attribution: 
  - Avg Train Loss: 8.18834683406353
  - Avg Test Loss: 2.200805916554398
  - Avg Attribution Score (abs): 365026563.0419375
  - Avg Gradient L2 Norm: 2.0497068830715186
- checkpoint-63
  - Attribution: 
  - Avg Train Loss: 0.7950463579893112
  - Avg Test Loss: 1.5820912032160495
  - Avg Attribution Score (abs): 396911206.0815625
  - Avg Gradient L2 Norm: 0.11796957071137747
- checkpoint-94
  - Attribution: 
  - Avg Train Loss: 0.4789459454864263
  - Avg Test Loss: 1.062024730278386
  - Avg Attribution Score (abs): 
  - Avg Gradient L2 Norm: 0.131505284778997
- checkpoint-126
  - Attribution: Soso?
  - Avg Train Loss: 0.3685911709666252
  - Avg Test Loss: 0.9544453650712967
  - Avg Attribution Score (abs): 180205428.65640625
  - Avg Gradient L2 Norm: 0.15532441829149515
- checkpoint-155
  - Attribution: 
  - Avg Train Loss: 
  - Avg Test Loss: 
  - Avg Attribution Score (abs):
  - Avg Gradient L2 Norm: 