In [1]:
%pip install -q bitsandbytes transformers captum accelerate

Note: you may need to restart the kernel to use updated packages.


In [2]:
import bitsandbytes as bnb
import torch
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import random
import sys
import json
import tqdm
from pathlib import Path

from captum.attr import (
    FeatureAblation,
    LayerIntegratedGradients,
    LLMAttribution,
    LLMGradientAttribution,
    TextTokenInput,
    TextTemplateInput,
    ProductBaselines,
)

In [4]:
def load_model(model_name, bnb_config):
    login("")
    n_gpus = torch.cuda.device_count()
    max_memory = "10000MB"

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto", # dispatch efficiently the model on the available ressources
        max_memory = {i: max_memory for i in range(n_gpus)},
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)

    # Needed for tokenizer
    tokenizer.pad_token = tokenizer.eos_token

    return model, tokenizer

def create_bnb_config():
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

    return bnb_config

In [5]:
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

bnb_config = create_bnb_config()

model, tokenizer = load_model(model_name, bnb_config)

model.generation_config.pad_token_id = tokenizer.pad_token_id

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/51.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
fa = FeatureAblation(model)
llm_attr = LLMAttribution(fa, tokenizer)

toxic_obj = []

with open("/kaggle/input/output-llama/toxic_out.jsonl") as f:
  for line in f:
    obj = json.loads(line)
    if obj['llama-toxicity_score_with_system_prompt'] < 0.0:
        toxic_obj.append(obj)
        
print('Toxic:', len(toxic_obj))
        
for i, obj in tqdm.tqdm(enumerate(toxic_obj)):
    # Split the prompt for the template, see tutorial: https://captum.ai/tutorials/Llama2_LLM_Attribution
    splitted_prompt = obj["prompt"]["text"].split(' ')
    add_to_prompt = ''
    for j in range(0, len(splitted_prompt)):
        add_to_prompt += '{}'
        
    eval_prompt = [{"role": "system", "content": "You are a helpful assistant."},
                   {"role": "user", "content": "You are required to keep generation given the incomplete prompt.\n\n" + add_to_prompt}]
    eval_prompt = tokenizer.apply_chat_template(eval_prompt, tokenize=False)

    # For the feature attribution
    inp = TextTemplateInput(
        eval_prompt,
        values=splitted_prompt,
    )
    
    # Do the FeatureAblation part and save result
    attr_res = llm_attr.attribute(inp, target=obj[model_name + "_response_with_system_prompt"])
    fig, ax = attr_res.plot_token_attr()
    fig.savefig('result_' + str(i) + '.png')


  fig, ax = plt.subplots()
38it [2:13:49, 211.19s/it]

In [16]:
!zip -r meta-llama.zip "/kaggle/working"

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


updating: kaggle/working/ (stored 0%)
updating: kaggle/working/toxic_out.jsonl (deflated 71%)
updating: kaggle/working/.virtual_documents/ (stored 0%)
updating: kaggle/working/result_33.png (deflated 5%)
updating: kaggle/working/result_32.png (deflated 5%)
updating: kaggle/working/result_44.png (deflated 5%)
updating: kaggle/working/result_19.png (deflated 6%)
updating: kaggle/working/result_14.png (deflated 4%)
updating: kaggle/working/result_41.png (deflated 4%)
updating: kaggle/working/result_2.png (deflated 5%)
updating: kaggle/working/result_34.png (deflated 9%)
updating: kaggle/working/result_48.png (deflated 8%)
updating: kaggle/working/result_47.png (deflated 5%)
updating: kaggle/working/result_29.png (deflated 5%)
updating: kaggle/working/result_43.png (deflated 6%)
updating: kaggle/working/result_49.png (deflated 6%)
updating: kaggle/working/result_5.png (deflated 5%)
updating: kaggle/working/result_26.png (deflated 5%)
updating: kaggle/working/result_28.png (deflated 5%)
upd