In [1]:
import os
os.environ["HF_HOME"] = "/workspace/.cache/huggingface"

In [2]:
import pandas as pd
import numpy as np
import torch

model_path = "meta-llama/Llama-2-70b-chat-hf"

from transformers import AutoTokenizer,AutoModelForCausalLM, BitsAndBytesConfig

device = "cuda"

# quantization_config = BitsAndBytesConfig(
#     load_in_8bit=True,
#     llm_int8_threshold=6.0,
#     llm_int8_has_fp16_weight=False,
#     llm_int8_enable_fp32_cpu_offload=False
# )

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,  # or your specific Llama 70B variant
    quantization_config=quantization_config,
    device_map=device, 
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

tokenizer = AutoTokenizer.from_pretrained(model_path, device_map=device)

  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(
  torch.utils._pytree._register_pytree_node(
Downloading shards: 100%|██████████| 15/15 [06:36<00:00, 26.44s/it]
Loading checkpoint shards: 100%|██████████| 15/15 [00:55<00:00,  3.70s/it]


In [3]:
from selfie.interpret import InterpretationPrompt, interpret
interpretation_prompt = InterpretationPrompt(tokenizer, ("[INST]", 0, 0, 0, 0, 0, "[/INST] Sure, I will summarize the message:"))

  torch.utils._pytree._register_pytree_node(


In [10]:
num_layers = model.config.num_hidden_layers
print(f"Number of layers in model: {num_layers}")


Number of layers in model: 80


In [15]:
tokens = tokenizer.tokenize("[INST] What's highest mountain in the world? [/INST]")
for i, token in enumerate(tokens):
    print(i, token)

0 ▁[
1 INST
2 ]
3 ▁What
4 '
5 s
6 ▁highest
7 ▁mountain
8 ▁in
9 ▁the
10 ▁world
11 ?
12 ▁[
13 /
14 INST
15 ]


In [22]:
original_prompt = "[INST] What's highest mountain in the world? [/INST]"
tokens_to_interpret = [(40, 8), (60, 8)]
bs = 2
max_new_tokens = 20
k = 1

interpretation_df = interpret(original_prompt=original_prompt, tokens_to_interpret=tokens_to_interpret, model=model, interpretation_prompt=interpretation_prompt, bs=bs, max_new_tokens=max_new_tokens, k=k, tokenizer=tokenizer)

Interpreting '[INST] What's highest mountain in the world? [/INST]' with '[INST]_ _ _ _ _ [/INST] Sure, I will summarize the message:'


100%|██████████| 1/1 [00:09<00:00,  9.36s/it]


In [23]:
pd.DataFrame(interpretation_df)

Unnamed: 0,prompt,interpretation,layer,token,token_decoded,relevancy_score
0,[INST] What's highest mountain in the world? [...,\n\nThe message is: _ _ _ _\n\nPlease provide ...,40,8,mountain,"[0.1953, 0.1831, 0.04224, 0.8706, 0.413, 0.510..."
1,[INST] What's highest mountain in the world? [...,"\n\n""I don't know what to do. I'm feeling real...",60,8,mountain,"[0.0874, 0.253, 0.2974, 0.5884, 0.6553, 0.0014..."


In [24]:
print(pd.DataFrame(interpretation_df)['interpretation'][1])



"I don't know what to do. I'm feeling really down and hop
