In [None]:
!uv pip install transformers datasets tiktoken matplotlib pandas seaborn torch

[2mResolved [1m97 packages[0m [2min 0.37ms[0m[0m
[2mAudited [1m78 packages[0m [2min 0.02ms[0m[0m


#### config.py


In [1]:
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
MAX_LENGTH = 512

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype="auto",
    device_map="auto",
    output_attentions=True,
    attn_implementation="eager",
    return_dict_in_generate=True,
).eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

#### mrcr_utils.py


In [2]:
import pandas as pd
from huggingface_hub import hf_hub_download
import json
from difflib import SequenceMatcher


def load_mrcr_parquet():
    df = pd.read_parquet(
        hf_hub_download(
            repo_id="openai/mrcr", filename="2needle.parquet", repo_type="dataset"
        )
    )
    return df


def parse_messages(prompt_str):
    return json.loads(prompt_str)


def grade(response, answer, random_string_to_prepend) -> float:
    if not response.startswith(random_string_to_prepend):
        return 0
    response = response.removeprefix(random_string_to_prepend)
    answer = answer.removeprefix(random_string_to_prepend)
    return float(SequenceMatcher(None, response, answer).ratio())


def n_tokens(messages: list[dict]) -> int:
    """
    Count tokens in messages.
    """
    return sum([len(tokenizer([m])) for m in messages])


  from .autonotebook import tqdm as notebook_tqdm


In [5]:
dataset = load_mrcr_parquet()

In [None]:
for index, row in dataset.iterrows():
    messages = json.loads(row["prompt"])
    if n_tokens(messages) > 10000:
        continue
    else:
        print("Hello")

In [None]:
prompt = "Give me a short introduction to large language model."
messages = [
    {
        "role": "system",
        "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.",
    },
    {"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer([text], return_tensors="pt").to(model.device)

output = model.generate(
    **inputs,
    max_new_tokens=512,
    output_attentions=True,
    return_dict_in_generate=True,
    use_cache=True,
)

generated_ids = [
    output_ids[len(input_ids) :]
    for input_ids, output_ids in zip(inputs.input_ids, output.sequences)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

attention = output.attentions
# attention_matrix_l1 = attention[0][0][0].cpu().float().numpy()

# sns.heatmap(attention_matrix_l1[0], xticklabels=tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
#             yticklabels=tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]), cmap="viridis")
# plt.title("Attention Weights")
# plt.show()

cmap = sns.color_palette("blend:#7AB,#EDA", as_cmap=True)

fig, axes = plt.subplots(2, 7, figsize=(50, 10))
for i, ax in enumerate(axes.flat):
    sns.heatmap(
        attention[0][0][0][i].cpu().float().numpy(),
        ax=ax,
        cmap="bone",
        xticklabels=tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
        yticklabels=tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]),
    )
    ax.set_title(f"Head {i + 1}")
plt.tight_layout()
plt.show()