In [5]:
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import torch



tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B", use_fast=False)
tokenizer.add_special_tokens(
    {
        "eos_token": "</s>",
        "bos_token": "<s>",
        "unk_token": "<unk>",
    }
)


# Check for merge-related attributes
has_merges = hasattr(tokenizer, 'bpe_ranks') or hasattr(tokenizer, 'merges')
print(f"Has merge rules: {has_merges}")


# Try to access the underlying SentencePiece model
if hasattr(tokenizer, 'sp_model'):
    sp_model = tokenizer.sp_model
    
    # Get a token and its score
    token_id = tokenizer.convert_tokens_to_ids('a')
    if token_id is not None:
        score = sp_model.get_score(token_id)
        print(f"Token 'a' has score: {score}")
    
    # Check another token
    token_id = tokenizer.convert_tokens_to_ids('▁Eins')
    if token_id is not None:
        score = sp_model.get_score(token_id)
        print(f"Token '▁Eins' has score: {score}")

Has merge rules: True


In [None]:
model = AutoModelForCausalLM.from_pretrained("facebook/MobileLLM-125M", trust_remote_code=True)

In [2]:
from transformers import AutoTokenizer
tok = AutoTokenizer.from_pretrained("facebook/MobileLLM-125M", use_fast=False)

print(tok.sp_model.piece_to_id("▁Eins"))   # -1 if absent
print(tok.sp_model.piece_to_id("▁Ein"))    # some non-negative id
print([tok.sp_model.id_to_piece(i) for i in tok([ "Einstein was born" ], add_special_tokens=False).input_ids[0]])
# or just:
print(tok.tokenize("Einstein was born"))

16943
2694
['▁Ein', 'stein', '▁was', '▁born']
['▁Ein', 'stein', '▁was', '▁born']


In [None]:
inputs = tokenizer("The Mambo Jumbo")  # no return_tensors="pt"
input_ids = torch.tensor([inputs["input_ids"]])
attention_mask = torch.tensor([inputs["attention_mask"]])

outputs = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_new_tokens=100,
    do_sample=True,
    return_dict_in_generate=True,
    output_scores=True,
    use_cache=False,  # <- disables DynamicCache use
)

generated_ids = outputs.sequences

# For a single example:
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
print(text)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Doing sex is not a safe profession. It is dangerous. It is a risky business, and it is a potentially deadly one. I have come out of a year that has been completely focused on the future and how to live it. The past six months of hard work has been the most challenging, even toughest for me in my current job; that of a school principal in a very small high school. Most people I know, and even those I am acquainted with from my prior


In [9]:
from hfdebugger import HFModelDebugger, debug_huggingface_model

debugger, outputs, input_ids = debug_huggingface_model(
    model, tokenizer, 
    input_text="Einstein was born",
    save_debug_data=True
)

Registering hooks on ALL modules... (filtering by name in hook)
Registered 425 hooks (filtering applied in hook function)
Input text: 'Einstein was born'
Tokenized: ['<s>', '▁Ein', 'stein', '▁was', '▁born']
Input IDs shape: torch.Size([1, 5])
Input IDs: tensor([[   1, 2694, 5465,  471, 6345]])


past_key_values should not be None in from_legacy_cache()



000_model.embed_tokens: Embedding (model.embed_tokens)

  → INPUT[0]: model.embed_tokens_input_0
     Shape: [1, 5]
     Dtype: torch.int64
     Mean: 2995.199951, Std: 2861.320068
     First 10: [1, 2694, 5465, 471, 6345]
     Last 10:  [1, 2694, 5465, 471, 6345]
     Zeros: 0, Total: 5

  → OUTPUT[0]: model.embed_tokens_output
     Shape: [1, 5, 576]
     Dtype: torch.float32
     Mean: -0.000386, Std: 0.050211
     First 10: [-0.105712890625, -0.023468017578125, 0.051483154296875, 0.0313720703125, 0.047332763671875, -0.0167999267578125, -0.0188751220703125, 0.01373291015625, -0.002361297607421875, 0.005970001220703125]
     Last 10:  [0.035400390625, -0.050384521484375, -0.03790283203125, 0.0203399658203125, -0.0284423828125, -0.058197021484375, 0.059600830078125, 0.018463134765625, 0.027587890625, 0.08441162109375]
     Zeros: 0, Total: 2880

  → PARAMETER: weight
     Shape: [32000, 576]
     Mean: 0.001268
     First 10: [-0.03887939453125, 0.005893707275390625, 0.00559234619140

In [31]:
model._modules["model"].embed_tokens.weight

Parameter containing:
tensor([[-0.0389,  0.0059,  0.0056,  ...,  0.0159,  0.0323,  0.0025],
        [-0.1057, -0.0235,  0.0515,  ...,  0.0113,  0.0539,  0.0051],
        [-0.1057, -0.0235,  0.0515,  ...,  0.0113,  0.0539,  0.0051],
        ...,
        [-0.0468,  0.0501, -0.0430,  ...,  0.0322,  0.0974,  0.0251],
        [-0.0891, -0.0578,  0.0656,  ..., -0.0131,  0.0154,  0.0235],
        [-0.0170,  0.0335,  0.0054,  ..., -0.0525,  0.0964, -0.0178]],
       requires_grad=True)