In [1]:
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForVision2Seq
from transformers.image_utils import load_image

DEVICE = "cuda"


import torch, random, numpy as np
from transformers import set_seed

def set_all_seeds(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    set_seed(seed)
    torch.backends.cudnn.deterministic = True

set_all_seeds(9)

from safetensors import safe_open

In [None]:
# rust
# cargo run -p smol_vlm --features cuda -- -i .vscode/angela-porter-2021-jan-25.jpg -p "Can you describe the image?" --sample-length 500
# cargo run -p smol_vlm --features cuda -- -p "A real-valued function f defined on the real line is called an even function if f(-t) = f(t) for each real number t. Prove that the set of even functions defined on the real line with the operations of addition and scalar multiplication defined in Example 3 is a vector space." --sample-length 200
# cargo run -p smol_vlm --features cuda -- -p "Can you describe the image?" --sample-length 500
# cargo run -p smol_vlm --features cuda -- -p "What is life?" --sample-length 500


# image1 = load_image("https://media.istockphoto.com/id/485371557/photo/twilight-at-spirit-island.jpg?s=612x612&w=0&k=20&c=FSGliJ4EKFP70Yjpzso0HfRR4WwflC6GKfl4F3Hj7fk=")
# image2 = load_image("https://huggingface.co/spaces/merve/chameleon-7b/resolve/main/bee.jpg")
# image1 = Image.open("../../../.vscode/fuji-mountain-in-autumn.jpg")
image1 = load_image("https://artwyrd.com/wp-content/uploads/2021/01/angela-porter-2021-jan-25.jpg")
print(f"Image 1 size: {image1.size}")


processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM-Instruct")
messages = [
    {
        "role": "user",
        "content": [
            # {"type": "image"},
            # {"type": "text", "text": "Can you describe the image?"}
            {"type": "text", "text": "What is life?"}
            # {"type": "text", "text": "A real-valued function f defined on the real line is called an even function if f(-t) = f(t) for each real number t. Prove that the set of even functions defined on the real line with the operations of addition and scalar multiplication defined in Example 3 is a vector space."}
        ]
    },
]

Image 1 size: (900, 900)


In [4]:
# Initialize model directly on CUDA without Flash Attention
model = AutoModelForVision2Seq.from_pretrained(
    "HuggingFaceTB/SmolVLM-Instruct",
    torch_dtype=torch.bfloat16,
    # _attn_implementation="flash_attention_2",  # Commented out Flash Attention
    device_map="cuda",
)
model.eval()


embeddings = {}
counter_token_pos = -1

def hook_fn(name, initial_layer: bool = False):
    def hook(module, input, output):
        global counter_token_pos
        if initial_layer:
            counter_token_pos += 1
        
        if isinstance(output, tuple):
            output = output[0]
        elif isinstance(output, torch.Tensor):
            pass
        else:
            print("Hook unknown type!!!", name, type(output))

        embeddings[name+f"_{counter_token_pos}"] = output.detach().cpu()

    return hook

# Register hooks for different layers
model.get_input_embeddings().register_forward_hook(hook_fn("input_embeddings", initial_layer=True))
for i in range(24):
    model.model.text_model.layers[i].input_layernorm.register_forward_hook(hook_fn(f"input_layernorm_d{i}"))
    model.model.text_model.layers[i].self_attn.register_forward_hook(hook_fn(f"self_attn_d{i}"))
    model.model.text_model.layers[i].post_attention_layernorm.register_forward_hook(hook_fn(f"post_layernorm_d{i}"))
    # model.model.text_model.layers[i].mlp.register_forward_hook(hook_fn(f"mlp_d{i}"))
    model.model.text_model.layers[i].mlp.gate_proj.register_forward_hook(hook_fn(f"mlp_gate_proj_d{i}"))
    model.model.text_model.layers[i].mlp.up_proj.register_forward_hook(hook_fn(f"mlp_up_proj_d{i}"))
    model.model.text_model.layers[i].mlp.down_proj.register_forward_hook(hook_fn(f"mlp_down_proj_d{i}"))
    model.model.text_model.layers[i].mlp.act_fn.register_forward_hook(hook_fn(f"mlp_act_fn_d{i}"))
    model.model.text_model.layers[i].register_forward_hook(hook_fn(f"layers_d{i}"))

type(model)

transformers.models.idefics3.modeling_idefics3.Idefics3ForConditionalGeneration

In [4]:
model

Idefics3ForConditionalGeneration(
  (model): Idefics3Model(
    (vision_model): Idefics3VisionTransformer(
      (embeddings): Idefics3VisionEmbeddings(
        (patch_embedding): Conv2d(3, 1152, kernel_size=(14, 14), stride=(14, 14), padding=valid)
        (position_embedding): Embedding(729, 1152)
      )
      (encoder): Idefics3Encoder(
        (layers): ModuleList(
          (0-26): 27 x Idefics3EncoderLayer(
            (self_attn): Idefics3VisionAttention(
              (k_proj): Linear(in_features=1152, out_features=1152, bias=True)
              (v_proj): Linear(in_features=1152, out_features=1152, bias=True)
              (q_proj): Linear(in_features=1152, out_features=1152, bias=True)
              (out_proj): Linear(in_features=1152, out_features=1152, bias=True)
            )
            (layer_norm1): LayerNorm((1152,), eps=1e-06, elementwise_affine=True)
            (mlp): Idefics3VisionMLP(
              (activation_fn): PytorchGELUTanh()
              (fc1): Linear(in_

In [5]:
# Prepare inputs
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
# inputs = processor(text=prompt, images=[image1], return_tensors="pt")
inputs = processor(text=prompt, return_tensors="pt")
inputs = inputs.to("cuda")

print(inputs["input_ids"])
# Generate outputs
with torch.no_grad():
    outputs = model.generate(
        **inputs,
        max_new_tokens=500,
        # repition_penalty=1.1,  # Apply repeat penalty
        output_scores=True,           # Return logits for each generated token
        return_dict_in_generate=True, # Return detailed output object
        do_sample=False,  # Use greedy decoding (highest logit)
    )

outputs.sequences[0]

tensor([[    1, 11126,    42,  1812,   314,  1029,    47, 49154,   198,  9519,
          9531,    42]], device='cuda:0')
dict_keys(['input_embeddings_0', 'input_layernorm_d0_0', 'self_attn_d0_0', 'post_layernorm_d0_0', 'mlp_gate_proj_d0_0', 'mlp_act_fn_d0_0', 'mlp_up_proj_d0_0', 'mlp_down_proj_d0_0', 'layers_d0_0', 'input_layernorm_d1_0', 'self_attn_d1_0', 'post_layernorm_d1_0', 'mlp_gate_proj_d1_0', 'mlp_act_fn_d1_0', 'mlp_up_proj_d1_0', 'mlp_down_proj_d1_0', 'layers_d1_0', 'input_layernorm_d2_0', 'self_attn_d2_0', 'post_layernorm_d2_0', 'mlp_gate_proj_d2_0', 'mlp_act_fn_d2_0', 'mlp_up_proj_d2_0', 'mlp_down_proj_d2_0', 'layers_d2_0', 'input_layernorm_d3_0', 'self_attn_d3_0', 'post_layernorm_d3_0', 'mlp_gate_proj_d3_0', 'mlp_act_fn_d3_0', 'mlp_up_proj_d3_0', 'mlp_down_proj_d3_0', 'layers_d3_0', 'input_layernorm_d4_0', 'self_attn_d4_0', 'post_layernorm_d4_0', 'mlp_gate_proj_d4_0', 'mlp_act_fn_d4_0', 'mlp_up_proj_d4_0', 'mlp_down_proj_d4_0', 'layers_d4_0', 'input_layernorm_d5_0', 'self_a

In [18]:
processor.tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)

'User: What is life?\nAssistant: Life is a complex and fascinating concept that has been the subject of philosophical, scientific, and spiritual inquiry for centuries. At its most basic level, life is the characteristic that distinguishes living organisms from inanimate objects and non-living matter. Living organisms are capable of growth, self-sustaining metabolism, reproduction, and response to the environment.\n\nFrom a scientific perspective, life is often defined by the presence of a biological cell, which is the basic unit of all living organisms. Cells are made up of various organelles and structures that work together to perform essential functions such as metabolism, growth, and reproduction.\n\nFrom a philosophical perspective, life is often associated with consciousness, self-awareness, and the ability to experience emotions and suffering. Some philosophers argue that life is a unique and precious gift that should be valued and protected, while others believe that life is a 

In [22]:
calc_err = lambda rust, python: (torch.nn.functional.mse_loss(rust, python), torch.nn.functional.l1_loss(rust, python))

with safe_open("../../../.vscode/rust_output.safetensors", framework="pt", device="cpu") as f:
    ind_div = None
    for i in range(500):
        rust_logits = f.get_tensor(f"logits_{i}")
        python_logits = outputs.scores[i][0].cpu()
        top_logits_python = python_logits.argsort(descending=True)[0]
        top_logits_rust = rust_logits.argsort(descending=True)[0]
        if top_logits_python != top_logits_rust:
            print(f"Mismatch at index {i}: Python top {top_logits_python}, Rust top {top_logits_rust}")
            ind_div = i
            break

    # for i in range(0,10):
    for i in range(max(0,ind_div-3), ind_div+2):
        rust_logits = f.get_tensor(f"logits_{i}")
        python_logits = outputs.scores[i][0].cpu()
        err = torch.sum((rust_logits-python_logits)**2)
        mse_logits, mae_logits = calc_err(rust_logits, python_logits)

        rust_embeds = f.get_tensor(f"embeds_{i}")
        python_embeds = embeddings[f"input_embeddings_{i}"].cpu()
        mse_embeds = torch.nn.functional.mse_loss(rust_embeds, python_embeds)
        mae_embeds = torch.nn.functional.l1_loss(rust_embeds, python_embeds)
        print(f"[{i:>3}] EMBEDS(MSE:{mse_embeds.item():.8f}, MAE:{mae_embeds.item():.8f})  LOGITS(MSE:{mse_logits.item():.8f}, MAE:{mae_logits.item():.8f}) ")

        top_logits_python = python_logits.argsort(descending=True)[:15]
        top_logits_rust = rust_logits.argsort(descending=True)[:15]
        print(f"[PYTHON] Logits: {[python_logits[i].item() for i in top_logits_python]}, Tokens: {[processor.tokenizer.decode([i]) for i in top_logits_python]}")
        print(f"         Tokens:", [i.item() for i in top_logits_python])
        print(f"[RUST] Logits: {[rust_logits[i].item() for i in top_logits_rust]}, Tokens: {[processor.tokenizer.decode([i]) for i in top_logits_rust]}")
        print(f"         Tokens:", [i.item() for i in top_logits_rust])


        for layer_d in [0]:
        # for layer_d in range(24):
            mse_input_layernorm, mae_input_layernorm = calc_err(
                f.get_tensor(f"DEBUG_input_layer_norm_d{layer_d}_i{i}"),
                embeddings[f"input_layernorm_d{layer_d}_{i}"].cpu()
            )
            mse_self_attn, mae_self_attn = calc_err(
                f.get_tensor(f"DEBUG_attn_d{layer_d}_i{i}"),
                embeddings[f"self_attn_d{layer_d}_{i}"].cpu()
            )
            mse_post_layernorm, mae_post_layernorm = calc_err(
                f.get_tensor(f"DEBUG_post_layer_norm_d{layer_d}_i{i}"),
                embeddings[f"post_layernorm_d{layer_d}_{i}"].cpu()
            )

            # mse_mlp, mae_mlp = calc_err(
            #     f.get_tensor(f"DEBUG_gates_d{layer_d}_i{i}"),
            #     embeddings[f"mlp_d{layer_d}_{i}"].cpu()
            # )
            mse_gate_proj, mae_gate_proj = calc_err(
                f.get_tensor(f"DEBUG_gate_proj_d{layer_d}_i{i}"),
                embeddings[f"mlp_gate_proj_d{layer_d}_{i}"].cpu()
            )
            mse_up_proj, mae_up_proj = calc_err(
                f.get_tensor(f"DEBUG_up_proj_d{layer_d}_i{i}"),
                embeddings[f"mlp_up_proj_d{layer_d}_{i}"].cpu()
            )
            mse_down_proj, mae_down_proj = calc_err(
                f.get_tensor(f"DEBUG_down_proj_d{layer_d}_i{i}"),
                embeddings[f"mlp_down_proj_d{layer_d}_{i}"].cpu()
            )
            mse_act_fn, mae_act_fn = calc_err(
                f.get_tensor(f"DEBUG_act_fn_d{layer_d}_i{i}"),
                embeddings[f"mlp_act_fn_d{layer_d}_{i}"].cpu()
            )

            mse_layer, mae_layer = calc_err(
                f.get_tensor(f"block_d{layer_d}_i{i}"),
                embeddings[f"layers_d{layer_d}_{i}"].cpu()
            )
            print(f"    [INPUT LAYERNORM]  (MSE:{mse_input_layernorm.item():.8f}, MAE:{mae_input_layernorm.item():.8f})")
            print(f"    [SELF ATTN]        (MSE:{mse_self_attn.item():.8f}, MAE:{mae_self_attn.item():.8f})")
            print(f"    [POST LAYERNORM]   (MSE:{mse_post_layernorm.item():.8f}, MAE:{mae_post_layernorm.item():.8f})")
            # print(f"    [MLP]              (MSE:{mse_mlp.item():.8f}, MAE:{mae_mlp.item():.8f})")
            print(f"        [MLP/GATE PROJ](MSE:{mse_gate_proj.item():.8f}, MAE:{mae_gate_proj.item():.8f})")
            print(f"        [MLP/ACT FN]   (MSE:{mse_act_fn.item():.8f}, MAE:{mae_act_fn.item():.8f})")
            print(f"        [MLP/UP PROJ]  (MSE:{mse_up_proj.item():.8f}, MAE:{mae_up_proj.item():.8f})")
            print(f"        [MLP/DOWN PROJ](MSE:{mse_down_proj.item():.8f}, MAE:{mae_down_proj.item():.8f})")
            print(f"[LAYER {layer_d:>2}]             (MSE#{mse_layer.item():.8f}, MAE:{mae_layer.item():.8f})")

Mismatch at index 6: Python top 1909, Rust top 7613
[  3] EMBEDS(MSE:0.00000000, MAE:0.00000000)  LOGITS(MSE:0.00148941, MAE:0.03045330) 
[PYTHON] Logits: [16.75, 16.625, 16.25, 16.0, 15.9375, 15.875, 15.625, 15.625, 15.5625, 15.5, 15.1875, 15.1875, 15.125, 15.0625, 14.9375], Tokens: [' complex', ' state', ' phenomenon', ' process', ' word', ' journey', ' very', ' fundamental', ' continuous', ' wonderful', ' term', ' precious', ' concept', ' unique', ' beautiful']
         Tokens: [1784, 1215, 7613, 980, 2229, 3444, 1035, 4959, 6860, 8264, 2115, 9764, 1909, 2116, 3953]
[RUST] Logits: [16.75, 16.5, 16.25, 16.0, 15.875, 15.875, 15.625, 15.625, 15.5625, 15.5, 15.125, 15.125, 15.125, 15.0625, 14.9375], Tokens: [' complex', ' state', ' phenomenon', ' process', ' word', ' journey', ' fundamental', ' very', ' continuous', ' wonderful', ' term', ' concept', ' precious', ' unique', ' beautiful']
         Tokens: [1784, 1215, 7613, 980, 2229, 3444, 4959, 1035, 6860, 8264, 2115, 1909, 9764, 2116,

  mse_embeds = torch.nn.functional.mse_loss(rust_embeds, python_embeds)
  mae_embeds = torch.nn.functional.l1_loss(rust_embeds, python_embeds)
  calc_err = lambda rust, python: (torch.nn.functional.mse_loss(rust, python), torch.nn.functional.l1_loss(rust, python))
  calc_err = lambda rust, python: (torch.nn.functional.mse_loss(rust, python), torch.nn.functional.l1_loss(rust, python))


In [20]:
tokenP = [    1, 11126,    42,   330,  1345,    29, 34270,  1517,   275,  4355,
           335,   260,  1345,  1761,   314,  1217,   354,   908,  1517,   585,
           275, 10242,   100,    25,   446,   275,    24,   100,    25,   327,
           971,  1345,  1230,   252,    30,  1053,   307,   338,   260,   932,
           282,   908,  3691,  4355,   335,   260,  1345,  1761,   351,   260,
          4261,   282,  1706,   284, 26727, 17385,  4355,   281, 12066,   216,
            35,   314,   253,  8431,  1898,    30, 49154,   198,  9519,  9531,
            42]
tokenR = [1, 11126, 42, 330, 1345, 29, 34270, 1517, 275, 4355, 335, 260, 1345, 1761, 314, 1217, 354, 908, 1517, 585, 275, 10242, 100, 25, 446, 275, 24, 100, 25, 327, 971, 1345, 1230, 252, 30, 1053, 307, 338, 260, 932, 282, 908, 3691, 4355, 335, 260, 1345, 1761, 351, 260, 4261, 282, 1706, 284, 26727, 17385, 4355, 281, 12066, 216, 35, 314, 253, 8431, 1898, 30, 49154, 198, 9519, 9531, 42]

print("Token same?", tokenP == tokenR)


rngDiffP1 = """The image features a detailed and intricate design, which appears to be a circular pattern. The central part of the design consists of a large, circular motif, which is composed of numerous interconnected elements. The central part is composed of a series of interconnected leaf-like shapes, each with a unique and detailed design. These leaf-like shapes are arranged in a circular pattern, creating a sense of depth and complexity.

The background of the design is filled with a dark green color, which provides a stark contrast to the lighter green and white elements of the central motif. The dark green background enhances the visual impact of the intricate patterns and designs within the central motif.

The image also includes a small, white text in the lower right corner, which reads "(c) Angela Porter | Artwyrocom | 25 Jan 2021". This text is written in a sans-serif font, which is clear and easy to read. The text provides information about the artist, the date, and the source of the image.

The overall design of the image is highly detailed and visually appealing, showcasing a blend of intricate patterns and a rich color palette. The use of a dark green background and the intricate leaf-like shapes create a sense of depth and complexity, making the image visually engaging.

In summary, the image features a circular, detailed design composed of interconnected leaf-like shapes, set against a dark green background. The central motif is composed of a series of interconnected leaf-like shapes arranged in a circular pattern, creating a sense of depth and complexity. The image is signed by the artist, Angela Porter, and dated 25th January 2021."""

rngDiffP2 = """The image features a detailed and intricate design, which appears to be a circular pattern. The central part of the design consists of a large, circular motif, which is composed of numerous interconnected elements. The central part is composed of a series of interconnected leaf-like shapes, each with a unique and detailed design. These leaf-like shapes are arranged in a circular pattern, creating a sense of depth and complexity.

The background of the design is filled with a dark green color, which provides a stark contrast to the lighter green and white elements of the central motif. The dark green background enhances the visual impact of the intricate patterns and designs within the central motif.

The image also includes a small, white text in the lower right corner, which reads "(c) Angela Porter | Artwyrocom | 25 Jan 2021". This text is written in a sans-serif font, which is clear and easy to read. The text provides information about the artist, the date, and the source of the image.

The overall design of the image is highly detailed and visually appealing, showcasing a blend of intricate patterns and a rich color palette. The use of a dark green background and the intricate leaf-like shapes create a sense of depth and complexity, making the image visually engaging.

In summary, the image features a circular, detailed design composed of interconnected leaf-like shapes, set against a dark green background. The central motif is composed of a series of interconnected leaf-like shapes arranged in a circular pattern, creating a sense of depth and complexity. The image is signed by the artist, Angela Porter, and dated 25th January 2021."""

print("Python version invariant of seed?", rngDiffP1 == rngDiffP2)


tknP = [[    1, 11126,    42,  1978,   346,  5125,   260,  2443,    47, 49154,
           198,  9519,  9531,    42,   378,  2443, 21559,   253,  6621,   429,
           253, 15879,  3562,    28,  4170,   253, 15879,  5182,  6621,    30,
           378,  4054,   314,   253, 21301,    28,   837,   827, 15713, 16328,
           359,  6934,   281,  7604,    30,   378, 21301,   314,  4412,   351,
          1461,  2728,    28,  1285,    42,  1116,    33,    30,  1903, 38257,
           370,  4253,   378,  2443,  2744,   827,  1507, 16328,    28,   582,
           335,   260,  2049,   284,   260,   550,   335,   260,  1048,    30,
          3768,  6237,   314,  8168,   282,  3824,  6333,    28,   511,  9031,
           351,   896,  1995,   282,  7921,   284, 23812,    30,   378,  6333,
           359,  6934,   281,  7604,    28,   351,   634,   282,   601, 17442,
           281, 20470,  7604,    28,   979,  1449,   359,  6934,   281,   540,
          9964, 34837,    30,  1116,    34,    30,  1903,  1882, 31146,   284,
         11889,   270,  4253,   378,  6333,   359,  9031,   351,   253,  3175,
           282,  7921,    28,  1285, 29871,    28, 45540,    28, 16418,    28,
           284, 30553,    30,   378, 23812,   502,   359,  9064,   314,   597,
          9771,    28,   351,   634,  6333,  9064, 29003,    28,  1248, 23812,
            28,   284, 30553,    30,  1116,    35,    30,  1903,    54,   403,
          7265,  6577,   756,  4253,   533,   260,  3969,    28,   665,   359,
          1545, 28557,  6734,    28,   715,   347, 26225,    28, 31721, 20273,
            28,   284,   550, 26271,  7095,    30,  1216,  6734,   359,   441,
          3319,  2773,   281,   260,  5182,   564,   803,   288,   260, 28557,
          5264,   282,   260,  6621,    30,  1116,    36,    30,  1903, 42284,
          4385,  4627,  4253,   378, 21301,   314,   253,  4749,    28,  1440,
          1557,   351,   253,  3175,   282, 12979,  3004,    28,  1285, 12610,
            28, 20232,    28,   284,  6495,    30,   378,  2118,   314, 19425,
            28,   351,   634,  1721,   282,  2375,  5372,   284,  1449,   282,
          1759,    28, 19284,  2763,    30,  1116,    37,    30,  1903, 45123,
          4253,   378,  6376,   314, 13178,   347,   253,  2437,  1194,    28,
           351,   787, 10695,   355,  5249,    30,   378,  2388,   314, 23952,
         26388,    28, 17462,   253,  3091, 14654,   690,   260, 21301,    30,
          1116,    38,    30,  1903, 41229,  4253,  1385,   359,  1545,  9077,
          2165,  1386,   281,   260,  2443,    28,   715,   347, 26538,  9077,
          7349,   284, 34080,   282, 16375,    30,  1216,  2165,   803,   288,
           260, 15879,  5264,   282,   260,  6621,    30,   198,   198,  3757,
          7777,   284, 18688,    42,   198,   198,   504,  2443,   314,   253,
         12999, 27079,   282,   253, 15879,  5182,  6621,    30,   378,  5861,
         27079,   282,   260, 21301,    28,   260,  6333,    28,   284,   260,
         28557,  6734,  5593,   253,  2588,   282, 21539,   284, 10434,    30,
           378,   722,   282,  2380,   284,  8703, 12666,   260,  9184,   977,
           282,   260,  6621,    30,   198,   198,   504,  2443,   314,  2003,
           599,   282,   253,  3227,  8149,    28,   347,   260,  4054,   284,
          3870,   359,  5707,   351,   260,  5535,   284,  5049,   282,   260,
         15879,  3562,    30,   378,  5861, 27079,   282,   260, 21301,   284,
           260,  6333,    23,  3813,  4567,   338,   260,  5182,   314,   253,
          1546,   284, 13865,  3847,   281,   260,  1977,    30,   198,   198,
          3757, 11466,    42,   198,   198,   504,  2443,   314,   253,  3428,
           284,  5861, 27079,   282,   253, 15879,  5182,  6621,    30,   378,
          5861, 27079,   282,   260, 21301,    28,   260,  6333,    28,   284,
           260, 28557,  6734,  7845,   288,   260, 18061,   284,  9184,   977,
           282,   260,  6621,    30,   378,   722,   282,  2380,   284,  8703,
         12666,   260,  9184,   977]][0][:475]

tknR = [
    1, 11126, 42, 1978, 346, 5125, 260, 2443, 47, 49154, 198, 9519, 9531, 42, 378, 2443, 21559, 253, 6621, 429, 253, 15879, 3562, 28, 4170, 253, 15879, 5182, 6621, 30, 378, 4054, 314, 253, 21301, 28, 837, 827, 15713, 16328, 359, 6934, 281, 7604, 30, 378, 21301, 314, 4412, 351, 1461, 2728, 28, 1285, 42, 1116, 33, 30, 1903, 38257, 370, 4253, 378, 2443, 2744, 827, 1507, 16328, 28, 582, 335, 260, 2049, 284, 260, 550, 335, 260, 1048, 30, 3768, 6237, 314, 8168, 282, 3824, 6333, 28, 511, 9031, 351, 896, 1995, 282, 7921, 284, 23812, 30, 378, 6333, 359, 6934, 281, 7604, 28, 351, 634, 282, 601, 17442, 281, 20470, 7604, 28, 979, 1449, 359, 6934, 281, 540, 9964, 34837, 30, 1116, 34, 30, 1903, 1882, 31146, 284, 11889, 270, 4253, 378, 6333, 359, 9031, 351, 253, 3175, 282, 7921, 28, 1285, 29871, 28, 45540, 28, 16418, 28, 284, 30553, 30, 378, 23812, 502, 359, 9064, 314, 597, 9771, 28, 351, 634, 6333, 9064, 29003, 28, 1248, 23812, 28, 284, 30553, 30, 1116, 35, 30, 1903, 54, 403, 7265, 6577, 756, 4253, 533, 260, 3969, 28, 665, 359, 1545, 28557, 6734, 28, 1285, 26225, 28, 31721, 20273, 28, 284, 550, 26271, 7095, 30, 1216, 6734, 359, 441, 3319, 2773, 281, 260, 5182, 564, 803, 288, 260, 28557, 5264, 282, 260, 6621, 30, 1116, 36, 30, 1903, 42284, 4385, 4627, 4253, 378, 21301, 314, 253, 4749, 28, 1440, 1557, 351, 253, 3175, 282, 12979, 3004, 28, 1285, 12610, 28, 20232, 28, 284, 6495, 30, 378, 2118, 314, 19425, 28, 351, 634, 1721, 282, 2375, 5372, 284, 1449, 282, 1759, 28, 19284, 2763, 30, 1116, 37, 30, 1903, 45123, 4253, 378, 6376, 314, 13178, 347, 253, 2437, 1194, 28, 351, 787, 10695, 355, 5249, 30, 378, 2388, 314, 23952, 26388, 28, 17462, 253, 3091, 14654, 690, 260, 21301, 30, 1116, 38, 30, 1903, 41229, 4253, 1385, 359, 1545, 9077, 2165, 6178, 281, 260, 2443, 28, 715, 347, 26538, 9077, 7349, 284, 34080, 282, 16375, 30, 1216, 2165, 803, 288, 260, 15879, 5264, 282, 260, 6621, 30, 198, 198, 3757, 7777, 284, 18688, 42, 198, 198, 504, 2443, 314, 253, 12999, 27079, 282, 253, 15879, 5182, 6621, 30, 378, 5861, 27079, 282, 260, 21301, 28, 260, 6333, 28, 284, 260, 28557, 6734, 7845, 288, 260, 18061, 2174, 282, 260, 6621, 30, 378, 722, 282, 2380, 284, 8703, 5593, 253, 2588, 282, 5856, 284, 5264, 28, 1625, 260, 6621, 1407, 540, 11198, 284, 18061, 30, 198, 198, 3757, 11466, 42, 198, 198, 504, 2443, 20660, 260, 9615, 282, 253, 15879, 5182, 28, 351, 624, 14035, 3964, 28, 5861, 27079, 282, 260, 21301, 28, 284, 28557, 2728, 30, 378, 5861, 27079, 282, 260, 6333, 284, 480, 3813, 28, 347, 876, 347, 260, 28557, 6734, 284, 9077, 2165, 28, 511, 3578, 288, 260, 18061, 2174, 282, 260, 6621, 30
][:475]


print(len(tknP), len(tknR))
print("Token same?", tknP == tknR)

for i,(p,r) in enumerate(zip(tknP, tknR)):
    if p != r:
        print(f"Token mismatch: Python {p} != Rust {r} at {i}")
        break
    
print(tknP[194], tknR[194])

for i in range(0,400):
    print(processor.tokenizer.decode([tknP[i]])+("#" if i != 195 else "<"), end="")
print()
for i in range(0,400):
    print(processor.tokenizer.decode([tknR[i]])+("#" if i != 195 else "<"), end="")

Token same? True
Python version invariant of seed? True
475 475
Token same? False
Token mismatch: Python 715 != Rust 1285 at 194
715 1285
<|im_start|>#User#:# Can# you# describe# the# image#?#<end_of_utterance>#
#Ass#istant#:# The# image# depicts# a# scene# from# a# fantasy# novel#,# specifically# a# fantasy# battle# scene#.# The# setting# is# a# battlefield#,# where# two# opposing# armies# are# engaged# in# combat#.# The# battlefield# is# filled# with# various# elements#,# including#:#

#1#.# **#Arm#ies#**:# The# image# shows# two# large# armies#,# one# on# the# left# and# the# other# on# the# right#.# Each# army# is# composed# of# numerous# soldiers#,# all# equipped# with# different# types# of# weapons# and# armor#.# The# soldiers# are# engaged# in# combat#,# with# some# of# them# locked# in# fierce# combat#,# while# others# are# engaged# in# more# subtle# maneuvers#.#

#2#.# **#We#apons# and# Arm#or#**:# The# soldiers# are# equipped# with# a# variety# of# weapons#,# including# sword