In [1]:
import json
from typing import Mapping, Union
import pathlib, textwrap
import transformers
import matplotlib.pyplot as plt


In [13]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
The token `create dataset` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `create da

In [41]:
tinyrm_scores = [78.4, 86.4]
tinyrm_flops = [0.19, 0.34]

tulu_scores = [88.9, 81.9, 85.5, 50.9]
tulu_flops = [78.1, 11.95, 8.77, 1.86]

qwen25_scores = [88.2, 92.7]
qwen25_flops = [8.86, 38.27]

llama_scores = [90.9, 93.1, 95.1, ]
llama_flops = [3.75, 8.77, 78.1, ]

In [38]:
# cfg_path = pathlib.Path("modernbert_base.json")  # or a dict inline
qwen2_config = transformers.PretrainedConfig.from_pretrained("SultanR/SmolTulu-1.7b-RM")
gflops = estimate_gflops_per_token(qwen2_config, context_len=8192)
print(f"≈ {gflops:.2f} GFLOPs / token (forward, logits included)")

config.json:   0%|          | 0.00/792 [00:00<?, ?B/s]

You are using a model of type llama to instantiate a model of type . This is not supported for all configurations of models and can yield errors.


≈ 1.86 GFLOPs / token (forward, logits included)


In [10]:
def estimate_gflops_per_token(
    cfg,
    context_len: int = 2048,
    include_logits: bool = True,
) -> float:

    # if isinstance(cfg, str):
    #     try:
    #         cfg = json.loads(cfg)            # JSON string?
    #     except json.JSONDecodeError:
    #         with open(cfg, "r") as f:        # treat as path
    #             cfg = json.load(f)

    # Pull the essentials
    dm   = cfg.hidden_size
    L    = cfg.num_hidden_layers
    nh   = cfg.num_attention_heads
    dff  = cfg.intermediate_size
    nv   = cfg.vocab_size
    datt = dm // nh  # assume divisible

    # ---------- OpenAI "Table-1" formulas ----------
    # Parameter-dominated term
    N = 2 * dm * L * (2 * datt + dff)

    # Attention masking matmuls
    attention_term = 2 * L * context_len * datt

    # Total forward cost
    flops = 2 * N + attention_term

    # Optional logits projection (≈ always paid at inference)
    if include_logits:
        flops += 2 * dm * nv          # de-embed

    # Return in giga-FLOPs
    return flops / 1e9

In [ ]:
# Create the scatter plot
plt.figure(figsize=(10, 6))

# Plot with connecting lines - keeping original style but with slight improvements
plt.plot(tinyrm_flops, tinyrm_scores, label='TinyRM', marker='o', linewidth=2, markersize=7)
plt.plot(qwen25_flops, qwen25_scores, label='Qwen2.5', marker='x', linewidth=2, markersize=8)
plt.plot(llama_flops, llama_scores, label='Llama3.1', marker='^', linewidth=2, markersize=7)
plt.plot(tulu_flops, tulu_scores, label='Tulu', marker='*', linewidth=2, markersize=9)

# Set labels and title
plt.xlabel('GFLOPs / token', fontsize=12)
plt.ylabel('Score', fontsize=12)
plt.title('Score vs GFLOPs / token', fontsize=14)

# Flip the x-axis and set to logarithmic scale
plt.xscale('log')

# Add a legend
plt.legend(fontsize=11)

# Add grid
plt.grid(True, alpha=0.3)

# Save the plot as high-quality images
plt.savefig('flopchart.jpg', dpi=300, bbox_inches='tight')
plt.savefig('flopchart.png', dpi=300, bbox_inches='tight')

# Show the plot
plt.show()

print("Chart saved as flopchart.jpg and flopchart.png")