# Nethermind's benchmarking data - state opcodes analysis

#### Maria Silva, September 2025

In [1]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

In [2]:
# plotting theme
sns.set_theme(
    style="whitegrid", palette="Set2", rc={"figure.dpi": 500, "axes.titlesize": 15}
)

## Load data

In this analysis, we are using data generated by running the [EEST benchmark suite](https://github.com/ethereum/execution-spec-tests/tree/main/tests/benchmark) with the [Nethermind benchmarking tooling](https://github.com/NethermindEth/gas-benchmarks). We extracted this data in 22-09-2025.

In [3]:
# Main directories
current_path = os.getcwd()
repo_dir = os.path.abspath(os.path.join(current_path, ".."))
data_dir = os.path.join(repo_dir, "data")

In [4]:
df = pd.read_csv(os.path.join(data_dir, "min_mgas_s_by_test_and_client_2025-09-22.csv"))
df = df.melt(
    id_vars=["test_title"], var_name="client", value_name="mgas/s"
).sort_values(by="mgas/s")
# Parse test title
df["test_file"] = (
    df["test_title"].str.replace("tests_benchmark_", "").str.split(".py").str[0]
)
df["test_name"] = (
    df["test_title"]
    .str.split(".py__")
    .str[1]
    .str.split("[")
    .str[0]
)
df["test_params"] = (
    df["test_title"]
    .str.split("[")
    .str[1]
    .str.split("]")
    .str[0]
    .str.split("engine_x")
    .str[1]
    .str[1:]
)
df["test_fork"] = df["test_title"].str.split("fork_").str[1].str.split("-").str[0]
# Parse opcodes
df["test_opcode"] = df["test_params"].str.extract(r"(?:opcode_|op_)([^-]+)")
df["test_opcode"] = np.where(df["test_name"]=="test_worst_modexp", "MODEXP", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("selfdestruct"), "SELFDESTRUCT", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_calldatacopy", "CALLDATACOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_mcopy", "MCOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_codecopy", "CODECOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_returndatacopy", "RETURNDATACOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("returndatasize"), "RETURNDATASIZE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("extcodecopy"), "EXTCODECOPY", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_calldataload", "CALLDATALOAD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_keccak", "KECCAK", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_shifts", df["test_params"].str[-3:], df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_selfbalance", "SELFBALANCE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_msize", "MSIZE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_jumpdests", "JUMPDEST", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"].str.contains("jumpi"), "JUMPI", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_jumps", "JUMP", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_tstore", "TSTORE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_tload", "TLOAD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("SSTORE"), "SSTORE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("SLOAD"), "SLOAD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_log_opcodes", "LOG", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_calldatasize", "CALLDATASIZE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_blobhash", "BLOBHASH", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_blockhash", "BLOCKHASH", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_worst_callvalue", "CALLVALUE", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bn128_add"), "ecAdd", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bn128_mul"), "ecMul", df["test_opcode"])
df["test_opcode"] = np.where((df["test_params"].str.contains("bn128")) & (df["test_params"].str.contains("pairing")), "ecPairing", df["test_opcode"])
df["test_opcode"] = np.where(df["test_name"]=="test_amortized_bn128_pairings", "ecPairing", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("point_evaluation"), "point evaluation", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("blake2f"), "blake2f", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("ecrecover"), "ecRecover", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("SHA2-256"), "SHA2-256", df["test_opcode"]) 
df["test_opcode"] = np.where(df["test_params"].str.contains("RIPEMD-160"), "RIPEMD-160", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("IDENTITY"), "identity", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g1add"), "BLS12_G1ADD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g1msm"), "BLS12_G1MSM", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g2add"), "BLS12_G2ADD", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_g2msm"), "BLS12_G2MSM", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_pairing_check"), "BLS12_PAIRING_CHECK", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_fp_to_g1"), "BLS12_MAP_FP_TO_G1", df["test_opcode"])
df["test_opcode"] = np.where(df["test_params"].str.contains("bls12_fp_to_g2"), "BLS12_MAP_FP2_TO_G2", df["test_opcode"])
df = df.drop(columns=["test_title"])
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2305 entries, 441 to 1853
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   client       2305 non-null   object 
 1   mgas/s       2305 non-null   float64
 2   test_file    2305 non-null   object 
 3   test_name    2305 non-null   object 
 4   test_params  2305 non-null   object 
 5   test_fork    2305 non-null   object 
 6   test_opcode  2260 non-null   object 
dtypes: float64(1), object(6)
memory usage: 144.1+ KB


In [10]:
state_df = df[df["test_file"]=="test_worst_stateful_opcodes"]

print(state_df["test_opcode"].unique())
print(state_df["test_name"].unique())

['SELFDESTRUCT' 'CALLCODE' 'DELEGATECALL' 'CALL' 'SLOAD' 'STATICCALL'
 'SSTORE' 'BALANCE' 'EXTCODESIZE' 'SELFBALANCE' 'EXTCODEHASH'
 'EXTCODECOPY' 'BLOCKHASH']
['test_worst_selfdestruct_existing' 'test_worst_address_state_warm'
 'test_worst_selfdestruct_created' 'test_worst_storage_access_cold'
 'test_worst_address_state_cold' 'test_worst_selfdestruct_initcode'
 'test_worst_selfbalance' 'test_worst_extcodecopy_warm'
 'test_worst_storage_access_warm' 'test_worst_blockhash']


In [7]:
# Keep only the worst client for each test
worse_state_df = (
    state_df.groupby(["test_file", "test_name", "test_params", "test_fork", "test_opcode"])[
        "mgas/s"
    ]
    .min()
    .reset_index()
)

In [14]:
# For anchoring
worse_ecRecover_mgs = df[df["test_opcode"]=="ecRecover"]["mgas/s"].min()
worse_ecRecover_msgas = 1 / worse_ecRecover_mgs

## Worse SLOAD

In [23]:
worse_sload_df = worse_state_df[worse_state_df["test_opcode"]=="SLOAD"]
worse_sload_df["ms/gas"] = 1 / worse_sload_df["mgas/s"]
worse_sload_df["ratio_to_ecRecover"] = worse_sload_df["ms/gas"] / worse_ecRecover_msgas
worse_sload_df["new_gas_price"] = worse_sload_df["ratio_to_ecRecover"]*3000

worse_sload_df

Unnamed: 0,test_file,test_name,test_params,test_fork,test_opcode,mgas/s,ms/gas,ratio_to_ecRecover,new_gas_price
27,test_worst_stateful_opcodes,test_worst_storage_access_cold,absent_slots_False-SSLOAD,Prague,SLOAD,40.9,0.02445,1.264059,3792.176039
34,test_worst_stateful_opcodes,test_worst_storage_access_cold,absent_slots_True-SSLOAD,Prague,SLOAD,307.0,0.003257,0.168404,505.211726
41,test_worst_stateful_opcodes,test_worst_storage_access_warm,SLOAD,Prague,SLOAD,297.0,0.003367,0.174074,522.222222


## Worse SSTORE

In [28]:
worse_sstore_df = worse_state_df[
    (worse_state_df["test_opcode"] == "SSTORE")
]
worse_sstore_df["ms/gas"] = 1 / worse_sstore_df["mgas/s"]
worse_sstore_df["ratio_to_ecRecover"] = (
    worse_sstore_df["ms/gas"] / worse_ecRecover_msgas
)
worse_sstore_df["new_gas_price"] = worse_sstore_df["ratio_to_ecRecover"] * 3000

worse_sstore_df

Unnamed: 0,test_file,test_name,test_params,test_fork,test_opcode,mgas/s,ms/gas,ratio_to_ecRecover,new_gas_price
28,test_worst_stateful_opcodes,test_worst_storage_access_cold,absent_slots_False-SSTORE new value,Prague,SSTORE,102.0,0.009804,0.506863,1520.588235
29,test_worst_stateful_opcodes,test_worst_storage_access_cold,"absent_slots_False-SSTORE new value, out of gas",Prague,SSTORE,87.6,0.011416,0.590183,1770.547945
30,test_worst_stateful_opcodes,test_worst_storage_access_cold,"absent_slots_False-SSTORE new value, revert",Prague,SSTORE,104.0,0.009615,0.497115,1491.346154
31,test_worst_stateful_opcodes,test_worst_storage_access_cold,absent_slots_False-SSTORE same value,Prague,SSTORE,43.1,0.023202,1.199536,3598.607889
32,test_worst_stateful_opcodes,test_worst_storage_access_cold,"absent_slots_False-SSTORE same value, out of gas",Prague,SSTORE,43.2,0.023148,1.196759,3590.277778
33,test_worst_stateful_opcodes,test_worst_storage_access_cold,"absent_slots_False-SSTORE same value, revert",Prague,SSTORE,43.3,0.023095,1.193995,3581.986143
35,test_worst_stateful_opcodes,test_worst_storage_access_cold,absent_slots_True-SSTORE new value,Prague,SSTORE,293.0,0.003413,0.176451,529.351536
36,test_worst_stateful_opcodes,test_worst_storage_access_cold,"absent_slots_True-SSTORE new value, out of gas",Prague,SSTORE,297.0,0.003367,0.174074,522.222222
37,test_worst_stateful_opcodes,test_worst_storage_access_cold,"absent_slots_True-SSTORE new value, revert",Prague,SSTORE,299.0,0.003344,0.17291,518.729097
38,test_worst_stateful_opcodes,test_worst_storage_access_cold,absent_slots_True-SSTORE same value,Prague,SSTORE,314.0,0.003185,0.16465,493.949045


## Worse address

In [18]:
worse_addrs_df = worse_state_df[worse_state_df["test_opcode"]=="BALANCE"]
worse_addrs_df["ms/gas"] = 1 / worse_addrs_df["mgas/s"]
worse_addrs_df["ratio_to_ecRecover"] = worse_addrs_df["ms/gas"] / worse_ecRecover_msgas
worse_addrs_df["new_gas_price"] = worse_addrs_df["ratio_to_ecRecover"]*3000

worse_addrs_df

Unnamed: 0,test_file,test_name,test_params,test_fork,test_opcode,mgas/s,ms/gas,ratio_to_ecRecover,new_gas_price
0,test_worst_stateful_opcodes,test_worst_address_state_cold,absent_accounts_False-opcode_BALANCE,Prague,BALANCE,47.4,0.021097,1.090717,3272.151899
1,test_worst_stateful_opcodes,test_worst_address_state_cold,absent_accounts_True-opcode_BALANCE,Prague,BALANCE,98.5,0.010152,0.524873,1574.619289
2,test_worst_stateful_opcodes,test_worst_address_state_warm,from_state_test-absent_target_False-opcode_BAL...,Prague,BALANCE,68.7,0.014556,0.752547,2257.641921
9,test_worst_stateful_opcodes,test_worst_address_state_warm,from_state_test-absent_target_True-opcode_BALANCE,Prague,BALANCE,157.0,0.006369,0.329299,987.898089
