In [1]:
import polars as pl
import altair as alt

alt.data_transformers.enable("vegafusion")
pl.Config.set_fmt_str_lengths(200)
pl.Config.set_fmt_float("full")

polars.config.Config

In [2]:
# mempool data
mainnet_mempool = pl.read_parquet('data/mainnet_mempool.parquet').select('event_date_time', 'hash', 'blob_gas_fee_cap', 'blob_hashes')

In [3]:
# blocks
blocks_df = pl.read_parquet('data/mainnet/block_blob_gas.parquet').rename({'number': 'block_number'}).with_columns(
    pl.col('block_number').cast(pl.UInt64),
    pl.col('blob_base_fee').cast(pl.Float64),
    pl.col('excess_blob_gas').cast(pl.Float64),
    )

# transactions with blobs
txs_blobs = pl.read_parquet('data/mainnet/txs_blobs.parquet')

# mempool data
mainnet_mempool = pl.read_parquet('data/mainnet_mempool.parquet').select('event_date_time', 'hash', 'blob_gas_fee_cap', 'blob_hashes').group_by('blob_hashes', 'hash').agg(
    pl.col('event_date_time').min(),
    pl.col('blob_gas_fee_cap').mean(),
    pl.len().alias('retry_count')
)

# labeled blobs - https://dune.com/queries/3521610
sequencers_l2: dict[list[str], list[str]] = {
    "sequencer_addresses": [
        "0xC1b634853Cb333D3aD8663715b08f41A3Aec47cc".lower(),
        "0x5050F69a9786F081509234F1a7F4684b5E5b76C9".lower(),
        "0x6887246668a3b87F54DeB3b94Ba47a6f63F32985".lower(),
        "0x000000633b68f5D8D3a86593ebB815b4663BCBe0".lower(),
        "0x415c8893D514F9BC5211d36eEDA4183226b84AA7".lower(),
        "0xa9268341831eFa4937537bc3e9EB36DbecE83C7e".lower(),
        "0xcF2898225ED05Be911D3709d9417e86E0b4Cfc8f".lower(),
        "0x0D3250c3D5FAcb74Ac15834096397a3Ef790ec99".lower(),
        "0xC70ae19B5FeAA5c19f576e621d2bad9771864fe2".lower(),
        "0xC94C243f8fb37223F3EB2f7961F7072602A51B8B".lower(),
        "0x625726c858dBF78c0125436C943Bf4b4bE9d9033".lower()
    ],
    "sequencer_names": [
        "arbitrum",
        "base",
        "optimism",
        "taiko",
        "blast",
        "linea",
        "scroll",
        "zksync",
        "paradex",
        "metal",
        "zora"
    ],
}

# join together
txs_df = (
    pl.scan_parquet('data/mainnet/transactions.parquet').filter(pl.col('type')==3).collect().join(txs_blobs, on='hash', how='inner').drop('__index_level_0__').unique()
    .join(blocks_df.select('block_number', 'blob_base_fee', 'base_fee_per_gas', 'time'), on='block_number', how='left')
    .select(
    'block_number',
    'time',
    'hash', 
    'nonce', 
    'to', 
    'from',
    'max_priority_fee_per_gas', 
    'gas_used', 
    'effective_gas_price', 
    'base_fee_per_gas',
    'blob_base_fee',
    'blob_gas',
    'blob_gas_fee_cap',
    'blob_hashes'
).drop_nulls()
    .with_columns(
    ((pl.col('blob_gas') * pl.col('blob_base_fee') / 10**9)).alias('blob_fee_gwei'),
    (pl.col("base_fee_per_gas")  / 10**9).alias("base_fee_gwei"),
    ((pl.col("effective_gas_price") - pl.col("base_fee_per_gas")) / 10**9).alias(
        "priority_fee_gwei")
).join(
    pl.from_dict(sequencers_l2),
    left_on="from",
    right_on="sequencer_addresses",
    how="left",
    coalesce=True
)
.join(mainnet_mempool, on='hash', how='left', suffix='_l1')
.with_columns(
    (pl.col('time') - pl.col('event_date_time')).alias('mempool_time').cast(pl.Int64)
)
.sort(by='block_number')
.unique()
.drop_nulls()
.with_columns(
# rewrite over mempool_time column to replace negative values with 0.
pl.when(pl.col('mempool_time') < 0).then(1).otherwise(pl.col('mempool_time')).alias('mempool_time')
)
)


In [4]:
txs_df.schema

Schema([('block_number', UInt64),
        ('time', Datetime(time_unit='us', time_zone=None)),
        ('hash', String),
        ('nonce', UInt64),
        ('to', String),
        ('from', String),
        ('max_priority_fee_per_gas', Float64),
        ('gas_used', Float64),
        ('effective_gas_price', Float64),
        ('base_fee_per_gas', Float64),
        ('blob_base_fee', Float64),
        ('blob_gas', UInt64),
        ('blob_gas_fee_cap', Int64),
        ('blob_hashes', List(String)),
        ('blob_fee_gwei', Float64),
        ('base_fee_gwei', Float64),
        ('priority_fee_gwei', Float64),
        ('sequencer_names', String),
        ('blob_hashes_l1', List(String)),
        ('event_date_time', Datetime(time_unit='ms', time_zone=None)),
        ('blob_gas_fee_cap_l1', Float64),
        ('retry_count', UInt32),
        ('mempool_time', Int64)])

In [5]:
txs_filtered_df = (
    txs_df.filter(pl.col('block_number') > 20_134_100).filter(pl.col('block_number') < 20_134_500).with_columns((pl.col('blob_base_fee') / 10**9).alias('blob_base_fee_gwei'))
    .select('time', 'block_number', 'sequencer_names', 'blob_base_fee_gwei', 'base_fee_gwei', 'priority_fee_gwei', 'mempool_time')
    )

### What did the Fee market look like?

In [6]:
# calculate the std from the percent change from a rolling window of 5 blocks
gas_vol_df = txs_filtered_df.sort(by='block_number').with_columns(
    # base fee volatility
    pl.col("blob_base_fee_gwei").pct_change(n=5).abs().alias("blob_base_fee_gwei_pct"),
    pl.col("base_fee_gwei").pct_change(n=5).abs().alias("base_fee_gwei_pct"),
    # fees paid volatility
    pl.col("priority_fee_gwei").pct_change(n=5).abs().alias("priority_fee_gwei_pct"),
    # mempool time volatility
    pl.col("mempool_time").pct_change(n=5).abs().alias("mempool_time_pct")
    ).fill_null(0)

In [7]:
# Base chart
gas_base_chart = alt.Chart(txs_filtered_df).encode(
    x=alt.X("time:T", title="Time")
)

# Line for blob_base_fee_gwei (first y-axis)
blob_base_fee_line = gas_base_chart.mark_line(color='blue').encode(
    y=alt.Y('blob_base_fee_gwei:Q', title='Blob Base Fee (gwei)', axis=alt.Axis(titleColor='blue')),
)

# Line for base_fee_gwei (second y-axis)
base_fee_line = gas_base_chart.mark_line(color='red').encode(
    y=alt.Y('base_fee_gwei:Q', title='Base Fee (gwei)', axis=alt.Axis(titleColor='red')),
)

# Combine into a twin-axis chart
gas_price_chart = alt.layer(
    blob_base_fee_line,
    base_fee_line
).resolve_scale(
    y='independent'  # Make y-scales independent for twin axes
).properties(
    width=600,
    height=300,
    title='Base and Blob Price (Gwei)'
)

In [8]:
# Base chart
base_chart_vol = alt.Chart(gas_vol_df).encode(
    x=alt.X("time:T", title="Time")
)

# Line for blob_base_fee_gwei_pct (first y-axis)
blob_fee_vol_chart = base_chart_vol.mark_line(color='blue').encode(
    y=alt.Y('blob_base_fee_gwei_pct:Q', title='Blob Base Fee (gwei)', axis=alt.Axis(titleColor='blue')),
)

# Line for base_fee_gwei_pct (second y-axis)
base_fee_vol_chart = base_chart_vol.mark_line(color='red').encode(
    y=alt.Y('base_fee_gwei_pct:Q', title='Base Fee (gwei)', axis=alt.Axis(titleColor='red')),
)

# Combine into a twin-axis chart
gas_vol_chart = alt.layer(
    blob_fee_vol_chart,
    base_fee_vol_chart
).resolve_scale(
    y='independent'  # Make y-scales independent for twin axes
).properties(
    width=600,
    height=300,
    title='Base and Blob Price (5 block window std (%) )'
)

In [9]:
gas_vol_df.shape

(475, 11)

In [10]:
gas_vol_df.sort(by='block_number').std()

time,block_number,sequencer_names,blob_base_fee_gwei,base_fee_gwei,priority_fee_gwei,mempool_time,blob_base_fee_gwei_pct,base_fee_gwei_pct,priority_fee_gwei_pct,mempool_time_pct
datetime[μs],f64,str,f64,f64,f64,f64,f64,f64,f64,f64
,122.8817581801661,,1942.254088299856,16.615026196883047,41.349300444293874,204958.28143121567,0.1511380105123476,0.1338689830927815,12.489950383227557,1629.736509704687


Writeup for Fees

During the 400 block period from 20,134,100 to 20,134,100, there was high volatility. Measuring the 5 block rolling standard deviation of fee changes (in percent):
* 24% blob base fee change
* 16.8% base fee change
* 2,200% priority fee change
* 143,500% change in time in mempool

In [11]:
gas_price_chart | gas_vol_chart

### Calculate blobs waiting in the mempool over this time period stacking up

In [12]:
blobs_in_mempool_df = (
    txs_df.filter(pl.col('block_number') > 20_134_100).filter(pl.col('block_number') < 20_134_500)    .with_columns(
        # convert mempool_time from ms to s
        (pl.col('mempool_time') / 100).alias('mempool_time_seconds')
    )
    .with_columns(
        # convert mempool time to block count
        (pl.col('mempool_time_seconds') / 12).floor().alias('blocks_in_mempool')
    )
    .with_columns(
        (pl.col('block_number') - pl.col('blocks_in_mempool')).alias('mempool_start_block_number')
    )
)

In [62]:
(blobs_in_mempool_df
.select('block_number', 'blocks_in_mempool', 'mempool_start_block_number', 'time', 'hash', 'blob_hashes_l1', 'sequencer_names', 'blob_fee_gwei', 'base_fee_gwei', 'priority_fee_gwei')
.with_columns(
pl.int_ranges(
    pl.col('mempool_start_block_number'),
    pl.col('block_number')
)))

block_number,blocks_in_mempool,mempool_start_block_number,time,hash,blob_hashes_l1,sequencer_names,blob_fee_gwei,base_fee_gwei,priority_fee_gwei
u64,f64,list[i64],datetime[μs],str,list[str],str,f64,f64,f64
20134322,152,"[20134170, 20134171, … 20134321]",2024-06-20 17:07:23,"""0x2dcc86d03d51124225b3583b9cb8df17087c91ac38dd18e91d88041127425093""","[""0x01cfb315d03d6e9328fac5452654ca359b162cfd551f9215605c608d1aceb8c1"", ""0x014f8e665cfa6e033d7273caaea36bfa56c04e182eab334460046a37883eb31a"", ""0x01b26d4b8ad22fef7808202e4cc26d933158c16d9c29be3c426061327e89e5da""]","""optimism""",2414144374.3238063,18.709116725,8
20134162,39,"[20134123, 20134124, … 20134161]",2024-06-20 16:34:47,"""0x776fd1591d690eefa216db2852e07b9c0a333517b5f244c7fa92c01b7c06163b""","[""0x01bbfbe5a03d763a1f6bb2a372022e588191a04683e1209bc01fa2b500743686"", ""0x01e00b620cf99cef2f605a6a580470b3f8c09371708467834d123eabe8d28e76"", ""0x0149b640a583e7841859696c739b942b05af58fa6baeea77161badc7eef312aa""]","""arbitrum""",57933955.06583962,14.640158260000002,33
20134118,64,"[20134054, 20134055, … 20134117]",2024-06-20 16:25:47,"""0x9ad55cf9416f08deb7983ca6617799b7cec91277556bc1000967596d97c73cf5""","[""0x01f95a0b473b8c3d0e452c624991e3f6fa16b683b88a3dec0baca6e950e98988"", ""0x017b374fb5ef0655ff3106b0f768420d8f159b0811a5ff4a85f4e9a6da902721"", ""0x0181e700a78a7021e69adb663a470ec4ad012fb84339c85f11d27782d2345f6f""]","""arbitrum""",1336756.408418304,16.198736895,6.010000000000001
20134478,67,"[20134411, 20134412, … 20134477]",2024-06-20 17:38:35,"""0x8aa059f04fce230615fce43ecf92986f3017f5b6a8da973a9f0d9639de63746d""","[""0x019935421c5e3162942c354282600df4baf488084c4cb3f1982214f4385c2daa"", ""0x0189e6c1b24602f920fa02e212f157db3d4eee759c34e6a32ee4cef7540a23f9"", … ""0x01e73915b00d178a1131fcaf010622ba151b93f5faed2a9e76248e12859d7652""]","""base""",79346488.92981249,10.067446885,2.009999999
20134350,7,"[20134343, 20134344, … 20134349]",2024-06-20 17:12:59,"""0x30e9fc91d8e43582f246dc14bbeee656bd2f2b4cb0555770b3473ddd21bcc1ca""","[""0x01da0bfc959f2658ba5aca996e7263fe16744240920f7a5db76319e0cf22acce""]","""taiko""",148749579.33759692,21.752964366,3
…,…,…,…,…,…,…,…,…,…
20134417,4,"[20134413, 20134414, … 20134416]",2024-06-20 17:26:23,"""0x07eda769cadb0259e26db3510fdfa376a2511a649c43186c4c2c30e30015d074""","[""0x018a606e8ced0f1755c4f7327bb28f8fba0b1388aa58147c4c684156f24b06eb""]","""taiko""",16504734.87761408,14.777655455000001,3
20134468,38,"[20134430, 20134431, … 20134467]",2024-06-20 17:36:35,"""0x8c85084f2ab91a10530300475e239eb760bb7730bec32b1cd17b95b674ee7fdf""","[""0x019e1999fc81bcf296513a4eae9641bf1ae47d0ec56d3d122cb380a236a4f868""]","""taiko""",39149578.693509124,10.777948652000001,3
20134115,10,"[20134105, 20134106, … 20134114]",2024-06-20 16:25:11,"""0xcc3f798b19f7ccc7478ef2562ccad932a76913a06a71ecec7bab70a8df8b6dd7""","[""0x019c7387c2a412f6587c37e2a8cb917b7c09ad61f707be55f381c1b26ce68c29"", ""0x013367c470cb4cd3fb430bcbf5d960b0c77766dd5b7ff7b593f36ace361ba63f"", … ""0x018808554fd132ee8809a38096b8cfb6942ca7e81c4ed9cfcaf6b438840ac41d""]","""base""",1980379.8365798402,14.387750117000001,132
20134428,124,"[20134304, 20134305, … 20134427]",2024-06-20 17:28:35,"""0x0b38de8e9a9a4513ac6a09d9cb91199f9fe6a48d9ab15d0fd055f5a27ef0e607""","[""0x0120b1b9e142b826309e647230aaa2ad6429261c47d04bda5f13aa46026621a2""]","""scroll""",24440887.95070464,11.873806677000001,0.1


In [57]:
# Calculate pending blocks
pending_blocks_df = (
    blobs_in_mempool_df
    .select('block_number', 'blocks_in_mempool', 'mempool_start_block_number', 'time', 'hash', 'blob_hashes_l1', 'sequencer_names', 'blob_fee_gwei', 'base_fee_gwei', 'priority_fee_gwei')
    .with_columns(
    pl.int_ranges(
        pl.col('mempool_start_block_number'),
        pl.col('block_number')
    ).alias('pending_blocks'))
    .select('time', 'pending_blocks', 'blob_hashes_l1', 'hash', 'block_number', 'mempool_start_block_number', 'sequencer_names', 'blob_fee_gwei', 'base_fee_gwei', 'priority_fee_gwei').explode('pending_blocks')
    .group_by('pending_blocks', 'sequencer_names').agg(
    pl.len().alias('pending_tx_count'), # this is the number of pending txs per rollup per block.
    pl.col('time').first(),
    pl.col('hash').last(),
    pl.col('block_number').last().alias('block_number_inclusion'),
    pl.col('mempool_start_block_number').last().alias('block_number_enter_mempool'),
    # average prices that each sequencer is bidding for their transactions
    pl.col('blob_fee_gwei').mean(),
    pl.col('base_fee_gwei').mean(),
    pl.col('priority_fee_gwei').mean()
    )
    # resize data for the congestion time period
    .filter(pl.col('pending_blocks') > 20_133_500).filter(pl.col('pending_blocks') < 20_134_500)
    .sort('pending_blocks')
)

In [60]:
pending_blocks_df.filter(pl.col('pending_blocks') == 20134100)

pending_blocks,sequencer_names,pending_tx_count,time,hash,block_number_inclusion,block_number_enter_mempool,blob_fee_gwei,base_fee_gwei,priority_fee_gwei
i64,str,u32,datetime[μs],str,u64,f64,f64,f64,f64
20134100,"""metal""",2,2024-06-20 17:11:23,"""0xe463691394eeebe881ec127369ac0eaa4b89527abd84d2895bca84e13045c7de""",20134342,20132806,229094686.41355368,22.440893249,2.0
20134100,"""taiko""",5,2024-06-20 16:25:11,"""0x68d95d24286858df46839c19a41e27897e991608e338969412e02222f5e08d8c""",20134159,20134054,3072066.364558541,13.9795296416,9.6
20134100,"""arbitrum""",37,2024-06-20 16:25:47,"""0xa68630d67b3f1e85fd2a3f337725f4fd045adb7ab5cf00b60c83de8822feebd9""",20134109,20133948,18632013.030890413,14.576597645270269,22.787567567567564
20134100,"""blast""",1,2024-06-20 17:07:11,"""0x78a60b537f04ffce336665e44f5a1e35b0b49e70d223eda3bc993a9b3c433a08""",20134321,20134063,4291812166.065193,19.53974051,8.0
20134100,"""base""",5,2024-06-20 16:37:23,"""0x0e2dfaa0f479653de7b987876b0172497070fde260e8ea6f19c8f700e8f69bac""",20134134,20134034,45553009.10845133,15.171827875600002,66.8079999992
20134100,"""zora""",2,2024-06-20 17:15:23,"""0x30ee3a7da24e06049954800b6ed4599a0aa18de50ee48d7af466024e6874d2f9""",20134368,20132727,458223439.38362575,22.5848041995,16.0
20134100,"""optimism""",11,2024-06-20 16:37:35,"""0xcffe1231c8dd533f274e8767ed9ff4ea9c8291964616ce82dd1cc50c8e05c802""",20134176,20133908,115320887.5780312,13.825621830272723,192.0
20134100,"""paradex""",2,2024-06-20 17:39:23,"""0x05c40a017792721e4816ee4fa20f6bebfe4fb27cd44c52c130edfaf23b89b19d""",20134408,20133930,15979501.26705869,12.362093182,0.1


In [59]:
alt.Chart(pending_blocks_df).mark_area().encode(
    x=alt.X('pending_blocks', title='Pending Blocks (Block Numbers)'),
    y=alt.Y('pending_tx_count', title='Number of Pending Transactions'),
    color=alt.Color("sequencer_names", title='Rollups')
).properties(
    width=600,
    height=300,
    title='Pending Blobs by Rollup (Blocks 20,133,500 to 20,134,500)'
)

### What was the "Inclusion Cost" at each block, how many pending blobs met the value?

In [22]:
priority_fee_intra_block_vol = (
    txs_filtered_df.sort(by='block_number')
    .group_by(
    "block_number"
    ).agg(
    # clearing prices for the blob market in the block
    pl.mean('blob_base_fee_gwei').alias('blob_base_fee_clearing_price'),
    pl.mean('base_fee_gwei').alias('base_fee_clearing_price'),
    pl.mean("priority_fee_gwei").alias('priority_fee_clearing_price'),
    pl.mean("mempool_time").alias('mempool_time_mean'),
    pl.len().alias('blobs_in_block')
    ).fill_null(0)
)

# forward fill the missing block numbers and replace blob_tx_count with 0 for missing block numbers
min_block = priority_fee_intra_block_vol["block_number"].min()
max_block = priority_fee_intra_block_vol["block_number"].max()

# Create a full range of block numbers
full_blocks = pl.DataFrame({
    "block_number": pl.Series("block_number", range(min_block, max_block + 1))
}).with_columns(pl.col('block_number').cast(pl.UInt64))

# Join to the full range and forward fill the missing values except for `blob_tx_count`
blob_market_cost_df = full_blocks.join(priority_fee_intra_block_vol, on="block_number", how="left").with_columns(
    pl.col('blob_base_fee_clearing_price').fill_null(strategy='forward'),
    pl.col('base_fee_clearing_price').fill_null(strategy='forward'),
    pl.col('priority_fee_clearing_price').fill_null(strategy='forward'),
    pl.col('blobs_in_block').fill_null(0).alias('blobs_in_block'),
    pl.col('mempool_time_mean').fill_null(strategy='forward'),
)

In [31]:
# join pending_blocks_df to blob_market_cost_df
blob_market_cost_inclusion_df = (
    pending_blocks_df
    .with_columns(pl.col('pending_blocks').cast(pl.UInt64)).join(blob_market_cost_df, left_on='pending_blocks', right_on='block_number', how='left')
    .with_columns(pl.col('blobs_in_block').fill_null(0))
    .rename(
        {
            "blob_fee_gwei": "blob_fee_rollup_price",
            "base_fee_gwei": "base_fee_rollup_price",
            "priority_fee_gwei": "priority_fee_rollup_price"
        }
    )
    )

In [33]:
blob_market_cost_inclusion_df.filter(pl.col('sequencer_names') == 'arbitrum')

pending_blocks,sequencer_names,pending_tx_count,time,blob_fee_rollup_price,base_fee_rollup_price,priority_fee_rollup_price,blob_base_fee_clearing_price,base_fee_clearing_price,priority_fee_clearing_price,mempool_time_mean,blobs_in_block
u64,str,u32,datetime[μs],f64,f64,f64,f64,f64,f64,f64,u32
20133947,"""arbitrum""",1,2024-06-20 16:24:11,1056202.565615616,13.778466374,6.010000000000001,,,,,0
20133948,"""arbitrum""",2,2024-06-20 16:24:11,997524.6390558721,13.201037625000001,6.010000000000001,,,,,0
20133949,"""arbitrum""",2,2024-06-20 16:24:11,997524.6390558721,13.201037625000001,6.010000000000001,,,,,0
20133950,"""arbitrum""",2,2024-06-20 16:24:11,997524.6390558721,13.201037625000001,6.010000000000001,,,,,0
20133951,"""arbitrum""",2,2024-06-20 16:24:11,997524.6390558721,13.201037625000001,6.010000000000001,,,,,0
…,…,…,…,…,…,…,…,…,…,…,…
20134440,"""arbitrum""",1,2024-06-20 17:31:59,108579206.20383437,11.855079502,2.1,298.68758158500003,11.822443829000001,0.8250000000000001,91468.5,4
20134441,"""arbitrum""",1,2024-06-20 17:31:59,108579206.20383437,11.855079502,2.1,298.68758158500003,11.822443829000001,0.8250000000000001,91468.5,0
20134442,"""arbitrum""",1,2024-06-20 17:31:59,108579206.20383437,11.855079502,2.1,298.68758158500003,12.006416159,3,8927,1
20134443,"""arbitrum""",1,2024-06-20 17:31:59,108579206.20383437,11.855079502,2.1,276.131200673,11.493206003000001,4,9532,1


In [35]:
txs_filtered_df.filter(pl.col('block_number') == 20134440)

time,block_number,sequencer_names,blob_base_fee_gwei,base_fee_gwei,priority_fee_gwei,mempool_time
datetime[μs],u64,str,f64,f64,f64,i64
2024-06-20 17:30:59,20134440,"""scroll""",298.68758158500003,11.822443829,0.1,113909
2024-06-20 17:30:59,20134440,"""scroll""",298.68758158500003,11.822443829,0.1,114526
2024-06-20 17:30:59,20134440,"""taiko""",298.68758158500003,11.822443829,3.0,92
2024-06-20 17:30:59,20134440,"""scroll""",298.68758158500003,11.822443829,0.1,137347
