In [28]:
import numpy as np
import pandas as pd
import pyarrow.parquet as pq

In [78]:
TOP_CHAINS = 6
MONTHS = 6
TOTAL_FUNDING = 1_000_000

In [80]:
df_gas = (
    pq
    .read_table("data/20250731_s7_onchain_builder_gasfees_all.parquet")
    .to_pandas(ignore_metadata=True)
)
df_gas['dt'] = pd.to_datetime(df_gas['dt'])

date_filter = (df_gas['dt'] >= pd.to_datetime('2025-02-01')) & (df_gas['dt'] < pd.to_datetime('2025-08-01'))
chain_filter = ~df_gas['chain'].isin(['CELO', 'ETHEREUM', 'FRAX'])
df_gas = df_gas[date_filter & chain_filter]
df_gas.tail()

Unnamed: 0,dt,chain,gas_fees
12868,2025-06-24,ARENAZ,0.009997
12879,2025-02-01,KROMA,0.001974
12880,2025-04-14,SWELL,0.004142
12881,2025-03-07,UNICHAIN,0.052094
12887,2025-07-17,INK,0.344964


In [84]:
chain_totals = df_gas.groupby('chain', as_index=False)['gas_fees'].sum()
chain_totals['gas_rank'] = chain_totals['gas_fees'].rank(ascending=False)
chain_totals['category'] = chain_totals.apply(
    lambda x: x['chain'] if x['gas_rank'] <= TOP_CHAINS else 'OTHER', axis=1
)
chain_totals['total_gas_by_category'] = chain_totals.groupby('category')['gas_fees'].transform('sum')
chain_totals['share_of_category_gas'] = chain_totals['gas_fees'] / chain_totals['total_gas_by_category']
chain_totals.sort_values(by='gas_rank').head(TOP_CHAINS+1)

Unnamed: 0,chain,gas_fees,gas_rank,category,total_gas_by_category,share_of_category_gas
2,BASE,12870.090633,1.0,BASE,12870.090633,1.0
13,OPTIMISM,733.083769,2.0,OPTIMISM,733.083769,1.0
22,UNICHAIN,456.486469,3.0,UNICHAIN,456.486469,1.0
19,SONEIUM,420.239131,4.0,SONEIUM,420.239131,1.0
23,WORLDCHAIN,112.017055,5.0,WORLDCHAIN,112.017055,1.0
6,INK,43.7444,6.0,INK,43.7444,1.0
8,LISK,31.654049,7.0,OTHER,60.387119,0.524185


In [90]:
chain_category_mappings = chain_totals.set_index('chain')['category'].to_dict()

In [86]:
chain_categories = chain_totals.groupby('category', as_index=False)['gas_fees'].sum()
chain_categories['log_gas'] = np.log(chain_categories['gas_fees'])
chain_categories['funding'] = chain_categories['log_gas'] / chain_categories['log_gas'].sum() * (TOTAL_FUNDING / MONTHS)
chain_categories.sort_values(by='funding', ascending=False)

Unnamed: 0,category,gas_fees,log_gas,funding
0,BASE,12870.090633,9.462661,38633.740614
2,OPTIMISM,733.083769,6.59726,26935.005015
5,UNICHAIN,456.486469,6.123559,25000.99959
4,SONEIUM,420.239131,6.040824,24663.212124
6,WORLDCHAIN,112.017055,4.718651,19265.102865
3,OTHER,60.387119,4.100776,16742.468517
1,INK,43.7444,3.778364,15426.137942


In [91]:
df_metrics['metric_name'].value_counts()

metric_name
amortized_contract_invocations_monthly       1192
gas_fees_monthly                             1192
qualified_addresses_monthly                  1192
active_farcaster_users_monthly               1023
contract_invocations_upgraded_eoa_monthly     544
average_tvl_monthly                           353
Name: count, dtype: int64

In [134]:
df_metrics = pd.read_csv('/Users/cerv1-air/GitHub/Retro-Funding/results/S7/M6/data/onchain__metrics_by_project.csv')

blue_chips = df_metrics[df_metrics['metric_name'] == 'average_tvl_monthly'].groupby('project_id')['amount'].sum()
blue_chips = list(blue_chips[blue_chips >= 100_000_000].index)

df_metrics = df_metrics[
    (df_metrics['measurement_period'] == 'Jul 2025')
    & (~df_metrics['project_id'].isin(blue_chips))
    & (df_metrics['metric_name'].isin([
        'amortized_contract_invocations_monthly',
        'gas_fees_monthly',
        'qualified_addresses_monthly',
        'contract_invocations_upgraded_eoa_monthly'
    ]))
]
df_metrics['chain_category'] = df_metrics['chain'].map(chain_category_mappings)
df_metrics['percentile'] = (
    df_metrics
    .groupby(['chain_category', 'metric_name'])['amount']
    .transform(lambda x: x.rank(pct=True))
)

df_metrics.tail()

Unnamed: 0,project_id,display_name,project_name,chain,metric_name,sample_date,measurement_period,amount,chain_category,percentile
5491,ZpLND65a7Iy7VJrIjdjCjrQzAFi4JaJpkYeg3DJlrsE=,Layer3,0x91a4420e2fcc8311e97dad480f201a8ce221f2cd64c2...,LISK,gas_fees_monthly,2025-07-01,Jul 2025,2.366971e-05,OTHER,0.44697
5492,a0PyEDtZEEjxDRe7zWNpHP+358umEogD6bQI1XfI2eY=,zkCodex,0x7a4bb37bc7997b8e9b34775164682ff1f441b716cd23...,LISK,gas_fees_monthly,2025-07-01,Jul 2025,0.0006777418,OTHER,0.689394
5493,Z0E5wD51Hag8einHJvFv4rycuCql5BP8obLIEDSrsbM=,Festify,0x27f345fdead33d831d6022462628b6a9ad384e7681ee...,LISK,gas_fees_monthly,2025-07-01,Jul 2025,0.06093374,OTHER,0.962121
5494,eQqCoZ6ZUM1ejuDhFMANJnjGAF32f7Pf7cWVAbVCSC4=,BAG Guild Dapp,0x076a2b1418a515ff8c5bb11beed5630cc6fe7f65fd8d...,LISK,gas_fees_monthly,2025-07-01,Jul 2025,1.067476e-08,OTHER,0.05303
5495,xyqCuzyp2siN+L8QgwpvSjTruF2y+tvB++HwqdWDFXY=,OnChainGM,0xa1f96576ad998e804140caccd478bae81b8b1059a107...,LISK,gas_fees_monthly,2025-07-01,Jul 2025,0.001595315,OTHER,0.772727


In [138]:
df_projects_by_category = (
    df_metrics
    .groupby(['project_name', 'display_name', 'chain_category'], as_index=False)['percentile']
    .mean()
)
(
    df_projects_by_category[df_projects_by_category['chain_category'] == 'OPTIMISM']
    .sort_values(by='percentile', ascending=False)
)

Unnamed: 0,project_name,display_name,chain_category,percentile
77,0x250065e82e6a2fd0127f0f7c7e0df7eb305810bd9796...,Relay Protocol,OPTIMISM,0.985243
210,0x72723e07fe409557489a6643b43d9493a94c10ba6823...,Across Protocol,OPTIMISM,0.975072
162,0x517eaa9c56951de89261f2d7830ea49aae92f2a90310...,LI.FI,OPTIMISM,0.965596
5,0x000c2ce4773defb3010a58d3800d0ec9d432189c574b...,WOOFi,OPTIMISM,0.946855
443,0xe8833ceee8beb2b3fb0f7f2dcef576f6f9cf20e35d8e...,SOCKET protocol,OPTIMISM,0.939673
...,...,...,...,...
431,0xe3d0892ac820afd317c8073e3fc8e599f1801019e86b...,Optimistic Builder Dollar,OPTIMISM,0.071865
66,0x1daa6d4f1449948487526075c2e4d8ef0b921169e8fe...,defi degen,OPTIMISM,0.056575
178,0x57cbd227f8353beaaa153466b2c7025ea9e63cb5f2ae...,Boredtopia,OPTIMISM,0.053517
168,0x5316b19c17eba417d70b924baa6a1c467b1406996842...,Bored Town Launchpad,OPTIMISM,0.041284
