In [416]:
%load_ext dotenv
%dotenv

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [419]:
import requests as r
import pandas as pd
import math
import os
from google.cloud import bigquery
client = bigquery.Client(project=os.environ['project'])

pd.set_option('display.max_columns', None)



In [30]:
def get_method_name(sig):
    obj = r.get(f"https://www.4byte.directory/api/v1/signatures/?hex_signature={sig}").json()
    try:
        return sorted(obj["results"], key=lambda x: x["id"])[0]["text_signature"]
    except:
        return None

In [443]:
current_block = 13648624
start_block = current_block - int(7*24*3600/13)

top_n_functions = 50

QUERY = f"""
SELECT 
    SUBSTR(input, 0, 10) as sig,
    count(*) as c, 
    SUM(receipt_gas_used) as gas_sum, 
    AVG(LENGTH(SUBSTR(input, 11)))/2 as avg_input_size,
    AVG(LENGTH(REPLACE(SUBSTR(input, 11), '00', '')))/2 as avg_non_zero_bytes,
FROM 
    `bigquery-public-data.crypto_ethereum.transactions` 
WHERE 
    block_number > {start_block} AND block_number < {current_block} AND LENGTH(input) > 12 
GROUP BY 
    sig 
ORDER BY 
    gas_sum DESC 
LIMIT {top_n_functions}
"""

query_job = client.query(QUERY)
rows = query_job.result()

cols = ['sig', 'c', 'gas_sum', 'avg_input_size' ,'avg_non_zero_bytes']
df_rows = []

for row in rows:
    df_rows += [{k: row[k] for k in cols}]
    
df = pd.DataFrame(df_rows)

In [444]:
df['method_name'] = df['sig'].apply(lambda e: get_method_name(e))

In [445]:
# https://github.com/jzaki/bls-wallet-contracts/blob/a8a8c2698256b1e776b3a79f65dd193f857505c8/contracts/VerificationGateway.sol#L185

# abi.encodePacked(
#     chainId, //block.chainid, # -> does not need to be included
#     txData.nonce, # -> does not need to be included
#     txData.rewardTokenAddress, # -> also does not need to be included
#     txData.rewardTokenAmount, # 2 bytes float16?
#     txData.ethValue, # rle encoded, 1 byte on avg (0)
#     txData.contractAddress, # contract registry, rle encoded, 2 byte on avg
#     keccak256(txData.encodedFunction) # see custom compression 
# )

# --> 5 byte tx overhead 

In [446]:
def get_input_size(sig):
    query_job = client.query(f"SELECT LENGTH(input) as len FROM `bigquery-public-data.crypto_ethereum.transactions` WHERE block_number > 13591559 AND input LIKE '{sig}%' LIMIT 1")
    rows = query_job.result()
    return list(rows)[0]["len"] - 10

def get_number_of_params(sig):
    return int(get_input_size(sig) / 64)

def get_avg_param_bytes(sig, replace_addresses=False):
    n_params = get_number_of_params(sig)
    proj = ""
    for i in range(0, n_params):
        #field = f"AVG(LENGTH(REGEXP_REPLACE(SUBSTR(input, {11+i*64}, 64), '^0+', '')))/2 as p{i}"
        field = f"AVG(LENGTH(REPLACE(SUBSTR(input, {11+i*64}, 64), '00', '')))/2 as p{i}"
        proj += field
        if i < n_params-1:
            proj += ","
    query_job = client.query(f"SELECT {proj} FROM `bigquery-public-data.crypto_ethereum.transactions` WHERE block_number > 13591559 and input like '{sig}%'")
    row = list(query_job.result())[0]
    res = []
    for i in range(0, n_params):
        size = math.ceil(row[f"p{i}"])
        if replace_addresses and size == 20:
            size = 4 # replace addresses by registry indices
        res += [size]
    return res

def custom_encoding_input_size(sig, replace_addresses=False):
    encoded_bytes = get_avg_param_bytes(sig, replace_addresses=replace_addresses)
    return sum(encoded_bytes) + len(encoded_bytes) + 4 # function signature

def standard_endcoding_input_size(sig):
    return get_input_size(sig) / 2 + 4 # function signature


In [447]:
ETHEREUM_AVG_TXN_SIZE = 80
BLSW_TXN_SIZE = 5

df['avg_zero_bytes'] = df['avg_input_size'] - df['avg_non_zero_bytes']

df['avg_evm_gas_per_tx'] = df['gas_sum'] / df['c']
df['avg_ovm_gas_per_tx'] = (ETHEREUM_AVG_TXN_SIZE + df['avg_non_zero_bytes']) * 16 + df['avg_zero_bytes'] * 4
df['evm_ovm_ratio'] = df['avg_evm_gas_per_tx'] / df['avg_ovm_gas_per_tx']

df['avg_blsw_agg_gas'] = (BLSW_TXN_SIZE + df['avg_non_zero_bytes']) * 16 + df['avg_zero_bytes'] * 4
df['ovm_blsw_agg_ratio'] = df['avg_ovm_gas_per_tx'] / df['avg_blsw_agg_gas']
df['evm_blsw_agg_ratio'] = df['avg_evm_gas_per_tx'] / df['avg_blsw_agg_gas']

df['avg_blsw_ce_addr_gas'] = (BLSW_TXN_SIZE + df.apply(lambda r: custom_encoding_input_size(r['sig']), axis=1)) * 16
df['blsw_agg_blsw_ce_addr_ratio'] = df['avg_blsw_agg_gas'] / df['avg_blsw_ce_addr_gas']
df['ovm_blsw_ce_addr_ratio'] = df['avg_ovm_gas_per_tx'] / df['avg_blsw_ce_addr_gas']
df['evm_blsw_ce_addr_ratio'] = df['avg_evm_gas_per_tx'] / df['avg_blsw_ce_addr_gas']

df['avg_blsw_ce_idx_gas'] = (BLSW_TXN_SIZE + df.apply(lambda r: custom_encoding_input_size(r['sig'], replace_addresses=True), axis=1)) * 16
df['blsw_ce_addr_blsw_ce_idx_ratio'] = df['avg_blsw_ce_addr_gas'] / df['avg_blsw_ce_idx_gas']
df['blsw_agg_blsw_ce_idx_ratio'] = df['avg_blsw_agg_gas'] / df['avg_blsw_ce_idx_gas']
df['ovm_blsw_ce_idx_ratio'] = df['avg_ovm_gas_per_tx'] / df['avg_blsw_ce_idx_gas']
df['evm_blsw_ce_idx_ratio'] = df['avg_evm_gas_per_tx'] / df['avg_blsw_ce_idx_gas']

In [451]:
df.head(50)

Unnamed: 0,sig,c,gas_sum,avg_input_size,avg_non_zero_bytes,method_name,avg_zero_bytes,avg_evm_gas_per_tx,avg_ovm_gas_per_tx,evm_ovm_ratio,avg_blsw_agg_gas,ovm_blsw_agg_ratio,evm_blsw_agg_ratio,avg_blsw_ce_addr_gas,blsw_agg_blsw_ce_addr_ratio,ovm_blsw_ce_addr_ratio,evm_blsw_ce_addr_ratio,avg_blsw_ce_idx_gas,blsw_ce_addr_blsw_ce_idx_ratio,blsw_agg_blsw_ce_idx_ratio,ovm_blsw_ce_idx_ratio,evm_blsw_ce_idx_ratio
0,0xa9059cbb,2121769,107719389261,64.003831,25.673906,"transfer(address,uint256)",38.329925,50768.67,1844.102196,27.530291,644.102196,2.863058,78.82083,592,1.08801,3.115037,85.757888,336,1.761905,1.916971,5.488399,151.097231
1,0xab834bab,246738,49547093683,2494.320121,525.028796,"atomicMatch_(address[14],uint256[18],uint8[8],...",1969.291325,200808.5,17557.626032,11.437111,16357.626032,1.07336,12.276141,10144,1.612542,1.730839,19.795793,8096,1.252964,2.020458,2.168679,24.803424
2,0x7ff36ab5,288258,39886179274,224.690569,74.625148,"swapExactETHForTokens(uint256,address[],addres...",150.065421,138369.7,3074.264055,45.009056,1874.264055,1.640251,73.826162,1472,1.273277,2.088495,94.00117,704,2.090909,2.662307,4.366852,196.5479
3,0x095ea7b3,494694,23368799025,64.0,47.82547,"approve(address,uint256)",16.17453,47238.9,2109.905639,22.389104,909.905639,2.318818,51.916259,960,0.947818,2.197818,49.207185,704,1.363636,1.29248,2.997025,67.100706
4,0x791ac947,99899,22135876885,256.056057,81.469995,swapExactTokensForETHSupportingFeeOnTransferTo...,174.586062,221582.6,3281.864163,67.517288,2081.864163,1.576406,106.43469,1616,1.288282,2.030857,137.117925,848,1.90566,2.455028,3.870123,261.300197
5,0x5f575529,90223,18845834246,955.697937,215.495107,"swap(string,address,uint256,bytes)",740.202831,208880.6,7688.733028,27.167103,6488.733028,1.184936,32.191276,3536,1.835049,2.174415,59.072567,3424,1.03271,1.895074,2.245541,61.004848
6,0xac9650d8,78363,13750067050,615.993977,118.685017,multicall(bytes[]),497.30896,175466.3,5168.196113,33.951172,3968.196113,1.302404,44.218157,2720,1.458896,1.900072,64.509676,2288,1.188811,1.734351,2.258827,76.689824
7,0x38ed1739,87894,12857626826,276.292921,92.511184,"swapExactTokensForTokens(uint256,uint256,addre...",183.781737,146285.6,3495.305891,41.852018,2295.305891,1.522806,63.732509,1792,1.280863,1.950506,81.632591,1024,1.75,2.24151,3.413385,142.857035
8,0xfb3bdb41,88378,12245226690,224.511621,73.199099,"swapETHForExactTokens(uint256,address[],addres...",151.312521,138555.1,3056.435674,45.332263,1856.435674,1.6464,74.635037,1456,1.275025,2.0992,95.161501,688,2.116279,2.698308,4.442494,201.388294
9,0xa0712d68,38829,9707072555,32.0,1.947926,mint(uint256),30.052074,249995.4,1431.375106,174.65403,231.375106,6.186383,1080.476781,192,1.205079,7.455079,1302.059532,192,1.0,1.205079,7.455079,1302.059532


In [453]:
topn = df.head(50)

print(f"EVM to OVM: {round(topn['evm_ovm_ratio'].median(),2)}x")
print(f"OVM to BLS Wallet (aggregation, no compression): {round(topn['ovm_blsw_agg_ratio'].median(),2)}x")
print(f"BLS Wallet with compression: {round(topn['blsw_agg_blsw_ce_addr_ratio'].median(),2)}x")
print(f"BLS Wallet with address registry: {round(topn['blsw_ce_addr_blsw_ce_idx_ratio'].median(),2)}x")
print(f"Total (EVM to BLS Wallet): {round(topn['evm_blsw_ce_idx_ratio'].median(),2)}x")

EVM to OVM: 39.66x
OVM to BLS Wallet (aggregation, no compression): 1.55x
BLS Wallet with compression: 1.3x
BLS Wallet with address registry: 1.19x
Total (EVM to BLS Wallet): 156.0x


In [439]:
sig = "0x7ff36ab5"
ce = custom_encoding_input_size(sig, replace_addresses=True)
se = standard_endcoding_input_size(sig)

ce, se, se/ce

(39, 228.0, 5.846153846153846)

In [441]:
get_avg_param_bytes(sig, replace_addresses=True)

[9, 1, 4, 5, 1, 4, 4]

In [None]:
# manual custom compression for swapExactETHForTokens
# 1B + 9B avg
# 4B path[1]
# 4B to address
# 4B deadline
# -------
# 22B (1.77x better than automatic compression)