In [None]:
# @title
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff

base_data = pd.read_csv("gas_analysis.csv")
contract_sizes_base_data = pd.read_csv("chunker_contract_sizes.csv")

# Txs dataset general metrics

In [None]:
# @title Txs dataset - Execution length histogram

total_txs = base_data["tx"].count()
total_contracts = contract_sizes_base_data["contract_addr"].count()

print("A total of {} transactions where analyzed. Their execution involved executing {} contracts (fully or partially).".format(total_txs, total_contracts))
q = base_data["execution_length"].quantile(0.95)
df = base_data[base_data["execution_length"] <= q]
q = df["receipt_gas"].quantile(0.95)
df = df[df["receipt_gas"] <= q]

fig = px.density_heatmap(
    df, 
    title="Heatmap count - execution_length (95%-th) and receipt_gas (95%-th)",
    x="receipt_gas", 
    y="execution_length", 
    marginal_x="histogram", 
    marginal_y="histogram", 
    nbinsx=15, 
    nbinsy=9,
    text_auto=True,
    width=1500,
    height=1000)
fig.show()

# Chunkers: Gas usage simulation

In [None]:
q = base_data["receipt_gas"].quantile(0.95)
df = base_data[base_data["execution_length"] <= q]
fig = px.histogram(
    df, 
    x="31bytechunker_gas", 
    labels={"31bytechunker_gas":"code_access_gas"}, 
    nbins=40, 
    histnorm='percent',
    width=800, 
    height=450, 
    title="31-byte chunker code-access gas")
fig.show()

df = base_data
z31bytechunker_gas_sum = df['31bytechunker_gas'].sum()
print()
print('sum(txs.31bytechunker_gas)/sum(txs.receipt_gas): {:0.2f}%'.format((z31bytechunker_gas_sum/base_data['receipt_gas'].sum())*100))
df['code_gas_overhead_percentage'] = (df['31bytechunker_gas'] / df['receipt_gas']) * 100
fig = px.histogram(
    df, 
    x="code_gas_overhead_percentage", 
    labels={"code_gas_overhead_percentage":"code_access_gas/receipt_gas (%)"}, 
    nbins=50, 
     histnorm='percent',
    width=800, 
    height=450, 
    title="Histogram - Code-access gas overhead (%) (31chunker_gas/receipt_gas)")
fig.show()

print()

df = base_data
print()
df = df.sort_values(by=['code_gas_overhead_percentage'], ascending=False)
print("Top 10 code-access worse overhead:")
print(df[['tx', 'execution_length', 'receipt_gas', '31bytechunker_gas', 'code_gas_overhead_percentage']].head(n=10).to_markdown(index=False))

print()
df = df.sort_values(by=['execution_length'], ascending=False)
print("Top 10 longest-execution txs:")
print(df[['tx', 'execution_length', 'receipt_gas', '31bytechunker_gas', 'code_gas_overhead_percentage']].head(n=10).to_markdown(index=False))

# 31-byte chunker vs 32-byte chunker

In [None]:
#
df = base_data
z32bytechunker_gas_sum = df['32bytechunker_gas'].sum()
print('The {} txs would have used the following amount of gas:'.format(total_txs))
print('\t- With a 31-byte chunker: {}'.format(z31bytechunker_gas_sum))
print('\t- With a 32-byte chunker: {}'.format(z32bytechunker_gas_sum))
diff = z32bytechunker_gas_sum-z31bytechunker_gas_sum
print('Thus, the 32-byte chunker used {} {} gas ({:0.2f}%) than the 31-byte chunker.'.format(diff, 'more' if diff > 0 else 'less', (z32bytechunker_gas_sum/z31bytechunker_gas_sum -1) * 100))

print()
df_diff = df[df['31bytechunker_gas'] != df['32bytechunker_gas']].copy()
df_diff['gas_diff'] = df['32bytechunker_gas'] - df['31bytechunker_gas']
df_diff['gas_diff_ratio'] = df['32bytechunker_gas']/df['31bytechunker_gas']
df_diff = df_diff.sort_values(by=['gas_diff_ratio'], ascending=False)
print("A total of {} txs have different gas cost.".format(df_diff['tx'].count()))
print("Top-10 worst-cases:")
print(df_diff[['tx', '31bytechunker_gas', '32bytechunker_gas', 'gas_diff', 'gas_diff_ratio']].head(n=10).to_markdown(index=False))
df_diff = df_diff.sort_values(by=['gas_diff_ratio'])

print()

print("Top-10 best-cases:")
print(df_diff[['tx', '31bytechunker_gas', '32bytechunker_gas', 'gas_diff', 'gas_diff_ratio']].head(n=10).to_markdown(index=False))



# Chunkers: encoded sizes

Remember that chunkers align chunked size to the next power of 32.

In [None]:
#
df = contract_sizes_base_data

z31bytechunker_size_sum = df['31bytechunker_chunked_size'].sum()
z32bytechunker_size_sum = df['32bytechunker_chunked_size'].sum()
original_size_sum = df['original_size'].sum()
print('Avg contract size: {:0.0f} bytes'.format(df['original_size'].mean()))
print('Total non-chunked contracts size: {:0.2f}MiB'.format(original_size_sum/1024/1024))
print('Total 31-byte chunked size: {:0.2f}MiB ({:0.1f}% more)'.format(z31bytechunker_size_sum/1024/1024, (z31bytechunker_size_sum/original_size_sum-1)*100))
print('Total 32-byte chunked contracts size: {:0.2f}MiB ({:0.1f}% more)'.format(z32bytechunker_size_sum/1024/1024, (z32bytechunker_size_sum/original_size_sum-1)*100))
diff = z32bytechunker_size_sum-z31bytechunker_size_sum
print('Thus, 32-byte chunker uses {:0.2f}MiB ({:0.1f}%) {} than the 31-byte chunker'.format(diff/1024/1024, (z32bytechunker_size_sum/z31bytechunker_size_sum -1) * 100, 'more' if diff > 0 else 'less'))

print()

z32_bigger_than_z31 = df[(df['32bytechunker_chunked_size']-df['31bytechunker_chunked_size'])>0]
print('There are {} contracts of 32-byte-chunker bigger than 31-byte-chunker:'.format(z32_bigger_than_z31['original_size'].count()))
print(z32_bigger_than_z31[['contract_addr', '31bytechunker_chunked_size', '32bytechunker_chunked_size']].to_markdown())
