In [1]:
import sqlite3
import pandas as pd

In [10]:
conn = sqlite3.connect('PBS.db')
builder_proposer_tx = pd.read_sql_query(
    "SELECT * FROM builder_proposer_tx", conn)
relay_data = pd.read_sql_query("SELECT * FROM relay_data", conn)
df = pd.merge(builder_proposer_tx, relay_data.drop_duplicates(subset='block_number'), on='block_hash')
df = df[['relay', 'block_number_x', 'from_address', 'to_address']]
df = df.rename(columns={'block_number_x': 'block_number', 'from_address' : 'builder_pubkey','to_address': 'proposer_address'})

In [12]:
df

Unnamed: 0,relay,block_number,builder_pubkey,proposer_address
0,agnostic Gnosis,18951601,0x1f9090aae28b8a3dceadf281b0f12828e676c326,0xb3d9cf8e163bbc840195a97e81f8a34e295b8f39
1,flashbots,18951918,0x0aa8ebb6ad5a8e499e550ae2c461197624c6e667,0x0000000000450702bc4f750fd1e7ecad7054c4f1
2,flashbots,18952077,0xbd3afb0bb76683ecb4225f9dbc91f998713c3b01,0x717bbc4fa6574abb4ffadccfe00270be21e7473a
3,flashbots,18953144,0x229b8325bb9ac04602898b7e8989998710235d5f,0x388c818ca8b9251b393131c08a736a67ccb19297
4,flashbots,18953421,0xbd3afb0bb76683ecb4225f9dbc91f998713c3b01,0x13f2241aa64bb6da2b74553fa9e12b713b74f334
...,...,...,...,...
140349,agnostic Gnosis,18911286,0x1f9090aae28b8a3dceadf281b0f12828e676c326,0xd6e4aa932147a3fe5311da1b67d9e73da06f9cef
140350,aestus,19089909,0x88c6c46ebf353a52bdbab708c23d0c81daa8134a,0xd6e4aa932147a3fe5311da1b67d9e73da06f9cef
140351,ultrasound,19091524,0x95222290dd7278aa3ddd389cc1e1d165cc4bafe5,0xd6e4aa932147a3fe5311da1b67d9e73da06f9cef
140352,flashbots,19093173,0x1f9090aae28b8a3dceadf281b0f12828e676c326,0xd6e4aa932147a3fe5311da1b67d9e73da06f9cef


In [23]:
def get_relay_gini(df):
    relay_df = df.groupby('relay')['block_number'].nunique().reset_index()
    relay_df.columns = ['relay', 'cumulative_block_number']

    relays = relay_df['relay'].unique()
    relay_blocks = relay_df['cumulative_block_number']

    numerator = 0
    denominator = 2 * len(relays) * relay_blocks.sum()

    for i in range(len(relays)):
        for j in range(i+1, len(relays)):
            numerator += abs(relay_blocks.iloc[i] - relay_blocks.iloc[j])

    return numerator / denominator

In [14]:
def get_builder_gini(df):
    builder_df = df.groupby('builder_pubkey')['block_number'].nunique().reset_index()
    builder_df.columns = ['builder_pubkey', 'cumulative_block_number']

    builders = builder_df['builder_pubkey'].unique()
    builder_blocks = builder_df['cumulative_block_number']

    numerator = 0
    denominator = 2 * len(builders) * builder_blocks.sum()

    for i in range(len(builders)):
        for j in range(i+1, len(builders)):
            numerator += abs(builder_blocks.iloc[i] - builder_blocks.iloc[j])

    return numerator / denominator

In [15]:
def get_proposer_gini(df):
    proposer_df = df.groupby('proposer_address')[
        'block_number'].nunique().reset_index()
    proposer_df.columns = ['proposer_address', 'cumulative_block_number']

    proposers = proposer_df['proposer_address'].unique()
    proposer_blocks = proposer_df['cumulative_block_number']

    numerator = 0
    denominator = 2 * len(proposers) * proposer_blocks.sum()

    for i in range(len(proposers)):
        for j in range(i+1, len(proposers)):
            numerator += abs(proposer_blocks.iloc[i] - proposer_blocks.iloc[j])

    return numerator / denominator

In [24]:
get_relay_gini(df)

0.26662463010198023

In [17]:
get_builder_gini(df)

0.48955387592690414

In [18]:
get_proposer_gini(df)

0.4628380540053418