In [2]:
import os
import plotly.graph_objects as go
import pandas as pd
import numpy as np
from helpers import * 
#from xatu import *
import pytz
import pyxatu

pd.set_option('display.max_colwidth', None)

GOOGLE_CREDENTIALS = "./config/google-creds.json"

try:
    os.environ['GOOGLE_APPLICATION_CREDENTIALS']
except:
    print(f"setting google credentials as global variable...")
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_CREDENTIALS
   

COLORS = [
    '#377eb8', '#ff7f00', '#4daf4a', '#984ea3', '#f781bf', '#a65628',
    '#e41a1c', '#a6cee3', '#999999', '#fdbf6f', '#b2df8a', '#fb9a99',
    '#cab2d6', '#1f78b4', '#33a02c', '#ff7f7f', '#8dd3c7', '#ffffb3',
    '#bebada', '#fb8072', '#80b1d3', '#fdb462', '#b3de69', '#fccde5',
    '#d9d9d9', '#bc80bd', '#ccebc5', '#ffed6f',
    '#6a3d9a', '#ffcc00', '#b15928', '#1f78b4', '#e31a1c', '#33a02c',
    '#fb9a99', '#e6ab02', '#a6761d', '#666666'
]

FONTSIZE = 16
DATA = "data2"

def get_time_in_curr_slot(ts, slot):
    return (ts - (1654824023000 + (slot-1-4e6)*12000) - 12000) / 1000

def parse_datetime(event_time_str):
    return datetime.strptime(event_time_str, '%Y-%m-%d %H:%M:%S.%f').replace(tzinfo=pytz.UTC).timestamp()*1000

In [3]:
xatu = pyxatu.PyXatu()
xatu

2024-08-14 12:40:29,277 - INFO - Clickhouse configs set
2024-08-14 12:40:29,278 - INFO - Clickhouse URL: https://clickhouse.analytics.production.platform.ethpandaops.io, User: arg


PyXatu(config_path=/home/devops/.pyxatu_config.json, user=arg, url=https://clickhouse.analytics.production.platform.ethpandaops.io)

In [4]:
df = xatu.get_blockevent( 
    time_interval="365 day", 
    columns="slot, event_date_time, meta_client_geo_country, meta_client_name"
).sort_values("slot")

df = df[df["meta_client_geo_country"] == "United States"]

df["timestamp"] = df["event_date_time"].apply(lambda x: parse_datetime(x))

df["seconds_in_slot"] = df.apply(lambda x: get_time_in_curr_slot(x["timestamp"], x["slot"]),axis=1)

df = df.groupby(["slot"])["seconds_in_slot"].min().reset_index().sort_values("slot")

2024-08-14 12:40:29,670 - INFO - Executing query: SELECT slot, event_date_time, meta_client_geo_country, meta_client_name FROM default.beacon_api_eth_v1_events_block WHERE slot_start_date_time > NOW() - INTERVAL '365 day' AND meta_network_name = 'mainnet'


In [5]:
proposer = xatu.get_proposers( 
    time_interval="365 day", 
    columns="slot, proposer_validator_index",
    orderby="slot"
)

proposer_labels = pandas_gbq.read_gbq("""
    SELECT DISTINCT validator_id, label FROM `ethereum-data-nero.ethdata.beaconchain_validators_db` 
""")
proposer_labels = pd.merge(proposer, proposer_labels, how="left", right_on="validator_id", left_on="proposer_validator_index")[["slot", "label"]]
proposer_labels["label"] = proposer_labels["label"].fillna("unidentified")
proposer_labels["label"] = proposer_labels["label"].apply(lambda x: "solo staker" if x.endswith(".eth") else x.lower())

2024-08-14 12:43:15,046 - INFO - Executing query: SELECT slot, proposer_validator_index FROM default.canonical_beacon_proposer_duty WHERE slot_start_date_time > NOW() - INTERVAL '365 day' AND meta_network_name = 'mainnet' ORDER BY slot


Downloading: 100%|[32m████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m

2024-08-14 12:44:01,274 - INFO - Total time taken 43.93 s.
Finished at 2024-08-14 12:44:01.





In [6]:
df = pd.merge(df, proposer_labels, how="left", left_on="slot", right_on="slot")
df.dropna(inplace=True)
df

Unnamed: 0,slot,seconds_in_slot,label
13,7106016,4.314,binance
14,7106017,1.805,unidentified
15,7106018,2.443,binance
16,7106019,4.490,unidentified
17,7106021,2.528,unidentified
...,...,...,...
3494052,9733947,0.829,unidentified
3494053,9733948,1.922,coinbase
3494054,9733949,1.950,unidentified
3494055,9733950,3.706,kiln


In [None]:
def hist_late_performer(_df):   
    df = _df.copy()

    lido = df[df["label"] == "lido"]
    solo = df[df["label"] == "solo staker"]
    kiln = df[df["label"] == "kiln"]
    coinbase = df[df["label"] == "coinbase"]
    
    def calculate_histogram(data, bins, x_range):
        hist, bin_edges = np.histogram(data, bins=bins, range=x_range)
        # Normalize to percentage
        total_count = np.sum(hist)
        hist_normalized = (hist / total_count) * 100
        return bin_edges, hist_normalized

    x_lido, y_lido = calculate_histogram(lido['seconds_in_slot'], 50, (0, 10.1))
    x_solo, y_solo = calculate_histogram(solo['seconds_in_slot'], 50, (0, 10.1))
    x_kiln, y_kiln = calculate_histogram(kiln['seconds_in_slot'], 50, (0, 10.1))
    x_coinbase, y_coinbase = calculate_histogram(coinbase['seconds_in_slot'], 50, (0, 10.1))

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=x_lido,
        y=y_lido,
        fill='tozeroy',
        mode='lines',
        name='Lido',
        line=dict(color=COLORS[0]),
        opacity=0.5
    ))
    fig.add_trace(go.Scatter(
        x=x_solo,
        y=y_solo,
        fill='tozeroy',
        mode='lines',
        name='Solo Stakers',
        line=dict(color=COLORS[1]),
        opacity=0.5
    ))
    
    fig.add_trace(go.Scatter(
            x=x_coinbase,
            y=y_coinbase,
            fill='tozeroy',
            mode='lines',
            name=f'Coinbase',
            line=dict(color=COLORS[3]),
            opacity=0.5
        ))

    fig.add_trace(go.Scatter(
            x=x_kiln,
            y=y_kiln,
            fill='tozeroy',
            mode='lines',
            name=f'Kiln',
            line=dict(color=COLORS[2]),
            opacity=0.5
        ))

    fig.update_layout(
        title=f'Blocks First Seen Timing <span style="font-size: 16px;">({slot_to_day(df.slot.min())} - {slot_to_day(df.slot.max())})</span>',
        yaxis_title='%',
        xaxis_title='seconds in slot',
        barmode='stack',
        legend_traceorder="normal",
        xaxis=dict(
            tickmode='linear',
            showgrid=True,
            gridcolor='lightgrey',
            range=[0, 5.01]
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor='lightgrey',
        ),
        height=500,
        width=900,
        font=dict(
            family="Ubuntu Mono",
            size=FONTSIZE,
            color="black"
        ),
        legend=dict(
            x=1,
            y=1,
            xanchor='right',
            yanchor='top'
        ),
        plot_bgcolor = "#FFFFFF"

    )

    return fig

cs = 30  # 30-day window
days_step = 1  # Move one day at a time
total_days = 365

chunks = (total_days - cs) // days_step + 1
for i in os.listdir("att_pics"):
    if i.startswith("proposer_timing_games_entities_"):
        os.remove(f"att_pics/{i}")
        
for ix in range(chunks)[::2]:
    start_slot = df.slot.max() - 7200 * (cs + ix * days_step)
    end_slot = df.slot.max() - 7200 * ix * days_step
    print(ix, start_slot, end_slot)
    fig = hist_late_performer(df[(df["slot"] >= start_slot) & (df["slot"] < end_slot)])
    fig.write_image(f"att_pics/proposer_timing_games_entities_{ix:03}.png")
    print(f"att_pics/proposer_timing_games_entities_{ix:03}.png")

In [8]:
from PIL import Image
import glob

image_files = sorted(glob.glob('att_pics/proposer_timing_games_entities_*.png'), key=lambda x: int(x.split(".")[0][-3:]), reverse=True)
image_files += [image_files[-1]]*300
images = [Image.open(img) for img in image_files]
output_path = "att_pics/proposer_timing_games.gif"
images[0].save(output_path, save_all=True, append_images=images[1:], duration=40, loop=0)
print(f"GIF saved as {output_path}")

GIF saved as att_pics/proposer_timing_games.gif


In [23]:
_df = pd.concat([pd.read_parquet(f"{DATA}/failed_missed_head/{i}") for i in os.listdir(f"{DATA}/failed_missed_head/")],
         ignore_index=True)
misses = _df.groupby("slot")["validator"].nunique().reset_index()
misses.columns = ["slot", "misses"]
misses = misses[misses["slot"] >= df.slot.min()]
misses = misses[misses["slot"] <= df.slot.max()]
_df = _df[_df["slot"] >= df.slot.min()]
_df = _df[_df["slot"] <= df.slot.max()]

In [24]:
dfwm = pd.merge(df, misses, left_on="slot", right_on="slot").dropna().drop_duplicates()
_dfwml = dfwm[dfwm["slot"] > dfwml.slot.max()-7200*30]
largest = _dfwml.groupby("label")["slot"].nunique().reset_index().sort_values("slot", ascending=False).label[0:10].tolist()
largest.append("solo staker")
_dfwml = _dfwml[_dfwml["label"].isin(largest)]

In [21]:
dfwml.label.unique()

array(['lido', 'figment', 'unidentified', 'coinbase', 'kiln', 'renzo',
       'rocketpool', 'binance', 'ether.fi', 'kraken', 'solo staker'],
      dtype=object)

In [25]:
fig = go.Figure()
for i in [_dfwml]:
    missed_head_votes_over_label = i.groupby("label")["misses"].mean().reset_index()
    missed_head_votes_over_label["per"] = missed_head_votes_over_label["misses"] / 32800 * 100    
    missed_head_votes_over_label.set_index("label", inplace=True)
    missed_head_votes_over_label = missed_head_votes_over_label.loc[largest]
    missed_head_votes_over_label.reset_index(inplace=True)
    fig.add_trace(go.Bar(
            x=missed_head_votes_over_label['label'],
            y=missed_head_votes_over_label['per'],
            name='Missed Head Votes Over Proposers',
            marker_color=COLORS
        ))

fig.add_shape(
    type="line",
    x0=-0.5, x1=10.5, y0=100, y1=100,
    line=dict(color="black", width=2, dash="dash")
)

fig.update_layout(
    title=f'Missed Head Votes Over Proposers <span style="font-size: 16px;">({slot_to_day(df.slot.min())} - {slot_to_day(df.slot.max())})</span>',
    xaxis_title=None,
    yaxis_title=f'missed/failed head votes (%)',
    barmode='group',
    plot_bgcolor='white',
    font=dict(family="Ubuntu Mono", size=FONTSIZE),
    legend=dict(
        x=1,
        y=1,
        xanchor='right',
        yanchor='top'
    ),
    xaxis=dict(showgrid=True, gridcolor='lightgray', tickformat=",d"),
    yaxis=dict(showgrid=True, gridcolor='lightgray',range=[0,105], tickvals=list(range(0,101,10))),
    height=550,
    width=1200,
    )
fig.write_image(f"att_pics/missed_head_votes_over_proposers.png")
fig.show()

In [28]:
validator_labels = pandas_gbq.read_gbq("""
    SELECT DISTINCT validator_id, label FROM `ethereum-data-nero.ethdata.beaconchain_validators_db` 
""")
validator_labels["label"] = validator_labels["label"].apply(lambda x: "unidentified" if x == None else x)
validator_labels["label"] = validator_labels["label"].fillna("unidentified")
validator_labels["label"] = validator_labels["label"].apply(lambda x: "solo staker" if x.endswith(".eth") else x.lower())

def merge_labels(df, labels, left_on="validator", right_on="validator_id", nlargest=15, add=None):
    df = pd.merge(df, labels, how="left", left_on=left_on, right_on=right_on).drop(right_on, axis=1)
    df["label"] = df["label"].apply(lambda x: "unidentified" if x == None else x)
    df["label"] = df["label"].fillna("unidentified")
    df["label"] = df["label"].apply(lambda x: "solo staker" if x.endswith(".eth") else x.lower())
    largest = df.groupby("label")[left_on].sum().reset_index().sort_values(left_on, ascending=False)["label"].tolist()[0:nlargest]
    if add:
        largest += add
    if "solo staker" not in largest:
        largest.append("solo staker")
    df["label"] = df["label"].apply(lambda x: x if x in largest else "other")
    if "other" not in largest:
        largest.append("other")
    return df, largest

_df = _df[_df["slot"] >= _df["slot"].max() - 7200*31]
all_missed_head_votes, _ = merge_labels(_df, validator_labels, left_on="validator", right_on="validator_id")

Downloading: 100%|[32m████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m

2024-08-14 12:57:41,584 - INFO - Total time taken 38.34 s.
Finished at 2024-08-14 12:57:41.





In [29]:
all_kiln_slots = dfwml[dfwml["label"] == "kiln"].slot.tolist()
all_lido_slots = dfwml[dfwml["label"] == "lido"].slot.tolist()
all_renzo_slots = dfwml[dfwml["label"] == "renzo"].slot.tolist()
all_etherfi_slots = dfwml[dfwml["label"] == "ether.fi"].slot.tolist()
all_coinbase_slots = dfwml[dfwml["label"] == "coinbase"].slot.tolist()

missed_head_votes_for_kiln_slots = all_missed_head_votes[all_missed_head_votes["slot"].isin(all_kiln_slots)]
missed_head_votes_for_lido_slots = all_missed_head_votes[all_missed_head_votes["slot"].isin(all_lido_slots)]
missed_head_votes_for_etherfi_slots = all_missed_head_votes[all_missed_head_votes["slot"].isin(all_etherfi_slots)]
missed_head_votes_for_coinbase_slots = all_missed_head_votes[all_missed_head_votes["slot"].isin(all_coinbase_slots)]

all_kiln_gr = missed_head_votes_for_kiln_slots[["validator", "slot", "label"]].drop_duplicates().groupby("label")["validator"].count().reset_index()
all_lido_gr = missed_head_votes_for_lido_slots[["validator", "slot", "label"]].drop_duplicates().groupby("label")["validator"].count().reset_index()
all_etherfi_gr = missed_head_votes_for_etherfi_slots[["validator", "slot", "label"]].drop_duplicates().groupby("label")["validator"].count().reset_index()
all_coinbase_gr = missed_head_votes_for_coinbase_slots[["validator", "slot", "label"]].drop_duplicates().groupby("label")["validator"].count().reset_index()

all_kiln_gr["share"] = all_kiln_gr["validator"]/len(missed_head_votes_for_kiln_slots[["validator", "slot", "label"]].drop_duplicates().index) * 100
all_lido_gr["share"] = all_lido_gr["validator"]/len(missed_head_votes_for_lido_slots[["validator", "slot", "label"]].drop_duplicates().index) * 100
all_etherfi_gr["share"] = all_etherfi_gr["validator"]/len(missed_head_votes_for_etherfi_slots[["validator", "slot", "label"]].drop_duplicates().index) * 100
all_coinbase_gr["share"] = all_coinbase_gr["validator"]/len(missed_head_votes_for_coinbase_slots[["validator", "slot", "label"]].drop_duplicates().index) * 100

total_validator_share = all_missed_head_votes.groupby("label")["validator"].count().reset_index().sort_values("validator", ascending=False)
total_validator_share["share"] = total_validator_share["validator"]/all_missed_head_votes.validator.count()*100

all_kiln_shares = pd.merge(all_kiln_gr[["label", "share"]], total_validator_share[["label", "share"]], left_on="label", right_on="label")
all_lido_shares = pd.merge(all_lido_gr[["label", "share"]], total_validator_share[["label", "share"]], left_on="label", right_on="label")
all_etherfi_shares = pd.merge(all_etherfi_gr[["label", "share"]], total_validator_share[["label", "share"]], left_on="label", right_on="label")
all_coinbase_shares = pd.merge(all_coinbase_gr[["label", "share"]], total_validator_share[["label", "share"]], left_on="label", right_on="label")

all_kiln_shares.drop("share_y", axis=1, inplace=True)
all_etherfi_shares.drop("share_y", axis=1, inplace=True)
all_coinbase_shares.drop("share_y", axis=1, inplace=True)

all_lido_shares.columns = ["label", "share_lido_proposer", "total_share"]
all_kiln_shares.columns = ["label", "share_kiln_proposer"]
all_etherfi_shares.columns = ["label", "share_etherfi_proposer"]
all_coinbase_shares.columns = ["label", "share_coinbase_proposer"]

all_kiln_shares["delta_kiln_proposer"] = all_kiln_shares["share_kiln_proposer"] / all_lido_shares["total_share"]
all_lido_shares["delta_lido_proposer"] = all_lido_shares["share_lido_proposer"] / all_lido_shares["total_share"]
all_etherfi_shares["delta_etherfi_proposer"] = all_etherfi_shares["share_etherfi_proposer"] / all_lido_shares["total_share"]
all_coinbase_shares["delta_coinbase_proposer"] = all_coinbase_shares["share_coinbase_proposer"] / all_lido_shares["total_share"]

missed_based_on_proposer = pd.merge(pd.merge(pd.merge(all_lido_shares, all_kiln_shares, left_on="label", right_on="label"), all_etherfi_shares, left_on="label", right_on="label"), all_coinbase_shares, left_on="label", right_on="label")
missed_based_on_proposer = missed_based_on_proposer.sort_values("total_share", ascending=False).reset_index(drop=True)
missed_based_on_proposer = missed_based_on_proposer[missed_based_on_proposer["label"] != "unidentified"]
missed_based_on_proposer = missed_based_on_proposer[missed_based_on_proposer["label"].isin(largest[:8])]


In [30]:
fig = go.Figure()

fig.add_trace(go.Bar(
    x=missed_based_on_proposer['label'],
    y=missed_based_on_proposer['delta_lido_proposer'],
    name='Lido',
    marker_color=COLORS[0],
))

fig.add_trace(go.Bar(
    x=missed_based_on_proposer['label'],
    y=missed_based_on_proposer['delta_coinbase_proposer'],
    name='Coinbase',
    marker_color=COLORS[1],
))

fig.add_trace(go.Bar(
    x=missed_based_on_proposer['label'],
    y=missed_based_on_proposer['delta_kiln_proposer'],
    name='Kiln',
    marker_color=COLORS[2],
))

fig.add_trace(go.Bar(
    x=missed_based_on_proposer['label'],
    y=missed_based_on_proposer['delta_etherfi_proposer'],
    name='EtherFi',
    marker_color=COLORS[3],
))

fig.update_layout(
    barmode='group',
    title=f'Missed Head Votes over Different Proposers <span style="font-size: 16px;">({slot_to_day(df.slot.min())} - {slot_to_day(df.slot.max())})</span>',
    xaxis_title=None,
    yaxis_title="share for specific proposer / total share",
    legend_title='Proposer',
    font=dict(
        family="Ubuntu Mono",
        size=FONTSIZE,
        color="black"
    ),
    legend=dict(
        x=1.1,
        y=1,
        xanchor='right',
        yanchor='top'
    ),
    xaxis=dict(showgrid=True, gridcolor='lightgray', tickformat=",d"),
    yaxis=dict(showgrid=True, gridcolor='lightgray',),
    height=550,
    width=1200,
    plot_bgcolor='white',
)

fig.add_shape(
    type="line",
    x0=-0.5, x1=len(missed_based_on_proposer['label']) - 0.5, y0=1, y1=1,
    line=dict(color="black", width=2, dash="dash")
)

fig.write_image(f"att_pics/missed_head_votes_over_proposers_percentage.png")
fig.show()

In [31]:
lidoco = set(validator_labels[validator_labels["label"].isin(["kiln", "lido", "coinbase"])].validator_id.tolist())

def parse_datetime(event_time_str):
    return datetime.strptime(event_time_str, '%Y-%m-%d %H:%M:%S.%f').replace(tzinfo=pytz.UTC).timestamp()*1000
 
SLOT_0_TIMESTAMP_MS = 1606824023000
SLOT_DURATION_MS = 12000

def get_seconds_in_slot(first_seen_ts: int, slot: int) -> float:
    slot_offset = slot * SLOT_DURATION_MS
    time_in_slot_ms = (first_seen_ts * 1000 - SLOT_0_TIMESTAMP_MS - slot_offset) % SLOT_DURATION_MS
    return round(time_in_slot_ms / 1000, 3)

In [32]:
try: 
    known = pd.read_parquet("./att_pics/known_timing_attestations.parquet")
except:
    known = pd.DataFrame(columns=["slot"])
known_epochs = set([i//32 for i in known.slot.unique().tolist()])
len(known_epochs)

420

In [40]:
CORRECT_HEAD_TIMING = "correct_head_timing"
FAILED_HEAD_TIMING = "wrong_head_timing"

def merge_labels(df, labels, left_on="validator", right_on="validator_id", nlargest=15):
    return pd.merge(df, labels, how="left", left_on=left_on, right_on=right_on).drop(right_on, axis=1)

wtiming = sorted([int(i.split(".")[0]) for i in os.listdir(f"data2/{FAILED_HEAD_TIMING}")], reverse=True)

timing_df = []
for ix, i in enumerate(wtiming):
    break
    if i in known_epochs:
        print(f"epoch {i} already known")
        continue
    print(ix, i)
    _df = pd.read_parquet(f"data2/{CORRECT_HEAD_TIMING}/{i}.parquet").drop_duplicates()
    _df = merge_labels(_df, validator_labels, left_on="attesting_validator_index", right_on="validator_id")
    _df = _df[_df["attesting_validator_index"].isin(lidoco)]        
    _df['event_date_time'] = _df["event_date_time"].apply(lambda x: parse_datetime(x))
    _df["event_date_time"] = _df.apply(lambda x: get_time_in_curr_slot(x["event_date_time"], x["slot"]),axis=1)
    timing_df.append(_df)
    if ix >= 620:
        break

try:
    timing_df = pd.concat(timing_df, ignore_index=True)
    timing_df.columns = ["slot", "attesting_validator_index", "seconds_in_slot"]


    timing_df = merge_labels(timing_df, validator_labels, left_on="attesting_validator_index", right_on="validator_id")
    timing_df = timing_df[timing_df["label"].isin(["kiln", "lido", "coinbase"])]

except:
    pass
try:
    raise
    timing_df = pd.concat([known, timing_df], ignore_index=True).drop_duplicates()
    timing_df.to_parquet("./att_pics/known_timing_attestations.parquet")
except:
    timing_df = known

In [42]:
import numpy as np
import plotly.graph_objects as go

def attestations_cdf(gg):

    data = gg.seconds_in_slot.tolist()[::100000]
    data.sort()

    data_array = np.array(data)

    cum_freq = np.cumsum(np.ones_like(data_array))
    cum_density = cum_freq / cum_freq[-1]

    cumulative_density = list(zip(data_array, cum_density))
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=data_array,
        y=cum_density,
        mode='lines',
        name='Cumulative Density',
        line=dict(color=COLORS[0], width=3),
    ))

    fig.update_layout(
            title=f'Cum. Distribution of Attestations First Seen Timing <span style="font-size: 16px;">(epoch {gg.slot.min() //32:,} - {gg.slot.max() // 32:,})</span>',
            yaxis_title='cumulative probability',
            xaxis_title='seconds in slot',
            barmode='stack',
            legend_traceorder="normal",
            xaxis=dict(
                tickmode='linear',
                showgrid=True,
                gridcolor='lightgrey',
            ),
            yaxis=dict(
                showgrid=True,
                gridcolor='lightgrey',
            ),
            height=500,
            width=1200,
            font=dict(
                family="Ubuntu Mono",
                size=FONTSIZE,
                color="black"
            ),
            legend=dict(
                x=1,
                y=1,
                xanchor='right',
                yanchor='top'
            ),
            plot_bgcolor = "#FFFFFF"
        )

    return fig, cumulative_density

fig, cumulative_density = attestations_cdf(timing_df)
fig.write_image(f"att_pics/attestations_cdf.png")
fig.show()

In [45]:
import numpy as np
from scipy.interpolate import interp1d

CONSTANT_BROADCAST_TIME = 0

times = np.array([point[0] for point in cumulative_density])
cumulative_attestations = np.array([point[1] for point in cumulative_density])
cumulative_attestation_func = interp1d(times, cumulative_attestations, kind='linear', fill_value="extrapolate")

# Function to calculate the latest time a proposer with x% control can safely propose a block
def calculate_latest_proposal_time(x):
    if x >= 0.4:
        return 11.999
    threshold = 0.4 / (1 - x)
    
    for t in np.linspace(times[0], times[-1], 1000):
        if cumulative_attestation_func(t) > threshold:
            # The proposer must act before this point
            return t
    return None

In [46]:
def get_slope():
    x_values = np.linspace(0.01, 0.31, 31)
    latest_times = [calculate_latest_proposal_time(x)-CONSTANT_BROADCAST_TIME for x in x_values]
    x1, x2 = 5, 30
    y1 = calculate_latest_proposal_time(x1/100) - CONSTANT_BROADCAST_TIME
    y2 = calculate_latest_proposal_time(x2/100) - CONSTANT_BROADCAST_TIME
    slope = (y2 - y1) / (x2 - x1)
    print(f"For every 1% increase in validators, the proposal time delay increases by {slope:.2f} seconds.")
    print("slope: ", slope)
get_slope()

For every 1% increase in validators, the proposal time delay increases by 0.03 seconds.
slope:  0.03141705705705707


In [47]:
import plotly.graph_objects as go

CONSTANT_BROADCAST_TIME = 0 

x_values = np.linspace(0.01, 0.31, 31)
latest_times = [calculate_latest_proposal_time(x)-CONSTANT_BROADCAST_TIME for x in x_values]

x1, x2 = 5, 30
y1 = calculate_latest_proposal_time(x1/100) - CONSTANT_BROADCAST_TIME
y2 = calculate_latest_proposal_time(x2/100) - CONSTANT_BROADCAST_TIME

slope = (y2 - y1) / (x2 - x1)
print(f"For every 1% increase in validators, the proposal time delay increases by {slope:.2f} seconds.")

fig = go.Figure()

fig.add_trace(go.Scatter(
    x=x_values * 100,
    y=latest_times,
    mode='lines+markers',
    line=dict(color='#FF6347', width=3),
    marker=dict(size=8, color='#4682B4'),
    name="Latest Proposal Time"
))

x_intersect = 5
y_intersect = calculate_latest_proposal_time(x_intersect/100) - CONSTANT_BROADCAST_TIME

fig.add_shape(
    type="line",
    x0=x_intersect,
    y0=3.4,
    x1=x_intersect,
    y1=y_intersect,
    line=dict(color="Black", width=2, dash="dashdot")
)

fig.add_shape(
    type="line",
    x0=0,
    y0=y_intersect,
    x1=x_intersect,
    y1=y_intersect,
    line=dict(color="Black", width=2, dash="dashdot")
)

x_intersect = 30
y_intersect = calculate_latest_proposal_time(x_intersect/100) - CONSTANT_BROADCAST_TIME

fig.add_shape(
    type="line",
    x0=x_intersect,
    y0=3.4,
    x1=x_intersect,
    y1=y_intersect,
    line=dict(color="Black", width=2, dash="dashdot")
)

fig.add_shape(
    type="line",
    x0=0,
    y0=y_intersect,
    x1=x_intersect,
    y1=y_intersect,
    line=dict(color="Black", width=2, dash="dashdot")
)

fig.add_trace(go.Scatter(
    x=[x1, x2],
    y=[y1, y1 + slope * (x2 - x1)],
    mode='lines',
    line=dict(color='Green', width=2, dash='solid'),
    name="Slope Line"
))

fig.update_layout(
    title="Latest Proposal Time vs Percentage of Validators Controlled",
    xaxis_title="Percentage of Validators Controlled by Proposer (%)",
    yaxis_title="Latest Proposal Time (seconds in slot)",
    plot_bgcolor = "#FFFFFF",
    paper_bgcolor = "#FFFFFF",
    xaxis=dict(showgrid=True, gridcolor='#e0e0e0', tickvals=list(range(0, 31, 1))),
    yaxis=dict(showgrid=True, gridcolor='#e0e0e0'),
    showlegend=False,
    height=500,
    width=900,
    font=dict(
        family="Ubuntu Mono",
        size=14,
        color="black"
    ),
    
)

fig.write_image("att_pics/timing_games_proposer_share.png")
fig.show()

For every 1% increase in validators, the proposal time delay increases by 0.03 seconds.


In [51]:
timing_df.sort_values("slot", inplace=True)
timing_df.reset_index(drop=True, inplace=True)

In [52]:
all_kiln_slots = set(proposer_labels[proposer_labels["label"] == "kiln"].slot.tolist())
all_lido_slots = set(proposer_labels[proposer_labels["label"] == "lido"].slot.tolist())
all_coinbase_slots = set(proposer_labels[proposer_labels["label"] == "coinbase"].slot.tolist())

timing_df.columns = ["slot", "attesting_validator_index", "seconds_in_slot", "label"]

kiln = timing_df[timing_df["slot"].isin(all_kiln_slots)]
lido = timing_df[timing_df["slot"].isin(all_lido_slots)]
coinbase = timing_df[timing_df["slot"].isin(all_coinbase_slots)]

timing_df = timing_df[timing_df["seconds_in_slot"] < 12]

In [None]:
def hist_late_performer(_df, sec):   
    df = _df.copy()

    lido = df[df["label"] == "lido"]
    kiln = df[df["label"] == "kiln"]
    coinbase = df[df["label"] == "coinbase"]
    
    def calculate_histogram(data, bins, x_range):
        hist, bin_edges = np.histogram(data, bins=bins, range=x_range)
        # Normalize to percentage
        total_count = np.sum(hist)
        hist_normalized = (hist / total_count) * 100
        return bin_edges, hist_normalized

    x_lido, y_lido = calculate_histogram(lido['seconds_in_slot'], 50, (0, 10.1))
    x_kiln, y_kiln = calculate_histogram(kiln['seconds_in_slot'], 50, (0, 10.1))
    x_coinbase, y_coinbase = calculate_histogram(coinbase['seconds_in_slot'], 50, (0, 10.1))

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=x_lido,
        y=y_lido,
        fill='tozeroy',
        mode='lines',
        name='Lido',
        line=dict(color=COLORS[0]),
        opacity=0.2
    ))
    
    fig.add_trace(go.Scatter(
        x=x_coinbase,
        y=y_coinbase,
        fill='tozeroy',
        mode='lines',
        name=f'Coinbase',
        line=dict(color=COLORS[3]),
        opacity=0.2
    ))
    
    fig.add_trace(go.Scatter(
        x=x_kiln,
        y=y_kiln,
        fill='tozeroy',
        mode='lines',
        name=f'Kiln',
        line=dict(color=COLORS[2]),
        opacity=0.5
    ))
    
    maxes = [max(i, j) for i, j in zip(y_kiln, y_lido)]
    fig.add_shape(
        type="line",
        x0=sec, x1=sec, y0=0, y1=19,
        line=dict(color="black", width=2, dash="dash")
    )
    
    fig.add_annotation(
        x=sec, y=20,
        text=f"block seen", showarrow=False, 
        xanchor='center', yanchor='top',  font=dict(family="Ubuntu Mono", size=FONTSIZE+3, color="#111111"),
    )

    fig.update_layout(
        title=f'Attestations first seen timing for blocks seen at second {sec:.2f}-{sec+0.2:.2f}s',
        yaxis_title='%',
        xaxis_title='seconds in slot',
        barmode='stack',
        legend_traceorder="normal",
        xaxis=dict(
            tickmode='linear',
            showgrid=True,
            gridcolor='lightgrey',
            range=[0, 10.01]
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor='lightgrey',
            range=[0, 20]
        ),
        height=500,
        width=900,
        font=dict(
            family="Ubuntu Mono",
            size=FONTSIZE,
            color="black"
        ),
        legend=dict(
            x=1,
            y=1,
            xanchor='right',
            yanchor='top'
        ),
        plot_bgcolor = "#FFFFFF"
    )
    return fig

for i in list(range(14,41, 2)):
    fig = hist_late_performer(timing_df[timing_df["slot"].isin(set(df[(df["seconds_in_slot"] >= i/10) & (df["seconds_in_slot"] < i/10 + 0.2)].slot.tolist()))], i/10)
    fig.write_image(f"att_pics/attestations_seen_late_{int(i):03}.png")
    print(f"att_pics/attestations_seen_late_{int(i):03}_misses.png")
    #fig.show()

In [54]:
from PIL import Image
import glob
import re

all_files = glob.glob('att_pics/*.png')
pattern = re.compile(r'attestations_seen_late_\d{3}\.png')
image_files = [f for f in all_files if pattern.search(f)]
image_files = sorted(image_files, key=lambda x: int(x.split(".")[0][-3:]), reverse=False)
print(image_files)
image_files += [image_files[-1]]*5
images = [Image.open(img) for img in image_files]
output_path = "att_pics/attestations_seen_late.gif"
images[0].save(output_path, save_all=True, append_images=images[1:], duration=800, loop=0)

print(f"GIF saved as {output_path}")

['att_pics/attestations_seen_late_014.png', 'att_pics/attestations_seen_late_016.png', 'att_pics/attestations_seen_late_018.png', 'att_pics/attestations_seen_late_020.png', 'att_pics/attestations_seen_late_022.png', 'att_pics/attestations_seen_late_024.png', 'att_pics/attestations_seen_late_026.png', 'att_pics/attestations_seen_late_028.png', 'att_pics/attestations_seen_late_030.png', 'att_pics/attestations_seen_late_032.png', 'att_pics/attestations_seen_late_034.png', 'att_pics/attestations_seen_late_036.png', 'att_pics/attestations_seen_late_038.png', 'att_pics/attestations_seen_late_040.png']
GIF saved as att_pics/attestations_seen_late.gif


In [55]:
def hist_late_performer(_df, sec=0, entity=None):   
    df = _df.copy()

    lido = df[df["label"] == "lido"]
    kiln = df[df["label"] == "kiln"]
    coinbase = df[df["label"] == "coinbase"]

    def calculate_histogram(data, bins, x_range):
        hist, bin_edges = np.histogram(data, bins=bins, range=x_range)
        # Normalize to percentage
        total_count = np.sum(hist)
        hist_normalized = (hist / total_count) * 100
        return bin_edges, hist_normalized

    x_lido, y_lido = calculate_histogram(lido['seconds_in_slot'], 50, (0, 10.1))
    x_kiln, y_kiln = calculate_histogram(kiln['seconds_in_slot'], 50, (0, 10.1))
    x_coinbase, y_coinbase = calculate_histogram(coinbase['seconds_in_slot'], 50, (0, 10.1))

    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=x_lido,
        y=y_lido,
        fill='tozeroy',
        mode='lines',
        name='Lido',
        line=dict(color=COLORS[0]),
        opacity=0.2
    ))

    fig.add_trace(go.Scatter(
        x=x_coinbase,
        y=y_coinbase,
        fill='tozeroy',
        mode='lines',
        name=f'Coinbase',
        line=dict(color=COLORS[3]),
        opacity=0.2
    ))
    fig.add_trace(go.Scatter(
        x=x_kiln,
        y=y_kiln,
        fill='tozeroy',
        mode='lines',
        name=f'Kiln',
        line=dict(color=COLORS[2]),
        opacity=0.5
    ))
    
    maxes = [max(i, j) for i, j in zip(y_kiln, y_lido)]
    fig.add_shape(
        type="line",
        x0=sec, x1=sec, y0=0, y1=19,
        line=dict(color="black", width=2, dash="dash")
    )
    
    fig.add_annotation(
        x=sec, y=20,
        text=f"block seen", showarrow=False, 
        xanchor='center', yanchor='top',  font=dict(family="Ubuntu Mono", size=FONTSIZE+3, color="#111111"),
    )

    fig.update_layout(
        title=f'Attestations first seen timing for {entity} blocks',
        yaxis_title='%',
        xaxis_title='seconds in slot',
        barmode='stack',
        legend_traceorder="normal",
        xaxis=dict(
            tickmode='linear',
            showgrid=True,
            gridcolor='lightgrey',
            range=[0, 10.01]
        ),
        yaxis=dict(
            showgrid=True,
            gridcolor='lightgrey',
            range=[0, 20]
        ),
        height=500,
        width=900,
        font=dict(
            family="Ubuntu Mono",
            size=FONTSIZE,
            color="black"
        ),
        legend=dict(
            x=1,
            y=1,
            xanchor='right',
            yanchor='top'
        ),
        plot_bgcolor = "#FFFFFF"
    )

    return fig

    
for ix,  i in enumerate([kiln, lido, coinbase]):
    slottiming = df[df["label"] == ["kiln", "lido", "coinbase"][ix]].seconds_in_slot.mean()
    fig = hist_late_performer(i, slottiming, ["kiln", "lido", "coinbase"][ix])
    fig.write_image(f"att_pics/attestations_seen_late_by_proposer_{int(ix):03}.png")
    print(f"att_pics/attestations_seen_late_{int(ix):03}.png")
    fig.show()
    #break

att_pics/attestations_seen_late_000.png


att_pics/attestations_seen_late_001.png


att_pics/attestations_seen_late_002.png


In [61]:
from PIL import Image
import glob

image_files = sorted(glob.glob('att_pics/attestations_seen_late_by_proposer_*.png'), key=lambda x: int(x.split(".")[0][-3:]), reverse=False)
print(image_files)
images = [Image.open(img) for img in image_files if not "misses" in img]
output_path = "att_pics/attestations_seen_late_by_proposer_misses.gif"
images[0].save(output_path, save_all=True, append_images=images[1:], duration=800, loop=0)
print(f"GIF saved as {output_path}")

['att_pics/attestations_seen_late_by_proposer_misses_000.png', 'att_pics/attestations_seen_late_by_proposer_000.png', 'att_pics/attestations_seen_late_by_proposer_001.png', 'att_pics/attestations_seen_late_by_proposer_misses_001.png', 'att_pics/attestations_seen_late_by_proposer_002.png', 'att_pics/attestations_seen_late_by_proposer_misses_002.png']
GIF saved as att_pics/attestations_seen_late_by_proposer_misses.gif


In [64]:
proposer = xatu.get_proposers( 
    time_interval="365 day", 
    columns="slot, proposer_validator_index",
    orderby="slot"
)

proposer_labels = pandas_gbq.read_gbq("""
    SELECT DISTINCT validator_id, label FROM `ethereum-data-nero.ethdata.beaconchain_validators_db` 
""")
proposer_labels = pd.merge(proposer, proposer_labels, how="left", right_on="validator_id", left_on="proposer_validator_index")[["slot", "label"]]
proposer_labels["label"] = proposer_labels["label"].fillna("unidentified")
proposer_labels["label"] = proposer_labels["label"].apply(lambda x: "solo staker" if x.endswith(".eth") else x.lower())
proposer_labels

potential_reorgs = xatu.get_reorgs( 
    time_interval="370 day", 
    columns="slot-depth",
)

canonical = xatu.get_slots( 
    time_interval="370 day", 
    columns="slot, block_root",
)
missed = set(range(canonical.slot.min(), canonical.slot.max()+1)) - set(canonical.slot.tolist())

reorgs = sorted(set(potential_reorgs["slot-depth"].tolist()).intersection(missed))

2024-08-14 13:21:11,797 - INFO - Executing query: SELECT slot, proposer_validator_index FROM default.canonical_beacon_proposer_duty WHERE slot_start_date_time > NOW() - INTERVAL '365 day' AND meta_network_name = 'mainnet' ORDER BY slot


Downloading: 100%|[32m████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m

2024-08-14 13:21:58,695 - INFO - Total time taken 45.09 s.
Finished at 2024-08-14 13:21:58.





2024-08-14 13:21:59,705 - INFO - Executing query: SELECT slot-depth FROM default.beacon_api_eth_v1_events_chain_reorg WHERE slot_start_date_time > NOW() - INTERVAL '370 day' AND meta_network_name = 'mainnet'
2024-08-14 13:22:00,012 - INFO - Executing query: SELECT slot, block_root FROM default.canonical_beacon_block WHERE slot_start_date_time > NOW() - INTERVAL '370 day' AND meta_network_name = 'mainnet'


In [65]:
validator_labels = pandas_gbq.read_gbq("""
    SELECT DISTINCT validator_id, label FROM `ethereum-data-nero.ethdata.beaconchain_validators_db` 
    where withdrawn = 0
""")
validator_labels["label"] = validator_labels["label"].apply(lambda x: "unidentified" if x == None else x)
validator_labels["label"] = validator_labels["label"].fillna("unidentified")
validator_labels["label"] = validator_labels["label"].apply(lambda x: "solo staker" if x.endswith(".eth") else x.lower())
share = validator_labels.groupby("label")["validator_id"].nunique().reset_index().sort_values("validator_id", ascending=False)
share["share"] = share["validator_id"]/validator_labels.validator_id.nunique()*100
share

Downloading: 100%|[32m████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m

2024-08-14 13:22:40,422 - INFO - Total time taken 35.02 s.
Finished at 2024-08-14 13:22:40.





Unnamed: 0,label,validator_id,share
46,lido,304461,28.598038
82,unidentified,266942,25.073876
18,coinbase,109792,10.312768
30,ether.fi,52089,4.892722
42,kiln,38144,3.582868
...,...,...,...
35,florisgamer.polygon,1,0.000094
49,mauser,1,0.000094
38,gooch: eth2 depositor,1,0.000094
79,sybil delegate: mengboshen,1,0.000094


In [None]:
import pickle

def merge_labels(df, labels, left_on="validator", right_on="validator_id", nlargest=15, add=None):
    
    df = pd.merge(df, labels, how="left", left_on=left_on, right_on=right_on).drop(right_on, axis=1)
    df["label"] = df["label"].apply(lambda x: "unidentified" if x == None else x)
    df["label"] = df["label"].fillna("unidentified")
    df["label"] = df["label"].apply(lambda x: "solo staker" if x.endswith(".eth") else x.lower())
    largest = df.groupby("label")[left_on].sum().reset_index().sort_values(left_on, ascending=False)["label"].tolist()[0:nlargest]
    if add:
        largest += add
    if "solo staker" not in largest:
        largest.append("solo staker")
    df["label"] = df["label"].apply(lambda x: x if x in largest else "other")
    if "other" not in largest:
        largest.append("other")
    return df, largest

def get_reorg_votes(entity):
    entity_reorged_slots = sorted(set(proposer_labels[proposer_labels["label"] == entity].slot.tolist()).intersection(set(reorgs)))
    voted_for_reorged_block = []
    voted_for_parent_block = []
    chunks = len(entity_reorged_slots) // 50 + 1
    for i in range(0, chunks):
        res_wrong = None
        res_right = None
        _entity_reorged_slots = entity_reorged_slots[i*50: (i+1)*50]
        _res = xatu.client.execute_query(f"""
            SELECT DISTINCT
                        block_slot, slot, validators, source_root, target_root, beacon_block_root
                    FROM default.canonical_beacon_elaborated_attestation
                    WHERE
                        block_slot_start_date_time > NOW() - INTERVAL '365 day'
                        AND meta_network_name = 'mainnet'
                        AND slot in {str(_entity_reorged_slots)}
            """,  "block_slot, slot, validators, source_root, target_root, beacon_block_root"
        )

        _res["validators"] = _res["validators"].apply(lambda x: eval(x))
        _res = _res.explode("validators").reset_index(drop=True)
        _res, _ = merge_labels(_res, validator_labels, left_on="validators", right_on="validator_id", add=["binance us (kiln)"])

        for slot in _entity_reorged_slots:
            print(slot)
            res_wrong = None
            res_right = None
            res = _res[_res["slot"] == slot].copy()
            for root in res.groupby("beacon_block_root")["validators"].nunique().reset_index()["beacon_block_root"].tolist():
                canonical_root = None
                _s = slot
                while canonical_root == None:
                    if len(canonical[canonical["slot"] == _s]) == 0:
                        _s -= 1
                        continue
                    canonical_root = canonical[canonical["slot"] == _s].block_root.values[0]
                if root == canonical_root:
                    res_right = res[res["beacon_block_root"] == canonical_root]
                else:
                    if res_wrong is not None and isinstance(res_wrong, pd.DataFrame):
                        res_wrong = pd.concat([res_wrong, res[res["beacon_block_root"] == root]])
                    else:
                        res_wrong = res[res["beacon_block_root"] == root]
            if not isinstance(res_wrong, pd.DataFrame):
                print(f"Slot {slot} had noone voting wrong")
                continue

            res_wrong = res_wrong.groupby("label").validators.nunique().reset_index()
            res_wrong.columns = ["label", "validators_wrong_vote"]
            res_right = res_right.groupby("label").validators.nunique().reset_index()
            res_right.columns = ["label", "validators_right_vote"]

            res_both = pd.merge(res_wrong, res_right, how="outer", left_on="label", right_on="label")
            res_both.fillna(0, inplace=True)
            if entity not in res_both.label.tolist():
                print("something went wrong")
                continue
            voted_for_reorged_block.append(res_both[res_both["label"] == entity]["validators_wrong_vote"].values[0])
            voted_for_parent_block.append(res_both[res_both["label"] == entity]["validators_right_vote"].values[0])
            print(f'slot={slot} | {entity}: correct={res_both[res_both["label"] == entity]["validators_right_vote"].values[0]} | false={res_both[res_both["label"] == entity]["validators_wrong_vote"].values[0]}')
    with open(f'att_pics/{entity}_votes.pickle', 'wb') as handle:
        pickle.dump([voted_for_reorged_block, voted_for_parent_block], handle)
    return (
        voted_for_reorged_block,
        voted_for_parent_block
    )

entities = share.label.tolist()[:10]
vote = {}
for entity in entities:
    voted_for_reorged_block, voted_for_parent_block = get_reorg_votes(entity)
    vote[entity] = (voted_for_reorged_block, voted_for_parent_block)
    totals = [i+j for i, j in zip(voted_for_reorged_block, voted_for_parent_block)]
    print(entity, np.sum(voted_for_reorged_block)/sum(totals)*100,np.sum(voted_for_parent_block)/sum(totals)*100)

In [67]:
j = [i for i in os.listdir(f'att_pics') if i.endswith("_votes.pickle")]
for file_name in j:
    new_name = "att_pics/" + file_name.replace(' ', '_').replace('(', '').replace(')', '')
    os.rename("att_pics/" + file_name, new_name)
    print(f"Renamed: {file_name} -> {new_name}")
j = [i for i in os.listdir(f'att_pics') if i.endswith("_votes.pickle")]

vote = {}
for entity in j:
    with open(f'att_pics/{entity}', 'rb') as handle:
        result = pickle.load(handle)
    voted_for_reorged_block, voted_for_parent_block = result
    if voted_for_reorged_block:
        vote[entity.split(".")[0]] = (voted_for_reorged_block, voted_for_parent_block)
        totals = [i+j for i, j in zip(voted_for_reorged_block, voted_for_parent_block)]
        print(entity.split(".")[0], np.sum(voted_for_reorged_block)/sum(totals)*100,np.sum(voted_for_parent_block)/sum(totals)*100)
    

Renamed: coinbase_votes.pickle -> att_pics/coinbase_votes.pickle
Renamed: figment_votes.pickle -> att_pics/figment_votes.pickle
Renamed: binance_us_kiln_votes.pickle -> att_pics/binance_us_kiln_votes.pickle
Renamed: ether.fi_votes.pickle -> att_pics/ether.fi_votes.pickle
Renamed: staked.us_votes.pickle -> att_pics/staked.us_votes.pickle
Renamed: bitpanda_kiln_votes.pickle -> att_pics/bitpanda_kiln_votes.pickle
Renamed: binance_votes.pickle -> att_pics/binance_votes.pickle
Renamed: lido_votes.pickle -> att_pics/lido_votes.pickle
Renamed: unidentified_votes.pickle -> att_pics/unidentified_votes.pickle
Renamed: rocketpool_votes.pickle -> att_pics/rocketpool_votes.pickle
Renamed: kiln_votes.pickle -> att_pics/kiln_votes.pickle
Renamed: kraken_votes.pickle -> att_pics/kraken_votes.pickle
coinbase_votes 1.2351676876723974 98.7648323123276
figment_votes 0.7965267332148229 99.20347326678518
binance_us_kiln_votes 41.46341463414634 58.536585365853654
ether 4.215222141296431 95.78477785870358
sta

In [68]:
with open('att_pics/votes.pickle', 'wb') as handle:
    pickle.dump(vote, handle)

with open('att_pics/votes.pickle', 'rb') as handle:
    b = pickle.load(handle)

tt = pd.DataFrame(columns = ["entity", "votes_for_parent", "votes_for_wrong"])
ns = {}
for i in vote.keys():
    print({i: len(vote[i][0])})
    ns[i.replace("_votes", "")] = len(vote[i][0])
    totals = [i+j for i, j in zip(*vote[i])]
    print(i, np.sum(vote[i][0])/sum(totals)*100,np.sum(vote[i][1])/sum(totals)*100)
    tt.loc[len(tt), ("entity", "votes_for_parent", "votes_for_wrong")] = i, np.sum(vote[i][1])/sum(totals)*100, np.sum(vote[i][0])/sum(totals)*100
tt, ns

{'coinbase_votes': 1223}
coinbase_votes 1.2351676876723974 98.7648323123276
{'figment_votes': 110}
figment_votes 0.7965267332148229 99.20347326678518
{'binance_us_kiln_votes': 3}
binance_us_kiln_votes 41.46341463414634 58.536585365853654
{'ether': 96}
ether 4.215222141296431 95.78477785870358
{'staked': 89}
staked 2.8503562945368173 97.14964370546319
{'binance_votes': 43}
binance_votes 0.11722134447785537 99.88277865552215
{'lido_votes': 807}
lido_votes 2.7077205439799243 97.29227945602008
{'unidentified_votes': 1900}
unidentified_votes 1.7599536105417157 98.24004638945829
{'rocketpool_votes': 240}
rocketpool_votes 1.52102872021042 98.47897127978959
{'kiln_votes': 208}
kiln_votes 11.673751270311323 88.32624872968869
{'kraken_votes': 163}
kraken_votes 1.234146685804372 98.76585331419562


(                   entity votes_for_parent votes_for_wrong
 0          coinbase_votes        98.764832        1.235168
 1           figment_votes        99.203473        0.796527
 2   binance_us_kiln_votes        58.536585       41.463415
 3                   ether        95.784778        4.215222
 4                  staked        97.149644        2.850356
 5           binance_votes        99.882779        0.117221
 6              lido_votes        97.292279        2.707721
 7      unidentified_votes        98.240046        1.759954
 8        rocketpool_votes        98.478971        1.521029
 9              kiln_votes        88.326249       11.673751
 10           kraken_votes        98.765853        1.234147,
 {'coinbase': 1223,
  'figment': 110,
  'binance_us_kiln': 3,
  'ether': 96,
  'staked': 89,
  'binance': 43,
  'lido': 807,
  'unidentified': 1900,
  'rocketpool': 240,
  'kiln': 208,
  'kraken': 163})

In [69]:
tt["entity"] = tt["entity"].str.replace("_votes", "")
tt = tt[tt["entity"].isin(largest)]
largest = [i for i in largest if i in tt.entity.tolist()]
tt.set_index("entity", inplace=True)
tt = tt.loc[largest]
tt.reset_index(inplace=True)
tt

In [71]:
def hex_to_rgba(hex_color, alpha=1.0):
    hex_color = hex_color.lstrip('#')
    if len(hex_color) == 6:
        r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
    elif len(hex_color) == 8:
        r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
        alpha = int(hex_color[6:8], 16) / 255.0
    else:
        return None
    return f'rgba({r}, {g}, {b}, {alpha})'

In [72]:
fig = go.Figure(data=[
    go.Bar(name='Votes for Own/Local Block', x=tt["entity"], y=tt["votes_for_wrong"], marker_color=hex_to_rgba(COLORS[6], 0.9)),
    go.Bar(name='Votes for Parent/Reorg Block', x=tt["entity"], y=tt["votes_for_parent"], marker_color=hex_to_rgba(COLORS[0], 0.9)),
])
annotations = [
    dict(
        x=ix, y=10, text=f"n={ns[entity]:,}", showarrow=False, 
        xanchor='center', yanchor='top',  font=dict(family="Ubuntu Mono", size=FONTSIZE+3, color="white"),
    ) 
    for ix, entity in enumerate(tt["entity"].unique().tolist())
]
fig.update_layout (
    title=f'Wrong votes for reorged local block vs parent block <span style="font-size: 16px;">({slot_to_day(proposer_labels.slot.min())} - {slot_to_day(proposer_labels.slot.max())})</span>',
    xaxis_title="block proposer",
    barmode='stack',
    yaxis_title=f'% of validators',
    plot_bgcolor='white',
    font=dict(family="Ubuntu Mono", size=FONTSIZE),
    legend=dict(
        x=0.98,
        y=0.94,
        xanchor='right',
        yanchor='top',
        bgcolor="rgba(255,255,255,0.9)"
    ),
        annotations=annotations,

    xaxis=dict(showgrid=True, gridcolor='#888888', tickformat=",d"),
    yaxis=dict(showgrid=True, gridcolor='#888888',
              tickvals=list(range(0,101,10))),
    height=550,
    width=1200,
)


fig.write_image(f"att_pics/votes_for_local_reorged_block.png")
fig.show()