In [None]:
import sqlalchemy as sa
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from IPython.display import display

"""
This script obtains the ordered list of slots and which pool was assigned to propose a block
Also, it extracts if the slot was missed and how many consecutive slots a pool got (both scheduled or proposed)
"""

# Connect to database
# %load_ext sql
engine = sa.create_engine('postgresql://usr:pwd@localhost:5432/database')

%reload_ext sql

%sql $engine.url

merge_epoch = 146875

# 1. Obtain missed blocks
sql_stmnt = f"""
    select 
    f_missed_blocks, 
    CASE 
    WHEN f_epoch <= {merge_epoch} THEN 'pre-merge' 
    WHEN f_epoch > {merge_epoch} THEN 'post-merge' 
    ELSE NULL END AS f_merge 
    from t_epoch_metrics_summary 
"""


df_missed_blocks = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Parse missed blocks to int
missed_blocks_list = []
for i, row in df_missed_blocks.iterrows():
    for j in str(row['f_missed_blocks']).split(","):
        text = j.replace("[", "")
        if text.isdigit():
            missed_blocks_list.append(int(text))


# Pools pre merge
# 2. Obtain scheduled blocks for pools
sql_stmnt = f"""
    select t_proposer_duties.f_val_idx, f_pool, f_pool_name, f_proposer_slot, f_proposer_slot/32 as f_epoch
    from t_proposer_duties
    inner join eth2_pubkeys
    on t_proposer_duties.f_val_idx = eth2_pubkeys.f_val_idx
    where f_proposer_slot/32 <= {merge_epoch}
    order by f_proposer_slot asc
"""

df_pool_scheduled_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# 3. Obtain proposed blocks for pools
sql_stmnt = f"""
    select f_pool, f_proposer_slot, f_proposer_slot as f_proposed_block
    from t_proposer_duties
    inner join eth2_pubkeys
    on t_proposer_duties.f_val_idx = eth2_pubkeys.f_val_idx
    where f_proposer_slot/32 <= {merge_epoch} and f_proposer_slot not in ({", ".join(str(x) for x in missed_blocks_list)})
    order by f_proposer_slot asc
"""

df_pool_proposed_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# 4. Obtain missed blocks for pools
sql_stmnt = f"""
    select f_pool, f_proposer_slot, f_proposer_slot as f_missed_block
    from t_proposer_duties
    inner join eth2_pubkeys
    on t_proposer_duties.f_val_idx = eth2_pubkeys.f_val_idx
    where f_proposer_slot/32 <= {merge_epoch} and f_proposer_slot in ({", ".join(str(x) for x in missed_blocks_list)})
    order by f_proposer_slot asc
"""

df_pool_missed_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Pools post merge
# 5. Obtain scheuuled blocks for pools
sql_stmnt = f"""
    select t_proposer_duties.f_val_idx, f_pool, f_pool_name, f_proposer_slot, f_proposer_slot/32 as f_epoch
    from t_proposer_duties
    inner join eth2_pubkeys
    on t_proposer_duties.f_val_idx = eth2_pubkeys.f_val_idx
    where f_proposer_slot/32 > {merge_epoch}
    order by f_proposer_slot asc
"""


df_pool_scheduled_post = pd.read_sql(sql_stmnt, engine)
print("Executed")

# 6. Obtain proposed blocks for pools
sql_stmnt = f"""
    select f_pool, f_proposer_slot, f_proposer_slot as f_proposed_block
    from t_proposer_duties
    inner join eth2_pubkeys
    on t_proposer_duties.f_val_idx = eth2_pubkeys.f_val_idx
    where f_proposer_slot/32 > {merge_epoch} and f_proposer_slot not in ({", ".join(str(x) for x in missed_blocks_list)})
    order by f_proposer_slot asc
"""


df_pool_proposed_post = pd.read_sql(sql_stmnt, engine)
print("Executed")

# 7. Obtain missed blocks for pools
sql_stmnt = f"""
    select f_pool, f_proposer_slot, f_proposer_slot as f_missed_block
    from t_proposer_duties
    inner join eth2_pubkeys
    on t_proposer_duties.f_val_idx = eth2_pubkeys.f_val_idx
    where f_proposer_slot/32 > {merge_epoch} and f_proposer_slot in ({", ".join(str(x) for x in missed_blocks_list)})
    order by f_proposer_slot asc
"""

df_pool_missed_post = pd.read_sql(sql_stmnt, engine)
print("Executed")



# 8. Obtain all pools at the merge epoch

sql_stmnt = f"""
    select distinct(f_pool)
    from t_validator_rewards_summary
    inner join eth2_pubkeys
    on t_validator_rewards_summary.f_val_idx = eth2_pubkeys.f_val_idx
    where f_epoch = 146875
"""

df_pools_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# 8. Obtain all pools at the last post merge epoch we are considering
sql_stmnt = f"""
    select distinct(f_pool)
    from t_validator_rewards_summary
    inner join eth2_pubkeys
    on t_validator_rewards_summary.f_val_idx = eth2_pubkeys.f_val_idx
    where f_epoch = 153875
"""

df_pools_post = pd.read_sql(sql_stmnt, engine)
print("Executed")

# --------------------------- Start analysis ----------------------------------

df_pools_pre.reset_index(drop=True, inplace=True)

# Merge all datasets using the common column: f_pool
df_pool_stats_pre = pd.merge(df_pools_pre, df_pool_scheduled_pre, on=['f_pool'], how='outer').fillna(0)
df_pool_stats_pre = pd.merge(df_pool_stats_pre, df_pool_proposed_pre, on=['f_pool', 'f_proposer_slot'], how='outer').fillna(0)
df_pool_stats_pre = pd.merge(df_pool_stats_pre, df_pool_missed_pre, on=['f_pool', 'f_proposer_slot'], how='outer').fillna(0)
df_pool_stats_pre['f_merge'] = 'pre-merge' # hardcode: all the above datasets are pre-merge
df_pool_stats_pre = df_pool_stats_pre.sort_values(by=['f_proposer_slot'], ascending=True)


df_pool_stats_post = pd.merge(df_pools_post, df_pool_scheduled_post, on=['f_pool'], how='outer').fillna(0)
df_pool_stats_post = pd.merge(df_pool_stats_post, df_pool_proposed_post, on=['f_pool', 'f_proposer_slot'], how='outer').fillna(0)
df_pool_stats_post = pd.merge(df_pool_stats_post, df_pool_missed_post, on=['f_pool', 'f_proposer_slot'], how='outer').fillna(0)
df_pool_stats_post['f_merge'] = 'post-merge'# hardcode: all the above datasets are post-merge
df_pool_stats_post = df_pool_stats_post.sort_values(by=['f_proposer_slot'], ascending=True)

df_pool_stats = pd.concat([
    df_pool_stats_pre, 
    df_pool_stats_post])


# Measure consecutive blocks by pool
df_consecutive_stats = df_pool_stats
df_consecutive_stats = df_consecutive_stats.set_index('f_proposer_slot') # use proposer_slot as index, common for everyone

# this new column will contain "missed" if the block was missed
df_consecutive_stats['f_pool_missed'] = df_consecutive_stats[['f_pool']]
i = 0
for index, row in df_consecutive_stats.iterrows():
    print("Loop line: ", i)
    i += 1
    if row['f_missed_block'] != 0:
        df_consecutive_stats.loc[index, 'f_pool_missed'] = "missed"

# we may use f_pool for scheduled blocks or f_pool_missed for proposed blocks
# we want to have an incremental number that repeats with consecutive pools
# the dataset is order by f_proposer_slot ascending
df_consecutive_stats['consecutive'] = ((df_consecutive_stats.f_pool != df_consecutive_stats.f_pool.shift()).cumsum())
df_consecutive_stats.to_csv('csv/proposers_metrics/pool_scheduled.csv')  
df_consecutive = df_consecutive_stats[['f_pool', 'f_pool_missed', 'consecutive']] # select only these columns

# count for every time there was a consecutive pool, how many consecutives
df_consecutive_count = df_consecutive_stats[['consecutive', 'f_val_idx']].groupby(['consecutive']).count()
df_consecutive_count = df_consecutive_count.rename(columns={"f_val_idx": "consecutive_count"}) # we call the count column "consecutive_count"
# we join this using the consecutive column, which identifies every consecutive case individually
df_consecutive_count = pd.merge(df_consecutive, df_consecutive_count, on=['consecutive']) 
# as the blocks are consecutive and we have the count, remove the extra lines
df_consecutive_count = df_consecutive_count.drop_duplicates(subset='consecutive', keep="first")
df_consecutive_count = df_consecutive_count.rename(columns={"consecutive": "number"})
df_consecutive_count = df_consecutive_count[['f_pool', 'consecutive_count', 'number']]
# count how many times a pool got 5 consecutive scheduled blocks. If we use f_pool_missed it would be proposed blocks
df_pool_consecutive = df_consecutive_count.groupby(['f_pool', 'consecutive_count']).count() 
df_consecutive_count = df_consecutive_count.rename(columns={"number": "number_times"})

# Export to csv
df_pool_consecutive.to_csv('csv/proposers_metrics/consecutive_pool_scheduled.csv')  
 