In [None]:
import sqlalchemy as sa
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
from IPython.display import display

"""
This script obtains the ordered list of slots and which validator was assigned to propose a block
With this, we are able to extract how many consecutive slots a validator got assigned to propose a block
"""

# Load the database
# %load_ext sql
engine = sa.create_engine('postgresql://beaconchain:Hj862Lx.$@localhost:5432/beaconchain_latest')

%reload_ext sql

%sql $engine.url

merge_epoch = 146875

# Obtain the missed blocks
sql_stmnt = f"""
    select 
    f_missed_blocks, 
    CASE 
    WHEN f_epoch <= {merge_epoch} THEN 'pre-merge' 
    WHEN f_epoch > {merge_epoch} THEN 'post-merge' 
    ELSE NULL END AS f_merge 
    from t_epoch_metrics_summary 
"""

df_missed_blocks = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Parse missed blocks to int
missed_blocks_list = []
for i, row in df_missed_blocks.iterrows():
    for j in str(row['f_missed_blocks']).split(","):
        text = j.replace("[", "")
        if text.isdigit():
            missed_blocks_list.append(int(text))


missed_blocks_string = {", ".join(str(x) for x in missed_blocks_list)}


# Obtain scheduled blocks for each validator pre merge
sql_stmnt = f"""
    select 
	f_val_idx, 
	count(*) as scheduled_slot
    from t_proposer_duties
    where f_proposer_slot/32 <= {merge_epoch}
    group by f_val_idx
	  order by count(*) desc
"""

df_val_scheduled_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Obtain proposed blocks for each validator pre merge
sql_stmnt = f"""
    select 
	f_val_idx, 
	count(*) as proposed_blocks
from t_proposer_duties
where f_proposer_slot/32 <= {merge_epoch} and f_proposer_slot not in ({", ".join(str(x) for x in missed_blocks_list)})
group by f_val_idx
	  order by count(*) desc
"""

df_val_proposed_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Obtain missed blocks for each validator pre merge
sql_stmnt = f"""
    select 
	f_val_idx, 
	count(*) as missed_blocks
from t_proposer_duties
where f_proposer_slot/32 <= {merge_epoch} and f_proposer_slot in ({", ".join(str(x) for x in missed_blocks_list)})
group by f_val_idx
	  order by count(*) desc
"""

df_val_missed_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Obtain scheduled blocks for each validator post merge
sql_stmnt = f"""
    select 
	f_val_idx, 
	count(*) as scheduled_slot
from t_proposer_duties
where f_proposer_slot/32 > {merge_epoch}
group by f_val_idx
	  order by count(*) desc
"""

df_val_scheduled_post = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Obtain proposed blocks for each validator post merge
sql_stmnt = f"""
    select 
	f_val_idx, 
	count(*) as proposed_blocks
from t_proposer_duties
where f_proposer_slot/32 > {merge_epoch} and f_proposer_slot not in ({", ".join(str(x) for x in missed_blocks_list)})
group by f_val_idx
	  order by count(*) desc
"""

df_val_proposed_post = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Obtain missed blocks for each validator post merge
sql_stmnt = f"""
    select 
	f_val_idx, 
	count(*) as missed_blocks
from t_proposer_duties
where f_proposer_slot/32 > {merge_epoch} and f_proposer_slot in ({", ".join(str(x) for x in missed_blocks_list)})
group by f_val_idx
	  order by count(*) desc
"""

df_val_missed_post = pd.read_sql(sql_stmnt, engine)
print("Executed")



# Obtain list of validator at the end of pre merge

sql_stmnt = f"""
    select distinct(f_val_idx)
    from t_validator_rewards_summary
    where f_epoch = 146875
"""

df_vals_pre = pd.read_sql(sql_stmnt, engine)
print("Executed")

# Obtain the list of validators at the end of post merge (range we are covering: epoch 153875)
sql_stmnt = f"""
    select distinct(f_val_idx)
    from t_validator_rewards_summary
    where f_epoch = 153875
"""

df_vals_post = pd.read_sql(sql_stmnt, engine)
print("Executed")


# ------------------------ Start the Analysis -------------------------------

# Merge all the collected datasets using the common column f_val_idx: pre-merge
df_val_stats_pre = pd.merge(df_vals_pre, df_val_scheduled_pre, on=['f_val_idx'], how='outer').fillna(0)
df_val_stats_pre = pd.merge(df_val_stats_pre, df_val_proposed_pre, on=['f_val_idx'], how='outer').fillna(0)
df_val_stats_pre = pd.merge(df_val_stats_pre, df_val_missed_pre, on=['f_val_idx'], how='outer').fillna(0)
df_val_stats_pre['f_merge'] = 'pre-merge'

# Merge all the collected datasets using the common column f_val_idx: post-merge
df_val_stats_post = pd.merge(df_vals_post, df_val_scheduled_post, on=['f_val_idx'], how='outer').fillna(0)
df_val_stats_post = pd.merge(df_val_stats_post, df_val_proposed_post, on=['f_val_idx'], how='outer').fillna(0)
df_val_stats_post = pd.merge(df_val_stats_post, df_val_missed_post, on=['f_val_idx'], how='outer').fillna(0)
df_val_stats_post['f_merge'] = 'post-merge'

# Concatenate everything
df_val_stats = pd.concat([
    df_val_stats_pre, 
    df_val_stats_post])

pd.set_option('display.float_format', lambda x: '%.3f' % x) # Display format


# Build some statistics


# Hoy many missed blocks in total
display(df_val_stats_pre['missed_blocks'].sum())
display(df_val_stats_post['missed_blocks'].sum())

# How many proposed blocks in total
display(df_val_stats_pre['proposed_blocks'].sum())
display(df_val_stats_post['proposed_blocks'].sum())

# Display number of validators who proposed 1 block, 2 blocks, 3 blocks...
group_pre = df_val_stats_pre.groupby(['proposed_blocks'])
group_post = df_val_stats_post.groupby(['proposed_blocks'])

display(group_pre['f_val_idx'].count())
display(group_post['f_val_idx'].count())

# Export to csv
df_val_stats.to_csv('csv/proposers_metrics/val_proposals.csv')  
 