In [1]:
import sqlalchemy as sa
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


# %load_ext sql # only the first time
engine = sa.create_engine('postgresql://rewards_reader:==Hji52.(M4@localhost:5433/beaconchain_latest')

# Variables
MERGE_EPOCH = 146875

# CSV names:
REWARDS_POOL_EPOCH = 'rewards_pool_epoch.csv'
VALIDATORS_PER_EPOCH = 'vals_per_epoch.csv'

In [4]:
# Download the a CSV file with the distribution of Rewards, Max Reward per epoch and per Pool
# NOTE: this might take up to 20-30mins depending on your machine and internet connection

# SQL QUERY:
sql_query = """
    SELECT 
	rewds.f_epoch, 
	sum(rewds.f_reward) as reward,
	sum(rewds.f_max_reward) as max_reward, 
	sum(CASE WHEN rewds.f_missing_source THEN 1 ELSE 0 END) as m_source, 
	sum(CASE WHEN rewds.f_missing_target THEN 1 ELSE 0 END) as m_target, 
	sum(CASE WHEN rewds.f_missing_head THEN 1 ELSE 0 END) as m_head,
	rewds.f_pool
    FROM (
        SELECT 
            f_epoch, 
            t_validator_rewards_summary.f_val_idx as f_val_idx, 
            f_reward, 
            f_max_reward, 
			f_missing_source,
			f_missing_target,
			f_missing_head,
        	CASE 
				WHEN f_pool IS NULL
					THEN 'others'
				ELSE f_pool
				END AS f_pool    
        FROM t_validator_rewards_summary 
        LEFT JOIN eth2_pubkeys ON t_validator_rewards_summary.f_val_idx=eth2_pubkeys.f_val_idx
    ) as rewds
    GROUP BY f_epoch, f_pool
    ORDER BY f_epoch ASC;
"""

p = pd.read_sql_query(sql_query, engine)
p.to_csv(REWARDS_POOL_EPOCH)


In [3]:
# Display the percentage of reward that each pool got out of the max they could
p = pd.read_csv(REWARDS_POOL_EPOCH)

# Since we want to display CL/Attetation-related rewards - remove slashings and block proposals 
p['perc'] = (p['reward'] / p['max_reward']) * 100
p = p.astype({'perc': float})
p = p.drop(p[p.perc > 100].index)
p = p.drop(p[p.perc < -100].index)

# Get the aggregation of all the pools and get the rewards/max_reward per pool
p = p.groupby(['f_epoch']).sum()
p['perc_2'] = (p['reward'] / p['max_reward']) * 100
p = p.astype({'perc_2': float})

# roll the large dataset into averages of 8 sample windows
p_mean = p['perc_2'].rolling(8, axis=0).mean()

# Display the data that we just prepared
sns.set_context("talk", font_scale=1.1)
plt.figure(figsize=(12,6))

ax = plt.plot(p_mean)

plt.xlabel("epochs")
plt.ylabel("extracted reward (%)")
plt.title("reward / max extractable reward")

# Mark the Merge Epoch
plt.vlines(x=MERGE_EPOCH, ymin=50, ymax=100 , color='r')

plt.tight_layout()

KeyError: "Only a column name can be used for the key in a dtype mappings argument. 'perc' not found in columns."

In [None]:
# Get the distribution of total active validators over the total of epochs

sql_query = """
    select 
        f_epoch,
        count(f_val_idx) as tot_val
    from t_validator_rewards_summary
    group by f_epoch
    order by f_epoch ASC;
"""

p = pd.read_sql_query(sql_query, engine)
p['perc'] = (p['reward'] / p['max_reward']) * 100
p.to_csv(VALIDATORS_PER_EPOCH)


In [None]:
# Get the total number of missed attestation flags per total active validators

# Read both CSVs in different panda OBJs
o = pd.read_csv(VALIDATORS_PER_EPOCH)
p = pd.read_csv(REWARDS_POOL_EPOCH)

# Get the aggregation of missed flags for each epoch
p = p.groupby(['f_epoch']).sum()
p['tot_m_flags'] = p['m_source'] + p['m_target'] + p['m_head']

# Aggregate the mean of the distribution in windows of 8 samples
p = p.rolling(8, axis=0).mean()

# Merge both datasets based on epoch to get the ratio of missed flags
p = pd.merge(p, o, on='f_epoch')

# Calculate the ratio of missed per each flag (%)
p['m_source_r'] = (p['m_source'] / p['tot_val']) * 100
p['m_target_r'] = (p['m_target'] / p['tot_val']) * 100
p['m_head_r'] = (p['m_head']/ p['tot_val'])  * 100
p['tot_m_r'] = ((p['tot_m_flags'])/ p['tot_val'])  * 100

# Display the distributions
sns.set_context("talk", font_scale=1.1)

f, axs = plt.subplots(2,1,
                    figsize=(15,12),
                    sharex=True)

sns.lineplot(ax=axs[0], x='f_epoch', y='tot_m_r', data=p, label='Total Missed', color='grey')

sns.lineplot(ax=axs[1], x='f_epoch', y='m_source_r', data=p, label='Missed Source')
sns.lineplot(ax=axs[1], x='f_epoch', y='m_target_r', data=p, label='Missed Target')
sns.lineplot(ax=axs[1], x='f_epoch', y='m_head_r', data=p, label='Missed Heads')


axs[1].set(xlabel="Epoch", ylabel="Ratio of Missed Flags")
axs[0].set(ylabel="Ratio of Missed Flags")

axs[0].axvline(x=146875, ymin=0, ymax=55000, color='r')
axs[1].axvline(x=146875, ymin=0, ymax=25000, color='r')

axs[0].grid(axis='both')
axs[1].grid(axis='both')

plt.tight_layout()