In [1]:
import pandas as pd
import statsmodels.formula.api as smf

# Dataset

In [2]:
df1 = pd.read_csv("dataset/scenario1.csv", index_col = False, sep = ",")
df2 = pd.read_csv("dataset/scenario2.csv", index_col = False, sep = ",")

# Check

In [3]:
# drop epoch 0 because epoch bootstrapped
df1 = df1[df1["epoch"] != 0]
df2 = df2[df2["epoch"] != 0]

In [4]:
df1.head()

Unnamed: 0,epoch,node,main,assigned_slots,share_assigned_slots,ok,skipped,invalid,delayed,bad_slots_abs,...,forced_invalid_slots_realized,forced_invalid_share_pct_of_invalid,penalty_gained,penalty_recovery,penalty_tol,residual_prev_dbg,residual_next_dbg,ok_rate,scenario,assigned_share
3,1,OV_1,MAIN_1,9712,40.467,9669,43,0,0,43,...,0,0.0,0.0,0.0,194.24,0.0,0.0,0.995572,1,0.404667
4,1,OV_2,MAIN_2,7160,29.833,7160,0,0,0,0,...,0,0.0,0.0,0.0,143.2,0.0,0.0,1.0,1,0.298333
5,1,OV_3,MAIN_3,7128,29.7,7096,17,10,5,32,...,0,0.0,0.0,0.0,142.56,0.0,0.0,0.995511,1,0.297
6,2,OV_1,MAIN_1,9503,39.596,9501,2,0,0,2,...,0,0.0,0.0,0.0,190.06,0.0,0.0,0.99979,1,0.395958
7,2,OV_2,MAIN_2,7276,30.317,7276,0,0,0,0,...,0,0.0,0.0,0.0,145.52,0.0,0.0,1.0,1,0.303167


In [5]:
df2.head()

Unnamed: 0,epoch,node,main,assigned_slots,share_assigned_slots,ok,skipped,invalid,delayed,bad_slots_abs,...,penalty_tol,residual_prev_dbg,residual_next_dbg,ok_rate,scenario,assigned_share,malicious_i,post_t,interaction,time_since_switch
3,1,OV_1,MAIN_1,9712,40.467,7577,0,2135,0,2135,...,485.6,0.0,2367.76,0.780169,2,0.404667,1,0,0,0
4,1,OV_2,MAIN_2,7160,29.833,7132,27,1,0,28,...,358.0,0.0,0.0,0.996089,2,0.298333,0,0,0,0
5,1,OV_3,MAIN_3,7128,29.7,7128,0,0,0,0,...,356.4,0.0,0.0,1.0,2,0.297,0,0,0,0
6,2,OV_1,MAIN_1,1896,7.9,1434,0,462,0,462,...,94.8,2368.0,2669.559366,0.756329,2,0.079,1,0,0,0
7,2,OV_2,MAIN_2,11027,45.946,11020,7,0,0,7,...,551.35,0.0,0.0,0.999365,2,0.459458,0,0,0,0


In [6]:
df1.columns

Index(['epoch', 'node', 'main', 'assigned_slots', 'share_assigned_slots', 'ok',
       'skipped', 'invalid', 'delayed', 'bad_slots_abs', 'bad_slots_pct_node',
       'effective_stake', 'stake_share_pct', 'effective_stake_next',
       'stake_share_next_pct', 'delivered_slots_share_pct',
       'coinbase_epoch_tkg', 'fees_epoch_tkr_at_main',
       'reward_epoch_total_value_tkg_only', 'coinbase_cum_tkg',
       'fees_cum_tkr_at_main', 'penalty_slots_residual', 'penalty_score',
       'forced_invalid_slots_planned', 'forced_invalid_slots_realized',
       'forced_invalid_share_pct_of_invalid', 'penalty_gained',
       'penalty_recovery', 'penalty_tol', 'residual_prev_dbg',
       'residual_next_dbg', 'ok_rate', 'scenario', 'assigned_share'],
      dtype='object')

In [7]:
df2.columns

Index(['epoch', 'node', 'main', 'assigned_slots', 'share_assigned_slots', 'ok',
       'skipped', 'invalid', 'delayed', 'bad_slots_abs', 'bad_slots_pct_node',
       'effective_stake', 'stake_share_pct', 'effective_stake_next',
       'stake_share_next_pct', 'delivered_slots_share_pct',
       'coinbase_epoch_tkg', 'fees_epoch_tkr_at_main',
       'reward_epoch_total_value_tkg_only', 'coinbase_cum_tkg',
       'fees_cum_tkr_at_main', 'penalty_slots_residual', 'penalty_score',
       'forced_invalid_slots_planned', 'forced_invalid_slots_realized',
       'forced_invalid_share_pct_of_invalid', 'penalty_gained',
       'penalty_recovery', 'penalty_tol', 'residual_prev_dbg',
       'residual_next_dbg', 'ok_rate', 'scenario', 'assigned_share',
       'malicious_i', 'post_t', 'interaction', 'time_since_switch'],
      dtype='object')

In [8]:
cols = ['malicious_i', 'post_t', 'interaction']
df2[cols] = df2[cols].astype(int)

In [9]:
for col in cols:
    print(col, df2[col].unique())

malicious_i [1 0]
post_t [0 1]
interaction [0 1]


# Did

In [13]:
mod_reward = smf.ols(
    "reward_epoch_total_value_tkg_only ~ malicious_i + post_t + interaction + C(node)",
    data=df2
).fit(cov_type="cluster", cov_kwds={"groups": df2["node"]})
print(mod_reward.summary())

                                    OLS Regression Results                                   
Dep. Variable:     reward_epoch_total_value_tkg_only   R-squared:                       0.339
Model:                                           OLS   Adj. R-squared:                  0.335
Method:                                Least Squares   F-statistic:                 1.440e+04
Date:                               Tue, 14 Oct 2025   Prob (F-statistic):           6.94e-05
Time:                                       22:41:47   Log-Likelihood:                 71.037
No. Observations:                                597   AIC:                            -132.1
Df Residuals:                                    592   BIC:                            -110.1
Df Model:                                          4                                         
Covariance Type:                             cluster                                         
                      coef    std err          z      P>|z| 



In [16]:
mod_stake = smf.ols(
    "effective_stake_next ~ malicious_i + post_t + interaction + C(node)",
    data=df2
).fit(cov_type="cluster", cov_kwds={"groups": df2["node"]})
print(mod_stake.summary())

                             OLS Regression Results                             
Dep. Variable:     effective_stake_next   R-squared:                       0.193
Model:                              OLS   Adj. R-squared:                  0.188
Method:                   Least Squares   F-statistic:                 2.159e+06
Date:                  Tue, 14 Oct 2025   Prob (F-statistic):           4.63e-07
Time:                          22:46:07   Log-Likelihood:                -3659.4
No. Observations:                   597   AIC:                             7329.
Df Residuals:                       592   BIC:                             7351.
Df Model:                             4                                         
Covariance Type:                cluster                                         
                      coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------------------------
Intercept         163.

