## Setup Only for Colab

In [None]:
# prompt: mount drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks/hidden_mediators

In [None]:
%ls

In [None]:
from IPython.display import clear_output

In [None]:
import time
!pip install -r requirements.txt
time.sleep(2)
clear_output()

In [None]:
import time
# replace `develop` with `install` if you wont make library code changes
!python setup.py develop
time.sleep(2)
clear_output()
# Restart the session after running this

In [None]:
%cd /content/drive/MyDrive/Colab\ Notebooks

## Semi-Synthetic Generation

In [None]:
%load_ext autoreload
%autoreload 2
import seaborn as sns 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
from joblib import Parallel, delayed
from proximalde.ukbb_proximal import ProximalDE_UKBB, residualizeW_ukbb
from proximalde.proximal import ProximalDE, residualizeW, svd_critical_value
from proximalde.utilities import covariance
from proximalde.gen_data import SemiSyntheticGenerator
from sklearn.linear_model import LogisticRegressionCV, LassoCV
from proximalde.ukbb_data_utils import *
from proximalde.gen_data import gen_data_with_mediator_violations, gen_data_no_controls_discrete_m, gen_data_no_controls, gen_data_complex

In [None]:
D_label = 'Obese'
Y_label = 'back'
W, W_binary, W_feats, X, X_binary, X_feats, Z, Z_binary, Z_feats, Y, D = load_ukbb_data(D_label=D_label, Y_label=Y_label)

_, X_feats, _, Z_feats = load_ukbb_XZ_data()
Xint = get_int_feats(X_feats)
Zint = get_int_feats(Z_feats)

Dres, Zres, Xres, Yres = residualizeW_ukbb(W, D, Z, X, Y, D_label=D_label, Y_label=Y_label, save_fname_addn='', random_state=3, cv=3)[:4] 
bad_idx = np.array([('Do not know' in x) or ('Prefer not to' in x) for x in Zint])
Zres = Zres[:,~bad_idx]
Zint = Zint[~bad_idx]
Z = Z[:, ~bad_idx]
Z_binary = Z_binary[~bad_idx]

In [None]:
nsamples = 50000
a = 1.0  # a*b is the indirect effect through mediator
b = 1.0
c = .5  # this is the direct effect we want to estimate
d = .0  # this can be zero; does not hurt
e = 1.0  # if the product of e*f is small, then we have a weak instrument
f = 1.0  # if the product of e*f is small, then we have a weak instrument
g = .0  # this can be zero; does not hurt

In [None]:
%%time
generator = SemiSyntheticGenerator(split=True)
generator.fit(W, D, Z, X, Y, ZXYres=[Zres, Xres, Yres],propensity=np.load('propensity.npy'))
# np.save('propensity.npy', generator.propensity_)

In [None]:
idx=np.random.choice(np.arange(len(generator.Xepsilon_)),size=50000).astype(int)
sns.histplot(generator.Xepsilon_[idx,:5])
plt.show()
sns.histplot(generator.Zepsilon_[idx,:5])
plt.show()
sns.heatmap(covariance(Z, X))
plt.show()
sns.heatmap(covariance(Ztilde, Xtilde))
plt.show()
sns.heatmap(covariance(Z, Z))
plt.show()
sns.heatmap(covariance(Ztilde, Ztilde))

In [None]:
Wtilde, Dtilde, _, Ztilde, Xtilde, Ytilde = generator.sample(nsamples, a, b, c, g, replace=True)

num_plots=10
idx=np.random.choice(np.arange(Z.shape[1]),size=50000).astype(int)
fig, axes = plt.subplots(1, num_plots, figsize=(num_plots*4, 4))
for i in range(num_plots):
    axes[i].hist(Ztilde[:, i], bins=30, alpha=0.5, label=f'sampled', color='blue',density=True)
    axes[i].hist(Z[:, i], alpha=0.5, label=f'true', color='orange',density=True)


    # Add legend and title
    axes[i].legend()
    axes[i].set_title(f'Feature {i}')
    axes[i].set_yticks([])

plt.tight_layout()  # Adjust layout to prevent overlap


num_plots=10
idx=np.random.choice(np.arange(X.shape[1]),size=50000).astype(int)
fig, axes = plt.subplots(1, num_plots, figsize=(num_plots*4, 4))
for i in range(num_plots):
    axes[i].hist(Xtilde[:, i], bins=30, alpha=0.5, label=f'sampled', color='blue',density=True)
    axes[i].hist(X[:, i], alpha=0.5, label=f'true', color='orange',density=True)


    # Add legend and title
    axes[i].legend()
    axes[i].set_title(f'Feature {i}')
    axes[i].set_yticks([])

plt.tight_layout()  # Adjust layout to prevent overlap


In [None]:
import statsmodels.api as stm

def minmax(M):
    Mmin =  M.min(axis=0,keepdims=True)
    Mmax =  M.max(axis=0,keepdims=True)
    zero_divide = (Mmin == Mmax).squeeze()
    M = (M - Mmin) / (Mmax - Mmin)
    if zero_divide.sum():
        M[:, zero_divide] = Mmin.squeeze()[zero_divide]+.5
    return M

def make_binary_Z(Z,sample=False):
    if sample:
        Z[:,Z_binary] = minmax(Z[:,Z_binary])
        Z[:,Z_binary] = np.concatenate(Parallel(n_jobs=-1, verbose=3)(delayed(lambda v: np.random.binomial(1, v))(Z[:,i])
                                          for i in np.argwhere(Z_binary)),axis=1)
    else:
        Z[:, Z_binary] =  (Z[:, Z_binary]> 0).astype(int)
    return Z

def best_baseline_est(it, generator, n, a, b, c, g, *, sy=1.0, n_jobs=-1, verbose=0, make_binary=True):
    np.random.seed(it)
    
    #binary ignored
    
    # Z doesn't matter
    W, D, M, _, X, Y = generator.sample(n, a, b, c, g, sy=sy, replace=True)

    res = stm.OLS(Y, np.hstack([D.reshape(-1, 1), M, X, W, np.ones((D.shape[0], 1))])).fit(cov_type='HC1')
    return res.params[0], np.sqrt(res.cov_params()[0, 0])

def known_baseline_est(it, generator, n, a, b, c, g, *, sy=1.0, n_jobs=-1, verbose=0, make_binary=True):
    np.random.seed(it)

    
    # M doesn't matter
    W, D, _, Z, X, Y = generator.sample(n, a, b, c, g, sy=sy, replace=True)
    if make_binary:
        Z = make_binary_Z(Z)
        print("making binary")
    res = stm.OLS(Y, np.hstack([D.reshape(-1, 1), Z, X, W, np.ones((D.shape[0], 1))])).fit(cov_type='HC1')
    return res.params[0], np.sqrt(res.cov_params()[0, 0])

def lasso_baseline_est(it, generator, n, a, b, c, g, *, sy=1.0, n_jobs=-1, verbose=0, make_binary=True,use_sklearn=False):
    np.random.seed(it)

    
    # M doesn't matter
    W, D, _, Z, X, Y = generator.sample(n, a, b, c, g, sy=sy, replace=True)
    if make_binary:
        Z = make_binary_Z(Z)
        print("making binary")
    data = np.hstack([D.reshape(-1, 1), Z, X, W, np.ones((D.shape[0], 1))])
    if not use_sklearn:
        model = stm.OLS(Y, data).fit_regularized(method='elastic_net', alpha=0., L1_wt=1e-3)
        return model.params[0]
    else:
        model = LassoCV(random_state=0, cv=5,n_jobs=-1)
        model.fit(data,Y)    
    return model.coef_.squeeze()[0]

def proximal_est(it, generator, n, a, b, c, g, *, sy=1.0, n_splits=3, semi=True,
            n_jobs=-1, verbose=0, make_binary=False, sample_binary=False):
    np.random.seed(it)
    print(it)
    # M is unobserved so we omit it from the return variables
    Wt, Dt, _, Zt, Xt, Yt = generator.sample(n, a, b, c, g, sy=sy, replace=True)
 
    if make_binary:
        Zt = make_binary_Z(Zt,sample=sample_binary)
        binary_Z = Z_binary
    else:
        binary_Z = []
    est = ProximalDE(cv=n_splits, semi=True, binary_D=True,
                     model_classification='xgb', binary_Z=binary_Z,
                     n_jobs=n_jobs, random_state=it, verbose=verbose)
    est.fit(Wt, Dt, Zt, Xt, Yt)
    weakiv_stat, _, _, weakiv_crit = est.weakiv_test(alpha=0.05)
    idstr, _, _, idstr_crit = est.idstrength_violation_test(alpha=0.05)
    pval, _, _, pval_crit = est.primal_violation_test(alpha=0.05)
    dval, _, _, dval_crit = est.dual_violation_test(alpha=0.05)
    lb, ub = est.robust_conf_int(lb=-2, ub=2)
    return est.point_, est.stderr_, est.r2D_, est.r2Z_, est.r2X_, est.r2Y_, \
        idstr, idstr_crit, est.point_pre_, est.stderr_pre_, \
        pval, pval_crit, dval, dval_crit, weakiv_stat, weakiv_crit, \
        lb, ub

### With continuous Z

In [None]:
for btype, bfn in zip(['best case OLS baseline, Y=OLS(M, X, W, D)', 'known OLS baseline, Y=OLS(X, W, Z, D)'],
                     [best_baseline_est, known_baseline_est]):
    print(btype)
    results_baseline = Parallel(n_jobs=-1, verbose=3)(delayed(bfn)(i, generator, nsamples,
                                                              a, b, c, g, n_jobs=1, make_binary=False)
                                              for i in range(100))

    points, stderrs = map(np.array, zip(*results_baseline))

    print("Estimation Quality")
    coverage = np.mean((points + 1.96 * stderrs >= c) & (points - 1.96 * stderrs <= c))
    rmse = np.sqrt(np.mean((points - c)**2))
    bias = np.abs(np.mean(points) - c)
    std = np.std(points)
    mean_stderr = np.mean(stderrs)
    mean_length = np.mean(2 * 1.96 * stderrs)
    median_length = np.median(2 * 1.96 * stderrs)
    print(f"Mean point: {np.mean(points):.3f}")
    print(f"Coverage: {coverage:.3f}")
    print(f"RMSE: {rmse:.3f}")
    print(f"Bias: {bias:.3f}")
    print(f"Std: {std:.3f}")
    print(f"Mean CI length: {mean_length:.3f}")
    print(f"Median CI length: {mean_length:.3f}")
    print(f"Mean Estimated Stderr: {mean_stderr:.3f}")

### With binary Z

In [None]:
c = .5
for btype, bfn in zip(['best case OLS baseline, Y=OLS(M, X, W, D)', 'known OLS baseline, Y=OLS(X, W, Z, D)'],
                     [best_baseline_est, known_baseline_est]):
    print(btype)
    results_baseline = Parallel(n_jobs=-1, verbose=3)(delayed(bfn)(i, generator, nsamples,
                                                              a, b, c, g, n_jobs=1, make_binary=True)
                                              for i in range(100))

    points, stderrs = map(np.array, zip(*results_baseline))

    print("Estimation Quality")
    coverage = np.mean((points + 1.96 * stderrs >= c) & (points - 1.96 * stderrs <= c))
    rmse = np.sqrt(np.mean((points - c)**2))
    bias = np.abs(np.mean(points) - c)
    std = np.std(points)
    mean_stderr = np.mean(stderrs)
    mean_length = np.mean(2 * 1.96 * stderrs)
    median_length = np.median(2 * 1.96 * stderrs)
    print(f"Mean point: {np.mean(points):.3f}")
    print(f"Coverage: {coverage:.3f}")
    print(f"RMSE: {rmse:.3f}")
    print(f"Bias: {bias:.3f}")
    print(f"Std: {std:.3f}")
    print(f"Mean CI length: {mean_length:.3f}")
    print(f"Median CI length: {mean_length:.3f}")
    print(f"Mean Estimated Stderr: {mean_stderr:.3f}")

In [None]:
c = .5
btype = 'known OLS baseline, Y=OLS(X, W, Z, D)'
for c in [-.5,-.1,.1,.5]:
    results_baseline = Parallel(n_jobs=-1, verbose=3)(delayed(known_baseline_est)(i, generator, nsamples,
                                                              a, b, c, g, n_jobs=1, make_binary=True)
                                              for i in range(100))

    points, stderrs = map(np.array, zip(*results_baseline))

    print("Estimation Quality")
    coverage = np.mean((points + 1.96 * stderrs >= c) & (points - 1.96 * stderrs <= c))
    rmse = np.sqrt(np.mean((points - c)**2))
    bias = np.abs(np.mean(points) - c)
    std = np.std(points)
    mean_stderr = np.mean(stderrs)
    mean_length = np.mean(2 * 1.96 * stderrs)
    median_length = np.median(2 * 1.96 * stderrs)
    print(f"Mean point: {np.mean(points):.3f}")
    print(f"Coverage: {coverage:.3f}")
    print(f"RMSE: {rmse:.3f}")
    print(f"Bias: {bias:.3f}")
    print(f"Std: {std:.3f}")
    print(f"Mean CI length: {mean_length:.3f}")
    print(f"Median CI length: {mean_length:.3f}")
    print(f"Mean Estimated Stderr: {mean_stderr:.3f}")

In [None]:
0.004 * 1.96

In [None]:
# for i in tqdm(range(100)):
#     results.append(proximal_est(i,generator,nsamples,a,b,c,g,make_binary=True,sample_binary=True, verbose=0))

In [None]:
import pickle as pk
import os
dfs = {'exp':[]}

metric_names = ['Mean point $\hat{\theta}$', 'CI $\pm1.96\sigma$',
                'Average CI $\pm1.96\sigma_i$',  'Average coverage of $\theta_0$',
       'RMSE from $\theta_0$', 'Bias', 'Success of (1) Primal', 'Success of (2) Dual', 'Success of (3) $\E[\tilde{D}V] \neq 0$',
       'Success of (4) $V$ strength F-test', 'Success of (5) Cov($\tilde{X},\tilde{Z}) rank test']
for m in metric_names:
    dfs[m] = []       
for p in [x for x in os.listdir('./results/semisynthetic') if '.pkl' in x]:
    if p.split('_')[-1][0]=='c':
        c = float(p.split('_')[-1][1:-4])
    else:
        c = .5
    print(c)
    results = pk.load(open('./results/semisynthetic/'+p, 'rb'))
    p=p.replace('results_DrealFalse_','')
    p=p.replace('_ClsfZFalse_SmpZFalse_Clsfxgb_DbinaryFalse', '')
    print('\n\n',p)
    print(len(results))
    points_base, stderrs_base, rmseD, rmseZ, rmseX, rmseY, \
    idstr, idstr_crit, points_alt, stderrs_alt, \
    pval, pval_crit, dval, dval_crit, wiv_stat, wiv_crit, \
    rlb, rub = map(np.array, zip(*results))

    points = np.array(points_base)
    stderrs = np.array(stderrs_base)
#     points = np.array(points_alt)
#     stderrs = np.array(stderrs_alt)
    dfs['exp'].append(p)
    for n,stat in zip(metric_names,[np.mean(points), 
                                    np.var(points)*1.96, 
                                    np.mean(1.96 * stderrs), 
                                    np.mean((points + 1.96 * stderrs >= c) & (points - 1.96 * stderrs <= c)),
       np.sqrt(np.mean((points - c)**2)),np.abs(np.mean(points) - c), 
        np.mean(pval<=pval_crit),np.mean(dval<=dval_crit), np.mean(idstr>=idstr_crit), 
       np.mean(wiv_stat>=wiv_crit),np.mean([1])]):
        dfs[n].append(round(stat,2))
        print(f"{n}: {stat:.3f}")

#     print("\nRobust ConfInt Coverage")
#     rcoverage = np.mean((rub >= c) & (rlb <= c))
#     print(f"Robust Coverage: {rcoverage:.3f}")
pd.DataFrame(dfs).to_csv('synth_metrics.csv')

In [None]:


UKBB_DATA_DIR = '/oak/stanford/groups/rbaltman/karaliu/bias_detection/cohort_creation/data/'


#     Dlabel_to_fid = {'Female':[31], 'Black':[21000], 
#                      'Obese': [21002], 'Asian': [21000], 
#                      'White': [21000], 'Low_inc': [738, 6138, 6146, 4674], 
#                      'On_dis': [6146], 'No_uni': [6138], 
#                      'No_priv_insr': [4674]} #fid is the ID # of a feature in UKBB
#     keep_W_idx = [int(f.split('.')[1]) not in Dlabel_to_fid[D_label] for f in all_D_feats]
#     W = np.concatenate([W, all_D_data[:,keep_W_idx]], axis=1)
#     W_binary = np.concatenate([W_binary, all_D_binary[keep_W_idx]])
#     W_feats = np.concatenate([W_feats, all_D_feats[keep_W_idx]])
#     return W, W_binary, W_feats

# Y = pd.read_csv(UKBB_DATA_DIR + 'updated_Y_labels.csv')
Y = pd.read_csv(UKBB_DATA_DIR + 'updated_sa_df_pp.csv')

Y = Y[Y.columns[1:]]
import seaborn as sns
plt.subplots(figsize=(20,20), dpi=80)
corr = Y.corr()
sns.heatmap(np.abs(corr), cmap="Blues", annot=True)

## Semi-Synthetic Generation

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats
from joblib import Parallel, delayed
from proximalde.proximal import ProximalDE, residualizeW, svd_critical_value
from proximalde.utilities import covariance
from proximalde.gen_data import gen_data_with_mediator_violations, gen_data_no_controls_discrete_m, gen_data_no_controls, gen_data_complex

In [None]:
a = 1.0  # a*b is the indirect effect through mediator
b = 1.0
c = .5  # this is the direct effect we want to estimate
d = .0  # this can be zero; does not hurt
e = 1.0  # if the product of e*f is small, then we have a weak instrument
f = 1.0  # if the product of e*f is small, then we have a weak instrument
g = .0  # this can be zero; does not hurt

n = 100000
pw = 10
pz, px = 4, 4

In [None]:
np.random.seed(124)
# W, D, _, Z, X, Y = gen_data_with_mediator_violations(n, pw, pz, px, a, b, c, d, e, f, g)
# W = None

# W, D, _, Z, X, Y = gen_data_complex(n, pw, pz, px, a, b, c, d, e, f, g)

## for no controls un-comment this
# _, D, _, Z, X, Y = gen_data_no_controls(n, pw, pz, px, a, b, c, d, e, f, g)
# W = None

## for multi-dimensional mediator uncomment this
pm = 2
full_rank = False
while not full_rank:
    E = np.random.normal(0, 2, (pm, pz))
    F = np.random.normal(0, 2, (pm, px))
    if (np.linalg.matrix_rank(E, tol=0.5) == pm) and (np.linalg.matrix_rank(F, tol=0.5) == pm):
        full_rank = True
W, D, _, Z, X, Y = gen_data_no_controls_discrete_m(n, pw, pz, px, a, b, c, d, e*E, f*F, g, pm=pm)
W = None

In [None]:
from proximalde.gen_data import SemiSyntheticGenerator

a = 1.0  # a*b is the indirect effect through mediator
b = 1.0
c = .5  # this is the direct effect we want to estimate
g = .0  # this can be zero; does not hurt
sm = 2.0  # strength of mediator noise; needs to be non-zero for identifiability; only used when pm=1.
nsamples = 100000

generator = SemiSyntheticGenerator(split=True)
generator.fit(W, D, Z, X, Y)

In [None]:
import seaborn as sns 
sns.histplot(generator.Xepsilon_)

In [None]:
Wtilde, Dtilde, _, Ztilde, Xtilde, Ytilde = generator.sample(nsamples, a, b, c, g, replace=True)

In [None]:
sns.heatmap(covariance(Z, X))
plt.show()
sns.heatmap(covariance(Ztilde, Xtilde))

In [None]:
sns.heatmap(covariance(Z, Z))
plt.show()
sns.heatmap(covariance(Ztilde, Ztilde))


In [None]:
plt.hist(Ztilde[:, 0], label='sampled')
plt.hist(Z[:, 0], label='true')
plt.show()

In [None]:
import statsmodels.api as stm
def exp_res(it, generator, n, a, b, c, g, *, sy=1.0, n_jobs=-1, verbose=0):
    np.random.seed(it)

    # M is unobserved so we omit it from the return variables
    W, D, M, Z, X, Y = generator.sample(n, a, b, c, g, sy=sy, replace=True)

    res = stm.OLS(Y, np.hstack([D.reshape(-1, 1), M, X, np.ones((D.shape[0], 1))])).fit(cov_type='HC1')
    return res.params[0], np.sqrt(res.cov_params()[0, 0])

In [None]:
exp_res(5, generator, nsamples, a, b, c, g, n_jobs=1)

In [None]:
results = Parallel(n_jobs=-1, verbose=3)(delayed(exp_res)(i, generator, nsamples,
                                                          a, b, c, g, n_jobs=1)
                                          for i in range(100))

In [None]:
points, stderrs = map(np.array, zip(*results))

print("Estimation Quality")
coverage = np.mean((points + 1.96 * stderrs >= c) & (points - 1.96 * stderrs <= c))
rmse = np.sqrt(np.mean((points - c)**2))
bias = np.abs(np.mean(points) - c)
std = np.std(points)
mean_stderr = np.mean(stderrs)
mean_length = np.mean(2 * 1.96 * stderrs)
median_length = np.median(2 * 1.96 * stderrs)
print(f"Coverage: {coverage:.3f}")
print(f"RMSE: {rmse:.3f}")
print(f"Bias: {bias:.3f}")
print(f"Std: {std:.3f}")
print(f"Mean CI length: {mean_length:.3f}")
print(f"Median CI length: {mean_length:.3f}")
print(f"Mean Estimated Stderr: {mean_stderr:.3f}")

In [None]:
Wtilde, Dtilde, _, Ztilde, Xtilde, Ytilde = generator.sample(nsamples, a, b, c, g, replace=True)

# we find that the dual violation still exists, causing a slight bias (the true
# value we should recover is c)
est = ProximalDE(cv=3, semi=True, n_jobs=-1, random_state=3, verbose=3)
est.fit(Wtilde, Dtilde, Ztilde, Xtilde, Ytilde)
est.summary()

In [None]:
def exp_res(it, generator, n, a, b, c, g, *, sy=1.0,
            dual_type='Z', ivreg_type='adv', n_splits=3, semi=True,
            n_jobs=-1, verbose=0):
    np.random.seed(it)

    # M is unobserved so we omit it from the return variables
    W, D, _, Z, X, Y = generator.sample(n, a, b, c, g, sy=sy, replace=True)

    est = ProximalDE(cv=n_splits, semi=semi,
                     dual_type=dual_type, ivreg_type=ivreg_type,
                     n_jobs=n_jobs, random_state=it, verbose=verbose)
    est.fit(W, D, Z, X, Y)
    weakiv_stat, _, _, weakiv_crit = est.weakiv_test(alpha=0.05)
    idstr, _, _, idstr_crit = est.idstrength_violation_test(alpha=0.05)
    pval, _, _, pval_crit = est.primal_violation_test(alpha=0.05)
    dval, _, _, dval_crit = est.dual_violation_test(alpha=0.05)
    lb, ub = est.robust_conf_int(lb=-2, ub=2)
    return est.point_, est.stderr_, est.r2D_, est.r2Z_, est.r2X_, est.r2Y_, \
        idstr, idstr_crit, est.point_pre_, est.stderr_pre_, \
        pval, pval_crit, dval, dval_crit, weakiv_stat, weakiv_crit, \
        lb, ub

In [None]:
results = Parallel(n_jobs=-1, verbose=3)(delayed(exp_res)(i, generator, nsamples,
                                                          a, b, c, g,
                                                          dual_type='Z', ivreg_type='adv',
                                                          n_splits=3, semi=True, n_jobs=1)
                                          for i in range(100))

In [None]:
points_base, stderrs_base, rmseD, rmseZ, rmseX, rmseY, \
    idstr, idstr_crit, points_alt, stderrs_alt, \
    pval, pval_crit, dval, dval_crit, wiv_stat, wiv_crit, \
    rlb, rub = map(np.array, zip(*results))

points_base = np.array(points_base)
stderrs_base = np.array(stderrs_base)
points_alt = np.array(points_alt)
stderrs_alt = np.array(stderrs_alt)

print("Estimation Quality")
for name, points, stderrs in [('Debiased', points_base, stderrs_base), ('Regularized', points_alt, stderrs_alt)]:
    print(f"\n{name} Estimate")
    coverage = np.mean((points + 1.96 * stderrs >= c) & (points - 1.96 * stderrs <= c))
    rmse = np.sqrt(np.mean((points - c)**2))
    bias = np.abs(np.mean(points) - c)
    std = np.std(points)
    mean_stderr = np.mean(stderrs)
    mean_length = np.mean(2 * 1.96 * stderrs)
    median_length = np.median(2 * 1.96 * stderrs)
    print(f"Coverage: {coverage:.3f}")
    print(f"RMSE: {rmse:.3f}")
    print(f"Bias: {bias:.3f}")
    print(f"Std: {std:.3f}")
    print(f"Mean CI length: {mean_length:.3f}")
    print(f"Median CI length: {mean_length:.3f}")
    print(f"Mean Estimated Stderr: {mean_stderr:.3f}")
    print(f"Nuisance R^2 (D, Z, X, Y): {np.mean(rmseD):.3f}, {np.mean(rmseZ):.3f}, {np.mean(rmseX):.3f}, {np.mean(rmseY):.3f}")

print("\nRobust ConfInt Coverage")
rcoverage = np.mean((rub >= c) & (rlb <= c))
print(f"Robust Coverage: {rcoverage:.3f}")

print("\nViolations")
for name, stat, crit in [('Id-Strenth', idstr, idstr_crit), ('WeakIV F-test', wiv_stat, wiv_crit)]:
    violation = np.mean(stat <= crit)
    print(f"% Violations of {name}: {violation:.3f}")
for name, stat, crit in [('Primal Existence', pval, pval_crit), ('Dual Existence', dval, dval_crit)]:
    violation = np.mean(stat >= crit)
    print(f"% Violations of {name}: {violation:.3f}")