# deep-inference E2E User Test

Simulates a new user installing and testing `deep-inference` from PyPI.

Benchmarks **Neural Network** (with Influence Function correction) against **Oracle** (correctly-specified logistic regression).

## 1. Installation & Setup

In [1]:
# Install from PyPI
!pip install 'deep-inference>=0.1.1' --quiet

import deep_inference
print("deep-inference installed successfully!")

deep-inference installed successfully!


In [2]:
import numpy as np
import warnings
from scipy.special import expit
import statsmodels.api as sm
from deep_inference import structural_dml

warnings.filterwarnings('ignore')
print("All imports successful!")

All imports successful!


## 2. Configuration

In [3]:
# DGP
A0, A1 = 1.0, 0.3      # alpha(X) = 1.0 + 0.3*X
B0, B1 = 0.5, 0.2      # beta(X) = 0.5 + 0.2*X
MU_TRUE = 0.5          # E[beta(X)] = 0.5

# Oracle MC
M_ORACLE = 100         # replications
N = 1000               # sample size

# NN (single run - MC too slow)
NN_SEED = 42
EPOCHS = 100
N_FOLDS = 50
HIDDEN_DIMS = [64, 32]
LR = 0.01
LAMBDA_METHOD = 'aggregate'  # CRITICAL for logit!

print("CONFIG:")
print(f"  DGP: P(Y=1) = sigmoid(alpha(X) + beta(X)*T)")
print(f"  alpha(X) = {A0} + {A1}*X")
print(f"  beta(X) = {B0} + {B1}*X")
print(f"  TRUE MU* = {MU_TRUE}")
print(f"  Oracle: M={M_ORACLE}, N={N}")
print(f"  NN: seed={NN_SEED}, epochs={EPOCHS}, n_folds={N_FOLDS}, hidden={HIDDEN_DIMS}")

CONFIG:
  DGP: P(Y=1) = sigmoid(alpha(X) + beta(X)*T)
  alpha(X) = 1.0 + 0.3*X
  beta(X) = 0.5 + 0.2*X
  TRUE MU* = 0.5
  Oracle: M=100, N=1000
  NN: seed=42, epochs=100, n_folds=50, hidden=[64, 32]


## 3. Oracle Monte Carlo (M=100)

In [4]:
oracle_results = []

for seed in range(1, M_ORACLE + 1):
    np.random.seed(seed)
    X = np.random.normal(0, 1, N)
    T = np.random.normal(0, 1, N)
    p = expit((A0 + A1*X) + (B0 + B1*X)*T)
    Y = np.random.binomial(1, p).astype(float)
    
    X_bar = X.mean()
    X_design = np.column_stack([np.ones(N), X, T, X*T])
    model = sm.Logit(Y, X_design).fit(disp=0)
    b0, b1 = model.params[2], model.params[3]
    cov = model.cov_params()
    
    mu = b0 + b1*X_bar
    var_naive = cov[2,2] + X_bar**2*cov[3,3] + 2*X_bar*cov[2,3]
    se_naive = np.sqrt(max(var_naive, 1e-10))
    var_delta = var_naive + b1**2*(X.var(ddof=1)/N)
    se_delta = np.sqrt(max(var_delta, 1e-10))
    
    oracle_results.append({
        'seed': seed, 'mu': mu,
        'se_naive': se_naive, 'se_delta': se_delta,
        'covers_naive': (mu - 1.96*se_naive) <= MU_TRUE <= (mu + 1.96*se_naive),
        'covers_delta': (mu - 1.96*se_delta) <= MU_TRUE <= (mu + 1.96*se_delta),
        'bias': mu - MU_TRUE
    })

print(f"Oracle MC complete: {len(oracle_results)} replications")

Oracle MC complete: 100 replications


In [5]:
# Full Oracle Table (first 20 rows)
print("ORACLE MC TABLE (first 20 of 100):")
print(f"{'Seed':<6} {'mu_hat':<10} {'SE_naive':<10} {'SE_delta':<10} {'Cov_N':<8} {'Cov_D':<8} {'Bias':<10}")
print("-"*70)
for r in oracle_results[:20]:
    print(f"{r['seed']:<6} {r['mu']:<10.5f} {r['se_naive']:<10.5f} {r['se_delta']:<10.5f} {str(r['covers_naive']):<8} {str(r['covers_delta']):<8} {r['bias']:<10.5f}")

ORACLE MC TABLE (first 20 of 100):
Seed   mu_hat     SE_naive   SE_delta   Cov_N    Cov_D    Bias      
----------------------------------------------------------------------
1      0.58114    0.07715    0.07722    True     True     0.08114   
2      0.54577    0.08005    0.08084    True     True     0.04577   
3      0.62790    0.07943    0.07978    True     True     0.12790   
4      0.63073    0.08381    0.08441    True     True     0.13073   
5      0.65105    0.08182    0.08217    True     True     0.15105   
6      0.56871    0.08125    0.08160    True     True     0.06871   
7      0.47376    0.07512    0.07512    True     True     -0.02624  
8      0.54588    0.07955    0.07966    True     True     0.04588   
9      0.65647    0.07871    0.07904    False    False    0.15647   
10     0.43255    0.07664    0.07710    True     True     -0.06745  
11     0.46651    0.07549    0.07588    True     True     -0.03349  
12     0.49210    0.08104    0.08118    True     True     -0.00790

In [6]:
# Oracle Summary
mus = np.array([r['mu'] for r in oracle_results])
ses_naive = np.array([r['se_naive'] for r in oracle_results])
ses_delta = np.array([r['se_delta'] for r in oracle_results])
covers_naive = np.array([r['covers_naive'] for r in oracle_results])
covers_delta = np.array([r['covers_delta'] for r in oracle_results])
biases = np.array([r['bias'] for r in oracle_results])

print("="*60)
print("ORACLE SUMMARY")
print("="*60)
print(f"Mean estimate: {mus.mean():.6f}")
print(f"Empirical SE: {mus.std():.6f}")
print(f"Mean bias: {biases.mean():.6f}")
print()
print(f"Naive SE:")
print(f"  Mean: {ses_naive.mean():.6f}")
print(f"  SE Ratio (est/emp): {ses_naive.mean()/mus.std():.3f}")
print(f"  Coverage: {covers_naive.sum()}/{M_ORACLE} = {100*covers_naive.mean():.1f}%")
print()
print(f"Delta SE:")
print(f"  Mean: {ses_delta.mean():.6f}")
print(f"  SE Ratio (est/emp): {ses_delta.mean()/mus.std():.3f}")
print(f"  Coverage: {covers_delta.sum()}/{M_ORACLE} = {100*covers_delta.mean():.1f}%")

ORACLE SUMMARY
Mean estimate: 0.507969
Empirical SE: 0.079177
Mean bias: 0.007969

Naive SE:
  Mean: 0.077706
  SE Ratio (est/emp): 0.981
  Coverage: 98/100 = 98.0%

Delta SE:
  Mean: 0.077970
  SE Ratio (est/emp): 0.985
  Coverage: 98/100 = 98.0%


## 4. Neural Network (Single Run)

In [7]:
# Generate data for NN
np.random.seed(NN_SEED)
X = np.random.normal(0, 1, N)
T = np.random.normal(0, 1, N)
alpha_true = A0 + A1*X
beta_true = B0 + B1*X
p = expit(alpha_true + beta_true*T)
Y = np.random.binomial(1, p).astype(float)

print(f"Data: n={N}, Y_mean={Y.mean():.3f}, X_mean={X.mean():.4f}")
print(f"Running NN (this takes ~1-2 min)...")

Data: n=1000, Y_mean=0.730, X_mean=0.0193
Running NN (this takes ~1-2 min)...


In [8]:
nn = structural_dml(
    Y=Y, T=T, X=X.reshape(-1, 1),
    family='logit',
    lambda_method=LAMBDA_METHOD,
    epochs=EPOCHS,
    n_folds=N_FOLDS,
    hidden_dims=HIDDEN_DIMS,
    lr=LR,
    verbose=False
)
print("NN complete!")

NN complete!


In [9]:
# NN Results
beta_hat = nn.theta_hat[:, 1]
alpha_hat = nn.theta_hat[:, 0]
mu_naive_nn = beta_hat.mean()
se_naive_nn = beta_hat.std() / np.sqrt(N)
ci_naive_lo = mu_naive_nn - 1.96*se_naive_nn
ci_naive_hi = mu_naive_nn + 1.96*se_naive_nn
covers_naive_nn = ci_naive_lo <= MU_TRUE <= ci_naive_hi

print("="*60)
print("NN RESULTS")
print("="*60)
print(f"mu_naive (mean beta_hat): {mu_naive_nn:.6f}")
print(f"mu_hat (IF corrected): {nn.mu_hat:.6f}")
print(f"se_naive (std/sqrt(n)): {se_naive_nn:.6f}")
print(f"se (IF): {nn.se:.6f}")
print(f"CI_naive: [{ci_naive_lo:.6f}, {ci_naive_hi:.6f}]")
print(f"CI_IF: [{nn.ci_lower:.6f}, {nn.ci_upper:.6f}]")
print(f"Covers_naive: {covers_naive_nn}")
print(f"Covers_IF: {nn.ci_lower <= MU_TRUE <= nn.ci_upper}")
print(f"Bias_naive: {mu_naive_nn - MU_TRUE:.6f}")
print(f"Bias_IF: {nn.mu_hat - MU_TRUE:.6f}")

NN RESULTS
mu_naive (mean beta_hat): 0.534380
mu_hat (IF corrected): 0.530171
se_naive (std/sqrt(n)): 0.010140
se (IF): 0.073615
CI_naive: [0.514507, 0.554254]
CI_IF: [0.385885, 0.674457]
Covers_naive: False
Covers_IF: True
Bias_naive: 0.034380
Bias_IF: 0.030171


In [10]:
# NN Diagnostics
print("="*60)
print("NN DIAGNOSTICS")
print("="*60)
print(f"min_lambda_eigenvalue: {nn.diagnostics['min_lambda_eigenvalue']:.6f}")
print(f"mean_lambda_eigenvalue: {nn.diagnostics['mean_lambda_eigenvalue']:.6f}")
print(f"mean_cond_number: {nn.diagnostics['mean_cond_number']:.6f}")
print(f"n_regularized: {nn.diagnostics['n_regularized']}")
print(f"pct_regularized: {nn.diagnostics['pct_regularized']:.2f}%")
print(f"correction_mean: {nn.diagnostics['correction_mean']:.6f}")
print(f"correction_std: {nn.diagnostics['correction_std']:.6f}")
print(f"correction_ratio: {nn.diagnostics['correction_ratio']:.2f}")
print(f"three_way: {nn.diagnostics['three_way']}")
print(f"n_folds: {nn.diagnostics['n_folds']}")

NN DIAGNOSTICS
min_lambda_eigenvalue: 0.117793
mean_lambda_eigenvalue: 0.158309
mean_cond_number: 1.329752
n_regularized: 0
pct_regularized: 0.00%
correction_mean: 0.004209
correction_std: 2.400486
correction_ratio: 32.61
three_way: True
n_folds: 50


In [11]:
# Parameter Recovery
corr_alpha = np.corrcoef(alpha_true, alpha_hat)[0,1]
corr_beta = np.corrcoef(beta_true, beta_hat)[0,1]

print("="*60)
print("NN PARAMETER RECOVERY")
print("="*60)
print(f"Corr(true_alpha, alpha_hat): {corr_alpha:.4f}")
print(f"Corr(true_beta, beta_hat): {corr_beta:.4f}")
print(f"mean(alpha_hat): {alpha_hat.mean():.6f} (true: {alpha_true.mean():.6f})")
print(f"mean(beta_hat): {beta_hat.mean():.6f} (true: {beta_true.mean():.6f})")

NN PARAMETER RECOVERY
Corr(true_alpha, alpha_hat): 0.6141
Corr(true_beta, beta_hat): 0.5423
mean(alpha_hat): 1.005534 (true: 1.005800)
mean(beta_hat): 0.534380 (true: 0.503866)


## 5. Final Comparison

In [12]:
print("="*100)
print("FINAL COMPARISON TABLE")
print("="*100)
print(f"TRUE MU* = {MU_TRUE}")
print()
print(f"{'Method':<20} {'Estimate':<12} {'SE':<12} {'CI_lower':<12} {'CI_upper':<12} {'Covers':<10} {'Bias':<12} {'Coverage(MC)':<12}")
print("-"*100)
print(f"{'Oracle_Naive':<20} {mus.mean():<12.6f} {ses_naive.mean():<12.6f} {'--':<12} {'--':<12} {'--':<10} {biases.mean():<12.6f} {100*covers_naive.mean():<11.1f}%")
print(f"{'Oracle_Delta':<20} {mus.mean():<12.6f} {ses_delta.mean():<12.6f} {'--':<12} {'--':<12} {'--':<10} {biases.mean():<12.6f} {100*covers_delta.mean():<11.1f}%")
print(f"{'NN_Naive':<20} {mu_naive_nn:<12.6f} {se_naive_nn:<12.6f} {ci_naive_lo:<12.6f} {ci_naive_hi:<12.6f} {str(covers_naive_nn):<10} {mu_naive_nn-MU_TRUE:<12.6f} {'N/A':<12}")
print(f"{'NN_IF':<20} {nn.mu_hat:<12.6f} {nn.se:<12.6f} {nn.ci_lower:<12.6f} {nn.ci_upper:<12.6f} {str(nn.ci_lower <= MU_TRUE <= nn.ci_upper):<10} {nn.mu_hat-MU_TRUE:<12.6f} {'N/A':<12}")
print("-"*100)

FINAL COMPARISON TABLE
TRUE MU* = 0.5

Method               Estimate     SE           CI_lower     CI_upper     Covers     Bias         Coverage(MC)
----------------------------------------------------------------------------------------------------
Oracle_Naive         0.507969     0.077706     --           --           --         0.007969     98.0       %
Oracle_Delta         0.507969     0.077970     --           --           --         0.007969     98.0       %
NN_Naive             0.534380     0.010140     0.514507     0.554254     False      0.034380     N/A         
NN_IF                0.530171     0.073615     0.385885     0.674457     True       0.030171     N/A         
----------------------------------------------------------------------------------------------------


In [13]:
print("="*60)
print("SE COMPARISON")
print("="*60)
print(f"Oracle Delta SE (mean): {ses_delta.mean():.6f}")
print(f"NN IF SE: {nn.se:.6f}")
print(f"Ratio (NN_IF / Oracle_Delta): {nn.se / ses_delta.mean():.3f}")
print()
print(f"NN Naive SE: {se_naive_nn:.6f}")
print(f"Ratio (NN_IF / NN_Naive): {nn.se / se_naive_nn:.1f}x")
print()
print("="*60)
print("VERDICT")
print("="*60)
print(f"Oracle Coverage: {100*covers_delta.mean():.0f}% (target: 95%)")
print(f"NN Naive Coverage: SINGLE RUN - {covers_naive_nn} (expected: often False)")
print(f"NN IF Coverage: SINGLE RUN - {nn.ci_lower <= MU_TRUE <= nn.ci_upper} (expected: usually True)")
print(f"NN IF SE matches Oracle: {nn.se / ses_delta.mean():.2f}x (target: ~1.0)")

SE COMPARISON
Oracle Delta SE (mean): 0.077970
NN IF SE: 0.073615
Ratio (NN_IF / Oracle_Delta): 0.944

NN Naive SE: 0.010140
Ratio (NN_IF / NN_Naive): 7.3x

VERDICT
Oracle Coverage: 98% (target: 95%)
NN Naive Coverage: SINGLE RUN - False (expected: often False)
NN IF Coverage: SINGLE RUN - True (expected: usually True)
NN IF SE matches Oracle: 0.94x (target: ~1.0)
