### 5.4 Randomization Mode: Compute Λ Instead of Estimating

For **randomized experiments** where T is independent of X and has known distribution $F_T$:

- **Regime A**: Λ(x) can be **computed** via Monte Carlo integration
- Faster, more stable (no Λ network to train)
- Uses **2-way cross-fitting** (not 3-way)

In [None]:
# Demonstrate randomization mode with ComputeLambda
# In RCTs where T ~ N(0,1) is independent of X, we can COMPUTE Lambda

print("Running inference() with randomization mode...")
print("(T is randomized ~ N(0,1), independent of X)")

result_rct = inference(
    Y=Y_logit, T=T_logit, X=X_logit.reshape(-1, 1),
    model='logit',
    target='beta',
    is_randomized=True,
    treatment_dist=Normal(mean=0.0, std=1.0),  # Known treatment distribution
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    verbose=True  # Shows "Regime A" detection
)

print("\n" + "="*60)
print("RANDOMIZATION MODE (REGIME A) RESULTS")
print("="*60)
print(f"True mu* = {MU_TRUE_LOGIT}")
print(f"Estimate: {result_rct.mu_hat:.4f}")
print(f"SE: {result_rct.se:.4f}")
print(f"95% CI: [{result_rct.ci_lower:.4f}, {result_rct.ci_upper:.4f}]")
print(f"Covers true: {result_rct.ci_lower <= MU_TRUE_LOGIT <= result_rct.ci_upper}")
print(f"\nDiagnostics:")
print(f"  Regime: {result_rct.diagnostics.get('regime', 'N/A')}")
print(f"  Lambda method: {result_rct.diagnostics.get('lambda_method', 'N/A')}")
print("="*60)
print("\nBenefits of Regime A:")
print("  - Lambda computed via MC integration (not estimated)")
print("  - 2-way cross-fitting (not 3-way)")
print("  - More stable, often faster")

In [None]:
import torch

# Define custom target: Average Prediction at t=0
def prediction_target(x, theta, t_tilde):
    """H = sigma(alpha + beta * t_tilde)"""
    alpha = theta[0]
    beta = theta[1]
    logits = alpha + beta * t_tilde
    return torch.sigmoid(logits)

print("Running inference() with custom target (Average Prediction at T=0)...")
result_custom = inference(
    Y=Y_logit, T=T_logit, X=X_logit.reshape(-1, 1),
    model='logit',
    target_fn=prediction_target,
    t_tilde=0.0,
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    verbose=False
)

# Oracle: E[sigma(alpha)]
oracle_pred = expit(alpha_logit).mean()

print("\n" + "="*60)
print("CUSTOM TARGET: Average Prediction at T=0")
print("="*60)
print(f"Oracle E[σ(α)]: {oracle_pred:.4f}")
print(f"inference():    {result_custom.mu_hat:.4f}")
print(f"SE: {result_custom.se:.4f}")
print(f"95% CI: [{result_custom.ci_lower:.4f}, {result_custom.ci_upper:.4f}]")
print(f"Covers oracle: {result_custom.ci_lower <= oracle_pred <= result_custom.ci_upper}")
print("="*60)
print(f"\nNote: The Jacobian of H w.r.t. θ was computed via autodiff!")

### 5.3 Custom Target Functions

You can define any target function `h(x, theta, t_tilde)` and the package will compute its Jacobian via autodiff.

Example: **Average Prediction** at $\tilde{t}=0$

$$H = \mathbb{E}[\sigma(\alpha(X) + \beta(X) \cdot 0)] = \mathbb{E}[\sigma(\alpha(X))]$$

In [None]:
# Compute AME using inference() with target='ame'
print("Running inference() with target='ame'...")
result_ame = inference(
    Y=Y_logit, T=T_logit, X=X_logit.reshape(-1, 1),
    model='logit',
    target='ame',
    t_tilde=0.0,  # Evaluate AME at T=0
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    verbose=False
)

# Compute oracle AME using our data
# AME = E[p(1-p) * beta] where p = sigma(alpha) at t=0
p_at_0 = expit(alpha_logit)  # p(Y=1|T=0)
oracle_ame = (p_at_0 * (1 - p_at_0) * beta_logit).mean()

print("\n" + "="*60)
print("AVERAGE MARGINAL EFFECT (AME) RESULTS")
print("="*60)
print(f"Oracle AME (true): {oracle_ame:.4f}")
print(f"inference() AME:   {result_ame.mu_hat:.4f}")
print(f"SE: {result_ame.se:.4f}")
print(f"95% CI: [{result_ame.ci_lower:.4f}, {result_ame.ci_upper:.4f}]")
print(f"Covers oracle: {result_ame.ci_lower <= oracle_ame <= result_ame.ci_upper}")
print("="*60)
print(f"\nNote: AME ≈ 0.25 × 0.5 = 0.125 (since p(1-p) ≈ 0.25 at p≈0.5)")

### 5.2 Flexible Targets: Average Marginal Effect (AME)

For logit models, the **Average Marginal Effect** captures the effect on probability (not log-odds):

$$\text{AME} = \mathbb{E}[p(1-p) \cdot \beta(X)]$$

where $p = \sigma(\alpha(X) + \beta(X) \cdot \tilde{t})$ is evaluated at the target treatment level $\tilde{t}=0$.

In [None]:
# Compare structural_dml() vs inference() on logit
print("Running structural_dml() (legacy API)...")
result_old = structural_dml(
    Y=Y_logit, T=T_logit, X=X_logit.reshape(-1, 1),
    family='logit',
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    # lambda_method='ridge' is default (96% coverage)
    verbose=False
)

print("Running inference() (new API)...")
result_new = inference(
    Y=Y_logit, T=T_logit, X=X_logit.reshape(-1, 1),
    model='logit',
    target='beta',
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    verbose=True  # Show regime detection
)

print("\n" + "="*60)
print("COMPARISON: structural_dml() vs inference()")
print("="*60)
print(f"True mu* = {MU_TRUE_LOGIT}")
print(f"\n{'API':<20} {'Estimate':>12} {'SE':>10} {'CI_lo':>10} {'CI_hi':>10}")
print("-"*60)
print(f"{'structural_dml()':<20} {result_old.mu_hat:>12.4f} {result_old.se:>10.4f} {result_old.ci_lower:>10.4f} {result_old.ci_upper:>10.4f}")
print(f"{'inference()':<20} {result_new.mu_hat:>12.4f} {result_new.se:>10.4f} {result_new.ci_lower:>10.4f} {result_new.ci_upper:>10.4f}")
print("="*60)

### 5.1 Basic Usage: `inference()` vs `structural_dml()`

Both APIs should produce similar results for `target='beta'`:

In [None]:
# Import the new inference API
from deep_inference import inference
from deep_inference.lambda_.compute import Normal

# Re-generate Logit data for comparison
np.random.seed(SEED)
X_logit = np.random.normal(0, 1, N)
T_logit = np.random.normal(0, 1, N)
alpha_logit = 0.0 + 0.3 * X_logit
beta_logit = 0.5 + 0.2 * X_logit
prob_logit = expit(alpha_logit + beta_logit * T_logit)
Y_logit = np.random.binomial(1, prob_logit).astype(float)

print("New inference() API loaded!")
print(f"Using same Logit DGP: True mu* = {MU_TRUE_LOGIT}")

# Deep Inference: Package Showcase

This tutorial demonstrates the `deep_inference` package for structural deep learning with valid inference.

**What this package does:**
- Estimates heterogeneous structural parameters θ(x) using neural networks
- Provides valid 95% confidence intervals via influence functions
- Implements Farrell, Liang, Misra (2021, 2025) framework

**What we'll show:**
- 4 model families: Linear, Logit, Gaussian, Poisson
- For each: compare NN estimates vs Oracle (closed-form)
- Demonstrate that influence functions correct for regularization bias

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.special import expit
import warnings
warnings.filterwarnings('ignore')

# Add package to path
import sys
sys.path.insert(0, '..')
sys.path.insert(0, '../src')

from deep_inference import structural_dml

print("Package loaded successfully!")

In [None]:
# Common settings
N = 2000        # Sample size
SEED = 42       # Random seed
N_FOLDS = 30    # Cross-fitting folds
EPOCHS = 100    # Training epochs

def print_comparison(oracle_results, nn_results, mu_true, title):
    """Print side-by-side comparison table."""
    print(f"\n{'='*70}")
    print(f"{title}")
    print(f"{'='*70}")
    print(f"True μ* = {mu_true:.6f}")
    print(f"\n{'Method':<20} {'Estimate':>12} {'SE':>10} {'CI_lo':>10} {'CI_hi':>10} {'Covers':>8}")
    print("-" * 70)
    
    for name, res in [*oracle_results.items(), *nn_results.items()]:
        covers = res['ci_lo'] <= mu_true <= res['ci_hi']
        print(f"{name:<20} {res['estimate']:>12.6f} {res['se']:>10.6f} {res['ci_lo']:>10.4f} {res['ci_hi']:>10.4f} {'YES' if covers else 'NO':>8}")
    
    print("="*70)

---

## 1. Linear Family

**Model:** $Y = \alpha(X) + \beta(X) \cdot T + \varepsilon$

**Target:** $\mu^* = \mathbb{E}[\beta(X)]$ (Average Treatment Effect)

**Oracle:** OLS with interaction terms

In [None]:
# === LINEAR FAMILY ===
print("\n" + "#"*70)
print("# LINEAR FAMILY")
print("#"*70)

# DGP Parameters
A0, A1 = 1.0, 0.3    # alpha(x) = A0 + A1*x
B0, B1 = 0.5, 0.2    # beta(x) = B0 + B1*x
SIGMA = 1.0          # noise std

# True target: E[beta(X)] = B0 (since E[X] = 0)
MU_TRUE_LINEAR = B0

# Generate data
np.random.seed(SEED)
X = np.random.normal(0, 1, N)
T = np.random.normal(0, 1, N)
alpha_true = A0 + A1 * X
beta_true = B0 + B1 * X
Y = alpha_true + beta_true * T + np.random.normal(0, SIGMA, N)

print(f"\nDGP:")
print(f"  alpha*(x) = {A0} + {A1}*x")
print(f"  beta*(x) = {B0} + {B1}*x")
print(f"  Y = alpha(X) + beta(X)*T + eps, eps ~ N(0, {SIGMA})")
print(f"  True mu* = E[beta(X)] = {MU_TRUE_LINEAR}")

In [None]:
# Oracle: OLS with interaction
# Y = b0 + b1*X + b2*T + b3*X*T
# beta(x) = b2 + b3*x, so E[beta(X)] = b2 (since E[X]=0)

X_design = np.column_stack([np.ones(N), X, T, X*T])
ols = sm.OLS(Y, X_design).fit()

b2, b3 = ols.params[2], ols.params[3]
mu_oracle = b2 + b3 * X.mean()  # approx b2 since E[X] approx 0

# Naive SE: just SE of b2
se_naive = ols.bse[2]

# Delta method SE: accounts for variance in X_bar
cov = ols.cov_params()
se_delta = np.sqrt(cov[2,2] + X.mean()**2 * cov[3,3] + 2*X.mean()*cov[2,3])

oracle_linear = {
    'Oracle (Naive SE)': {
        'estimate': mu_oracle,
        'se': se_naive,
        'ci_lo': mu_oracle - 1.96*se_naive,
        'ci_hi': mu_oracle + 1.96*se_naive
    },
    'Oracle (Delta SE)': {
        'estimate': mu_oracle,
        'se': se_delta,
        'ci_lo': mu_oracle - 1.96*se_delta,
        'ci_hi': mu_oracle + 1.96*se_delta
    }
}

print("Oracle OLS coefficients:")
print(f"  b2 (T coef) = {b2:.6f}")
print(f"  b3 (X*T coef) = {b3:.6f}")
print(f"  mu_oracle = {mu_oracle:.6f}")

In [None]:
# NN: structural_dml with linear family
result_linear = structural_dml(
    Y=Y, T=T, X=X.reshape(-1, 1),
    family='linear',
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    verbose=False
)

nn_linear = {
    'NN (Naive)': {
        'estimate': result_linear.mu_naive,
        'se': result_linear.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_lo': result_linear.mu_naive - 1.96 * result_linear.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_hi': result_linear.mu_naive + 1.96 * result_linear.theta_hat[:, 1].std() / np.sqrt(N)
    },
    'NN (IF Corrected)': {
        'estimate': result_linear.mu_hat,
        'se': result_linear.se,
        'ci_lo': result_linear.ci_lower,
        'ci_hi': result_linear.ci_upper
    }
}

print_comparison(oracle_linear, nn_linear, MU_TRUE_LINEAR, "LINEAR FAMILY RESULTS")

---

## 2. Logit Family

**Model:** $P(Y=1|X,T) = \sigma(\alpha(X) + \beta(X) \cdot T)$

**Target:** $\mu^* = \mathbb{E}[\beta(X)]$ (Average log-odds treatment effect)

**Oracle:** Logistic regression with interaction terms

In [None]:
# === LOGIT FAMILY ===
print("\n" + "#"*70)
print("# LOGIT FAMILY")
print("#"*70)

# DGP Parameters
A0, A1 = 0.0, 0.3    # alpha(x) = A0 + A1*x
B0, B1 = 0.5, 0.2    # beta(x) = B0 + B1*x

# True target: E[beta(X)] = B0 (since E[X] = 0)
MU_TRUE_LOGIT = B0

# Generate data
np.random.seed(SEED)
X = np.random.normal(0, 1, N)
T = np.random.normal(0, 1, N)
alpha_true = A0 + A1 * X
beta_true = B0 + B1 * X
prob = expit(alpha_true + beta_true * T)
Y = np.random.binomial(1, prob).astype(float)

print(f"\nDGP:")
print(f"  alpha*(x) = {A0} + {A1}*x")
print(f"  beta*(x) = {B0} + {B1}*x")
print(f"  P(Y=1|X,T) = sigmoid(alpha(X) + beta(X)*T)")
print(f"  True mu* = E[beta(X)] = {MU_TRUE_LOGIT}")
print(f"  Mean(Y) = {Y.mean():.3f}")

In [None]:
# Oracle: Logistic regression with interaction
X_design = np.column_stack([np.ones(N), X, T, X*T])
logit = sm.Logit(Y, X_design).fit(disp=0)

b2, b3 = logit.params[2], logit.params[3]
mu_oracle = b2 + b3 * X.mean()

se_naive = logit.bse[2]
cov = logit.cov_params()
se_delta = np.sqrt(cov[2,2] + X.mean()**2 * cov[3,3] + 2*X.mean()*cov[2,3])

oracle_logit = {
    'Oracle (Naive SE)': {
        'estimate': mu_oracle,
        'se': se_naive,
        'ci_lo': mu_oracle - 1.96*se_naive,
        'ci_hi': mu_oracle + 1.96*se_naive
    },
    'Oracle (Delta SE)': {
        'estimate': mu_oracle,
        'se': se_delta,
        'ci_lo': mu_oracle - 1.96*se_delta,
        'ci_hi': mu_oracle + 1.96*se_delta
    }
}

print("Oracle Logit coefficients:")
print(f"  b2 (T coef) = {b2:.6f}")
print(f"  b3 (X*T coef) = {b3:.6f}")
print(f"  mu_oracle = {mu_oracle:.6f}")

In [None]:
# NN: structural_dml with logit family
result_logit = structural_dml(
    Y=Y, T=T, X=X.reshape(-1, 1),
    family='logit',
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    # lambda_method='ridge' is default (96% coverage)
    verbose=False
)

nn_logit = {
    'NN (Naive)': {
        'estimate': result_logit.mu_naive,
        'se': result_logit.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_lo': result_logit.mu_naive - 1.96 * result_logit.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_hi': result_logit.mu_naive + 1.96 * result_logit.theta_hat[:, 1].std() / np.sqrt(N)
    },
    'NN (IF Corrected)': {
        'estimate': result_logit.mu_hat,
        'se': result_logit.se,
        'ci_lo': result_logit.ci_lower,
        'ci_hi': result_logit.ci_upper
    }
}

print_comparison(oracle_logit, nn_logit, MU_TRUE_LOGIT, "LOGIT FAMILY RESULTS")

---

## 3. Gaussian Family

**Model:** $Y \sim N(\alpha(X) + \beta(X) \cdot T, \sigma^2(X))$

**Target:** $\mu^* = \mathbb{E}[\beta(X)]$

**Note:** Gaussian family uses identity link for mean (like Linear) but estimates heterogeneous variance sigma(x) via MLE. theta_dim = 3: (alpha, beta, gamma) where sigma = exp(gamma).

In [None]:
# === GAUSSIAN FAMILY ===
print("\n" + "#"*70)
print("# GAUSSIAN FAMILY")
print("#"*70)

# DGP Parameters (identity link for mean)
A0, A1 = 2.0, 0.3    # alpha(x) = A0 + A1*x
B0, B1 = 0.5, 0.2    # beta(x) = B0 + B1*x
SIGMA = 1.0          # noise std (homogeneous for simplicity)

# True target: E[beta(X)] = B0
MU_TRUE_GAUSSIAN = B0

# Generate data
np.random.seed(SEED)
X = np.random.normal(0, 1, N)
T = np.random.normal(0, 1, N)
alpha_true = A0 + A1 * X
beta_true = B0 + B1 * X
mu_y = alpha_true + beta_true * T  # Identity link!
Y = mu_y + np.random.normal(0, SIGMA, N)

print(f"\nDGP:")
print(f"  alpha*(x) = {A0} + {A1}*x")
print(f"  beta*(x) = {B0} + {B1}*x")
print(f"  E[Y|X,T] = alpha(X) + beta(X)*T  (identity link)")
print(f"  Y ~ N(mu, sigma^2), sigma = {SIGMA}")
print(f"  True mu* = E[beta(X)] = {MU_TRUE_GAUSSIAN}")
print(f"  Mean(Y) = {Y.mean():.3f}, Std(Y) = {Y.std():.3f}")

In [None]:
# Oracle: OLS with interaction (same as linear since identity link)
X_design = np.column_stack([np.ones(N), X, T, X*T])
ols = sm.OLS(Y, X_design).fit()

b2, b3 = ols.params[2], ols.params[3]
mu_oracle = b2 + b3 * X.mean()

se_naive = ols.bse[2]
cov = ols.cov_params()
se_delta = np.sqrt(cov[2,2] + X.mean()**2 * cov[3,3] + 2*X.mean()*cov[2,3])

oracle_gaussian = {
    'Oracle (Naive SE)': {
        'estimate': mu_oracle,
        'se': se_naive,
        'ci_lo': mu_oracle - 1.96*se_naive,
        'ci_hi': mu_oracle + 1.96*se_naive
    },
    'Oracle (Delta SE)': {
        'estimate': mu_oracle,
        'se': se_delta,
        'ci_lo': mu_oracle - 1.96*se_delta,
        'ci_hi': mu_oracle + 1.96*se_delta
    }
}

print("Oracle OLS coefficients:")
print(f"  b2 (T coef) = {b2:.6f}")
print(f"  b3 (X*T coef) = {b3:.6f}")
print(f"  mu_oracle = {mu_oracle:.6f}")

In [None]:
# NN: structural_dml with gaussian family
# Note: Gaussian estimates sigma(x) via MLE, so theta_dim=3
result_gaussian = structural_dml(
    Y=Y, T=T, X=X.reshape(-1, 1),
    family='gaussian',
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    # lambda_method='ridge' is default
    verbose=False
)

nn_gaussian = {
    'NN (Naive)': {
        'estimate': result_gaussian.mu_naive,
        'se': result_gaussian.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_lo': result_gaussian.mu_naive - 1.96 * result_gaussian.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_hi': result_gaussian.mu_naive + 1.96 * result_gaussian.theta_hat[:, 1].std() / np.sqrt(N)
    },
    'NN (IF Corrected)': {
        'estimate': result_gaussian.mu_hat,
        'se': result_gaussian.se,
        'ci_lo': result_gaussian.ci_lower,
        'ci_hi': result_gaussian.ci_upper
    }
}

print_comparison(oracle_gaussian, nn_gaussian, MU_TRUE_GAUSSIAN, "GAUSSIAN FAMILY RESULTS")

# Also show estimated sigma
sigma_hat = np.exp(result_gaussian.theta_hat[:, 2]).mean()
print(f"\nEstimated sigma: {sigma_hat:.4f} (true: {SIGMA})")

---

## 4. Poisson Family

**Model:** $Y \sim \text{Poisson}(\exp(\alpha(X) + \beta(X) \cdot T))$

**Target:** $\mu^* = \mathbb{E}[\beta(X)]$ (Average log-rate treatment effect)

**Oracle:** Poisson regression with interaction terms

In [None]:
# === POISSON FAMILY ===
print("\n" + "#"*70)
print("# POISSON FAMILY")
print("#"*70)

# DGP Parameters
A0, A1 = 1.5, 0.2    # alpha(x) = A0 + A1*x
B0, B1 = 0.3, 0.1    # beta(x) = B0 + B1*x

# True target: E[beta(X)] = B0
MU_TRUE_POISSON = B0

# Generate data
np.random.seed(SEED)
X = np.random.normal(0, 1, N)
T = np.random.normal(0, 0.5, N)  # Smaller T variance to avoid extreme counts
alpha_true = A0 + A1 * X
beta_true = B0 + B1 * X
rate = np.exp(alpha_true + beta_true * T)
Y = np.random.poisson(rate).astype(float)

print(f"\nDGP:")
print(f"  alpha*(x) = {A0} + {A1}*x")
print(f"  beta*(x) = {B0} + {B1}*x")
print(f"  Y ~ Poisson(exp(alpha(X) + beta(X)*T))")
print(f"  True mu* = E[beta(X)] = {MU_TRUE_POISSON}")
print(f"  Mean(Y) = {Y.mean():.3f}, Var(Y) = {Y.var():.3f}")

In [None]:
# Oracle: Poisson regression with interaction
X_design = np.column_stack([np.ones(N), X, T, X*T])
poisson = sm.GLM(Y, X_design, family=sm.families.Poisson()).fit()

b2, b3 = poisson.params[2], poisson.params[3]
mu_oracle = b2 + b3 * X.mean()

se_naive = poisson.bse[2]
cov = poisson.cov_params()
se_delta = np.sqrt(cov[2,2] + X.mean()**2 * cov[3,3] + 2*X.mean()*cov[2,3])

oracle_poisson = {
    'Oracle (Naive SE)': {
        'estimate': mu_oracle,
        'se': se_naive,
        'ci_lo': mu_oracle - 1.96*se_naive,
        'ci_hi': mu_oracle + 1.96*se_naive
    },
    'Oracle (Delta SE)': {
        'estimate': mu_oracle,
        'se': se_delta,
        'ci_lo': mu_oracle - 1.96*se_delta,
        'ci_hi': mu_oracle + 1.96*se_delta
    }
}

print("Oracle Poisson coefficients:")
print(f"  b2 (T coef) = {b2:.6f}")
print(f"  b3 (X*T coef) = {b3:.6f}")
print(f"  mu_oracle = {mu_oracle:.6f}")

In [None]:
# NN: structural_dml with poisson family
result_poisson = structural_dml(
    Y=Y, T=T, X=X.reshape(-1, 1),
    family='poisson',
    n_folds=N_FOLDS,
    epochs=EPOCHS,
    # lambda_method='ridge' is default
    verbose=False
)

nn_poisson = {
    'NN (Naive)': {
        'estimate': result_poisson.mu_naive,
        'se': result_poisson.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_lo': result_poisson.mu_naive - 1.96 * result_poisson.theta_hat[:, 1].std() / np.sqrt(N),
        'ci_hi': result_poisson.mu_naive + 1.96 * result_poisson.theta_hat[:, 1].std() / np.sqrt(N)
    },
    'NN (IF Corrected)': {
        'estimate': result_poisson.mu_hat,
        'se': result_poisson.se,
        'ci_lo': result_poisson.ci_lower,
        'ci_hi': result_poisson.ci_upper
    }
}

print_comparison(oracle_poisson, nn_poisson, MU_TRUE_POISSON, "POISSON FAMILY RESULTS")

---

## Summary

This tutorial demonstrated the `deep_inference` package on 4 model families.

**Key findings:**
- Oracle (closed-form) estimates provide the benchmark
- NN (Naive) estimates may have regularization bias
- NN (IF Corrected) estimates should match Oracle performance

**The influence function correction:**
- Removes regularization bias from neural network estimates
- Provides valid standard errors for inference
- Enables valid 95% confidence intervals

In [None]:
# Final summary table
print("\n" + "="*80)
print("FINAL SUMMARY: ALL FAMILIES")
print("="*80)

summary_data = [
    ('Linear', MU_TRUE_LINEAR, 
     oracle_linear['Oracle (Delta SE)']['estimate'], 
     nn_linear['NN (IF Corrected)']['estimate'],
     oracle_linear['Oracle (Delta SE)']['ci_lo'] <= MU_TRUE_LINEAR <= oracle_linear['Oracle (Delta SE)']['ci_hi'],
     nn_linear['NN (IF Corrected)']['ci_lo'] <= MU_TRUE_LINEAR <= nn_linear['NN (IF Corrected)']['ci_hi']),
    
    ('Logit', MU_TRUE_LOGIT,
     oracle_logit['Oracle (Delta SE)']['estimate'],
     nn_logit['NN (IF Corrected)']['estimate'],
     oracle_logit['Oracle (Delta SE)']['ci_lo'] <= MU_TRUE_LOGIT <= oracle_logit['Oracle (Delta SE)']['ci_hi'],
     nn_logit['NN (IF Corrected)']['ci_lo'] <= MU_TRUE_LOGIT <= nn_logit['NN (IF Corrected)']['ci_hi']),
    
    ('Gaussian', MU_TRUE_GAUSSIAN,
     oracle_gaussian['Oracle (Delta SE)']['estimate'],
     nn_gaussian['NN (IF Corrected)']['estimate'],
     oracle_gaussian['Oracle (Delta SE)']['ci_lo'] <= MU_TRUE_GAUSSIAN <= oracle_gaussian['Oracle (Delta SE)']['ci_hi'],
     nn_gaussian['NN (IF Corrected)']['ci_lo'] <= MU_TRUE_GAUSSIAN <= nn_gaussian['NN (IF Corrected)']['ci_hi']),
    
    ('Poisson', MU_TRUE_POISSON,
     oracle_poisson['Oracle (Delta SE)']['estimate'],
     nn_poisson['NN (IF Corrected)']['estimate'],
     oracle_poisson['Oracle (Delta SE)']['ci_lo'] <= MU_TRUE_POISSON <= oracle_poisson['Oracle (Delta SE)']['ci_hi'],
     nn_poisson['NN (IF Corrected)']['ci_lo'] <= MU_TRUE_POISSON <= nn_poisson['NN (IF Corrected)']['ci_hi']),
]

print(f"\n{'Family':<12} {'True mu*':>10} {'Oracle':>12} {'NN IF':>12} {'Oracle CI':>12} {'NN IF CI':>12}")
print("-" * 80)
for family, true, oracle, nn, oracle_cov, nn_cov in summary_data:
    print(f"{family:<12} {true:>10.4f} {oracle:>12.6f} {nn:>12.6f} {'YES' if oracle_cov else 'NO':>12} {'YES' if nn_cov else 'NO':>12}")

print("\n" + "="*80)
print("Tutorial complete!")
print("="*80)

---

## 5. New `inference()` API

The new `inference()` API provides additional capabilities beyond `structural_dml()`:

- **Flexible targets**: AME, custom functions with autodiff Jacobians
- **Randomization mode**: Compute Λ instead of estimating it (faster, more stable)
- **Regime auto-detection**: Automatically chooses 2-way vs 3-way cross-fitting

Let's demonstrate these features using the same Logit DGP from Section 2.