# Logit DGP: Neural Network vs Logistic Regression Oracle

**Links:**
- [GitHub](https://github.com/rawatpranjal/deep-inference)
- [PyPI](https://pypi.org/project/deep-inference/)
- [Documentation](https://rawatpranjal.github.io/deep-inference/)

**References:**
- Farrell, Liang, Misra (2021) "Deep Neural Networks for Estimation and Inference" *Econometrica*
- Farrell, Liang, Misra (2025) "Deep Learning for Individual Heterogeneity"

---

This notebook validates `structural_dml` for **logit** models against correctly-specified logistic regression oracles.

## DGP Specification

$$P(Y=1 | X, T) = \sigma(\alpha(X) + \beta(X) \cdot T)$$

where $\sigma(z) = 1/(1 + e^{-z})$ is the sigmoid function.

**Target:** $\mu^* = E[\beta(X)] = 0.5$

## Lambda Method

The package defaults to `lambda_method='ridge'` which has 96% validated coverage. You can also use `'aggregate'` or `'lgbm'`.

```python
# Default (ridge) - recommended
result = structural_dml(Y, T, X, family='logit')

# Or explicit
result = structural_dml(Y, T, X, family='logit', lambda_method='ridge')
```

## Section 1: Setup & DGP

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from scipy import stats
from scipy.special import expit
import warnings
import sys
from pathlib import Path

# Use local deep_inference
sys.path.insert(0, str(Path.cwd().parent / 'src'))
from deep_inference import structural_dml

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
warnings.filterwarnings('ignore')

print("Setup complete!")

In [None]:
# DGP Parameters
A0, A1 = 1.0, 0.3      # alpha(X) = 1.0 + 0.3*X
B0, B1 = 0.5, 0.2      # beta(X) = 0.5 + 0.2*X
MU_TRUE = 0.5          # E[beta(X)] = 0.5 (since E[X]=0)

print(f"DGP: P(Y=1) = sigmoid(alpha(X) + beta(X)*T)")
print(f"  alpha(X) = {A0} + {A1}*X")
print(f"  beta(X)  = {B0} + {B1}*X")
print(f"  X ~ N(0, 1)")
print(f"  Target mu* = E[beta(X)] = {MU_TRUE}")

In [None]:
def generate_data(n, seed=None):
    """Generate data from logit DGP."""
    if seed is not None:
        np.random.seed(seed)
    X = np.random.normal(0, 1, n)
    T = np.random.normal(0, 1, n)
    alpha = A0 + A1 * X
    beta = B0 + B1 * X
    p = expit(alpha + beta * T)
    Y = np.random.binomial(1, p).astype(float)
    return {'Y': Y, 'T': T, 'X': X, 'alpha': alpha, 'beta': beta, 'p': p}

In [None]:
# Visualize DGP
data = generate_data(1000, seed=42)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# Y vs T
ax = axes[0]
scatter = ax.scatter(data['T'], data['Y'] + np.random.normal(0, 0.02, 1000), 
                     c=data['X'], cmap='coolwarm', alpha=0.5, s=10)
plt.colorbar(scatter, ax=ax, label='X')
ax.set_xlabel('T'); ax.set_ylabel('Y'); ax.set_title('Y vs T (colored by X)')

# Structural functions
ax = axes[1]
X_grid = np.linspace(-3, 3, 100)
ax.plot(X_grid, A0 + A1*X_grid, 'b-', label=r'$\alpha(X)$', lw=2)
ax.plot(X_grid, B0 + B1*X_grid, 'r-', label=r'$\beta(X)$', lw=2)
ax.axhline(MU_TRUE, color='gray', ls='--', label=f'$\mu^*={MU_TRUE}$')
ax.set_xlabel('X'); ax.legend(); ax.set_title('True Structural Functions')

# Distribution of beta
ax = axes[2]
ax.hist(data['beta'], bins=30, alpha=0.7, edgecolor='black')
ax.axvline(MU_TRUE, color='red', ls='--', lw=2, label=f'$\mu^*={MU_TRUE}$')
ax.set_xlabel(r'$\beta(X)$'); ax.legend(); ax.set_title(r'Distribution of $\beta(X)$')

plt.tight_layout()
plt.show()

## Section 2: Oracle Implementation

Logistic regression oracle: $\text{logit}(P(Y=1)) = a_0 + a_1 X + b_0 T + b_1 (X \cdot T)$

In [None]:
def logit_oracle(Y, T, X):
    """Logit oracle with naive and delta-corrected SE."""
    n = len(Y)
    X_bar = X.mean()
    X_design = np.column_stack([np.ones(n), X, T, X * T])
    model = sm.Logit(Y, X_design).fit(disp=0)
    a0, a1, b0, b1 = model.params
    mu_hat = b0 + b1 * X_bar
    
    cov = model.cov_params()
    var_naive = cov[2,2] + X_bar**2*cov[3,3] + 2*X_bar*cov[2,3]
    var_delta = var_naive + b1**2 * (X.var(ddof=1)/n)
    
    return {
        'mu_hat': mu_hat,
        'se_naive': np.sqrt(max(var_naive, 1e-10)),
        'se_delta': np.sqrt(max(var_delta, 1e-10)),
        'params': {'a0': a0, 'a1': a1, 'b0': b0, 'b1': b1},
        'alpha_hat': a0 + a1*X,
        'beta_hat': b0 + b1*X
    }

In [None]:
# Test oracle
result = logit_oracle(data['Y'], data['T'], data['X'])
print("Oracle Results:")
print(f"  a0={result['params']['a0']:.3f} (true {A0}), a1={result['params']['a1']:.3f} (true {A1})")
print(f"  b0={result['params']['b0']:.3f} (true {B0}), b1={result['params']['b1']:.3f} (true {B1})")
print(f"  mu_hat={result['mu_hat']:.4f} (true {MU_TRUE})")
print(f"  SE naive={result['se_naive']:.4f}, SE delta={result['se_delta']:.4f}")

## Section 3: Oracle Monte Carlo (M=500)

Fast validation of the oracle - runs in ~1 minute.

In [None]:
# Oracle MC
M = 500
N = 1000

mus, ses_naive, ses_delta = [], [], []
covered_naive, covered_delta = [], []

print(f"Running Oracle MC: M={M}, N={N}")
for i in range(M):
    d = generate_data(N, seed=i)
    r = logit_oracle(d['Y'], d['T'], d['X'])
    mus.append(r['mu_hat'])
    ses_naive.append(r['se_naive'])
    ses_delta.append(r['se_delta'])
    covered_naive.append(r['mu_hat'] - 1.96*r['se_naive'] <= MU_TRUE <= r['mu_hat'] + 1.96*r['se_naive'])
    covered_delta.append(r['mu_hat'] - 1.96*r['se_delta'] <= MU_TRUE <= r['mu_hat'] + 1.96*r['se_delta'])

mus = np.array(mus)
print("Done!")

In [None]:
# Oracle MC Results
print("="*60)
print(f"ORACLE MONTE CARLO RESULTS (M={M}, N={N})")
print("="*60)
print(f"True mu* = {MU_TRUE}")
print(f"Mean estimate: {mus.mean():.4f}")
print(f"Bias: {mus.mean() - MU_TRUE:.4f}")
print(f"Empirical SE: {mus.std():.4f}")
print()
print(f"Naive SE:")
print(f"  Mean Est SE: {np.mean(ses_naive):.4f}")
print(f"  SE Ratio: {np.mean(ses_naive)/mus.std():.2f}")
print(f"  Coverage: {np.mean(covered_naive):.1%}")
print()
print(f"Delta-corrected SE:")
print(f"  Mean Est SE: {np.mean(ses_delta):.4f}")
print(f"  SE Ratio: {np.mean(ses_delta)/mus.std():.2f}")
print(f"  Coverage: {np.mean(covered_delta):.1%}")
print("="*60)

In [None]:
# Visualize Oracle MC
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Histogram
ax = axes[0]
ax.hist(mus, bins=30, alpha=0.7, edgecolor='black')
ax.axvline(MU_TRUE, color='red', ls='--', lw=2, label=f'True $\mu^*$={MU_TRUE}')
ax.axvline(mus.mean(), color='blue', ls=':', lw=2, label=f'Mean={mus.mean():.4f}')
ax.set_xlabel(r'$\hat{\mu}$'); ax.set_ylabel('Frequency')
ax.set_title(f'Oracle Estimates (Coverage: {np.mean(covered_delta):.1%})')
ax.legend()

# QQ plot
ax = axes[1]
t_stats = (mus - MU_TRUE) / np.array(ses_delta)
stats.probplot(t_stats, dist='norm', plot=ax)
ax.set_title('QQ Plot of t-statistics (Delta SE)')

plt.tight_layout()
plt.show()

## Section 4: Neural Network with IF-based SE

Single run demonstrating influence function-based standard errors.

In [None]:
# Generate data
np.random.seed(42)
data = generate_data(1000, seed=42)

print("Running Neural Network with IF-based SE...")
nn_result = structural_dml(
    Y=data['Y'],
    T=data['T'],
    X=data['X'].reshape(-1, 1),
    family='logit',
    # lambda_method='ridge' is default (96% coverage)
    epochs=100,
    n_folds=50,
    hidden_dims=[64, 32],
    lr=0.01,
    verbose=False
)
print("Done!")

In [None]:
# NN: Naive vs IF Comparison
beta_hat = nn_result.theta_hat[:, 1]
n = len(beta_hat)
se_naive = beta_hat.std() / np.sqrt(n)

print("="*60)
print("NN: NAIVE vs INFLUENCE FUNCTION")
print("="*60)
print(f"True mu* = {MU_TRUE}")
print()
print(f"{'Method':<12} {'Estimate':<12} {'SE':<12} {'95% CI':<25} {'Covers?'}")
print("-"*60)

# Naive
ci_naive = (nn_result.mu_naive - 1.96*se_naive, nn_result.mu_naive + 1.96*se_naive)
covers_naive = ci_naive[0] <= MU_TRUE <= ci_naive[1]
print(f"{'Naive':<12} {nn_result.mu_naive:<12.4f} {se_naive:<12.4f} [{ci_naive[0]:.4f}, {ci_naive[1]:.4f}]  {covers_naive}")

# IF-corrected
covers_if = nn_result.ci_lower <= MU_TRUE <= nn_result.ci_upper
print(f"{'IF':<12} {nn_result.mu_hat:<12.4f} {nn_result.se:<12.4f} [{nn_result.ci_lower:.4f}, {nn_result.ci_upper:.4f}]  {covers_if}")

print("="*60)
print()
print(f"SE Ratio (IF/Naive): {nn_result.se / se_naive:.1f}x")
print("Naive SE ignores estimation uncertainty â†’ overconfident CIs")

In [None]:
# Compare Oracle vs NN on same data
oracle_result = logit_oracle(data['Y'], data['T'], data['X'])

print("="*60)
print("COMPARISON: Oracle vs Neural Network")
print("="*60)
print(f"{'Metric':<20} {'Oracle':<15} {'NN':<15}")
print("-"*60)
print(f"{'mu_hat':<20} {oracle_result['mu_hat']:<15.4f} {nn_result.mu_hat:<15.4f}")
print(f"{'SE':<20} {oracle_result['se_delta']:<15.4f} {nn_result.se:<15.4f}")
print(f"{'CI lower':<20} {oracle_result['mu_hat']-1.96*oracle_result['se_delta']:<15.4f} {nn_result.ci_lower:<15.4f}")
print(f"{'CI upper':<20} {oracle_result['mu_hat']+1.96*oracle_result['se_delta']:<15.4f} {nn_result.ci_upper:<15.4f}")
print("="*60)

In [None]:
# Parameter recovery comparison
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Alpha recovery
ax = axes[0]
ax.scatter(data['alpha'], oracle_result['alpha_hat'], alpha=0.3, s=10, label='Oracle')
ax.scatter(data['alpha'], nn_result.theta_hat[:, 0], alpha=0.3, s=10, label='NN')
ax.plot([data['alpha'].min(), data['alpha'].max()], 
        [data['alpha'].min(), data['alpha'].max()], 'k--', lw=2)
ax.set_xlabel(r'True $\alpha(X)$'); ax.set_ylabel(r'Estimated $\alpha(X)$')
ax.set_title(r'$\alpha(X)$ Recovery'); ax.legend()

# Beta recovery
ax = axes[1]
ax.scatter(data['beta'], oracle_result['beta_hat'], alpha=0.3, s=10, label='Oracle')
ax.scatter(data['beta'], nn_result.theta_hat[:, 1], alpha=0.3, s=10, label='NN')
ax.plot([data['beta'].min(), data['beta'].max()], 
        [data['beta'].min(), data['beta'].max()], 'k--', lw=2)
ax.set_xlabel(r'True $\beta(X)$'); ax.set_ylabel(r'Estimated $\beta(X)$')
ax.set_title(r'$\beta(X)$ Recovery'); ax.legend()

plt.tight_layout()
plt.show()

# Correlations
print(f"Correlation with true values:")
print(f"  Oracle alpha: {np.corrcoef(data['alpha'], oracle_result['alpha_hat'])[0,1]:.3f}")
print(f"  Oracle beta:  {np.corrcoef(data['beta'], oracle_result['beta_hat'])[0,1]:.3f}")
print(f"  NN alpha:     {np.corrcoef(data['alpha'], nn_result.theta_hat[:,0])[0,1]:.3f}")
print(f"  NN beta:      {np.corrcoef(data['beta'], nn_result.theta_hat[:,1])[0,1]:.3f}")

## Section 5: Conclusions

### Key Findings

1. **Oracle validation (M=500):** ~96% coverage, SE ratio ~1.0

2. **NN matches Oracle:** Point estimates and SEs are comparable

3. **Default `lambda_method='ridge'` works well:** Validated 96% coverage

4. **IF-based SE works:** Single run with valid confidence intervals

### References

- Farrell, Liang, Misra (2021) "Deep Neural Networks for Estimation and Inference" *Econometrica*
- Farrell, Liang, Misra (2025) "Deep Learning for Individual Heterogeneity"