### Delta Method for Parameter Inference

- Analytic technique for approximating variances and constructing confidence intervals for functions of estimators.
- Relies on asymptotic normality and differentiable transformations.
- Applicable to both univariate and multivariate estimators.

### Process

1. Compute the parameter estimate using the full sample:
   - $ \hat{\theta}_n = g(X_1, X_2, \dots, X_N) $

2. Define the function and compute its gradient:
   - Identify the function $ g(\cdot) $ of interest.
   - Calculate the gradient $ \nabla g(\hat{\theta}_n) $.

3. Estimate the variance-covariance matrix of the estimator:
   - $ \Sigma = \text{Var}(\hat{\theta}_n) $

4. Apply the delta method to approximate the variance of $ g(\hat{\theta}_n) $:
   - $ \text{Var}(g(\hat{\theta}_n)) \approx \nabla g(\hat{\theta}_n)' \Sigma \nabla g(\hat{\theta}_n) $

5. Construct the confidence interval:
   - $ g(\hat{\theta}_n) \pm z_{\alpha/2} \times \sqrt{\text{Var}(g(\hat{\theta}_n))} $

### Advantages

- Computationally efficient compared to resampling methods like bootstrap.
- Widely applicable to various nonlinear functions of parameters.

### Limitations

- Large sample requirement: Relies on asymptotic properties; may be inaccurate for small samples.
- Differentiability assumption: Requires the function $ g(\theta) $ to be differentiable.
- Linear approximation: May fail for highly nonlinear functions or when higher-order terms are significant.

In [4]:
import numpy as np
import pandas as pd
from scipy import stats

# 1. Generate synthetic data
np.random.seed(42)
n = 100
x1 = np.random.randn(n)
x2 = np.random.randn(n)
y = 2 * x1 + 1 * x2 + np.random.randn(n)  # True b1=2, b2=1

# 2. OLS estimates by hand
X = np.column_stack([np.ones(n), x1, x2])       # Design matrix: intercept + x1 + x2
XtX = X.T @ X
XtX_inv = np.linalg.inv(XtX)
b_hat = XtX_inv @ X.T @ y                      # (X'X)^{-1}X'y

# 3. Residual variance
resid = y - X @ b_hat
sigma2 = np.sum(resid**2) / (n - X.shape[1])    # Unbiased estimator of error variance

# 4. Variance-Covariance matrix of parameters
cov_b = sigma2 * XtX_inv

# 5. Extract estimates for b1 and b2
b0, b1, b2 = b_hat
se_b1 = np.sqrt(cov_b[1, 1])
se_b2 = np.sqrt(cov_b[2, 2])

# 6. Compute t-statistics and p-values for b1 and b2
t_b1 = b1 / se_b1
t_b2 = b2 / se_b2
p_b1 = 2 * (1 - stats.norm.cdf(np.abs(t_b1)))
p_b2 = 2 * (1 - stats.norm.cdf(np.abs(t_b2)))

# 7. Compute 95% Confidence Intervals for b1 and b2
ci_b1_lower = b1 - 1.96 * se_b1
ci_b1_upper = b1 + 1.96 * se_b1
ci_b2_lower = b2 - 1.96 * se_b2
ci_b2_upper = b2 + 1.96 * se_b2

# 8. Ratio and Delta Method
ratio = b1 / b2
# Gradient of g(b1, b2) = b1 / b2 w.r.t b1 and b2
grad = np.array([1 / b2, -b1 / (b2 ** 2)])
# Extract covariance between b1 and b2
cov_b1_b2 = cov_b[1, 2]
cov_b2_b1 = cov_b[2, 1]
# Variance of the ratio using Delta Method
var_ratio = grad[0]**2 * cov_b[1, 1] + grad[1]**2 * cov_b[2, 2] + 2 * grad[0] * grad[1] * cov_b1_b2
se_ratio = np.sqrt(var_ratio)
# t-statistic and p-value for ratio
t_ratio = ratio / se_ratio
p_ratio = 2 * (1 - stats.norm.cdf(np.abs(t_ratio)))
# 95% Confidence Interval for ratio
ci_ratio_lower = ratio - 1.96 * se_ratio
ci_ratio_upper = ratio + 1.96 * se_ratio

# 9. Assemble Results into a DataFrame
results = pd.DataFrame({
    'Coefficient': [b1, b2, ratio],
    'StdErr': [se_b1, se_b2, se_ratio],
    't': [t_b1, t_b2, t_ratio],
    'p-value': [p_b1, p_b2, p_ratio],
    '95% CI Lower': [ci_b1_lower, ci_b2_lower, ci_ratio_lower],
    '95% CI Upper': [ci_b1_upper, ci_b2_upper, ci_ratio_upper]
}, index=['b1', 'b2', 'b1/b2'])

# 10. Formatting the Results
pd.options.display.float_format = '{:.4f}'.format
display(results)


Unnamed: 0,Coefficient,StdErr,t,p-value,95% CI Lower,95% CI Upper
b1,2.2261,0.1201,18.5334,0.0,1.9907,2.4615
b2,0.9877,0.1144,8.6353,0.0,0.7635,1.2119
b1/b2,2.2538,0.2725,8.2713,0.0,1.7197,2.7878


In [5]:
import numpy as np
import pandas as pd
from scipy.stats import norm
np.random.seed(42)

# 1. Generate synthetic logistic data
n = 500
x1 = np.random.randn(n)
x2 = np.random.randn(n)
X = np.column_stack([np.ones(n), x1, x2])  # intercept + x1 + x2
beta_true = np.array([-1.0, 2.0, -1.0])
p = 1 / (1 + np.exp(-X @ beta_true))
y = np.random.binomial(1, p)

# 2. Define log-likelihood, gradient, Hessian for logistic
def loglike(b):
    xb = X @ b
    return -np.sum(y * xb - np.log(1 + np.exp(xb)))

def grad_loglike(b):
    xb = X @ b
    p = 1 / (1 + np.exp(-xb))  # predicted probabilities
    return -(X.T @ (y - p))

def hess_loglike(b):
    xb = X @ b
    p = 1 / (1 + np.exp(-xb))
    W = np.diag(p * (1 - p))
    return X.T @ W @ X

# 3. Newton-Raphson to find MLE
b = np.zeros(X.shape[1])  # init
for _ in range(20):  # simple fixed iteration
    g = grad_loglike(b)
    H = hess_loglike(b)
    b -= np.linalg.inv(H) @ g

# 4. Invert Hessian to get covariance
cov_b = np.linalg.inv(hess_loglike(b))

# 5. Extract parameter estimates, std. errors
b0, b1, b2 = b
se_b0 = np.sqrt(cov_b[0, 0])
se_b1 = np.sqrt(cov_b[1, 1])
se_b2 = np.sqrt(cov_b[2, 2])

# 6. Delta Method for a ratio g(b1, b2) = b1 / b2
ratio = b1 / b2
grad = np.array([1 / b2, -b1 / (b2**2)])  # partial w.r.t. [b1, b2]
cov_12 = cov_b[1:3, 1:3]  # submatrix for b1, b2
var_ratio = grad @ cov_12 @ grad
se_ratio = np.sqrt(var_ratio)

# 7. Construct table
z = 1.96
def ci(est, se): 
    return (est - z * se, est + z * se)

results = pd.DataFrame({
    'Coefficient': [b0, b1, b2, ratio],
    'StdErr':      [se_b0, se_b1, se_b2, se_ratio],
    'z-value':     [b0/se_b0, b1/se_b1, b2/se_b2, ratio/se_ratio],
    'p-value':     [2*(1 - norm.cdf(abs(b0/se_b0))),
                    2*(1 - norm.cdf(abs(b1/se_b1))),
                    2*(1 - norm.cdf(abs(b2/se_b2))),
                    2*(1 - norm.cdf(abs(ratio/se_ratio)))]
}, index=['b0','b1','b2','b1/b2'])

results['CI Lower'], results['CI Upper'] = zip(*results.apply(
    lambda row: ci(row['Coefficient'], row['StdErr']), axis=1
))

pd.options.display.float_format = '{:.4f}'.format
display(results)


Unnamed: 0,Coefficient,StdErr,z-value,p-value,CI Lower,CI Upper
b0,-0.9921,0.1348,-7.3626,0.0,-1.2562,-0.728
b1,1.9712,0.1892,10.4171,0.0,1.6003,2.3421
b2,-0.8143,0.1389,-5.8604,0.0,-1.0866,-0.542
b1/b2,-2.4207,0.418,-5.7911,0.0,-3.24,-1.6014


In [14]:
import numpy as np
import pandas as pd
from scipy.stats import norm

pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.4f}'.format
np.random.seed(42)

def sim_data(n=400):
    x1 = np.random.randn(n)
    x2 = np.random.randn(n)
    X = np.column_stack([np.ones(n), x1, x2])
    b_true = np.array([-1.0, 2.0, -1.0])
    p_true = 1 / (1 + np.exp(-X @ b_true))
    y = np.random.binomial(1, p_true)
    return X, y

def fit_logistic(X, y):
    def grad(b):
        xb = X @ b
        p = 1 / (1 + np.exp(-xb))
        return -(X.T @ (y - p))
    def hess(b):
        xb = X @ b
        p = 1 / (1 + np.exp(-xb))
        W = p * (1 - p)
        return X.T @ (W[:,None] * X)
    b = np.zeros(X.shape[1])
    for _ in range(100):
        g = grad(b)
        H = hess(b)
        d = np.linalg.solve(H, g)
        b_new = b - d
        if np.max(np.abs(d)) < 1e-6:
            b = b_new
            break
        b = b_new
    cov_b = np.linalg.inv(hess(b))
    return b, cov_b

X, y = sim_data()
b_hat, cov_b = fit_logistic(X, y)
se_delta = np.sqrt(np.diag(cov_b))
x0 = np.array([1, 0.5, -0.5])
phat = 1 / (1 + np.exp(-x0 @ b_hat))
gp = phat * (1 - phat) * x0
var_phat = gp @ cov_b @ gp
se_phat = np.sqrt(var_phat)

M = 1000
b_store = []
p_store = []
for _ in range(M):
    Xmc, ymc = sim_data()
    b_mc, _ = fit_logistic(Xmc, ymc)
    b_store.append(b_mc)
    p_store.append(1 / (1 + np.exp(-x0 @ b_mc)))
b_store = np.vstack(b_store)
p_store = np.array(p_store)

mc_se_b = b_store.std(axis=0)
mc_se_phat = p_store.std()

ix = ['Intercept','x1','x2','Predicted Probability']
df = pd.DataFrame({
    'Estimate':     list(b_hat) + [phat],
    'DeltaMethod_SE': list(se_delta) + [se_phat],
    'MC_SE':        list(mc_se_b) + [mc_se_phat]
}, index=ix)
display(df)


Unnamed: 0,Estimate,DeltaMethod_SE,MC_SE
Intercept,-0.9572,0.1457,0.1579
x1,1.6647,0.1953,0.2157
x2,-0.9754,0.1535,0.1667
Predicted Probability,0.5897,0.0368,0.0401


In [15]:
import numpy as np
import pandas as pd
import time

np.random.seed(42)

def true_quantile_normal(p=0.95):  
    # 95% quantile for standard normal ~ 1.645
    from math import sqrt, log
    from scipy.stats import norm
    return norm.ppf(p)

def bootstrap_ci(x, p=0.95, B=500, alpha=0.05):
    n = len(x)
    q_list = []
    for _ in range(B):
        xb = np.random.choice(x, size=n, replace=True)
        q_list.append(np.quantile(xb, p))
    q_arr = np.sort(q_list)
    lo = q_arr[int(alpha/2*B)]
    hi = q_arr[int((1 - alpha/2)*B)]
    return lo, hi

def outer_ci_pre(x, p=0.95, alpha=0.05):
    # 1) Find raw quantile
    n = len(x)
    x_sorted = np.sort(x)
    q_idx = int(np.floor(p*n))
    Qp = x_sorted[q_idx]
    # 2) Indicator
    y = (x <= Qp).astype(float)
    # 3) Summaries
    ybar = y.mean()
    # 4) Delta approximation of p(1-p)/n scaled by clusters=1 for simplicity
    se = np.sqrt((ybar*(1-ybar))/n)
    z = 1.96
    lo_rank = max(0, int(n*(p - z*se)))
    hi_rank = min(n-1, int(n*(p + z*se)))
    return x_sorted[lo_rank], x_sorted[hi_rank]

def outer_ci_post(x, p=0.95, alpha=0.05):
    n = len(x)
    x_sorted = np.sort(x)
    # naive ranks
    se_naive = np.sqrt(p*(1-p)/n)
    z = 1.96
    lo_rank = max(0, int(n*(p - z*se_naive)))
    hi_rank = min(n-1, int(n*(p + z*se_naive)))
    # unadjusted CI
    lo_unadj = x_sorted[lo_rank]
    hi_unadj = x_sorted[hi_rank]
    # measure y wrt Qp
    q_idx = int(np.floor(p*n))
    Qp = x_sorted[q_idx]
    y = (x <= Qp).astype(float)
    ybar = y.mean()
    se_adj = np.sqrt(ybar*(1-ybar)/n)
    corr_factor = se_adj / se_naive
    # post-adjust
    mid_lo = lo_unadj - (Qp - lo_unadj)*corr_factor
    mid_hi = hi_unadj + (hi_unadj - Qp)*corr_factor
    return min(mid_lo, lo_unadj), max(mid_hi, hi_unadj)

def run_simulation(n=1000, p=0.95, M=200):
    true_q = true_quantile_normal(p)
    methods = ['NB','Pre','Post']
    coverage = {m:0 for m in methods}
    runtime  = {m:0.0 for m in methods}
    for _ in range(M):
        x = np.random.randn(n)
        # NB
        t0 = time.time()
        l_nb, h_nb = bootstrap_ci(x, p)
        runtime['NB'] += (time.time() - t0)
        coverage['NB'] += (l_nb <= true_q <= h_nb)
        # Pre
        t0 = time.time()
        l_pre, h_pre = outer_ci_pre(x, p)
        runtime['Pre'] += (time.time() - t0)
        coverage['Pre'] += (l_pre <= true_q <= h_pre)
        # Post
        t0 = time.time()
        l_post, h_post = outer_ci_post(x, p)
        runtime['Post'] += (time.time() - t0)
        coverage['Post'] += (l_post <= true_q <= h_post)
    res = []
    for m in methods:
        res.append([m, coverage[m]/M, runtime[m]/M])
    return pd.DataFrame(res, columns=['Method','Coverage','Avg Time (s)'])

df_results = run_simulation(n=1000, p=0.95, M=200)
print(df_results)


  Method  Coverage  Avg Time (s)
0     NB    0.9100        0.0233
1    Pre    0.9450        0.0000
2   Post    1.0000        0.0000
