# M-estimation: introduction and applied examples 

Python (Paul Zivich 2023/06/08)

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy as sp
from scipy.optimize import minimize, approx_fprime, newton
import delicatessen as deli
from delicatessen import MEstimator
from delicatessen.estimating_equations import ee_regression
from delicatessen.utilities import inverse_logit

print("versions")
print("--------------------")
print("NumPy:       ", np.__version__)
print("SciPy:       ", sp.__version__)
print("pandas:      ", pd.__version__)
print("statsmodels: ", sm.__version__)
print("Delicatessen:", deli.__version__)

versions
--------------------
NumPy:        1.22.2
SciPy:        1.9.2
pandas:       1.4.1
statsmodels:  0.13.2
Delicatessen: 1.2


### Loading data
Generating the corresponding data set from Table 1

In [2]:
# From Table 1
d = pd.DataFrame()
d['X'] = [0, 0, 0, 0, 1, 1, 1, 1]
d['W'] = [0, 0, 1, 1, 0, 0, 1, 1]
d['Y'] = [0, 1, 0, 1, 0, 1, 0, 1]
d['n'] = [496, 74, 113, 25, 85, 15, 15, 3]
d['intercept'] = 1

# Expanding rows by n
d = pd.DataFrame(np.repeat(d.values, d['n'], axis=0), columns=d.columns)
d = d[['intercept', 'X', 'W', 'Y']].copy()

In [3]:
X = np.asarray(d[['intercept', 'X', 'W']])
y = np.asarray(d['Y'])

## Example 1: Logistic Regression

### Regression by Maximum Likelihood Estimation (MLE)
Using `statsmodels` version of the generalized linear model to estimate the logistic model parameters

In [4]:
f = sm.families.Binomial()
fm = smf.glm("Y ~ X + W", d, family=f).fit()

In [5]:
fm.summary()

0,1,2,3
Dep. Variable:,Y,No. Observations:,826.0
Model:,GLM,Df Residuals:,823.0
Model Family:,Binomial,Df Model:,2.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-335.79
Date:,"Thu, 08 Jun 2023",Deviance:,671.59
Time:,14:38:47,Pearson chi2:,826.0
No. Iterations:,4,Pseudo R-squ. (CS):,0.002817
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.8945,0.122,-15.489,0.000,-2.134,-1.655
X,0.1187,0.279,0.426,0.670,-0.427,0.665
W,0.3605,0.238,1.515,0.130,-0.106,0.827


### M-estimation by-hand

#### Defining estimating equations

In [6]:
def ee_logistic(theta):
    # Estimating equation for the logistic model
    beta = np.asarray(theta)[:, None]
    n = d.shape[0]

    # Looping through each observation
    est_vals = []
    for i in range(n):
        v_i = (y[i] - inverse_logit(np.dot(X[i], beta)))*X[i]
        est_vals.append(v_i)

    return np.asarray(est_vals).T


def sum_ee(theta):
    # Function to sum the previous estimating equation over all i's
    stacked_equations = np.asarray(ee_logistic(theta))  # Returning stacked equation
    vals = ()                                           # Create empty tuple
    for i in stacked_equations:                         # Go through each individual theta
        vals += (np.sum(i), )                           # Add the theta sum to the tuple of thetas

    # Return the calculated values of theta
    return vals


def solve_m_estimator(stacked_equations, init):
    # Wrapper function for SciPy root-finding 
    psi = newton(stacked_equations,    # stacked equations to solve (should be written as sums)
                 x0=np.asarray(init),  # initial values for solver
                 maxiter=2000,         # Increasing iterations
                 disp=True)            # Option to raise RuntimeError if doesn't converge
    return psi

#### Root-finding

In [7]:
# Solving the estimating equations for beta
theta = solve_m_estimator(stacked_equations=sum_ee,
                          init=[0, 0, 0]
                          )
print(theta)

[-1.89449987  0.11873458  0.36051038]


#### Baking the Bread (approximate derivative)

In [8]:
bread = -approx_fprime(theta, sum_ee)
bread_invert = np.linalg.inv(bread)

#### Cooking the filling (matrix algebra)

In [9]:
x = np.asarray(ee_logistic(theta=theta))
meat = np.dot(x, x.T)

#### Assembling the sandwich (matrix algebra)

In [10]:
sandwich = np.dot(np.dot(bread_invert, meat), bread_invert.T)
sandwich_var = np.diag(sandwich)
sandwich_var

array([0.01484041, 0.07772034, 0.05652968])

### M-estimation using `delicatessen`

In [11]:
def psi(theta):
    return ee_regression(theta=theta, X=X, y=y, 
                         model='logistic')

In [12]:
mestr = MEstimator(psi, init=[0, 0, 0])
mestr.estimate()

# Point estimates
mestr.theta

array([-1.89450082,  0.11873535,  0.36051132])

In [13]:
# Sandwich variance
np.diag(mestr.variance)

array([0.01484043, 0.07772036, 0.05652968])

In [14]:
# Hessian-based variance
np.diag(np.linalg.inv(mestr.bread) / d.shape[0])

array([0.0149605 , 0.07764844, 0.05660457])

In [15]:
# Outer-product variance
np.diag(np.linalg.inv(mestr.meat) / d.shape[0])

array([0.01508328, 0.07761366, 0.05670628])

### Logistic Regression Results

In [16]:
result = pd.DataFrame()
result['Param'] = ['beta_0', 'beta_1', 'beta_2']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
result.round(2)

Unnamed: 0,Param,Coef,LCL,UCL
0,beta_0,-1.89,-2.13,-1.66
1,beta_1,0.12,-0.43,0.67
2,beta_2,0.36,-0.11,0.83


## Example 2: Standardization by IPW

### Using `delicatessen`

In [17]:
def psi(theta):
    # Dividing parameters into corresponding parts and labels from slides
    alpha = theta[0:2]                    # Logistic model coefficients
    mu0, mu1 = theta[2], theta[3]         # Causal risks
    delta1 = theta[4]                     # Causal contrast

    # Using built-in regression model functionality from delicatessen
    ee_logit = ee_regression(theta=alpha,
                             y=d['X'],
                             X=d[['intercept', 'W']],
                             model='logistic')

    # Transforming logistic model coefficients into causal parameters
    pscore = inverse_logit(np.dot(d[['intercept', 'W']], alpha))  # Propensity score
    wt = d['X']/pscore + (1-d['X'])/(1-pscore)                     # Corresponding weights

    # Estimating function for causal risk under a=1
    ee_r1 = d['X']*d['Y']*wt - mu1
    
    # Estimating function for causal risk under a=0
    ee_r0 = (1-d['X'])*d['Y']*wt - mu0
    
    # Estimating function for causal risk difference
    ee_rd = np.ones(d.shape[0])*((mu1 - mu0) - delta1)

    # Returning stacked estimating functions in order of parameters
    return np.vstack([ee_logit,   # EF of logistic model
                      ee_r0,      # EF of causal risk a=0
                      ee_r1,      # EF of causal risk a=1
                      ee_rd])     # EF of causal risk difference

In [18]:
mestr = MEstimator(psi, init=[0, 0, 0.5, 0.5, 0])
mestr.estimate()

In [19]:
result = pd.DataFrame()
result['Param'] = ['alpha_0', 'alpha_1', 'mu_0', 'mu_1', 'delta']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
print("IPW")
result.round(2)

IPW


Unnamed: 0,Param,Coef,LCL,UCL
0,alpha_0,-1.74,-1.95,-1.53
1,alpha_1,-0.3,-0.83,0.24
2,mu_0,0.14,0.11,0.17
3,mu_1,0.15,0.09,0.22
4,delta,0.01,-0.06,0.08


## Example 3: Standardization by G-computation

### Using `delicatessen`

In [20]:
# Copies of data with policies applied
d1 = d.copy()
d1['X'] = 1
d0 = d.copy()
d0['X'] = 0

In [21]:
def psi(theta):
    # Dividing parameters into corresponding parts and labels from slides
    beta = theta[0:3]                     # Logistic model coefficients
    mu0, mu1 = theta[3], theta[4]         # Causal risks
    delta1 = theta[5]                     # Causal contrasts

    # Using built-in regression model functionality from delicatessen
    ee_logit = ee_regression(theta=beta,
                             y=d['Y'],
                             X=d[['intercept', 'X', 'W']],
                             model='logistic')

    # Transforming logistic model coefficients into causal parameters
    y0_hat = inverse_logit(np.dot(d0[['intercept', 'X', 'W']], beta))  # Prediction under a=0
    y1_hat = inverse_logit(np.dot(d1[['intercept', 'X', 'W']], beta))  # Prediction under a=1

    # Estimating function for causal risk under a=1
    ee_r1 = y1_hat - mu1

    # Estimating function for causal risk under a=0
    ee_r0 = y0_hat - mu0
    
    # Estimating function for causal risk difference
    ee_rd = np.ones(d.shape[0])*((mu1 - mu0) - delta1)

    # Returning stacked estimating functions in order of parameters
    return np.vstack([ee_logit,   # EF of logistic model
                      ee_r0,      # EF of causal risk a=0
                      ee_r1,      # EF of causal risk a=1
                      ee_rd])     # EF of causal risk difference

In [22]:
mestr = MEstimator(psi, init=[0, 0, 0, 0.5, 0.5, 0])
mestr.estimate(solver='lm')

In [23]:
result = pd.DataFrame()
result['Param'] = ['beta_0', 'beta_1', 'beta_2', 'mu_0', 'mu_1', 'delta']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
print("G-computation")
result.round(2)

G-computation


Unnamed: 0,Param,Coef,LCL,UCL
0,beta_0,-1.89,-2.13,-1.66
1,beta_1,0.12,-0.43,0.67
2,beta_2,0.36,-0.11,0.83
3,mu_0,0.14,0.11,0.17
4,mu_1,0.15,0.09,0.22
5,delta,0.01,-0.06,0.09


## Example 4: Data Fusion

### Setting up data

In [24]:
d = pd.DataFrame()
d['R'] = [1, 1, 0, 0, 0, 0]
d['Y'] = [0, 0, 1, 1, 0, 0]
d['W'] = [1, 0, 1, 0, 1, 0]
d['n'] = [680, 270, 204, 38, 18, 71]
d['intercept'] = 1
d = pd.DataFrame(np.repeat(d.values, d['n'], axis=0),   # Expanding compact data frame
                 columns=d.columns)                     # ... keeping column names
d = d[['intercept', 'R', 'W', 'Y']].copy()              # Dropping the n column
n = d.shape[0]                                          # Number of observations

r = np.asarray(d['R'])
w = np.asarray(d['W'])
y = np.asarray(d['Y'])

### Using `delicatessen`

In [25]:
def psi(theta):
    ee_1 = r*(w - theta[0])                                                                    # EF naive mean
    ee_2 = (1-r) * y * (w - theta[1])                                                          # EF sensitivity
    ee_3 = (1-r) * (1-y) * ((1-w) - theta[2])                                                  # EF specificity
    ee_4 = np.ones(y.shape[0])*theta[3]*(theta[1] + theta[2] - 1) - (theta[0] + theta[2] - 1)  # EF corrected mean

    # Returning stacked estimating functions in order of parameters
    return np.vstack([ee_1,      # EF naive mean
                      ee_2,      # EF sensitivity
                      ee_3,      # EF specificity
                      ee_4])     # EF corrected mean

In [26]:
mestr = MEstimator(psi, init=[0.5, 0.75, 0.75, 0.5])
mestr.estimate()

In [27]:
result = pd.DataFrame()
result['Param'] = ['theta_1', 'theta_2', 'theta_3', 'theta_4']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
result.round(2)

Unnamed: 0,Param,Coef,LCL,UCL
0,theta_1,0.72,0.69,0.74
1,theta_2,0.84,0.8,0.89
2,theta_3,0.8,0.71,0.88
3,theta_4,0.8,0.72,0.88


END