# M-estimation: introduction and applied examples 

Python (Paul Zivich 2023/06/08)

In [1]:
# Loading Libraries
import numpy as np                           # Numpy to manage arrays
import pandas as pd                          # Pandas for dataframes
import statsmodels.api as sm                 # Statsmodels as reference
import statsmodels.formula.api as smf        # Statsmodels R-style formulas
import scipy as sp                           # Scipy for root-finding and derivs
import delicatessen as deli                  # Delicatessen for M-estimators

# Loading Specific functions from prior libraries
from scipy.optimize import minimize, approx_fprime, newton
from delicatessen import MEstimator
from delicatessen.estimating_equations import ee_regression
from delicatessen.utilities import inverse_logit

# Displaying verions
print("versions")
print("--------------------")
print("NumPy:       ", np.__version__)
print("SciPy:       ", sp.__version__)
print("pandas:      ", pd.__version__)
print("statsmodels: ", sm.__version__)
print("Delicatessen:", deli.__version__)

versions
--------------------
NumPy:        1.25.2
SciPy:        1.11.2
pandas:       1.4.1
statsmodels:  0.13.2
Delicatessen: 1.4


### Loading data
Generating the corresponding data set from Table 1

In [2]:
# From Table 1
d = pd.DataFrame()
d['X'] = [0, 0, 0, 0, 1, 1, 1, 1]            # X values
d['W'] = [0, 0, 1, 1, 0, 0, 1, 1]            # W values
d['Y'] = [0, 1, 0, 1, 0, 1, 0, 1]            # Y values
d['n'] = [496, 74, 113, 25, 85, 15, 15, 3]   # Counts
d['intercept'] = 1                           # Intercept term (always 1)

# Expanding rows by n
d = pd.DataFrame(np.repeat(d.values,         # Converting tabled data
                           d['n'], axis=0),  # ... by replicating counts
                 columns=d.columns)          # ... into rows for each X,W,Y
d = d[['intercept', 'X', 'W', 'Y']].copy()   # Dropping extra rows

n = d.shape[0]                               # Number of observations

In [3]:
# Extracting arrays for easier coding later on
X = np.asarray(d[['intercept', 'X', 'W']])   # Design matrix for regression
y = np.asarray(d['Y'])                       # Outcome in regression

## Example 1: Logistic Regression

### Regression by Maximum Likelihood Estimation (MLE)
Using `statsmodels` version of the generalized linear model to estimate the logistic model parameters

In [4]:
f = sm.families.Binomial()                    # Family for GLM (logit-bin)
fm = smf.glm("Y ~ X + W", d, family=f).fit()  # Fitting logistic regression

In [5]:
fm.summary()   # Display the regression results nicely

0,1,2,3
Dep. Variable:,Y,No. Observations:,826.0
Model:,GLM,Df Residuals:,823.0
Model Family:,Binomial,Df Model:,2.0
Link Function:,Logit,Scale:,1.0
Method:,IRLS,Log-Likelihood:,-335.79
Date:,"Mon, 18 Sep 2023",Deviance:,671.59
Time:,15:50:19,Pearson chi2:,826.0
No. Iterations:,4,Pseudo R-squ. (CS):,0.002817
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.8945,0.122,-15.489,0.000,-2.134,-1.655
X,0.1187,0.279,0.426,0.670,-0.427,0.665
W,0.3605,0.238,1.515,0.130,-0.106,0.827


### M-estimation by-hand

#### Defining estimating equations

In [6]:
def ee_logistic(theta):
    # Estimating equation for the logistic model
    beta = np.asarray(theta)[:, None]    # Reshaping parameter array for dot product

    # Looping through each observation
    est_vals = []                        # Empty list for storage
    for i in range(n):                   # For each observation in the data
        logodds = np.dot(X[i], beta)     # ... Log-odds of Y given design
        prob_y = inverse_logit(logodds)  # ... Predicted probability of Y
        v_i = (y[i] - prob_y)*X[i]       # ... Estimating function for O_i
        est_vals.append(v_i)             # ... Storing contribution
    
    # Return estimating functions stacked together
    return np.asarray(est_vals).T


def sum_ee(theta):
    # Function to sum the previous estimating equation over all i's
    stacked_equations = np.asarray(ee_logistic(theta))  # Returning stacked equation
    vals = ()                                           # Create empty tuple
    for i in stacked_equations:                         # Go through each individual theta
        vals += (np.sum(i), )                           # Add the theta sum to the tuple of thetas

    # Return the calculated values of theta
    return vals


def solve_m_estimator(stacked_equations, init):
    # Wrapper function for SciPy root-finding 
    psi = newton(stacked_equations,    # stacked equations to solve (written as sums)
                 x0=np.asarray(init),  # initial values for solver
                 maxiter=2000,         # Increasing iterations
                 disp=True)            # Raise RuntimeError if doesn't converge
    return psi

#### Root-finding

Starting values need to be provided. A good starting value is within the plausible range and not close to the bounds. For example, if the parameter is a risk then a starting value of 0.5 would be a good choice. For regression, one can generally provide starting values of 0. To increase computational efficiency of M-estimation, subsets of estimating equations can be solved separately and then used as the starting values for the overall estimating equations. For example, in Example 2, we can obtain the point estimates for propensity score model parameters using built-in functions/procedures for logistic regression use those as starting values.

In [7]:
# Solving the estimating equations for beta
theta = solve_m_estimator(stacked_equations=sum_ee,
                          init=[0, 0, 0]
                          )
print(theta)

[-1.89449987  0.11873458  0.36051038]


#### Baking the Bread (approximate derivative)

In [8]:
bread = -approx_fprime(theta, sum_ee)   # Computing the negative derivative
bread_invert = np.linalg.inv(bread)     # Inverting the corresponding derivative

#### Cooking the filling (matrix algebra)

In [9]:
x = np.asarray(ee_logistic(theta=theta))  # Evaluating the residuals
meat = np.dot(x, x.T)                     # Dot product of the residuals

#### Assembling the sandwich (matrix algebra)

In [10]:
sandwich = np.dot(np.dot(bread_invert, meat),  # Construct sandwich
                  bread_invert.T)              # ... B M B
sandwich_var = np.diag(sandwich)               # Diagnonal of covariance is variance
sandwich_var                                   # Displaying results

array([0.01484041, 0.07772034, 0.05652968])

### M-estimation using `delicatessen`

In [11]:
def psi(theta):
    # Using the built-in estimating functions
    return ee_regression(theta=theta,       # Parameters to estimate
                         X=X,               # ... design matrix
                         y=y,               # ... outcome variable
                         model='logistic')  # ... logistic model

In [12]:
mestr = MEstimator(psi,               # Implementing estimator
                   init=[0, 0, 0])    # ... with general initial values
mestr.estimate()                      # Estimating the parameters

In [13]:
# Point Estimates
mestr.theta

array([-1.89450082,  0.11873535,  0.36051132])

In [14]:
# Covariance matrix
mestr.variance

array([[ 0.01484043, -0.01210287, -0.01317699],
       [-0.01210287,  0.07772036,  0.0014539 ],
       [-0.01317699,  0.0014539 ,  0.05652968]])

In [15]:
# Confidence intervals
mestr.confidence_intervals()

array([[-2.1332662 , -1.65573543],
       [-0.42767071,  0.66514141],
       [-0.10548916,  0.82651181]])

In [16]:
# Sandwich variance
np.diag(mestr.variance)

array([0.01484043, 0.07772036, 0.05652968])

In [17]:
# Hessian-based variance
np.diag(np.linalg.inv(mestr.bread) / d.shape[0])

array([0.0149605 , 0.07764844, 0.05660457])

In [18]:
# Outer-product variance
np.diag(np.linalg.inv(mestr.meat) / d.shape[0])

array([0.01508328, 0.07761366, 0.05670628])

### Logistic Regression Results

In [19]:
# Formatting results into a nice table
result = pd.DataFrame()                           # Empty storage
result['Param'] = ['beta_0', 'beta_1', 'beta_2']  # Names
result['Coef'] = mestr.theta                      # Estimates
ci = mestr.confidence_intervals()                 # Getting CI
result['LCL'] = ci[:, 0]                          # Lower CI
result['UCL'] = ci[:, 1]                          # Upper CI
result.round(2)                                   # Rounding to 2 decimals

Unnamed: 0,Param,Coef,LCL,UCL
0,beta_0,-1.89,-2.13,-1.66
1,beta_1,0.12,-0.43,0.67
2,beta_2,0.36,-0.11,0.83


## Example 2: Standardization by IPW

### Using `delicatessen`

In [20]:
def psi(theta):
    # Dividing parameters into corresponding parts and labels from slides
    alpha = theta[0:2]                    # Logistic model coefficients
    mu0, mu1 = theta[2], theta[3]         # Causal risks
    delta1 = theta[4]                     # Causal contrast

    # Using built-in regression model functionality from delicatessen
    ee_logit = ee_regression(theta=alpha,             # Regression model
                             y=d['X'],                # ... for exposure
                             X=d[['intercept', 'W']], # ... given confounders
                             model='logistic')        # ... logistic model

    # Transforming logistic model coefficients into causal parameters
    pscore = inverse_logit(np.dot(d[['intercept', 'W']], alpha))  # Propensity score
    wt = d['X']/pscore + (1-d['X'])/(1-pscore)                    # Corresponding weights

    # Estimating function for causal risk under a=1
    ee_r1 = d['X']*d['Y']*wt - mu1                   # Weighted conditional mean
    
    # Estimating function for causal risk under a=0
    ee_r0 = (1-d['X'])*d['Y']*wt - mu0               # Weighted conditional mean
    
    # Estimating function for causal risk difference
    ee_rd = np.ones(d.shape[0])*((mu1 - mu0) - delta1)

    # Returning stacked estimating functions in order of parameters
    return np.vstack([ee_logit,   # EF of logistic model
                      ee_r0,      # EF of causal risk a=0
                      ee_r1,      # EF of causal risk a=1
                      ee_rd])     # EF of causal contrast

In [21]:
# Applying M-estimator
mestr = MEstimator(psi,                       # For given EF
                   init=[0, 0, 0.5, 0.5, 0])  # ... at generic starting values
mestr.estimate()                              # Estimation procedure

In [22]:
# Formatting results into a nice table
result = pd.DataFrame()
result['Param'] = ['alpha_0', 'alpha_1', 'mu_0', 'mu_1', 'delta']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
print("IPW")
result.round(2)

IPW


Unnamed: 0,Param,Coef,LCL,UCL
0,alpha_0,-1.74,-1.95,-1.53
1,alpha_1,-0.3,-0.83,0.24
2,mu_0,0.14,0.11,0.17
3,mu_1,0.15,0.09,0.22
4,delta,0.01,-0.06,0.08


## Example 3: Standardization by G-computation

### Using `delicatessen`

In [23]:
# Copies of data with policies applied
d1 = d.copy()
d1['X'] = 1
d0 = d.copy()
d0['X'] = 0

In [24]:
def psi(theta):
    # Dividing parameters into corresponding parts and labels from slides
    beta = theta[0:3]                     # Logistic model coefficients
    mu0, mu1 = theta[3], theta[4]         # Causal risks
    delta1 = theta[5]                     # Causal contrasts

    # Using built-in regression model functionality from delicatessen
    ee_logit = ee_regression(theta=beta,
                             y=d['Y'],
                             X=d[['intercept', 'X', 'W']],
                             model='logistic')

    # Transforming logistic model coefficients into causal parameters
    y0_hat = inverse_logit(np.dot(d0[['intercept', 'X', 'W']], beta))  # Prediction under a=0
    y1_hat = inverse_logit(np.dot(d1[['intercept', 'X', 'W']], beta))  # Prediction under a=1

    # Estimating function for causal risk under a=1
    ee_r1 = y1_hat - mu1             # Simple mean

    # Estimating function for causal risk under a=0
    ee_r0 = y0_hat - mu0             # Simple mean
    
    # Estimating function for causal risk difference
    ee_rd = np.ones(d.shape[0])*((mu1 - mu0) - delta1)

    # Returning stacked estimating functions in order of parameters
    return np.vstack([ee_logit,   # EF of logistic model
                      ee_r0,      # EF of causal risk a=0
                      ee_r1,      # EF of causal risk a=1
                      ee_rd])     # EF of causal contrast

In [25]:
# Applying M-estimator
mestr = MEstimator(psi,                          # For given EF
                   init=[0, 0, 0, 0.5, 0.5, 0])  # ... at generic starting values
mestr.estimate(solver='lm')

In [26]:
# Formatting results into a nice table
result = pd.DataFrame()
result['Param'] = ['beta_0', 'beta_1', 'beta_2', 'mu_0', 'mu_1', 'delta']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
print("G-computation")
result.round(2)

G-computation


Unnamed: 0,Param,Coef,LCL,UCL
0,beta_0,-1.89,-2.13,-1.66
1,beta_1,0.12,-0.43,0.67
2,beta_2,0.36,-0.11,0.83
3,mu_0,0.14,0.11,0.17
4,mu_1,0.15,0.09,0.22
5,delta,0.01,-0.06,0.09


## Example 4: Data Fusion

### Setting up data

In [27]:
# Loading in data for the fusion example
d = pd.DataFrame()
d['R'] = [1, 1, 0, 0, 0, 0]           # R or population indicator
d['Y'] = [0, 0, 1, 1, 0, 0]           # True outcome
d['W'] = [1, 0, 1, 0, 1, 0]           # Measured outcome
d['n'] = [680, 270, 204, 38, 18, 71]  # Counts
d['intercept'] = 1                    # Intercept is always 1

# Expanding out data 
d = pd.DataFrame(np.repeat(d.values, d['n'], axis=0),   # Expanding compact data frame
                 columns=d.columns)                     # ... keeping column names
d = d[['intercept', 'R', 'W', 'Y']].copy()              # Dropping the n column
n = d.shape[0]                                          # Number of observations

In [28]:
# Converting to arrays to simplify process
r = np.asarray(d['R'])
w = np.asarray(d['W'])
y = np.asarray(d['Y'])

### Using `delicatessen`

In [29]:
def psi(theta):
    ee_1 = r*(w - theta[0])                                             # EF naive mean
    ee_2 = (1-r) * y * (w - theta[1])                                   # EF sensitivity
    ee_3 = (1-r) * (1-y) * ((1-w) - theta[2])                           # EF specificity
    corrected = theta[3]*(theta[1]+theta[2]-1) - (theta[0]+theta[2]-1)  # EF corrected mean
    ee_4 = np.ones(y.shape[0])*corrected                                # Expanding for stack

    # Returning stacked estimating functions in order of parameters
    return np.vstack([ee_1,      # EF naive mean
                      ee_2,      # EF sensitivity
                      ee_3,      # EF specificity
                      ee_4])     # EF corrected mean

In [30]:
# Applying M-estimator
mestr = MEstimator(psi,                          # For given EF
                   init=[0.5, 0.75, 0.75, 0.5])  # ... at generic starting values
mestr.estimate()                                 # Estimation procedure

In [31]:
# Formatting results into a nice table
result = pd.DataFrame()
result['Param'] = ['theta_1', 'theta_2', 'theta_3', 'theta_4']
result['Coef'] = mestr.theta
ci = mestr.confidence_intervals()
result['LCL'] = ci[:, 0]
result['UCL'] = ci[:, 1]
result.round(2)

Unnamed: 0,Param,Coef,LCL,UCL
0,theta_1,0.72,0.69,0.74
1,theta_2,0.84,0.8,0.89
2,theta_3,0.8,0.71,0.88
3,theta_4,0.8,0.72,0.88


END