In [1]:
import numpy as np
import statsmodels.api as sm
import linearmodels as lm
np.random.seed(42)

from gmm import GMMEstimator

Single endogeneous variable and single instrument DGP with varying instrument strength ($\pi$) and degree of endogeneity($\rho$).


In [2]:
def dgp(n = 100_000,
        beta = np.array([-0.5, 1.2]),
        rho = 0.7,
        pi = np.array([0.5, -0.1])):
    ε = np.random.normal(0, 1, n)
    z = np.random.normal(0, 1, n * pi.shape[0]).reshape(n, pi.shape[0])
    # Generate endogenous x, influenced by the instrument
    x = z @ pi + ε * rho + np.random.normal(0, 1, n)
    X = np.c_[np.ones(n), x]
    # heteroskedasticity
    y = X @ beta + ε + (X[:, 1] > 0) * np.random.normal(0, 1, n)
    return y, X, z

# No Endogeneity 

OLS and IV with X as its own instrument should produce the same estimates.

In [3]:
y, X, z = dgp(pi = np.array([0]), rho = 0)
print(sm.OLS(y, X).fit(cov_type = "HC2").summary().tables[1])

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.5031      0.004   -129.841      0.000      -0.511      -0.496
x1             1.1973      0.004    309.246      0.000       1.190       1.205


### GMM using Scipy Minimization

In [4]:
%%time
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm_scipy = GMMEstimator(ψ)
gmm_scipy.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm_scipy.summary()

CPU times: user 1.26 s, sys: 2.9 s, total: 4.16 s
Wall time: 301 ms


Unnamed: 0,coef,std err
0,-0.503123,0.003875
1,1.197316,0.003872


### GMM using Torch Minimization

In [5]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, opt = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm.summary()

CPU times: user 2.06 s, sys: 769 ms, total: 2.83 s
Wall time: 259 ms


Unnamed: 0,coef,std err
0,-0.503123,0.003875
1,1.197316,0.003872


Identical estimates and standard errors.

# With Endogeneity 

Over-identified: 2 instruments and 1 endogenous variable.

OLS is inconsistent. Also confirm `GMMEstimator` returns the same answer as IV2SLS.

In [6]:
y, X, z = dgp()
print(sm.OLS(y, X).fit().summary().tables[1])

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.5060      0.003   -144.858      0.000      -0.513      -0.499
x1             1.6020      0.003    607.064      0.000       1.597       1.607


In [7]:
%%time
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm = GMMEstimator(ψ)
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

CPU times: user 1.42 s, sys: 3.41 s, total: 4.82 s
Wall time: 343 ms


Unnamed: 0,coef,std err
0,-0.506942,0.003893
1,1.192193,0.007656


In [8]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, opt = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

CPU times: user 1.87 s, sys: 739 ms, total: 2.61 s
Wall time: 197 ms


Unnamed: 0,coef,std err
0,-0.506942,0.003893
1,1.192193,0.007656


In [9]:
lm.iv.model.IV2SLS(y, None, X, np.c_[np.ones(z.shape[0]), z]).fit().summary.tables[1]

  return vecs @ np.diag(1 / np.sqrt(vals)) @ vecs.T
  return vecs @ np.diag(1 / np.sqrt(vals)) @ vecs.T


0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
endog.0,-0.5069,0.0039,-130.21,0.0000,-0.5145,-0.4993
endog.1,1.1923,0.0077,155.73,0.0000,1.1773,1.2073


Identical estimates and standard errors.