In [1]:
import numpy as np
import statsmodels.api as sm
import linearmodels as lm
np.random.seed(94305)

from gmm.gmm import GMMEstimator

Single endogeneous variable and single instrument DGP with varying instrument strength ($\pi$) and degree of endogeneity($\rho$).


In [2]:
def dgp(n = 100_000,
        beta = np.array([-0.5, 1.2]),
        rho = 0.7,
        pi = np.array([0.5, -0.1])):
    ε = np.random.normal(0, 1, n)
    z = np.random.normal(0, 1, n * pi.shape[0]).reshape(n, pi.shape[0])
    # Generate endogenous x, influenced by the instrument
    x = z @ pi + ε * rho + np.random.normal(0, 1, n)
    X = np.c_[np.ones(n), x]
    # heteroskedasticity
    y = X @ beta + ε + (X[:, 1] > 0) * np.random.normal(0, 1, n)
    return y, X, z

# No Endogeneity 

OLS and IV with X as its own instrument should produce the same estimates.

In [3]:
y, X, z = dgp(pi = np.array([0]), rho = 0)
print(sm.OLS(y, X).fit(cov_type = "HC2").summary().tables[1])

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4994      0.004   -128.506      0.000      -0.507      -0.492
x1             1.1953      0.004    308.588      0.000       1.188       1.203


### GMM using Scipy Minimization

In [4]:
%%time
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm_scipy = GMMEstimator(ψ)
gmm_scipy.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm_scipy.summary()

CPU times: user 127 ms, sys: 4.53 ms, total: 132 ms
Wall time: 102 ms


Unnamed: 0,coef,std err
0,-0.4994,0.0039
1,1.1953,0.0039


### GMM using Torch Minimization

In [5]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend  = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm.summary()

CPU times: user 383 ms, sys: 74.6 ms, total: 458 ms
Wall time: 316 ms


Unnamed: 0,coef,std err
0,-0.4994,0.0039
1,1.1953,0.0039


Faster optimizer (default argument is limited-memory BFGS, but BFGS works faster for small problems)

In [6]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X, fit_method='bfgs')
gmm.summary()

CPU times: user 189 ms, sys: 4.75 ms, total: 193 ms
Wall time: 56.2 ms


Unnamed: 0,coef,std err
0,-0.4994,0.0039
1,1.1953,0.0039


Identical estimates and standard errors.

# With Endogeneity 

Over-identified: 2 instruments and 1 endogenous variable.

OLS is inconsistent. Also confirm `GMMEstimator` returns the same answer as IV2SLS.

In [7]:
y, X, z = dgp()
print(sm.OLS(y, X).fit().summary().tables[1])

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4981      0.003   -143.251      0.000      -0.505      -0.491
x1             1.5971      0.003    608.599      0.000       1.592       1.602


In [8]:
%%time
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm = GMMEstimator(ψ)
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

CPU times: user 145 ms, sys: 5.29 ms, total: 150 ms
Wall time: 108 ms


Unnamed: 0,coef,std err
0,-0.4983,0.0039
1,1.1964,0.0076


In [9]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

CPU times: user 223 ms, sys: 0 ns, total: 223 ms
Wall time: 62.8 ms


Unnamed: 0,coef,std err
0,-0.4983,0.0039
1,1.1964,0.0076


Faster optimizer in torch

In [10]:
%%time
def moment_cond(z, y, x, beta):
    residuals = (y - x @ beta).unsqueeze(-1)
    return z * residuals

gmm = GMMEstimator(moment_cond, backend = "torch")
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X, fit_method='bfgs')
gmm.summary()

CPU times: user 272 ms, sys: 14.3 ms, total: 286 ms
Wall time: 84.3 ms


Unnamed: 0,coef,std err
0,-0.4983,0.0039
1,1.1964,0.0076


Confirm with linearmodels

In [11]:
lm.iv.model.IV2SLS(y, None, X, np.c_[np.ones(z.shape[0]), z]).fit().summary.tables[1]

  return vecs @ np.diag(1 / np.sqrt(vals)) @ vecs.T
  return vecs @ np.diag(1 / np.sqrt(vals)) @ vecs.T
  q = vpmzv_sqinv @ (ex1.T @ ex1) @ vpmzv_sqinv


0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
endog.0,-0.4983,0.0039,-129.06,0.0000,-0.5059,-0.4907
endog.1,1.1965,0.0076,157.72,0.0000,1.1816,1.2113


Identical estimates and standard errors.