In [1]:
import numpy as np

from gmm import GMMEstimator
import statsmodels.api as sm
# https://bashtage.github.io/linearmodels/
import linearmodels as lm

np.random.seed(42)

Single endogeneous variable and single instrument DGP with varying instrument strength ($\pi$) and degree of endogeneity($\rho$).


In [2]:
def dgp(n = 5000,
        beta = np.array([-0.5, 1.2]),
        rho = 0.7, pi = 0.5):
    ε = np.random.normal(0, 1, n)
    z = np.random.normal(0, 1, n)
    # Generate endogenous x, influenced by the instrument
    x = z * pi + ε * rho + np.random.normal(0, 1, n)
    X = np.c_[np.ones(n), x]
    # Outcome variable with true relationship
    y = X @ beta + ε
    return y, X, z

# No Endogeneity 

OLS and IV with X as its own instrument should produce the same estimates.

In [3]:
y, X, z = dgp(n = 1_000, pi = 0, rho = 0)
print(sm.OLS(y, X).fit(cov_type = "HC2").summary().tables[1])

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -0.4808      0.031    -15.522      0.000      -0.542      -0.420
x1             1.2220      0.031     39.822      0.000       1.162       1.282


In [4]:
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm = GMMEstimator(ψ)
gmm.fit(np.c_[np.ones(z.shape[0]), X[:, 1]], y, X)
gmm.summary()

Unnamed: 0,coef,std err
0,-0.480796,0.030944
1,1.222033,0.030629


Identical estimates and standard errors.

# With Endogeneity 

OLS is inconsistent. Also confirm `GMMEstimator` returns the same answer as IV2SLS.

In [5]:
y, X, z = dgp(n = 1_000, beta = np.array([0, 1.5]), rho = 0.8)
print(sm.OLS(y, X).fit().summary().tables[1])

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0204      0.026      0.795      0.427      -0.030       0.071
x1             1.9525      0.018    107.179      0.000       1.917       1.988


In [6]:
ψ = lambda z, y, x, beta: z * (y - x @ beta)[:, np.newaxis]
gmm = GMMEstimator(ψ)
gmm.fit(np.c_[np.ones(z.shape[0]), z], y, X)
gmm.summary()

Unnamed: 0,coef,std err
0,-0.015089,0.032544
1,1.542041,0.066404


In [7]:
lm.iv.model.IV2SLS(y, None, X, np.c_[np.ones(z.shape[0]), z]).fit().summary.tables[1]

  return vecs @ np.diag(1 / np.sqrt(vals)) @ vecs.T


0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
endog.0,-0.0151,0.0325,-0.4637,0.6429,-0.0789,0.0487
endog.1,1.5420,0.0664,23.222,0.0000,1.4119,1.6722


Identical estimates and standard errors.