In [1]:
%config InlineBackend.figure_format='retina'

import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels.sandbox.regression.gmm as gmm 
import seaborn as sns;sns.set_style('whitegrid',{'grid.color':'0.95'})
import weak_instr

from scipy import stats
from linearmodels.iv import IV2SLS
from linearmodels.iv.results import compare
from collections import OrderedDict
from statsmodels.iolib.summary2 import summary_col
from statsmodels.stats.multivariate_tools import cancorr
from tabulate import tabulate
from weak_instr import weak_instr

In [2]:
poe5csv='http://www.principlesofeconometrics.com/poe5/data/csv/'
vars=['hours','mtr','kidsl6','faminc','wage','educ','exper','lfp','mothereduc',
      'fathereduc','largecity']
mroz=pd.read_csv(poe5csv+'mroz.csv',usecols=vars)
print(mroz[['wage','lfp','educ','exper']].describe(percentiles=[]).transpose())

       count       mean       std  min     50%   max
wage   753.0   2.374565  3.241829  0.0   1.625  25.0
lfp    753.0   0.568393  0.495630  0.0   1.000   1.0
educ   753.0  12.286853  2.280246  5.0  12.000  17.0
exper  753.0  10.630810  8.069130  0.0   9.000  45.0


In [3]:
mroz=mroz.drop(mroz[mroz.lfp==0].index)
print(mroz[['wage','educ','exper']].describe(percentiles=[]).transpose())

       count       mean       std     min      50%   max
wage   428.0   4.177682  3.310282  0.1282   3.4819  25.0
educ   428.0  12.658879  2.285376  5.0000  12.0000  17.0
exper  428.0  13.037383  8.055923  0.0000  12.0000  38.0


In [4]:
mroz['lnwage']=np.log(mroz['wage'])
mroz['exper2']=mroz['exper']**2
res=smf.ols('lnwage~educ+exper+exper2',data=mroz).fit()
print(res.summary(slim=True))

                            OLS Regression Results                            
Dep. Variable:                 lnwage   R-squared:                       0.157
Model:                            OLS   Adj. R-squared:                  0.151
No. Observations:                 428   F-statistic:                     26.29
Covariance Type:            nonrobust   Prob (F-statistic):           1.30e-15
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.5220      0.199     -2.628      0.009      -0.912      -0.132
educ           0.1075      0.014      7.598      0.000       0.080       0.135
exper          0.0416      0.013      3.155      0.002       0.016       0.067
exper2        -0.0008      0.000     -2.063      0.040      -0.002   -3.82e-05

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is lar

In [5]:
res=smf.ols('lnwage~educ',data=mroz).fit()
print(res.summary(slim=True))

                            OLS Regression Results                            
Dep. Variable:                 lnwage   R-squared:                       0.118
Model:                            OLS   Adj. R-squared:                  0.116
No. Observations:                 428   F-statistic:                     56.93
Covariance Type:            nonrobust   Prob (F-statistic):           2.76e-13
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.1852      0.185     -1.000      0.318      -0.549       0.179
educ           0.1086      0.014      7.545      0.000       0.080       0.137

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


In [6]:
print(mroz[['educ','mothereduc','wage']].corr(method='pearson'))

                educ  mothereduc      wage
educ        1.000000    0.387020  0.341954
mothereduc  0.387020    1.000000  0.057074
wage        0.341954    0.057074  1.000000


In [7]:
mroz['Intercept']=1
res2sls=IV2SLS(dependent=mroz['lnwage'],endog=mroz['educ'],
               exog=mroz[['Intercept']],instruments=mroz['mothereduc']
              ).fit(cov_type='unadjusted')
print(res2sls.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                 lnwage   R-squared:                      0.0688
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0666
No. Observations:                 428   F-statistic:                    1.0217
Date:                Wed, Jun 18 2025   P-value (F-stat)                0.3121
Time:                        09:24:45   Distribution:                  chi2(1)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.7022     0.4840     1.4509     0.1468     -0.2464      1.6507
educ           0.0385     0.0381     1.0108     0.31

In [8]:
res2sls=IV2SLS(dependent=mroz['lnwage'],endog=mroz['educ'],
               exog=mroz[['Intercept']],instruments=mroz['mothereduc']
              ).fit(cov_type='unadjusted',debiased='True')
print(res2sls.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                 lnwage   R-squared:                      0.0688
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0666
No. Observations:                 428   F-statistic:                    1.0169
Date:                Wed, Jun 18 2025   P-value (F-stat)                0.3138
Time:                        09:24:45   Distribution:                 F(1,426)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.7022     0.4851     1.4475     0.1485     -0.2513      1.6557
educ           0.0385     0.0382     1.0084     0.31

In [9]:
res2sls=IV2SLS(dependent=mroz['lnwage'],endog=mroz['educ'],
               exog=mroz[['Intercept']],instruments=mroz['mothereduc']
              ).fit(cov_type='robust',debiased='True')
print(res2sls.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                 lnwage   R-squared:                      0.0688
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0666
No. Observations:                 428   F-statistic:                    0.9640
Date:                Wed, Jun 18 2025   P-value (F-stat)                0.3267
Time:                        09:24:45   Distribution:                 F(1,426)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.7022     0.4937     1.4222     0.1557     -0.2683      1.6726
educ           0.0385     0.0393     0.9818     0.32

In [10]:
res=smf.ols('educ~mothereduc',data=mroz).fit()
print(res.summary(slim=True))

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.150
Model:                            OLS   Adj. R-squared:                  0.148
No. Observations:                 428   F-statistic:                     75.05
Covariance Type:            nonrobust   Prob (F-statistic):           9.62e-17
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     10.1145      0.311     32.532      0.000       9.503      10.726
mothereduc     0.2674      0.031      8.663      0.000       0.207       0.328

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


In [11]:
hypo='(mothereduc=0)'
print(f'F-statistic = {res.f_test(hypo).fvalue:.2f}')
print(f'p-value     =  {res.f_test(hypo).pvalue:.4f}')

F-statistic = 75.05
p-value     =  0.0000


In [12]:
mroz['educhat']=res.predict()
res2=smf.ols('lnwage~educhat',data=mroz).fit()
print(res2.summary(slim=True))

                            OLS Regression Results                            
Dep. Variable:                 lnwage   R-squared:                       0.002
Model:                            OLS   Adj. R-squared:                 -0.000
No. Observations:                 428   F-statistic:                    0.9491
Covariance Type:            nonrobust   Prob (F-statistic):              0.331
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.7022      0.502      1.398      0.163      -0.285       1.689
educhat        0.0385      0.040      0.974      0.331      -0.039       0.116

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


In [13]:
resm=smf.ols('educ~mothereduc',data=mroz).fit()
resf=smf.ols('educ~fathereduc',data=mroz).fit()
print(resm.summary().tables[1])
print(resf.summary().tables[1])

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     10.1145      0.311     32.532      0.000       9.503      10.726
mothereduc     0.2674      0.031      8.663      0.000       0.207       0.328
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     10.2371      0.276     37.099      0.000       9.695      10.779
fathereduc     0.2694      0.029      9.426      0.000       0.213       0.326


In [14]:
res=smf.ols('educ~mothereduc+fathereduc',data=mroz).fit()
hypo='(mothereduc=0), (fathereduc=0)'
print('F-statistic =','{:.2f}'.format(res.fvalue))
print('p-value     =',' {:.4f}'.format(res.f_pvalue))

F-statistic = 55.83
p-value     =  0.0000


In [15]:
res=smf.ols('educ~mothereduc+fathereduc',data=mroz).fit(cov_type='HC1')
hypo='(mothereduc=0),(fathereduc=0)'
print('F-statistic =','{:.2f}'.format(res.fvalue))
print('p-value     =',' {:.4f}'.format(res.f_pvalue))

F-statistic = 48.64
p-value     =  0.0000


In [16]:
mroz['educhat2']=res.predict()
res=smf.ols('lnwage~educhat2',data=mroz).fit()
print(res.summary(slim=True))

                            OLS Regression Results                            
Dep. Variable:                 lnwage   R-squared:                       0.005
Model:                            OLS   Adj. R-squared:                  0.003
No. Observations:                 428   F-statistic:                     2.268
Covariance Type:            nonrobust   Prob (F-statistic):              0.133
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.5510      0.426      1.294      0.196      -0.286       1.388
educhat2       0.0505      0.034      1.506      0.133      -0.015       0.116

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.


In [17]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz['educ']
               ,exog=mroz[['Intercept']],
               instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
print(res2sls.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                 lnwage   R-squared:                      0.0841
Estimator:                    IV-2SLS   Adj. R-squared:                 0.0820
No. Observations:                 428   F-statistic:                    2.4637
Date:                Wed, Jun 18 2025   P-value (F-stat)                0.1172
Time:                        09:24:46   Distribution:                 F(1,426)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.5510     0.4086     1.3486     0.1782     -0.2521      1.3541
educ           0.0505     0.0322     1.5696     0.11

In [18]:
res=smf.ols('educ~exper+exper2+mothereduc+fathereduc',data=mroz).fit()
print(res.summary(slim=True))

                            OLS Regression Results                            
Dep. Variable:                   educ   R-squared:                       0.211
Model:                            OLS   Adj. R-squared:                  0.204
No. Observations:                 428   F-statistic:                     28.36
Covariance Type:            nonrobust   Prob (F-statistic):           6.87e-21
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.1026      0.427     21.340      0.000       8.264       9.941
exper          0.0452      0.040      1.124      0.262      -0.034       0.124
exper2        -0.0010      0.001     -0.839      0.402      -0.003       0.001
mothereduc     0.1576      0.036      4.391      0.000       0.087       0.228
fathereduc     0.1895      0.034      5.615      0.000       0.123       0.256

Notes:
[1] Standard Errors assume that the covarian

In [19]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz.educ,
               exog=mroz[['Intercept','exper','exper2']]
               ,instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
print(res2sls.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                 lnwage   R-squared:                      0.1357
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1296
No. Observations:                 428   F-statistic:                    8.1407
Date:                Wed, Jun 18 2025   P-value (F-stat)                0.0000
Time:                        09:24:46   Distribution:                 F(3,424)
Cov. Estimator:            unadjusted                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0481     0.4003     0.1202     0.9044     -0.7388      0.8350
exper          0.0442     0.0134     3.2883     0.00

In [20]:
print(res2sls.first_stage)

    First Stage Estimation Results    
                                  educ
--------------------------------------
R-squared                       0.2115
Partial R-squared               0.2076
Shea's R-squared                0.2076
Partial F-statistic             55.400
P-value (Partial F-stat)      1.11e-16
Partial F-stat Distn          F(2,423)
Intercept                       9.1026
                              (21.340)
exper                           0.0452
                              (1.1236)
exper2                         -0.0010
                             (-0.8386)
mothereduc                      0.1576
                              (4.3906)
fathereduc                      0.1895
                              (5.6152)
--------------------------------------

T-stats reported in parentheses
T-stats use same covariance type as original model


In [21]:
print(f'Minimum eigenvalue statistic: {res2sls.first_stage.diagnostics['f.stat']}')
weak_instr(1,2)

Minimum eigenvalue statistic: educ    55.4003
Name: f.stat, dtype: float64

 Stock & Yogo (2005) Weak Instrument Critical Values
--------------------------------------------------------------
H0: Instruments are weak
                                 # of endogenous regressors: 1
                                 # of excluded instruments:  2
--------------------------------------------------------------
                                        5%    10%   20%    30%
2SLS relative bias:                    nan    nan   nan    nan
Fuller-k relative bias:              13.64  10.89  9.00   7.49
Fuller-k maximum bias:               12.38  10.14  8.16   6.97
--------------------------------------------------------------
                                       10%    15%   20%    25%
2SLS Size of nominal 5% Wald test:   19.93  11.59  8.75   7.25
LIML Size of nominal 5% Wald test:    8.68   5.33  4.42   3.92
--------------------------------------------------------------


In [22]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz['educ'],
               exog=mroz[['Intercept','exper','exper2']]
               ,instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='robust',debiased='True')
print(res2sls.summary)

                          IV-2SLS Estimation Summary                          
Dep. Variable:                 lnwage   R-squared:                      0.1357
Estimator:                    IV-2SLS   Adj. R-squared:                 0.1296
No. Observations:                 428   F-statistic:                    6.1456
Date:                Wed, Jun 18 2025   P-value (F-stat)                0.0004
Time:                        09:24:46   Distribution:                 F(3,424)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0481     0.4298     0.1119     0.9109     -0.7967      0.8929
exper          0.0442     0.0155     2.8412     0.00

In [23]:
print(res2sls.first_stage)

    First Stage Estimation Results    
                                  educ
--------------------------------------
R-squared                       0.2115
Partial R-squared               0.2076
Shea's R-squared                0.2076
Partial F-statistic             99.053
P-value (Partial F-stat)        0.0000
Partial F-stat Distn           chi2(2)
Intercept                       9.1026
                              (21.461)
exper                           0.0452
                              (1.0791)
exper2                         -0.0010
                             (-0.7626)
mothereduc                      0.1576
                              (4.4456)
fathereduc                      0.1895
                              (5.8427)
--------------------------------------

T-stats reported in parentheses
T-stats use same covariance type as original model


In [24]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz['educ'],
               exog=mroz[['Intercept','exper','exper2']],instruments=mroz[['mothereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
print(res2sls.first_stage)

    First Stage Estimation Results    
                                  educ
--------------------------------------
R-squared                       0.1527
Partial R-squared               0.1485
Shea's R-squared                0.1485
Partial F-statistic             73.946
P-value (Partial F-stat)      1.11e-16
Partial F-stat Distn          F(1,424)
Intercept                       9.7751
                              (23.061)
exper                           0.0489
                              (1.1726)
exper2                         -0.0013
                             (-1.0290)
mothereduc                      0.2677
                              (8.5992)
--------------------------------------

T-stats reported in parentheses
T-stats use same covariance type as original model


In [25]:
res=smf.ols('educ~exper+exper2',data=mroz).fit(cov_type='nonrobust')
mroz['reduc']=res.resid
res=smf.ols('mothereduc~exper+exper2',data=mroz).fit(cov_type='nonrobust')
mroz['rmom']=res.resid
print('\n','Partial correlations (using Pearson):'
      ,'\n',mroz[['reduc','rmom']].corr(method='pearson'),'\n')
print('Squared partial correlations (using Pearson): '
      ,'\n',(mroz[['reduc','rmom']].corr(method='pearson'))**2)


 Partial correlations (using Pearson): 
          reduc     rmom
reduc  1.00000  0.38536
rmom   0.38536  1.00000 

Squared partial correlations (using Pearson):  
           reduc      rmom
reduc  1.000000  0.148502
rmom   0.148502  1.000000


In [26]:
res=smf.ols('reduc~rmom-1',data=mroz).fit()
print(res.summary().tables[0],'\n')
print('OLS R-squared = First-Stage IV2SLS Partial R-squared = ','{:.4}'
      .format(res.rsquared))

                                 OLS Regression Results                                
Dep. Variable:                  reduc   R-squared (uncentered):                   0.149
Model:                            OLS   Adj. R-squared (uncentered):              0.147
Method:                 Least Squares   F-statistic:                              74.47
Date:                Wed, 18 Jun 2025   Prob (F-statistic):                    1.23e-16
Time:                        09:24:46   Log-Likelihood:                         -925.10
No. Observations:                 428   AIC:                                      1852.
Df Residuals:                     427   BIC:                                      1856.
Df Model:                           1                                                  
Covariance Type:            nonrobust                                                  

OLS R-squared = First-Stage IV2SLS Partial R-squared =  0.1485


In [27]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz.educ,
               exog=mroz[['Intercept','exper','exper2']]
               ,instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
print(res2sls.first_stage)

    First Stage Estimation Results    
                                  educ
--------------------------------------
R-squared                       0.2115
Partial R-squared               0.2076
Shea's R-squared                0.2076
Partial F-statistic             55.400
P-value (Partial F-stat)      1.11e-16
Partial F-stat Distn          F(2,423)
Intercept                       9.1026
                              (21.340)
exper                           0.0452
                              (1.1236)
exper2                         -0.0010
                             (-0.8386)
mothereduc                      0.1576
                              (4.3906)
fathereduc                      0.1895
                              (5.6152)
--------------------------------------

T-stats reported in parentheses
T-stats use same covariance type as original model


In [28]:
res=smf.ols('fathereduc~exper+exper2',data=mroz).fit()
mroz['rdad']=res.resid
res=smf.ols('reduc~rmom+rdad-1',data=mroz).fit()
print(res.summary().tables[0],'\n')
print('OLS R-squared = First-Stage IV2SLS Partial R-squared = ','{:.4}'
      .format(res.rsquared) )

                                 OLS Regression Results                                
Dep. Variable:                  reduc   R-squared (uncentered):                   0.208
Model:                            OLS   Adj. R-squared (uncentered):              0.204
Method:                 Least Squares   F-statistic:                              55.79
Date:                Wed, 18 Jun 2025   Prob (F-statistic):                    3.01e-22
Time:                        09:24:46   Log-Likelihood:                         -909.72
No. Observations:                 428   AIC:                                      1823.
Df Residuals:                     426   BIC:                                      1832.
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  

OLS R-squared = First-Stage IV2SLS Partial R-squared =  0.2076


In [29]:
res=smf.ols('educ~exper+exper2+mothereduc+fathereduc',data=mroz).fit()
mroz['vhat']=res.resid
res=smf.ols('lnwage~educ+exper+exper2+vhat',data=mroz).fit()
print(res.summary().tables[1])

                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0481      0.395      0.122      0.903      -0.727       0.824
educ           0.0614      0.031      1.981      0.048       0.000       0.122
exper          0.0442      0.013      3.336      0.001       0.018       0.070
exper2        -0.0009      0.000     -2.271      0.024      -0.002      -0.000
vhat           0.0582      0.035      1.671      0.095      -0.010       0.127


In [30]:
res=smf.ols('lnwage~educ+exper+exper2+vhat',data=mroz).fit(cov_type='HC1')
print(res.summary().tables[1])

                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0481      0.422      0.114      0.909      -0.779       0.875
educ           0.0614      0.033      1.879      0.060      -0.003       0.125
exper          0.0442      0.015      2.921      0.003       0.015       0.074
exper2        -0.0009      0.000     -2.165      0.030      -0.002   -8.51e-05
vhat           0.0582      0.036      1.597      0.110      -0.013       0.130


In [31]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz.educ
               ,exog=mroz[['Intercept','exper','exper2']]
               ,instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
print(res2sls.summary.tables[1],'\n')
print(res2sls.wu_hausman())

                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
Intercept      0.0481     0.4003     0.1202     0.9044     -0.7388      0.8350
exper          0.0442     0.0134     3.2883     0.0011      0.0178      0.0706
exper2        -0.0009     0.0004    -2.2380     0.0257     -0.0017     -0.0001
educ           0.0614     0.0314     1.9530     0.0515     -0.0004      0.1232

Wu-Hausman test of exogeneity
H0: All endogenous variables are exogenous
Statistic: 2.8035
P-value: 0.0948
Distributed: F(1,423)


In [32]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz['educ']
               ,exog=mroz[['Intercept','exper','exper2']]
               ,instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
mroz['ehat']=res2sls.resids
res=smf.ols('ehat~exper+exper2+mothereduc+fathereduc',data=mroz).fit()
nr2=res.nobs*res.rsquared
chic_stat=stats.chi2.ppf(.95,1)
print(f'           LM Test of Validity for Surplus Instruments')
print('------------------------------------------------------------')
print(f'R-squared artificial regression                 = {res.rsquared:.4f}')
print(f'N x R-squared test overidentifying restrictions = {nr2:.4f}')
print(f'Chi-square(1) critical value, 0.05 level        = {chic_stat:.4f}')
print(f'p-value for overidentifying test 0.05 level     = {1-stats.chi2.cdf(nr2,1):.4f}')

           LM Test of Validity for Surplus Instruments
------------------------------------------------------------
R-squared artificial regression                 = 0.0009
N x R-squared test overidentifying restrictions = 0.3781
Chi-square(1) critical value, 0.05 level        = 3.8415
p-value for overidentifying test 0.05 level     = 0.5386


In [33]:
res2sls=IV2SLS(dependent=mroz.lnwage,endog=mroz['educ']
               ,exog=mroz[['Intercept','exper','exper2']]
               ,instruments=mroz[['mothereduc','fathereduc']]
              ).fit(cov_type='unadjusted',debiased='True')
print('\n',res2sls.sargan)
print('\n',res2sls.basmann)


 Sargan's test of overidentification
H0: The model is not overidentified.
Statistic: 0.3781
P-value: 0.5386
Distributed: chi2(1)

 Basmann's test of overidentification
H0: The model is not overidentified.
Statistic: 0.3740
P-value: 0.5408
Distributed: chi2(1)


In [34]:
mroz['nwifeinc']=(mroz.faminc-mroz.wage*mroz.hours)/1000
res2sls11=IV2SLS(dependent=mroz.hours,endog=mroz.mtr
                 ,exog=mroz[['Intercept','educ','kidsl6','nwifeinc']]
                 ,instruments=mroz.exper).fit()
weak_instr(1,1)
res11=smf.ols('mtr~exper+educ+kidsl6+nwifeinc',data=mroz).fit()
hypo='exper=0'
print('Weak Instrument F-test for MTR: B=1 and L=1')
print(f'F(1,423) = {res11.f_test(hypo).fvalue:.2f}')
print(f'Prob > F =  {res11.f_test(hypo).pvalue.item(0):.4f}')


 Stock & Yogo (2005) Weak Instrument Critical Values
--------------------------------------------------------------
H0: Instruments are weak
                                 # of endogenous regressors: 1
                                 # of excluded instruments:  1
--------------------------------------------------------------
                                        5%    10%   20%    30%
2SLS relative bias:                    nan    nan   nan    nan
Fuller-k relative bias:              24.09  19.35 15.42  12.71
Fuller-k maximum bias:               23.81  19.40 15.39  12.76
--------------------------------------------------------------
                                       10%    15%   20%    25%
2SLS Size of nominal 5% Wald test:   16.38   8.96  6.66   5.53
LIML Size of nominal 5% Wald test:   16.38   8.96  6.66   5.53
--------------------------------------------------------------
Weak Instrument F-test for MTR: B=1 and L=1
F(1,423) = 30.61
Prob > F =  0.0000


In [35]:
res2sls13=IV2SLS(dependent=mroz.hours,endog=mroz.mtr
                 ,exog=mroz[['Intercept','educ','kidsl6','nwifeinc']],
                 instruments=mroz[['exper','exper2','largecity']]).fit()
weak_instr(1,3)
res13=smf.ols('mtr~exper+exper2+largecity+educ+kidsl6+nwifeinc',data=mroz).fit()
hypo='(exper=0),(exper2=0),(largecity=0)'
print('Weak Instrument F-test for MTR: B=1 and L=3')
print(f'F(3,421) = {res13.f_test(hypo).fvalue:.2f}')
print(f'Prob > F =  {res13.f_test(hypo).pvalue.item(0):.4f}')



 Stock & Yogo (2005) Weak Instrument Critical Values
--------------------------------------------------------------
H0: Instruments are weak
                                 # of endogenous regressors: 1
                                 # of excluded instruments:  3
--------------------------------------------------------------
                                        5%    10%   20%    30%
2SLS relative bias:                  13.91   9.08  6.46   5.39
Fuller-k relative bias:               9.61   7.90  6.61   5.60
Fuller-k maximum bias:                8.66   7.18  5.87   5.11
--------------------------------------------------------------
                                       10%    15%   20%    25%
2SLS Size of nominal 5% Wald test:   22.30  12.83  9.54   7.80
LIML Size of nominal 5% Wald test:    6.46   4.36  3.69   3.32
--------------------------------------------------------------
Weak Instrument F-test for MTR: B=1 and L=3
F(3,421) = 13.22
Prob > F =  0.0000


In [36]:
res2sls22=IV2SLS(dependent=mroz.hours,endog=mroz[['mtr','educ']]
                 ,exog=mroz[['Intercept','kidsl6','nwifeinc']]
                 ,instruments=mroz[['mothereduc','fathereduc']]).fit()
weak_instr(2,2)
res22a=smf.ols('mtr~mothereduc+fathereduc+kidsl6+nwifeinc',data=mroz).fit()
hypo='(mothereduc=0, fathereduc=0)'
print('Weak Instrument F-Test for MTR: B=2 and L=2')
print(f'F(2,423) =  {res22a.f_test(hypo).fvalue:.2f}')
print(f'Prob > F =  {res22a.f_test(hypo).pvalue.item(0):.4f}')
res22b=smf.ols('educ~mothereduc+fathereduc+kidsl6+nwifeinc',data=mroz).fit()
print('Weak Instrument F-Test for EDUC: B=2 and L=2')
print(f'F(2,423) = {res22b.f_test(hypo).fvalue:.2f}')
print(f'Prob > F =  {res22b.f_test(hypo).pvalue.item(0):.4f}')


 Stock & Yogo (2005) Weak Instrument Critical Values
--------------------------------------------------------------
H0: Instruments are weak
                                 # of endogenous regressors: 2
                                 # of excluded instruments:  2
--------------------------------------------------------------
                                        5%    10%   20%    30%
2SLS relative bias:                    nan    nan   nan    nan
Fuller-k relative bias:              15.50  12.55  9.72   8.03
Fuller-k maximum bias:               14.19  11.92  9.41   8.01
--------------------------------------------------------------
                                       10%    15%   20%    25%
2SLS Size of nominal 5% Wald test:    7.03   4.58  3.95   3.63
LIML Size of nominal 5% Wald test:    7.03   4.58  3.95   3.63
--------------------------------------------------------------
Weak Instrument F-Test for MTR: B=2 and L=2
F(2,423) =  8.14
Prob > F =  0.0003
Weak Instrument F-Test

In [37]:
res2sls23=IV2SLS(dependent=mroz.hours,endog=mroz[['mtr','educ']]
                 ,exog=mroz[['Intercept','kidsl6','nwifeinc']]
                 ,instruments=mroz[['mothereduc','fathereduc','exper']]
                ).fit(cov_type='unadjusted',debiased='True')
weak_instr(2,3)
res23a=smf.ols('mtr~mothereduc+fathereduc+exper+kidsl6+nwifeinc',data=mroz).fit()
hypo='(mothereduc=0),(fathereduc=0),(exper=0)'
print('Weak Instrument F-Test for MTR: B=2 and L=3')
print(f'F(3,422) = {res23a.f_test(hypo).fvalue:.2f}')
print(f'Prob > F = {res23a.f_test(hypo).pvalue.item(0):.4f}')
res23b=smf.ols('educ~mothereduc+fathereduc+exper+kidsl6+nwifeinc',data=mroz).fit()
print('Weak Instrument F-Test for EDUC: B=2 and L=3')
print(f'F(3,422) = {res23b.f_test(hypo).fvalue:.2f}')
print(f'Prob > F = {res23b.f_test(hypo).pvalue.item(0):.4f}')


 Stock & Yogo (2005) Weak Instrument Critical Values
--------------------------------------------------------------
H0: Instruments are weak
                                 # of endogenous regressors: 2
                                 # of excluded instruments:  3
--------------------------------------------------------------
                                        5%    10%   20%    30%
2SLS relative bias:                    nan    nan   nan    nan
Fuller-k relative bias:              10.83   8.96  7.18   6.15
Fuller-k maximum bias:               10.00   8.39  6.79   5.88
--------------------------------------------------------------
                                       10%    15%   20%    25%
2SLS Size of nominal 5% Wald test:   13.43   8.18  6.40   5.45
LIML Size of nominal 5% Wald test:    5.44   3.81  3.32   3.09
--------------------------------------------------------------
Weak Instrument F-Test for MTR: B=2 and L=3
F(3,422) = 18.86
Prob > F = 0.0000
Weak Instrument F-Test 

In [38]:
res=OrderedDict()
res['mtr11']=res2sls11
res['mtr13']=res2sls13
res['mtr22']=res2sls22
res['mtr23']=res2sls23
print(compare(res))

                                Model Comparison                                
                              mtr11          mtr13          mtr22          mtr23
--------------------------------------------------------------------------------
Dep. Variable                 hours          hours          hours          hours
Estimator                   IV-2SLS        IV-2SLS        IV-2SLS        IV-2SLS
No. Observations                428            428            428            428
Cov. Est.                    robust         robust         robust     unadjusted
R-squared                   -0.1415         0.0514        -4.0197        -0.1678
Adj. R-squared              -0.1523         0.0425        -4.0672        -0.1789
F-statistic                  40.977         41.569         2.5190         9.2617
P-value (F-stat)          2.718e-08      2.049e-08         0.6412      3.503e-07
Intercept                 1.742e+04      1.439e+04     -2.449e+04      1.807e+04
                           (

In [39]:
res2sls=IV2SLS(dependent=mroz.hours,endog=mroz[['mtr','educ']]
               ,exog=mroz[['Intercept','kidsl6','nwifeinc',]]
               ,instruments=mroz[['mothereduc','fathereduc','exper']]).fit()
nobs=res2sls.nobs
varlist=mroz[['mtr','educ','mothereduc','fathereduc','exper']]
for v in varlist:
    res=sm.OLS(mroz[v],mroz[['Intercept','kidsl6','nwifeinc']]).fit()
    mroz[str(v+'r')]=res.resid
canc=cancorr(mroz[['mtrr','educr']],mroz[['mothereducr','fathereducr','experr']])
mincc=np.amin(canc)
print(f'Canonical Correlations:, {np.round(canc,5)}')
print(f'Minimum Canonical Correlation: {mincc:.5f}')

Canonical Correlations:, [0.46236 0.24005]
Minimum Canonical Correlation: 0.24005


In [40]:
l=res2sls.first_stage.instr.shape
g=res2sls.first_stage.exog.shape
numiv=l[1]+g[1]
cd_f_stat=((nobs-numiv)/l[1])*(mincc**2)/((1-mincc**2))
print(f'The Cragg-Donald F-statitic: {cd_f_stat:.4f}')

The Cragg-Donald F-statitic: 8.6014
