# Chapter 13.  Pooling Cross Sections across Time
[Home](http://solomonegash.com/) | [Stata](http://solomonegash.com/woodridge1/index.html) | [R](http://solomonegash.com/econometrics/rbook1/index.html)


In [1]:
import numpy as np
import pandas as pd

import statsmodels.stats.api as sms 
import statsmodels.formula.api as smf
from statsmodels.iolib.summary2 import summary_col

from wooldridge import *

### Example 13.1. Women’s Fertility over Time

In [2]:
df = dataWoo('fertil1')

In [3]:
fert_reg = smf.ols(
    'kids ~ educ + age + agesq + black + east + northcen + west + farm + othrural + town + smcity + y74 + y76 + y78 + y80 + y82 + y84',
    data=df).fit()
print(fert_reg.summary())

                            OLS Regression Results                            
Dep. Variable:                   kids   R-squared:                       0.130
Model:                            OLS   Adj. R-squared:                  0.116
Method:                 Least Squares   F-statistic:                     9.723
Date:                Tue, 02 Jul 2024   Prob (F-statistic):           2.42e-24
Time:                        18:12:09   Log-Likelihood:                -2091.2
No. Observations:                1129   AIC:                             4218.
Df Residuals:                    1111   BIC:                             4309.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -7.7425      3.052     -2.537      0.0

In [4]:
hypotheses = '(y74 =y76 = y78 = y80 = y82 = y84 = 0)'
f_test = fert_reg.f_test(hypotheses)
print(f_test)

<F test: F=5.869508671580268, p=4.85518986757229e-06, df_denom=1.11e+03, df_num=6>


In [5]:
bptest = sms.diagnostic.het_breuschpagan(fert_reg.resid, fert_reg.model.exog)
df2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
                   'Prob>Chi-Sq':[bptest[1]]})
print(df2)

      Chi-Sq  Prob>Chi-Sq
0  55.315373     0.000006


### Example 13.2. Changes in the Return to Education and the Gender Wage Gap

In [6]:
print(smf.ols('lwage ~ y85 + educ + y85educ + exper + expersq + union + female + y85fem', data=dataWoo("cps78_85")).fit().summary())

                            OLS Regression Results                            
Dep. Variable:                  lwage   R-squared:                       0.426
Model:                            OLS   Adj. R-squared:                  0.422
Method:                 Least Squares   F-statistic:                     99.80
Date:                Tue, 02 Jul 2024   Prob (F-statistic):          4.46e-124
Time:                        18:12:09   Log-Likelihood:                -574.24
No. Observations:                1084   AIC:                             1166.
Df Residuals:                    1075   BIC:                             1211.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.4589      0.093      4.911      0.0

### Example 13.3. Effect of a Garbage Incinerator’s Location on Housing Prices

In [7]:
df = dataWoo("kielmc")
garb81_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1981)]).fit()
garb78_reg = smf.ols('rprice ~ nearinc', data=df[(df['year']==1978)]).fit()

print(summary_col([garb81_reg, garb78_reg],stars=True,float_format='%0.3f',
                  model_names=['grab81\n(b/se)','grab78\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))


                   grab81        grab78   
                   (b/se)        (b/se)   
------------------------------------------
Intercept      101307.514*** 82517.228*** 
               (3093.027)    (2653.790)   
nearinc        -30688.274*** -18824.370***
               (5827.709)    (4744.594)   
R-squared      0.165         0.082        
R-squared Adj. 0.159         0.076        
N              142           179          
R2             0.165         0.082        
Adj.R2         0.159         0.076        
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


### Table 13.2 Effects of Incinerator Location on Housing Prices (rprice)

In [8]:
One = smf.ols('rprice ~ y81 + nearinc + y81nrinc', data=df).fit()
Two = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq', data=df).fit()
Three = smf.ols('rprice ~ y81 + nearinc + y81nrinc + age + agesq + intst + land + area + rooms + baths', data=df).fit()

print(summary_col([One, Two, Three],stars=True,float_format='%0.3f',
                  model_names=['One\n(b/se)','Two\n(b/se)', 'Three\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))


                    One           Two          Three    
                   (b/se)        (b/se)        (b/se)   
--------------------------------------------------------
Intercept      82517.228***  89116.535***  13807.665    
               (2726.910)    (2406.051)    (11166.594)  
R-squared      0.174         0.414         0.660        
R-squared Adj. 0.166         0.405         0.649        
age                          -1494.424***  -739.451***  
                             (131.860)     (131.127)    
agesq                        8.691***      3.453***     
                             (0.848)       (0.813)      
area                                       18.086***    
                                           (2.306)      
baths                                      6977.317***  
                                           (2581.321)   
intst                                      -0.539***    
                                           (0.196)      
land                          

In [9]:
lOne = smf.ols('lprice ~ y81 + nearinc + y81nrinc', data=df).fit()
lThree = smf.ols('lprice ~ y81 + nearinc + y81nrinc + age + agesq + lintst + lland + larea + rooms + baths', data=df).fit()

print(summary_col([lOne, lThree],stars=True,float_format='%0.3f',
                  model_names=['lOne\n(b/se)', 'lThree\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))


                  lOne     lThree 
                 (b/se)    (b/se) 
----------------------------------
Intercept      11.285*** 7.652*** 
               (0.031)   (0.416)  
R-squared      0.409     0.790    
R-squared Adj. 0.403     0.784    
age                      -0.008***
                         (0.001)  
agesq                    0.000*** 
                         (0.000)  
baths                    0.094*** 
                         (0.028)  
larea                    0.351*** 
                         (0.051)  
lintst                   -0.061*  
                         (0.032)  
lland                    0.100*** 
                         (0.024)  
nearinc        -0.340*** 0.032    
               (0.055)   (0.047)  
rooms                    0.047*** 
                         (0.017)  
y81            0.457***  0.426*** 
               (0.045)   (0.028)  
y81nrinc       -0.063    -0.132** 
               (0.083)   (0.052)  
N              321       321      
R2             0.40

### Example 13.4. Effect of Worker Compensation Laws on Weeks out of Work

In [10]:
df = dataWoo("injury")
print(smf.ols('ldurat~ afchnge + highearn + afhigh', data=df[(df['ky']==1)]).fit().summary())

                            OLS Regression Results                            
Dep. Variable:                 ldurat   R-squared:                       0.021
Model:                            OLS   Adj. R-squared:                  0.020
Method:                 Least Squares   F-statistic:                     39.54
Date:                Tue, 02 Jul 2024   Prob (F-statistic):           2.81e-25
Time:                        18:12:09   Log-Likelihood:                -9322.0
No. Observations:                5626   AIC:                         1.865e+04
Df Residuals:                    5622   BIC:                         1.868e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      1.1256      0.031     36.621      0.0

### Example 13.5. Sleeping versus Working

In [11]:
print(smf.ols('cslpnap ~ ctotwrk + ceduc + cmarr + cyngkid + cgdhlth', data=dataWoo("slp75_81")).fit().summary())

                            OLS Regression Results                            
Dep. Variable:                cslpnap   R-squared:                       0.150
Model:                            OLS   Adj. R-squared:                  0.131
Method:                 Least Squares   F-statistic:                     8.191
Date:                Tue, 02 Jul 2024   Prob (F-statistic):           3.83e-07
Time:                        18:12:09   Log-Likelihood:                -1864.4
No. Observations:                 239   AIC:                             3741.
Df Residuals:                     233   BIC:                             3762.
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -92.6340     45.866     -2.020      0.0

### Distributed Lag of Crime Rate on Clear-Up Rate

In [12]:
print(smf.ols('clcrime ~ cclrprc1 + cclrprc2', data=dataWoo("crime3")).fit().summary())

                            OLS Regression Results                            
Dep. Variable:                clcrime   R-squared:                       0.193
Model:                            OLS   Adj. R-squared:                  0.161
Method:                 Least Squares   F-statistic:                     5.992
Date:                Tue, 02 Jul 2024   Prob (F-statistic):            0.00465
Time:                        18:12:09   Log-Likelihood:                -17.194
No. Observations:                  53   AIC:                             40.39
Df Residuals:                      50   BIC:                             46.30
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0857      0.064      1.343      0.1

### Example 13.7. Effect of Drunk Driving Laws on Traffic Fatalities

In [13]:
ezon_reg =smf.ols('guclms ~ d82 + d83 + d84 + d85 + d86 + d87 + d88 + cez', data=dataWoo("ezunem")).fit() 
print(ezon_reg.summary())

                            OLS Regression Results                            
Dep. Variable:                 guclms   R-squared:                       0.623
Model:                            OLS   Adj. R-squared:                  0.605
Method:                 Least Squares   F-statistic:                     34.50
Date:                Tue, 02 Jul 2024   Prob (F-statistic):           1.08e-31
Time:                        18:12:09   Log-Likelihood:                 24.553
No. Observations:                 176   AIC:                            -31.11
Df Residuals:                     167   BIC:                            -2.573
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -0.3216      0.046     -6.982      0.0

In [14]:
cez = (np.exp(-.1819) - 1) * 100
cez

-16.631529528207743

In [15]:
bptest = sms.diagnostic.het_breuschpagan(ezon_reg.resid, ezon_reg.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
                   'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)

     Chi-Sq  Prob>Chi-Sq
0  6.913966     0.545943


### Example 13.9. County Crime Rates in North Carolina

In [16]:
df = dataWoo("crime4")
hetrosced_r =smf.ols('clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit()
robust_r =smf.ols('clcrmrte ~  d83 + d84 + d85 + d86 + d87 + clprbarr + clprbcon + clprbpri + clavgsen + clpolpc + 1', data=df).fit(cov_type='HC1')

print(summary_col([hetrosced_r, robust_r],stars=True,float_format='%0.3f',
                  model_names=['Hetrosced\n(b/se)', 'Robust\n(b/se)'],
                 info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                             'R2':lambda x: "{:.3f}".format(x.rsquared), 
                           'Adj.R2':lambda x: "{:.3f}".format(x.rsquared_adj)}))


               Hetrosced   Robust 
                 (b/se)    (b/se) 
----------------------------------
Intercept      0.008     0.008    
               (0.017)   (0.015)  
d83            -0.100*** -0.100***
               (0.024)   (0.022)  
d84            -0.048**  -0.048** 
               (0.024)   (0.020)  
d85            -0.005    -0.005   
               (0.023)   (0.024)  
d86            0.028     0.028    
               (0.024)   (0.021)  
d87            0.041*    0.041*   
               (0.024)   (0.024)  
clprbarr       -0.327*** -0.327***
               (0.030)   (0.051)  
clprbcon       -0.238*** -0.238***
               (0.018)   (0.031)  
clprbpri       -0.165*** -0.165***
               (0.026)   (0.035)  
clavgsen       -0.022    -0.022   
               (0.022)   (0.025)  
clpolpc        0.398***  0.398*** 
               (0.027)   (0.076)  
R-squared      0.433     0.433    
R-squared Adj. 0.422     0.422    
N              540       540      
R2             0.43

In [17]:
bptest = sms.diagnostic.het_breuschpagan(hetrosced_r.resid, hetrosced_r.model.exog)
bptest2 = pd.DataFrame({'Chi-Sq':[bptest[0]],
                   'Prob>Chi-Sq':[bptest[1]]})
print(bptest2)

      Chi-Sq  Prob>Chi-Sq
0  10.929708     0.363021
