In [None]:
# uncomment and execute the following if necessary

# !pip install linearmodels
# !pip install stargazer

### Example data

We will use the data set studied in T.A. Mroz (1987), "The Sensitivity of an Empirical Model of Married Women's
Hours of Work to Economic and Statistical Assumptions," Econometrica 55,
765-799.  This data set is used for examples in Wooldridge's Introduction to Econometrics and also in Sheppard's description of his linearmodels library.

In [1]:
from linearmodels.datasets import mroz
data = mroz.load()
data = data.dropna()
print(mroz.DESCR)
data.head()


T.A. Mroz (1987), "The Sensitivity of an Empirical Model of Married Women's
Hours of Work to Economic and Statistical Assumptions," Econometrica 55,
765-799.

nlf        1 if in labor force, 1975
hours      hours worked, 1975
kidslt6    # kids < 6 years
kidsge6    # kids 6-18
age        woman's age in yrs
educ       years of schooling
wage       estimated wage from earns., hours
repwage    reported wage at interview in 1976
hushrs     hours worked by husband, 1975
husage     husband's age
huseduc    husband's years of schooling
huswage    husband's hourly wage, 1975
faminc     family income, 1975
mtr        fed. marginal tax rate facing woman
motheduc   mother's years of schooling
fatheduc   father's years of schooling
unem       unem. rate in county of resid.
city       =1 if live in SMSA
exper      actual labor mkt exper
nwifeinc   (faminc - wage*hours)/1000
lwage      log(wage)
expersq    exper^2



Unnamed: 0,inlf,hours,kidslt6,kidsge6,age,educ,wage,repwage,hushrs,husage,...,faminc,mtr,motheduc,fatheduc,unem,city,exper,nwifeinc,lwage,expersq
0,1,1610,1,0,32,12,3.354,2.65,2708,34,...,16310,0.7215,12,7,5.0,0,14,10.91006,1.210154,196
1,1,1656,0,2,30,12,1.3889,2.65,2310,30,...,21800,0.6615,7,7,11.0,1,5,19.49998,0.328512,25
2,1,1980,1,3,35,12,4.5455,4.04,3072,40,...,21040,0.6915,12,7,5.0,0,15,12.03991,1.514138,225
3,1,456,0,3,34,12,1.0965,3.25,1920,53,...,7300,0.7815,7,7,5.0,0,6,6.799996,0.092123,36
4,1,1568,1,2,31,14,4.5918,3.6,2000,32,...,27300,0.6215,12,14,9.5,1,7,20.10006,1.524272,49


In [3]:
import pandas as pd
import statsmodels.formula.api as smf
from stargazer.stargazer import Stargazer

### Basic regression

In [38]:
model = smf.ols("wage ~ educ", data=data)
result = model.fit()
# result.summary()

### Heteroskedasticity consistent standard errors

In [39]:
model = smf.ols("wage ~ educ", data=data)
result = model.fit(cov_type="HC3")
# result.summary()

### Saving output to Excel

In [40]:
table = result.summary().tables[1]
table = pd.DataFrame(table)
table.to_excel('table.xlsx', header=False, index=False)

### Saving output to latex

In [41]:
stargazer = Stargazer([result])
table = stargazer.render_latex()
with open("table.tex", "w") as file:
    file.write(table)

# print(table)

### Multivariate

In [42]:
model = smf.ols("wage ~ educ + exper + age", data=data)
result = model.fit(cov_type="HC3")
# result.summary()


### Dummy and Categorical Variables

We could do C(area) and C(occup) but this is unnecessary for categorical text variables.  We might want to treat numdep as numerical, but using C(numdep) causes it to be treated as categorical (generating dummy variables).

In [43]:
model = smf.ols(
    "wage ~ educ+exper+tenure+C(numdep)+female+nonwhite+married+smsa+area+occup", 
    data=wages
)
result = model.fit(cov_type='HC3')
# result.summary()

In [44]:
stargazer = Stargazer([result])
stargazer.covariate_order(
    [
        'educ', 
        'exper', 
        'tenure', 
        'female', 
        'nonwhite', 
        'married'
    ]
)
tex = stargazer.render_latex()
with open("table.tex", "w") as file:
    file.write(tex)

# print(tex)

### Multiple models

In [49]:
mod1 = smf.ols(
    "wage ~ educ+C(numdep)+smsa+area+occup", 
    data=wages
)

mod2 = smf.ols(
    "wage ~ educ+exper+tenure+C(numdep)+smsa+area+occup", 
    data=wages
)

mod3 = smf.ols(
    "wage ~ educ+exper+tenure+female+nonwhite+married+C(numdep)+smsa+area+occup", 
    data=wages
)

results = [mod.fit(cov_type="HC3") for mod in [mod1, mod2, mod3]]

stargazer = Stargazer(results)
stargazer.covariate_order(
    [
        'educ', 
        'exper', 
        'tenure', 
        'female', 
        'nonwhite', 
        'married'
    ]
)
tex = stargazer.render_latex()
with open("table.tex", "w") as file:
    file.write(tex)

# print(tex)

### Logit

In [48]:
model = smf.logit("married ~ wage+female", data=wages)
result = model.fit()
stargazer = Stargazer([result])
tex = stargazer.render_latex()
with open("table.tex", "w") as file:
    file.write(tex)

# result.summary()

Optimization terminated successfully.
         Current function value: 0.635312
         Iterations 6


0,1,2,3
Dep. Variable:,married,No. Observations:,526.0
Model:,Logit,Df Residuals:,523.0
Method:,MLE,Df Model:,2.0
Date:,"Mon, 22 Aug 2022",Pseudo R-squ.:,0.05103
Time:,08:37:13,Log-Likelihood:,-334.17
converged:,True,LL-Null:,-352.14
Covariance Type:,nonrobust,LLR p-value:,1.571e-08

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1671,0.250,-0.667,0.505,-0.658,0.324
wage,0.1436,0.034,4.172,0.000,0.076,0.211
female,-0.3854,0.194,-1.984,0.047,-0.766,-0.005
