In [2]:
from causaldata import Mroz, gapminder, organ_donations, restaurant_inspections
import warnings 
warnings.filterwarnings('ignore')
import pyfixest as pf
%load_ext watermark
%watermark --iversions

pyfixest  : 0.26.2
causaldata: 0.1.5



### Multivariate Regression - with interactions, HAC stderrs, cluster stderrs

In [3]:
# Read in data
dt = Mroz.load_pandas().data
print(dt.dtypes)
dt = dt.query("lfp") # working women
dt.loc[:, "earn"] = dt["lwg"].apply("exp") # unlogged earnings
fit = pf.feols(fml="lwg ~ csw(inc, wc, k5)", data=dt, vcov="iid")
pf.etable(fit)

Unnamed: 0      int64
lfp              bool
k5              int64
k618            int64
age             int64
wc               bool
hc               bool
lwg           float64
inc           float64
dtype: object


Unnamed: 0_level_0,lwg,lwg,lwg
Unnamed: 0_level_1,(1),(2),(3)
coef,coef,coef,coef
inc,0.010** (0.003),0.005 (0.003),0.005 (0.003)
wc,,0.342*** (0.075),0.349*** (0.075)
k5,,,-0.072 (0.087)
Intercept,1.007*** (0.071),0.972*** (0.070),0.982*** (0.071)
stats,stats,stats,stats
Observations,428,428,428
S.E. type,iid,iid,iid
R2,0.020,0.066,0.068
"Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)","Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)","Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)","Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"


In [4]:
res = restaurant_inspections.load_pandas().data
res.inspection_score = res.inspection_score.astype(float)
res.NumberofLocations = res.NumberofLocations.astype(float)
print(res.dtypes)
fit = pf.feols(fml="inspection_score ~ NumberofLocations", data=res)
pf.etable([fit])

business_name         object
inspection_score     float64
Year                   int64
NumberofLocations    float64
Weekend                 bool
dtype: object


Unnamed: 0_level_0,inspection_score
Unnamed: 0_level_1,(1)
coef,coef
NumberofLocations,-0.019*** (0.000)
Intercept,94.866*** (0.046)
stats,stats
Observations,27178
S.E. type,iid
R2,0.065
"Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)","Significance levels: * p < 0.05, ** p < 0.01, *** p < 0.001. Format of coefficient cell: Coefficient (Std. Error)"


In [5]:
df = restaurant_inspections.load_pandas().data
fit1 = pf.feols(
    fml="inspection_score ~ NumberofLocations + I(NumberofLocations^2) + Year", data=df
)
fit2 = pf.feols(fml="inspection_score ~ NumberofLocations*Weekend + Year", data=df)
pf.etable([fit1, fit2])

Unnamed: 0_level_0,inspection_score,inspection_score
Unnamed: 0_level_1,(1),(2)
coef,coef,coef
NumberofLocations,-0.075*** (0.019),-0.019*** (0.000)
I(NumberofLocations ^ 2),0.056** (0.019),
Year,-0.065*** (0.006),-0.065*** (0.006)
Weekend,,1.759*** (0.488)
NumberofLocations:Weekend,,-0.010 (0.008)
Intercept,225.504*** (12.409),225.126*** (12.415)
stats,stats,stats
Observations,27178,27178
S.E. type,iid,iid


In [6]:
pf.feols(fml="inspection_score ~ Year + Weekend", data=df, vcov="HC3").summary()

###

Estimation:  OLS
Dep. var.: inspection_score, Fixed effects: 0
Inference:  HC3
Observations:  27178

| Coefficient   |   Estimate |   Std. Error |   t value |   Pr(>|t|) |    2.5% |   97.5% |
|:--------------|-----------:|-------------:|----------:|-----------:|--------:|--------:|
| Intercept     |    185.380 |       12.150 |    15.257 |      0.000 | 161.564 | 209.196 |
| Year          |     -0.046 |        0.006 |    -7.551 |      0.000 |  -0.057 |  -0.034 |
| Weekend       |      2.057 |        0.353 |     5.829 |      0.000 |   1.365 |   2.749 |
---
RMSE: 6.248 R2: 0.003 


In [7]:
pf.feols(fml="inspection_score ~ Year + Weekend", data=df, vcov={"CRV1": "Weekend"}).tidy()

Unnamed: 0_level_0,Estimate,Std. Error,t value,Pr(>|t|),2.5%,97.5%
Coefficient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Intercept,185.380033,3.264345,56.789343,0.011209,143.902592,226.857474
Year,-0.04564,0.001624,-28.107556,0.02264,-0.066272,-0.025008
Weekend,2.057166,0.001401,1468.2568,0.000434,2.039364,2.074969


### Fixed and Two way fixed effects

In [12]:
gm = gapminder.load_pandas().data
gm["logGDPpercap"] = gm["gdpPercap"].apply("log")
display(gm)
fit = pf.feols(fml="lifeExp ~ C(country) + np.log(gdpPercap)", data=gm)
fit.tidy().head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,logGDPpercap
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,6.658583
1,Afghanistan,Asia,1957,30.332,9240934,820.853030,6.710344
2,Afghanistan,Asia,1962,31.997,10267083,853.100710,6.748878
3,Afghanistan,Asia,1967,34.020,11537966,836.197138,6.728864
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,6.606625
...,...,...,...,...,...,...,...
1699,Zimbabwe,Africa,1987,62.351,9216418,706.157306,6.559838
1700,Zimbabwe,Africa,1992,60.377,10704340,693.420786,6.541637
1701,Zimbabwe,Africa,1997,46.809,11404948,792.449960,6.675129
1702,Zimbabwe,Africa,2002,39.989,11926563,672.038623,6.510316


Unnamed: 0_level_0,Estimate,Std. Error,t value,Pr(>|t|),2.5%,97.5%
Coefficient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Intercept,-27.773459,2.500533,-11.107015,0.0,-32.678217,-22.868701
C(country)[T.Albania],17.782625,2.19516,8.100835,1.110223e-15,13.476853,22.088397
C(country)[T.Algeria],5.241055,2.214496,2.366704,0.01806875,0.897356,9.584755
C(country)[T.Angola],-13.907122,2.201727,-6.31646,3.481857e-10,-18.225777,-9.588468
C(country)[T.Argentina],8.132158,2.272781,3.578065,0.0003567229,3.674133,12.590183


In [10]:
# Set our individual and time (index) for our data
fit = pf.feols(fml="lifeExp ~ np.log(gdpPercap) | country + year", data=gm)
fit.summary()

###

Estimation:  OLS
Dep. var.: lifeExp, Fixed effects: country+year
Inference:  CRV1
Observations:  1704

| Coefficient       |   Estimate |   Std. Error |   t value |   Pr(>|t|) |   2.5% |   97.5% |
|:------------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|
| np.log(gdpPercap) |      1.450 |        0.677 |     2.141 |      0.034 |  0.111 |   2.788 |
---
RMSE: 3.267 R2: 0.936 R2 Within: 0.019 


In [11]:
od = organ_donations.load_pandas().data

# Create Treatment Variable
od["California"] = od["State"] == "California"
od["After"] = od["Quarter_Num"] > 3
od["Treated"] = 1 * (od["California"] & od["After"])

did = pf.feols(fml="Rate ~ Treated | State + Quarter", data=od)
did.summary()

###

Estimation:  OLS
Dep. var.: Rate, Fixed effects: State+Quarter
Inference:  CRV1
Observations:  162

| Coefficient   |   Estimate |   Std. Error |   t value |   Pr(>|t|) |   2.5% |   97.5% |
|:--------------|-----------:|-------------:|----------:|-----------:|-------:|--------:|
| Treated       |     -0.022 |        0.006 |    -3.733 |      0.001 | -0.035 |  -0.010 |
---
RMSE: 0.022 R2: 0.979 R2 Within: 0.009 
