# Does `PyFixest` match `fixest`?

This vignette compares estimation results from `fixest` with `pyfixest` via the `rpy2` package.

In [1]:
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr

import pyfixest as pf

# Activate pandas2ri
pandas2ri.activate()

# Import R packages
fixest = importr("fixest")
stats = importr("stats")

# IPython magic commands for autoreloading
%load_ext autoreload
%autoreload 2

# Get data using pyfixest
data = pf.get_data(model="Feols", N=10_000, seed=99292)

## Ordinary Least Squares (OLS)

### IID Inference

First, we estimate a model via `pyfixest. We compute "iid" standard errors. 

In [2]:
fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid")

We estimate the same model with weights: 

In [3]:
fit_weights = pf.feols(
    fml="Y ~ X1 + X2 | f1 + f2", data=data, weights="weights", vcov="iid"
)

Via `r-fixest` and `rpy2`, we get

In [4]:
r_fit = fixest.feols(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    vcov="iid",
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

r_fit_weights = fixest.feols(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    weights=ro.Formula("~weights"),
    vcov="iid",
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).



Let's compare how close the covariance matrices are: 

In [5]:
fit_vcov = fit._vcov
r_vcov = stats.vcov(r_fit)
fit_vcov - r_vcov

array([[-7.04731412e-19, -3.34180967e-22],
       [-3.34594558e-22, -1.38913403e-19]])

And for WLS: 

In [6]:
fit_weights._vcov - stats.vcov(r_fit_weights)

array([[ 1.68051337e-18, -1.69406589e-21],
       [-1.69406589e-21, -1.49077799e-19]])

We conclude by comparing all estimation results via the `etable` function: 

In [7]:
pf.etable([fit, fit_weights], digits=6)

coef,coef.1,coef.2
X1,0.112019*** (0.016947),0.123687*** (0.016880)
X2,0.732788*** (0.004595),0.732244*** (0.004584)
fe,fe,fe
f1,x,x
f2,x,x
modelstats,modelstats,modelstats
Observations,9997,9997
S.E. type,iid,iid
R2,0.774375,-
,Y,Y


In [8]:
pd.DataFrame(fixest.etable(r_fit, r_fit_weights, digits=6)).T

Unnamed: 0,0,1,2
0,Dependent Var.:,Y,Y
1,,,
2,X1,0.112019*** (0.016947),0.123687*** (0.016880)
3,X2,0.732788*** (0.004595),0.732244*** (0.004584)
4,Fixed-Effects:,----------------------,----------------------
5,f1,Yes,Yes
6,f2,Yes,Yes
7,_______________,______________________,______________________
8,S.E. type,IID,IID
9,Observations,9997,9997


### Heteroskedastic Errors

We repeat the same exercise with heteroskedastic (HC1) errors: 

In [9]:
fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero")
fit_weights = pf.feols(
    fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", weights="weights"
)

In [10]:
fit_r = fixest.feols(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    vcov="hetero",
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

fit_weights_r = fixest.feols(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    weights=ro.Formula("~weights"),
    vcov="hetero",
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).



As before, we compare the variance covariance matrices:

In [11]:
fit._vcov - stats.vcov(fit_r)

array([[-1.61925594e-16, -2.13306719e-17],
       [-2.13306719e-17, -5.39593869e-17]])

In [12]:
fit_weights._vcov - stats.vcov(fit_weights_r)

array([[-2.04968421e-16, -9.53780274e-18],
       [-9.53780274e-18, -3.03136151e-17]])

In [13]:
pf.etable([fit, fit_weights], digits=6)

coef,coef.1,coef.2
X1,0.112019*** (0.017009),0.123687*** (0.019361)
X2,0.732788*** (0.004553),0.732244*** (0.005140)
fe,fe,fe
f1,x,x
f2,x,x
modelstats,modelstats,modelstats
Observations,9997,9997
S.E. type,hetero,hetero
R2,0.774375,-
,Y,Y


In [14]:
pd.DataFrame(fixest.etable(fit_r, fit_weights_r, digits=6)).T

Unnamed: 0,0,1,2
0,Dependent Var.:,Y,Y
1,,,
2,X1,0.112019*** (0.017009),0.123687*** (0.019361)
3,X2,0.732788*** (0.004553),0.732244*** (0.005140)
4,Fixed-Effects:,----------------------,----------------------
5,f1,Yes,Yes
6,f2,Yes,Yes
7,_______________,______________________,______________________
8,S.E. type,Heteroskedastici.-rob.,Heteroskedastici.-rob.
9,Observations,9997,9997


### Cluster-Robust Errors

We conclude with cluster robust errors. 

In [15]:
fit = pf.feols(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"})
fit_weights = pf.feols(
    fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, weights="weights"
)

fit_r = fixest.feols(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    vcov=ro.Formula("~f1"),
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)
fit_r_weights = fixest.feols(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    weights=ro.Formula("~weights"),
    vcov=ro.Formula("~f1"),
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).



In [16]:
fit._vcov - stats.vcov(fit_r)

array([[ 4.20345182e-16, -6.97387636e-17],
       [-6.97404577e-17, -1.42166010e-17]])

In [17]:
fit_weights._vcov - stats.vcov(fit_weights_r)

array([[-3.95318402e-05,  2.05473422e-05],
       [ 2.05473422e-05,  1.13249200e-06]])

In [18]:
pf.etable([fit, fit_weights], digits=6)

coef,coef.1,coef.2
X1,0.112019*** (0.015816),0.123687*** (0.018311)
X2,0.732788*** (0.004476),0.732244*** (0.005249)
fe,fe,fe
f1,x,x
f2,x,x
modelstats,modelstats,modelstats
Observations,9997,9997
S.E. type,by: f1,by: f1
R2,0.774375,-
,Y,Y


In [19]:
pd.DataFrame(fixest.etable(fit_r, fit_r_weights, digits=6)).T

Unnamed: 0,0,1,2
0,Dependent Var.:,Y,Y
1,,,
2,X1,0.112019*** (0.015816),0.123687*** (0.018311)
3,X2,0.732788*** (0.004476),0.732244*** (0.005249)
4,Fixed-Effects:,----------------------,----------------------
5,f1,Yes,Yes
6,f2,Yes,Yes
7,_______________,______________________,______________________
8,S.E.: Clustered,by: f1,by: f1
9,Observations,9997,9997


## Poisson Regression

In [20]:
data = pf.get_data(model="Fepois")

In [21]:
fit_iid = pf.fepois(fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="iid", iwls_tol=1e-10)
fit_hetero = pf.fepois(
    fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov="hetero", iwls_tol=1e-10
)
fit_crv = pf.fepois(
    fml="Y ~ X1 + X2 | f1 + f2", data=data, vcov={"CRV1": "f1"}, iwls_tol=1e-10
)

fit_r_iid = fixest.fepois(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    vcov="iid",
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

fit_r_hetero = fixest.fepois(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    vcov="hetero",
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

fit_r_crv = fixest.fepois(
    ro.Formula("Y ~ X1 + X2 | f1 + f2"),
    data=data,
    vcov=ro.Formula("~f1"),
    ssc=fixest.ssc(True, "none", True, "min", "min", False),
)

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).

R[write to console]: NOTE: 3 observations removed because of NA values (LHS: 1, RHS: 1, Fixed-effects: 1).



In [22]:
fit_iid._vcov - stats.vcov(fit_r_iid)

array([[ 1.20791284e-08, -6.55604931e-10],
       [-6.55604931e-10,  1.69958097e-09]])

In [23]:
fit_hetero._vcov - stats.vcov(fit_r_hetero)

array([[ 2.18101847e-08, -7.38711972e-10],
       [-7.38711972e-10,  3.07587753e-09]])

In [24]:
fit_crv._vcov - stats.vcov(fit_r_crv)

array([[ 1.58300904e-08, -1.20806815e-10],
       [-1.20806815e-10,  3.17512746e-09]])

In [25]:
pf.etable([fit_iid, fit_hetero, fit_crv], digits=6)

coef,coef.1,coef.2,coef.3
X1,-0.006591 (0.040758),-0.006591 (0.039145),-0.006591 (0.034745)
X2,-0.014924 (0.010994),-0.014924 (0.010501),-0.014924 (0.010303)
fe,fe,fe,fe
f1,x,x,x
f2,x,x,x
modelstats,modelstats,modelstats,modelstats
Observations,997,997,997
S.E. type,iid,hetero,by: f1
R2,-,-,-
,Y,Y,Y


In [26]:
pd.DataFrame(fixest.etable(fit_r_iid, fit_r_hetero, fit_r_crv, digits=6)).T

Unnamed: 0,0,1,2,3
0,Dependent Var.:,Y,Y,Y
1,,,,
2,X1,-0.006591 (0.040758),-0.006591 (0.039145),-0.006591 (0.034745)
3,X2,-0.014924 (0.010994),-0.014924 (0.010501),-0.014924 (0.010302)
4,Fixed-Effects:,--------------------,--------------------,--------------------
5,f1,Yes,Yes,Yes
6,f2,Yes,Yes,Yes
7,_______________,____________________,____________________,____________________
8,S.E. type,IID,Heteroskedasti.-rob.,by: f1
9,Observations,997,997,997
