# PyFixest Benchmarks

In [1]:
%load_ext autoreload
%autoreload 2

import time

import pandas as pd
from linearmodels.iv import AbsorbingLS
from tqdm import tqdm  # note: tqdm is not a dependency of pyfixest

from pyfixest.estimation import feols, fepois


def run_standard_benchmark(model, fixed_effect):
    "Run the standard benchmark for the fixed effect models."
    assert model in ["feols", "fepois", "absorbingls"]
    assert fixed_effect in ["dum_1", "dum_1+dum_2", "dum_1+dum_2+dum_3"]

    # one fixed effect
    res = []

    fml_base = "ln_y ~ X1" if model == "feols" else "y ~ X1"
    fml = f"{fml_base} | {fixed_effect}"

    # warmup
    df = pd.read_stata("./data/_STATA/base_s2_r1.dta")
    feols(fml, data=df)

    for size in tqdm(range(1, 6)):
        if size == 5:
            if model == "fepois":
                pass
            else:
                df = pd.read_csv("./data/data/base_10M.csv")

        for rep in range(1, 11):
            if size < 5:
                df = pd.read_stata(f"./data/_STATA/base_s{size}_r{rep}.dta")

            tic = time.time()
            if model == "feols":
                feols(fml, data=df)
            elif model == "absorbingls":
                df = df.astype({f"dum_{i}": "category" for i in range(1, 4)})
                df_exog = df[["X1"]]

                absorb_columns = fixed_effect.split("+")
                df_absorb = df[absorb_columns]
                df_endog = df["ln_y"]
                # I don't think the approach gives analogous standard errors
                AbsorbingLS(df_endog, df_exog, absorb=df_absorb).fit(
                    cov_type="clustered"
                )
            else:
                fepois(fml, data=df)
            toc = time.time()

            res.append(
                pd.Series(
                    {
                        "method": model,
                        "n_obs": df.shape[0],
                        "G": len(fixed_effect.split("+")),
                        "rep": rep,
                        "time": toc - tic,
                    }
                )
            )

    return pd.concat(res, axis=1).T


def run_all_benchmarks():
    "Run all the benchmarks."
    res = pd.DataFrame()
    for model in ["feols", "fepois", "absorbingls"]:
        for fixef in ["dum_1", "dum_1+dum_2", "dum_1+dum_2+dum_3"]:
            res = pd.concat([res, run_standard_benchmark(model, fixef)], axis=0)

    res.to_csv("./results_py.csv")

In [2]:
run_all_benchmarks()

100%|██████████| 5/5 [00:23<00:00,  4.76s/it]
100%|██████████| 5/5 [00:28<00:00,  5.67s/it]
100%|██████████| 5/5 [00:30<00:00,  6.18s/it]
100%|██████████| 5/5 [00:25<00:00,  5.04s/it]
100%|██████████| 5/5 [01:06<00:00, 13.37s/it]
100%|██████████| 5/5 [01:43<00:00, 20.69s/it]
100%|██████████| 5/5 [37:23<00:00, 448.78s/it]
100%|██████████| 5/5 [1:26:09<00:00, 1033.99s/it]
100%|██████████| 5/5 [1:44:48<00:00, 1257.66s/it]


In [3]:
a = run_standard_benchmark("feols", "dum_1")
a

100%|██████████| 5/5 [16:53<00:00, 202.62s/it]


Unnamed: 0,method,n_obs,G,rep,time
0,feols,1000,1,1,0.056183
1,feols,1000,1,2,0.05277
2,feols,1000,1,3,0.052286
3,feols,1000,1,4,0.052665
4,feols,1000,1,5,0.052282
5,feols,1000,1,6,0.052521
6,feols,1000,1,7,0.052853
7,feols,1000,1,8,0.053038
8,feols,1000,1,9,0.053121
9,feols,1000,1,10,0.052397


In [4]:
a.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
method,feols,feols,feols,feols,feols,feols,feols,feols,feols,feols,...,feols,feols,feols,feols,feols,feols,feols,feols,feols,feols
n_obs,1000,1000,1000,1000,1000,1000,1000,1000,1000,1000,...,10000000,10000000,10000000,10000000,10000000,10000000,10000000,10000000,10000000,10000000
G,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
rep,1,2,3,4,5,6,7,8,9,10,...,1,2,3,4,5,6,7,8,9,10
time,0.056183,0.05277,0.052286,0.052665,0.052282,0.052521,0.052853,0.053038,0.053121,0.052397,...,992.064057,1.935793,1.74646,1.726809,1.662941,1.64592,1.652289,1.667719,1.649755,1.645669


## Visualisation

In [5]:
res_all = pd.concat(
    [
        pd.read_csv("./results_py.csv"),
        pd.read_csv("./results_all.txt"),
    ]
)