# Visualise Benchmarking Results

In [12]:
import numpy as np
import pandas as pd
from lets_plot import (
    LetsPlot,
    aes,
    facet_wrap,
    geom_line,
    geom_point,
    ggplot,
    ggsave,
    ggsize,
    ggtitle,
    scale_x_discrete,
    scale_y_continuous,
    xlab,
    ylab,
)

LetsPlot.setup_html(no_js=True)

In [13]:
res_py = pd.read_csv("./results_py.csv")

res_py.head()

Unnamed: 0.1,Unnamed: 0,method,n_obs,G,rep,time
0,0,feols,1000,1,1,0.063291
1,1,feols,1000,1,2,0.050933
2,2,feols,1000,1,3,0.05039
3,3,feols,1000,1,4,0.050413
4,4,feols,1000,1,5,0.049823


In [14]:
np.unique(res_py["method"])

array(['absorbingls', 'feols', 'fepois'], dtype=object)

In [15]:
res_py = pd.read_csv("./results_py.csv")
res_py["model"] = np.where(
    np.isin(res_py["method"], ["feols", "absorbingls"]), "Gaussian", "Poisson"
)
res_py["method"] = np.where(
    np.isin(res_py["method"], ["feols", "fepois"]), "pyfixest", "linearmodels"
)
res_other = pd.read_csv("./results_all.txt")
res_all = pd.concat([res_py, res_other], axis=0)

In [16]:
res_all.method.unique()

array(['pyfixest', 'linearmodels', 'fepois', 'glmmboot', 'feglm (alpaca)',
       'ppmlhdfe', 'fenegbin', 'glmnb', 'nbreg', 'feglm (fixest)',
       'logit', 'feols', 'lfe', 'reghdfe', 'FixedEffectModels'],
      dtype=object)

In [17]:
res_agg = (
    res_all.groupby(["method", "n_obs", "G", "model"]).mean()["time"].reset_index()
)
res_agg["G"] = res_agg["G"].apply(lambda x: f"{x} FE")
res_agg["method"] = pd.Categorical(
    res_agg["method"],
    [
        "pyfixest",
        "linearmodels",
        "feols",
        "reghdfe",
        "lfe",
        "FixedEffectModels",
        "fepois",
        "glmmboot",
        "ppmlhdfe",
        "feglm (alpaca)",
    ],
)

In [19]:
plot_ols = (
    ggplot(
        res_agg[res_agg["model"] == "Gaussian"],
        aes(x="n_obs", y="time", color="method"),
    )
    + geom_line()
    + geom_point()
    + facet_wrap("G", nrow=1)
    + scale_x_discrete()
    + scale_y_continuous(trans="log10", limits=(0, 120))
    + ylab("Time in Seconds")
    + xlab("Number of Observations")
    + ggtitle("Fixest Standard Benchmark for OLS")
    + ggsize(1000, 500)
)
ggsave(plot_ols, filename="benchmarks_ols.svg")


plot_ols

In [20]:
plot_poisson = (
    ggplot(
        res_agg[(res_agg["model"] == "Poisson") & (res_agg.n_obs < 1e07)],
        aes(x="n_obs", y="time", color="method"),
    )
    + geom_line()
    + geom_point()
    + facet_wrap("G", nrow=1)
    + scale_x_discrete()
    + scale_y_continuous(trans="log10", limits=(0, 120))
    + ylab("Time in Seconds")
    + xlab("Number of Observations")
    + ggtitle("Fixest Standard Benchmark for Poisson Regression")
    + ggsize(1000, 500)
)

ggsave(plot_poisson, filename="benchmarks_poisson.svg")

plot_poisson