In [2]:
%load_ext autoreload
%autoreload 2

from itertools import product
from utils.dgps import generate_benchmark_data
from utils.estimators import (
    twfe_fixest, twfe_fixest_compressed, twfe_statsmodels, duck_mundlak
)
from utils.benchmark import Bench
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
%%capture

N_values = [100_000]
# 2-5 weeks of runtime, daily data
T_values = [14, 21, 28]
# one week prior
T0_values = [7]
iter = 3

all_benchmarks_df = pd.DataFrame()

for N, T, T0 in product(N_values, T_values, T0_values):

    bench = Bench(N = N, T = T, T0 = T0, iter = 3)
    bench.mark(fun = twfe_fixest)
    #bench.mark(fun = twfe_statsmodels)
    # add mundlak twfe via pyfixest in memory once on PyPi
    # duck_mundlak runs with 1 bootstrap iteration
    bench.mark(fun = twfe_fixest_compressed)
    bench.mark(fun = duck_mundlak)
    
    bench_df = bench.to_dataframe()
    bench_df["N"] = N
    bench_df["T"] = T
    bench_df["T0"] = T0
    
    all_benchmarks_df = pd.concat(
        [all_benchmarks_df, bench_df], 
        axis = 0
    )

In [None]:
all_benchmarks_df.head()

In [None]:
# drop first column due to numba burn-in
all_benchmarks_df_melted = all_benchmarks_df.iloc[1:,:].melt(id_vars=['N', 'T', 'T0'], value_vars=['twfe_fixest', 'duck_mundlak'], 
                    var_name='model', value_name='value')                    
all_benchmarks_df_melted['N'] = all_benchmarks_df_melted['N'].apply(lambda x: f'{x:.0e}')

In [None]:
g = sns.FacetGrid(all_benchmarks_df_melted, col="T", row="N", margin_titles=True, height=4, aspect=1)
g.map_dataframe(sns.violinplot, x='model', y='value', hue='model', palette='Set2', legend=False)

# Adjust layout
plt.yscale('log')
plt.subplots_adjust(top=0.9)
g.fig.suptitle('Violin Plots of Models for Different N and T')

plt.show()