In [1]:
import pickle
import numpy as np
from src.groupedpaneldatamodels import GroupedFixedEffects, GroupedInteractiveFixedEffects, grid_search_by_ic
from joblib import Parallel, delayed
# from tqdm import tqdm
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import os

# Bonhomme Manresa (DGP 1)

In [2]:
def bm_run(run, G, i):
    x, y, f, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gfe_iterations = 10 if G < 4 else 100
    n_boot = 200
    bootstrap = True if G < 4 else False
    try:
        m = GroupedFixedEffects(y, x, G, bootstrap, hide_progressbar=True).fit(
            max_iter=100, gfe_iterations=gfe_iterations, n_boot=n_boot, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return None

runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))
bm_run(runs[0], 3, 0)

{'name': 'GroupedFixedEffects',
 'id': '0x151e50c20',
 'fit_datetime': '2025-07-03 01:53:31',
 'fit_duration': 5.051087,
 'model_type': 'bonhomme_manresa',
 'params': {'beta': array([[1.09710777, 0.94092071, 0.9446826 ],
         [1.95833027, 2.06949878, 1.93274701],
         [3.02682257, 3.02320194, 2.93402767]]),
  'alpha': array([[ 2.27280304,  2.41919213,  3.00925257,  1.05008519,  0.73315203,
           1.5027573 ,  0.72219423, -0.48180834,  0.41019553,  0.63405466,
          -1.11972675,  0.40345225,  0.94602223,  1.42608962,  0.21801438,
          -0.52556276, -0.794405  , -0.57060553,  0.27263898, -1.18072652],
         [-0.61463819,  0.26580869,  0.34595521, -0.81371733, -1.69929573,
          -2.93477045, -0.99554873,  1.01901478, -0.33052888,  0.28410158,
          -0.84067857, -3.10777513, -0.71495776,  0.1183961 , -1.22536251,
          -1.36712327, -0.92795561, -0.31134588, -0.22240228,  0.92712255],
         [-1.73161307, -2.35355611, -0.81019474, -0.960793  ,  0.5015539

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp1_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP1 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue

                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp1_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(bm_run)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP1 n={n}, t={t}, G={G}, k={k}")
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

## DGP 2

In [2]:
def ab_run(run, G, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gife_iterations = 10 if G < 4 else 100
    n_boot = 200 if G < 4 else 100
    # bootstrap = True if G < 4 else False
    bootstrap = False  # For now, we disable bootstrap for G >= 4 to avoid long computation times
    try:
        m = GroupedInteractiveFixedEffects(y, x, G, bootstrap, hide_progressbar=True).fit(max_iter=100, gife_iterations=gife_iterations, n_boot=n_boot, boot_n_jobs=1)

        return m.to_dict()
    except Exception as err:
        print(f"Error {err}")

        return

runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
ab_run(runs[10], 3, 0)

{'name': 'GroupedInteractiveFixedEffects',
 'id': '0x141344c20',
 'fit_datetime': '2025-07-03 02:39:31',
 'fit_duration': 1.465207,
 'model_type': 'ando_bai',
 'params': {'beta': array([[1.06419743, 1.01846832, 1.12349773],
         [1.96012761, 2.1488888 , 1.93736769],
         [3.02470208, 3.06779144, 3.00264263]]),
  'g': {0: [5,
    8,
    17,
    18,
    22,
    32,
    33,
    34,
    35,
    36,
    39,
    52,
    54,
    57,
    60,
    65,
    67,
    68,
    69,
    75,
    78,
    81,
    82,
    92],
   1: [4,
    6,
    9,
    10,
    12,
    14,
    15,
    16,
    20,
    21,
    23,
    24,
    28,
    29,
    30,
    37,
    38,
    40,
    41,
    42,
    43,
    47,
    50,
    51,
    56,
    58,
    59,
    61,
    62,
    63,
    64,
    72,
    74,
    76,
    77,
    80,
    85,
    87,
    88,
    89,
    90,
    96,
    97,
    98,
    99],
   2: [0,
    1,
    2,
    3,
    7,
    11,
    13,
    19,
    25,
    26,
    27,
    31,
    44,
    45,
    46,
  

In [4]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp2_n{n}_t{t}_G{G}_k{k}_se_fixed_3_small.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP2 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP2 (ab) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ab_run)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:100]), total=100, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)



Running DGP2 (ab) with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3:   0%|          | 0/100 [00:00<?, ?it/s]



Running DGP2 (ab) with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3:   0%|          | 0/100 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
def sj_run(run, G, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G < 4 else 50
    kappa = 0.005 if G < 4 else 0.00005
    bootstrap = True if G < 4 else False
    try:
        m = GroupedInteractiveFixedEffects(y, x, G, bootstrap, "su_ju", R=G, hide_progressbar=True).fit(
            n_boot=n_boot, only_bfgs=True, max_iter=100, kappa=kappa, tol=1e-2, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as err:
        print(f"Error: {err}")
        return None

runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
sj_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp2_n{n}_t{t}_G{G}_k{k}_su_ju_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP2 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP2 (su_ju) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(sj_run)(run, G, i)
                    for i, run in enumerate(tqdm(runs, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}"))
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

# DGP 3 (SSJ)

In [9]:
def ssp_run(run, G, i, kappa=0.005):
    x, y, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    n_boot = 100 if G < 4 else 50
    # kappa = 0.005 if G < 4 else 0.00005
    bootstrap = False if G < 4 else False
    m = GroupedFixedEffects(y, x, G, bootstrap, "su_shi_phillips", hide_progressbar=True, kappa=kappa).fit(
        n_boot=n_boot, only_bfgs=False, max_iter=100, tol=1e-5, boot_n_jobs=1
    )

    print(m.IC)


runs = pickle.load(open("development/generated_data/dgp3_n100_t20_G3_k3.pkl", "rb"))
ssp_run(runs[0], 3, 0)

{'sigma^2': np.float64(1.2225363299714709), 'AIC': np.float64(762.9262961020638), 'BIC': np.float64(3053.6954020547755), 'HQIC': np.float64(1604.0486897660383)}


In [None]:
def ssp_run(run, G, i, kappa=0.005):
    x, y, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    n_boot = 100 if G < 4 else 50
    # kappa = 0.005 if G < 4 else 0.00005
    bootstrap = False if G < 4 else False
    try:
        m = GroupedFixedEffects(y, x, G, bootstrap, "su_shi_phillips", hide_progressbar=True, kappa=kappa).fit(
            n_boot=n_boot, only_bfgs=False, max_iter=100, tol=1e-5, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as err:
        print(f"Error: {err}")
        return None

runs = pickle.load(open("development/generated_data/dgp3_n100_t20_G3_k3.pkl", "rb"))
ssp_run(runs[0], 3, 0)

{'name': 'GroupedFixedEffects',
 'id': '0x13a5ec6e0',
 'fit_datetime': '2025-07-01 17:34:29',
 'fit_duration': 3.48636,
 'model_type': 'su_shi_phillips',
 'params': {'beta': array([[0.9752052 , 1.02096804, 1.03397083],
         [1.94705547, 2.10195583, 2.01913477],
         [3.03720153, 3.00897187, 2.97261922]]),
  'b': array([[0.97577067, 2.00834705, 3.05843823, 3.07416981, 3.07163485,
          1.02634178, 2.04942254, 1.94712974, 1.251697  , 3.35244533,
          1.90603913, 2.16426509, 2.33440084, 1.97608125, 1.83640093,
          2.66830622, 2.65481544, 3.53920817, 1.65807487, 0.84692417,
          0.77637368, 2.2085375 , 3.18421298, 1.47803444, 3.4898657 ,
          0.69795958, 1.48577808, 1.18225311, 0.91067578, 3.03569154,
          1.90386376, 2.57891521, 1.61671921, 3.33707301, 0.82389489,
          1.80320238, 3.47312184, 3.26002438, 2.92784255, 1.96398719,
          3.02205402, 1.34522279, 2.22040859, 0.76957915, 1.17250083,
          0.88303647, 3.20953714, 1.84311801, 0.64

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp3_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP3 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP3 (su_shi_phillips) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp3_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))
                estimates = []
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ssp_run)(run, G, i) for i, run in enumerate(tqdm(runs, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}"))
                )
                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

In [None]:
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

def ols(y, x):
    """
    Fit an OLS model to the data and return the coefficients.
    """
    model = OLS(y, add_constant(x))
    results = model.fit()
    return results.params[1:], results.conf_int(alpha=0.05)[1], results.bse[1:]

def heterogeneous_ols(y, x):
    """
    Fit an OLS model to the data and return the coefficients.
    """
    est_array = np.zeros((x.shape[0], x.shape[2]))
    conf_array = np.zeros((x.shape[0], x.shape[2], 2))
    se_array = np.zeros((x.shape[0], x.shape[2]))

    for i in range(len(x)):
        est, conf, se = ols(y[i], x[i])
        est_array[i] = est
        conf_array[i] = conf
        se_array[i] = se

    return est_array, conf_array, se_array

In [None]:
runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))

estimates = []
for run in tqdm(runs[:200]):
    x = np.float32(runs[0][0])
    y = np.float32(runs[0][1])

    estimates.append(heterogeneous_ols(y, x))

output_path = "estimates/dgp1_n100_t20_G3_k3_ols.pkl"
with open(output_path, "wb") as f:
    pickle.dump(estimates, f)

In [None]:
def heterogeneous_ols_run(dgp, n, t, G, k):
    runs = pickle.load(open(f"development/generated_data/dgp{dgp}_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

    def _run_single_estimation(run):
        x = np.float64(run[0])
        y = np.float64(run[1])
        return heterogeneous_ols(y, x)

    # Wrap tqdm around the iterator to preserve the progress bar
    estimates = Parallel(n_jobs=-1)(
        delayed(_run_single_estimation)(run) for run in tqdm(runs)
    )

    output_path = f"estimates/dgp{dgp}_n{n}_t{t}_G{G}_k{k}_ols.pkl"
    with open(output_path, "wb") as f:
        pickle.dump(estimates, f)

In [None]:
for dgp in (1, 2, 3):
    for n in (100, 200):
        for t in (20, 50):
            for G in (3, 6):
                for k in (3,):
                    heterogeneous_ols_run(dgp, n, t, G, k)

## IC

In [4]:
import contextlib
import pandas as pd

In [4]:
def bm_ic(run, G_true, i):
    x, y, f, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gfe_iterations = 10 if G_true < 4 else 100
    n_boot = 200 if G_true < 4 else 100
    bootstrap = True if G_true < 4 else False
    try:
        # m = GroupedFixedEffects(y, x, G_true, bootstrap, hide_progressbar=False).fit(
        #     max_iter=100, gfe_iterations=gfe_iterations, n_boot=n_boot, boot_n_jobs=1)
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(GroupedFixedEffects, {"G": [2, 3, 4, 5, 6, 7, 8]}, init_params={"dependent": y, "exog": x, "bootstrap": bootstrap, "hide_progressbar": False}, fit_params={"max_iter": 100, "gfe_iterations": gfe_iterations, "n_boot": n_boot, "boot_n_jobs": 1})[2]["G"]
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return np.nan

runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))
bm_ic(runs[0], 3, 0)

3

In [8]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp1_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(bm_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP1 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values]) / len(estimates)

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP1 with n=100, t=20, G=3, k=3


DGP1 n=100, t=20, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 1.00


Running DGP1 with n=100, t=20, G=6, k=3


DGP1 n=100, t=20, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.98


Running DGP1 with n=100, t=50, G=3, k=3


DGP1 n=100, t=50, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Error in run 314: No theta found, something went wrong in the estimation process.
Proportion of correct G estimates 100, 50, 3, 3: 1.00


Running DGP1 with n=100, t=50, G=6, k=3


DGP1 n=100, t=50, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.78


Running DGP1 with n=200, t=20, G=3, k=3


DGP1 n=200, t=20, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 1.00


Running DGP1 with n=200, t=20, G=6, k=3


DGP1 n=200, t=20, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 1.00


Running DGP1 with n=200, t=50, G=3, k=3


DGP1 n=200, t=50, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Error in run 93: No theta found, something went wrong in the estimation process.
Error in run 98: No theta found, something went wrong in the estimation process.
Error in run 296: No theta found, something went wrong in the estimation process.
Error in run 319: No theta found, something went wrong in the estimation process.
Error in run 416: No theta found, something went wrong in the estimation process.
Proportion of correct G estimates 200, 50, 3, 3: 0.99


Running DGP1 with n=200, t=50, G=6, k=3


DGP1 n=200, t=50, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 0.94
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 & 0.000000 & 1.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
1 & 100 & 20 & 6 & 3 & 0.980000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.980000 & 0.020000 & 0.000000 \\
2 & 100 & 50 & 3 & 3 & 0.998000 & 0.000000 & 0.998000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
3 & 100 & 50 & 6 & 3 & 0.782000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.782000 & 0.210000 & 0.008000 \\
4 & 200 & 20 & 3 & 3 & 1.000000 & 0.000000 & 1.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
5 & 200 & 20 & 6 & 3 & 1.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 0.000000 & 0.000000 \\
6 & 200 & 50 & 3 & 3 & 0.990000 & 0.000000 & 0.990000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
7 & 200 & 50 & 6 & 3 & 0.936000 & 0.000000 &

In [9]:
def ab_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gife_iterations = 10 if G < 4 else 100
    n_boot = 200 if G < 4 else 100
    bootstrap = True if G < 4 else False
    try:
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(GroupedInteractiveFixedEffects, {"G": [2, 3, 4, 5, 6, 7, 8]}, init_params={"dependent": y, "exog": x, "bootstrap": bootstrap, "hide_progressbar": False}, fit_params={"max_iter": 100, "gife_iterations": gife_iterations, "n_boot": n_boot, "boot_n_jobs": 1})[2]["G"]
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return np.nan


runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
ab_ic(runs[0], 3, 0)

3

In [10]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP2 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ab_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:200]), total=200, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP2 with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 1.00


Running DGP2 with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.96


Running DGP2 with n=100, t=50, G=3, k=3


DGP2 n=100, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 3, 3: 1.00


Running DGP2 with n=100, t=50, G=6, k=3


DGP2 n=100, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.99


Running DGP2 with n=200, t=20, G=3, k=3


DGP2 n=200, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 0.99


Running DGP2 with n=200, t=20, G=6, k=3


DGP2 n=200, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 0.99


Running DGP2 with n=200, t=50, G=3, k=3


DGP2 n=200, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 3, 3: 1.00


Running DGP2 with n=200, t=50, G=6, k=3


DGP2 n=200, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 1.00
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
1 & 100 & 20 & 6 & 3 & 0.960000 & 0 & 0 & 0 & 3 & 192 & 4 & 1 \\
2 & 100 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
3 & 100 & 50 & 6 & 3 & 0.990000 & 0 & 0 & 0 & 0 & 198 & 2 & 0 \\
4 & 200 & 20 & 3 & 3 & 0.995000 & 0 & 199 & 1 & 0 & 0 & 0 & 0 \\
5 & 200 & 20 & 6 & 3 & 0.995000 & 0 & 0 & 0 & 0 & 199 & 1 & 0 \\
6 & 200 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
7 & 200 & 50 & 6 & 3 & 1.000000 & 0 & 0 & 0 & 0 & 200 & 0 & 0 \\
\bottomrule
\end{tabular}



In [3]:
def sj_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G_true < 4 else 50
    kappa = 0.005 if G_true < 4 else 0.00005
    bootstrap = True if G_true < 4 else False
    try:
        with open(os.devnull, "w") as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return (
                    grid_search_by_ic(
                        GroupedInteractiveFixedEffects,
                        {"G": [2, 3, 4, 5, 6, 7, 8]},
                        init_params={
                            "dependent": y,
                            "exog": x,
                            "bootstrap": bootstrap,
                            "model": "su_ju",
                            "hide_progressbar": False,
                        },
                        fit_params={
                            "n_boot": n_boot,
                            "only_bfgs": True,
                            "max_iter": 100,
                            "kappa": kappa,
                            "tol": 1e-2,
                            "boot_n_jobs": 1,
                        },
                    )[2]["G"]
                )
    except Exception as err:
        print(f"Error in run {i}: {err}")
        return np.nan

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_ic(runs[0], 3, 0)

In [12]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP2 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(sj_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:200]), total=200, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP2 with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 1.00


Running DGP2 with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.92


Running DGP2 with n=100, t=50, G=3, k=3


DGP2 n=100, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 3, 3: 1.00


Running DGP2 with n=100, t=50, G=6, k=3


DGP2 n=100, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.89


Running DGP2 with n=200, t=20, G=3, k=3


DGP2 n=200, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 0.99


Running DGP2 with n=200, t=20, G=6, k=3


DGP2 n=200, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 0.93


Running DGP2 with n=200, t=50, G=3, k=3


DGP2 n=200, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 3, 3: 1.00


Running DGP2 with n=200, t=50, G=6, k=3


DGP2 n=200, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 0.97
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
1 & 100 & 20 & 6 & 3 & 0.920000 & 0 & 0 & 0 & 14 & 184 & 2 & 0 \\
2 & 100 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
3 & 100 & 50 & 6 & 3 & 0.885000 & 0 & 0 & 0 & 0 & 177 & 21 & 2 \\
4 & 200 & 20 & 3 & 3 & 0.995000 & 0 & 199 & 1 & 0 & 0 & 0 & 0 \\
5 & 200 & 20 & 6 & 3 & 0.930000 & 0 & 0 & 0 & 14 & 186 & 0 & 0 \\
6 & 200 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
7 & 200 & 50 & 6 & 3 & 0.970000 & 0 & 0 & 0 & 0 & 194 & 6 & 0 \\
\bottomrule
\end{tabular}



In [18]:
def ssp_ic(run, G_true, i):
    x, y, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    kappa = 0.05 if G_true < 4 else 0.005
    bootstrap = True if G_true < 4 else False
    if G_true < 4:
        max_iter = 10
        G_test = [2, 3, 4, 5]
    else:
        max_iter = 100
        G_test = [5, 6, 7, 8]
    try:
        with open(os.devnull, "w") as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                res = grid_search_by_ic(
                    GroupedFixedEffects,
                    {"G": G_test},
                    init_params={
                        "dependent": y,
                        "exog": x,
                        "bootstrap": False,
                        "model": "su_shi_phillips",
                        "entity_effects": False,
                        "kappa": kappa,
                    },
                    fit_params={
                        "n_boot": 0,
                        "only_bfgs": True,
                        "max_iter": max_iter,
                        "tol": 1e-6,
                        "boot_n_jobs": 1,
                    },
                )

            return res[2]["G"]
    except Exception as err:
        print(f"Error in run {i}: {err}")
        return np.nan


runs = pickle.load(open("development/generated_data/dgp3_n100_t20_G3_k3.pkl", "rb"))
ssp_ic(runs[10], 3, 0)

3

In [19]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP3 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp3_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ssp_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:200]), total=200, desc=f"DGP3 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP3 with n=100, t=20, G=3, k=3


DGP3 n=100, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 0.81


Running DGP3 with n=100, t=20, G=6, k=3


DGP3 n=100, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.71


Running DGP3 with n=100, t=50, G=3, k=3


DGP3 n=100, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 3, 3: 0.96


Running DGP3 with n=100, t=50, G=6, k=3


DGP3 n=100, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.77


Running DGP3 with n=200, t=20, G=3, k=3


DGP3 n=200, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 0.69


Running DGP3 with n=200, t=20, G=6, k=3


DGP3 n=200, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 0.69


Running DGP3 with n=200, t=50, G=3, k=3


DGP3 n=200, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 3, 3: 0.97


Running DGP3 with n=200, t=50, G=6, k=3


DGP3 n=200, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Exception ignored in: <function ResourceTracker.__del__ at 0x102ed1620>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes


Proportion of correct G estimates 200, 50, 6, 3: 0.69
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 0.805000 & 0 & 161 & 34 & 5 & 0 & 0 & 0 \\
1 & 100 & 20 & 6 & 3 & 0.715000 & 0 & 0 & 0 & 0 & 143 & 42 & 15 \\
2 & 100 & 50 & 3 & 3 & 0.960000 & 0 & 192 & 6 & 2 & 0 & 0 & 0 \\
3 & 100 & 50 & 6 & 3 & 0.765000 & 0 & 0 & 0 & 0 & 153 & 31 & 16 \\
4 & 200 & 20 & 3 & 3 & 0.695000 & 0 & 139 & 54 & 7 & 0 & 0 & 0 \\
5 & 200 & 20 & 6 & 3 & 0.690000 & 0 & 0 & 0 & 0 & 138 & 33 & 29 \\
6 & 200 & 50 & 3 & 3 & 0.970000 & 0 & 194 & 3 & 3 & 0 & 0 & 0 \\
7 & 200 & 50 & 6 & 3 & 0.695000 & 0 & 0 & 0 & 0 & 139 & 41 & 20 \\
\bottomrule
\end{tabular}



Exception ignored in: <function ResourceTracker.__del__ at 0x106481620>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x103019620>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/m

In [17]:
res

[(100,
  20,
  3,
  3,
  np.float64(0.782),
  np.int64(0),
  np.int64(391),
  np.int64(54),
  np.int64(12),
  np.int64(33),
  np.int64(7),
  np.int64(3)),
 (100,
  20,
  6,
  3,
  np.float64(0.676),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(0),
  np.int64(338),
  np.int64(104),
  np.int64(58))]