In [1]:
import pickle
import numpy as np
from src.groupedpaneldatamodels import GroupedFixedEffects, GroupedInteractiveFixedEffects, grid_search_by_ic
from joblib import Parallel, delayed
# from tqdm import tqdm
from tqdm.notebook import tqdm
import os

# Bonhomme Manresa (DGP 1)

In [None]:
def bm_run(run, G, i):
    x, y, f, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gfe_iterations = 10 if G < 4 else 100
    n_boot = 200
    bootstrap = True if G < 4 else False
    try:
        m = GroupedFixedEffects(y, x, G, bootstrap, hide_progressbar=True).fit(
            max_iter=100, gfe_iterations=gfe_iterations, n_boot=n_boot, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return None

# runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))
# bm_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp1_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP1 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue

                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp1_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(bm_run)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP1 n={n}, t={t}, G={G}, k={k}")
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

## DGP 2

In [None]:
def ab_run(run, G, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gife_iterations = 10 if G < 4 else 100
    n_boot = 200 if G < 4 else 100
    bootstrap = True if G < 4 else False
    try:
        m = GroupedInteractiveFixedEffects(y, x, G, bootstrap, hide_progressbar=True).fit(max_iter=100, gife_iterations=gife_iterations, n_boot=n_boot, boot_n_jobs=1)

        return m.to_dict()
    except Exception as err:
        print(f"Error {err}")

        return

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# ab_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp2_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP2 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP2 (ab) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ab_run)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

In [None]:
def sj_run(run, G, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G < 4 else 50
    kappa = 0.005 if G < 4 else 0.00005
    bootstrap = True if G < 4 else False
    try:
        m = GroupedInteractiveFixedEffects(y, x, G, bootstrap, "su_ju", R=G, hide_progressbar=True).fit(
            n_boot=n_boot, only_bfgs=True, max_iter=100, kappa=kappa, tol=1e-2, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as err:
        print(f"Error: {err}")
        return None

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp2_n{n}_t{t}_G{G}_k{k}_su_ju_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP2 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP2 (su_ju) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(sj_run)(run, G, i)
                    for i, run in enumerate(tqdm(runs, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}"))
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

# DGP 3 (SSJ)

In [None]:
def ssp_run(run, G, i, kappa=0.005):
    x, y, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    n_boot = 100 if G < 4 else 50
    # kappa = 0.005 if G < 4 else 0.00005
    bootstrap = False if G < 4 else False
    try:
        m = GroupedFixedEffects(y, x, G, bootstrap, "su_shi_phillips", hide_progressbar=True).fit(
            n_boot=n_boot, only_bfgs=False, max_iter=100, kappa=kappa, tol=1e-5, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as err:
        print(f"Error: {err}")
        return None

runs = pickle.load(open("development/generated_data/dgp3_n100_t20_G3_k3.pkl", "rb"))
ssp_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp3_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP3 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP3 (su_shi_phillips) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp3_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))
                estimates = []
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ssp_run)(run, G, i) for i, run in enumerate(tqdm(runs, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}"))
                )
                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

In [None]:
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

def ols(y, x):
    """
    Fit an OLS model to the data and return the coefficients.
    """
    model = OLS(y, add_constant(x))
    results = model.fit()
    return results.params[1:], results.conf_int(alpha=0.05)[1], results.bse[1:]

def heterogeneous_ols(y, x):
    """
    Fit an OLS model to the data and return the coefficients.
    """
    est_array = np.zeros((x.shape[0], x.shape[2]))
    conf_array = np.zeros((x.shape[0], x.shape[2], 2))
    se_array = np.zeros((x.shape[0], x.shape[2]))

    for i in range(len(x)):
        est, conf, se = ols(y[i], x[i])
        est_array[i] = est
        conf_array[i] = conf
        se_array[i] = se

    return est_array, conf_array, se_array

In [None]:
runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))

estimates = []
for run in tqdm(runs[:200]):
    x = np.float32(runs[0][0])
    y = np.float32(runs[0][1])

    estimates.append(heterogeneous_ols(y, x))

output_path = "estimates/dgp1_n100_t20_G3_k3_ols.pkl"
with open(output_path, "wb") as f:
    pickle.dump(estimates, f)

In [None]:
def heterogeneous_ols_run(dgp, n, t, G, k):
    runs = pickle.load(open(f"development/generated_data/dgp{dgp}_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

    def _run_single_estimation(run):
        x = np.float64(run[0])
        y = np.float64(run[1])
        return heterogeneous_ols(y, x)

    # Wrap tqdm around the iterator to preserve the progress bar
    estimates = Parallel(n_jobs=-1)(
        delayed(_run_single_estimation)(run) for run in tqdm(runs)
    )

    output_path = f"estimates/dgp{dgp}_n{n}_t{t}_G{G}_k{k}_ols.pkl"
    with open(output_path, "wb") as f:
        pickle.dump(estimates, f)

In [None]:
for dgp in (1, 2, 3):
    for n in (100, 200):
        for t in (20, 50):
            for G in (3, 6):
                for k in (3,):
                    heterogeneous_ols_run(dgp, n, t, G, k)

## IC

In [3]:
import contextlib
import pandas as pd

In [4]:
def bm_ic(run, G_true, i):
    x, y, f, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gfe_iterations = 10 if G_true < 4 else 100
    n_boot = 200 if G_true < 4 else 100
    bootstrap = True if G_true < 4 else False
    try:
        # m = GroupedFixedEffects(y, x, G_true, bootstrap, hide_progressbar=False).fit(
        #     max_iter=100, gfe_iterations=gfe_iterations, n_boot=n_boot, boot_n_jobs=1)
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(GroupedFixedEffects, {"G": [2, 3, 4, 5, 6, 7, 8]}, init_params={"dependent": y, "exog": x, "bootstrap": bootstrap, "hide_progressbar": False}, fit_params={"max_iter": 100, "gfe_iterations": gfe_iterations, "n_boot": n_boot, "boot_n_jobs": 1})[2]["G"]
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return np.nan

runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))
bm_ic(runs[0], 3, 0)

3

In [8]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp1_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(bm_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP1 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values]) / len(estimates)

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP1 with n=100, t=20, G=3, k=3


DGP1 n=100, t=20, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 1.00


Running DGP1 with n=100, t=20, G=6, k=3


DGP1 n=100, t=20, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.98


Running DGP1 with n=100, t=50, G=3, k=3


DGP1 n=100, t=50, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Error in run 314: No theta found, something went wrong in the estimation process.
Proportion of correct G estimates 100, 50, 3, 3: 1.00


Running DGP1 with n=100, t=50, G=6, k=3


DGP1 n=100, t=50, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.78


Running DGP1 with n=200, t=20, G=3, k=3


DGP1 n=200, t=20, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 1.00


Running DGP1 with n=200, t=20, G=6, k=3


DGP1 n=200, t=20, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 1.00


Running DGP1 with n=200, t=50, G=3, k=3


DGP1 n=200, t=50, G=3, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Error in run 93: No theta found, something went wrong in the estimation process.
Error in run 98: No theta found, something went wrong in the estimation process.
Error in run 296: No theta found, something went wrong in the estimation process.
Error in run 319: No theta found, something went wrong in the estimation process.
Error in run 416: No theta found, something went wrong in the estimation process.
Proportion of correct G estimates 200, 50, 3, 3: 0.99


Running DGP1 with n=200, t=50, G=6, k=3


DGP1 n=200, t=50, G=6, k=3:   0%|          | 0/500 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 0.94
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 & 0.000000 & 1.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
1 & 100 & 20 & 6 & 3 & 0.980000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.980000 & 0.020000 & 0.000000 \\
2 & 100 & 50 & 3 & 3 & 0.998000 & 0.000000 & 0.998000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
3 & 100 & 50 & 6 & 3 & 0.782000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.782000 & 0.210000 & 0.008000 \\
4 & 200 & 20 & 3 & 3 & 1.000000 & 0.000000 & 1.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
5 & 200 & 20 & 6 & 3 & 1.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 1.000000 & 0.000000 & 0.000000 \\
6 & 200 & 50 & 3 & 3 & 0.990000 & 0.000000 & 0.990000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 & 0.000000 \\
7 & 200 & 50 & 6 & 3 & 0.936000 & 0.000000 &

In [9]:
def ab_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gife_iterations = 10 if G < 4 else 100
    n_boot = 200 if G < 4 else 100
    bootstrap = True if G < 4 else False
    try:
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(GroupedInteractiveFixedEffects, {"G": [2, 3, 4, 5, 6, 7, 8]}, init_params={"dependent": y, "exog": x, "bootstrap": bootstrap, "hide_progressbar": False}, fit_params={"max_iter": 100, "gife_iterations": gife_iterations, "n_boot": n_boot, "boot_n_jobs": 1})[2]["G"]
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return np.nan


runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
ab_ic(runs[0], 3, 0)

3

In [10]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP2 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ab_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:200]), total=200, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP2 with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 1.00


Running DGP2 with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.96


Running DGP2 with n=100, t=50, G=3, k=3


DGP2 n=100, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 3, 3: 1.00


Running DGP2 with n=100, t=50, G=6, k=3


DGP2 n=100, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.99


Running DGP2 with n=200, t=20, G=3, k=3


DGP2 n=200, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 0.99


Running DGP2 with n=200, t=20, G=6, k=3


DGP2 n=200, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 0.99


Running DGP2 with n=200, t=50, G=3, k=3


DGP2 n=200, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 3, 3: 1.00


Running DGP2 with n=200, t=50, G=6, k=3


DGP2 n=200, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 1.00
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
1 & 100 & 20 & 6 & 3 & 0.960000 & 0 & 0 & 0 & 3 & 192 & 4 & 1 \\
2 & 100 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
3 & 100 & 50 & 6 & 3 & 0.990000 & 0 & 0 & 0 & 0 & 198 & 2 & 0 \\
4 & 200 & 20 & 3 & 3 & 0.995000 & 0 & 199 & 1 & 0 & 0 & 0 & 0 \\
5 & 200 & 20 & 6 & 3 & 0.995000 & 0 & 0 & 0 & 0 & 199 & 1 & 0 \\
6 & 200 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
7 & 200 & 50 & 6 & 3 & 1.000000 & 0 & 0 & 0 & 0 & 200 & 0 & 0 \\
\bottomrule
\end{tabular}



In [11]:
def sj_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G_true < 4 else 50
    kappa = 0.005 if G_true < 4 else 0.00005
    bootstrap = True if G_true < 4 else False
    try:
        with open(os.devnull, "w") as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return (
                    grid_search_by_ic(
                        GroupedInteractiveFixedEffects,
                        {"G": [2, 3, 4, 5, 6, 7, 8]},
                        init_params={
                            "dependent": y,
                            "exog": x,
                            "bootstrap": bootstrap,
                            "model": "su_ju",
                            "hide_progressbar": False,
                        },
                        fit_params={
                            "n_boot": n_boot,
                            "only_bfgs": True,
                            "max_iter": 100,
                            "kappa": kappa,
                            "tol": 1e-2,
                            "boot_n_jobs": 1,
                        },
                    )[2]["G"]
                )
    except Exception as err:
        print(f"Error in run {i}: {err}")
        return np.nan

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_ic(runs[0], 3, 0)

In [12]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP2 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(sj_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:200]), total=200, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP2 with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 1.00


Running DGP2 with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.92


Running DGP2 with n=100, t=50, G=3, k=3


DGP2 n=100, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 3, 3: 1.00


Running DGP2 with n=100, t=50, G=6, k=3


DGP2 n=100, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.89


Running DGP2 with n=200, t=20, G=3, k=3


DGP2 n=200, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 0.99


Running DGP2 with n=200, t=20, G=6, k=3


DGP2 n=200, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 0.93


Running DGP2 with n=200, t=50, G=3, k=3


DGP2 n=200, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 3, 3: 1.00


Running DGP2 with n=200, t=50, G=6, k=3


DGP2 n=200, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 0.97
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
1 & 100 & 20 & 6 & 3 & 0.920000 & 0 & 0 & 0 & 14 & 184 & 2 & 0 \\
2 & 100 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
3 & 100 & 50 & 6 & 3 & 0.885000 & 0 & 0 & 0 & 0 & 177 & 21 & 2 \\
4 & 200 & 20 & 3 & 3 & 0.995000 & 0 & 199 & 1 & 0 & 0 & 0 & 0 \\
5 & 200 & 20 & 6 & 3 & 0.930000 & 0 & 0 & 0 & 14 & 186 & 0 & 0 \\
6 & 200 & 50 & 3 & 3 & 1.000000 & 0 & 200 & 0 & 0 & 0 & 0 & 0 \\
7 & 200 & 50 & 6 & 3 & 0.970000 & 0 & 0 & 0 & 0 & 194 & 6 & 0 \\
\bottomrule
\end{tabular}



In [21]:
def ssp_ic(run, G_true, i):
    x, y, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    n_boot = 100 if G_true < 4 else 50
    kappa = 0.05 if G_true < 4 else 0.005
    bootstrap = True if G_true < 4 else False
    try:
        with open(os.devnull, "w") as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(
                    GroupedFixedEffects,
                    {"G": [2, 3, 4, 5, 6, 7, 8], "kappa": [1, 0.5, 0.05, 0.005]},
                    init_params={
                        "dependent": y,
                        "exog": x,
                        "bootstrap": bootstrap,
                        "model": "su_shi_phillips",
                        "hide_progressbar": False,
                    },
                    fit_params={
                        "n_boot": n_boot,
                        "only_bfgs": True,
                        "max_iter": 100,
                        "kappa": kappa,
                        "tol": 1e-2,
                        "boot_n_jobs": 1,
                    },
                )[2]["G"]
    except Exception as err:
        print(f"Error in run {i}: {err}")
        return np.nan


# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_ic(runs[0], 3, 0)

In [None]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp3_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ssp_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs[:200]), total=200, desc=f"DGP3 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP1 with n=100, t=20, G=3, k=3


DGP3 n=100, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 3, 3: 0.48


Running DGP1 with n=100, t=20, G=6, k=3


DGP3 n=100, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 20, 6, 3: 0.80


Running DGP1 with n=100, t=50, G=3, k=3


DGP3 n=100, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 3, 3: 0.88


Running DGP1 with n=100, t=50, G=6, k=3


DGP3 n=100, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 100, 50, 6, 3: 0.91


Running DGP1 with n=200, t=20, G=3, k=3


DGP3 n=200, t=20, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 3, 3: 0.07


Running DGP1 with n=200, t=20, G=6, k=3


DGP3 n=200, t=20, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 20, 6, 3: 0.68


Running DGP1 with n=200, t=50, G=3, k=3


DGP3 n=200, t=50, G=3, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 3, 3: 0.35


Running DGP1 with n=200, t=50, G=6, k=3


DGP3 n=200, t=50, G=6, k=3:   0%|          | 0/200 [00:00<?, ?it/s]

Proportion of correct G estimates 200, 50, 6, 3: 0.91
\begin{tabular}{lrrrrrrrrrrrr}
\toprule
 & n & t & G & k & proportion_correct & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
\midrule
0 & 100 & 20 & 3 & 3 & 0.480000 & 0 & 96 & 36 & 26 & 29 & 12 & 1 \\
1 & 100 & 20 & 6 & 3 & 0.800000 & 0 & 0 & 0 & 0 & 160 & 26 & 14 \\
2 & 100 & 50 & 3 & 3 & 0.880000 & 0 & 176 & 14 & 6 & 3 & 1 & 0 \\
3 & 100 & 50 & 6 & 3 & 0.910000 & 0 & 0 & 0 & 0 & 182 & 15 & 3 \\
4 & 200 & 20 & 3 & 3 & 0.075000 & 0 & 15 & 43 & 16 & 35 & 67 & 24 \\
5 & 200 & 20 & 6 & 3 & 0.680000 & 0 & 0 & 0 & 0 & 136 & 47 & 17 \\
6 & 200 & 50 & 3 & 3 & 0.355000 & 0 & 71 & 13 & 38 & 47 & 29 & 2 \\
7 & 200 & 50 & 6 & 3 & 0.910000 & 0 & 0 & 0 & 0 & 182 & 10 & 8 \\
\bottomrule
\end{tabular}



Exception ignored in: <function ResourceTracker.__del__ at 0x1036e9620>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x106c01620>
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/Library/Frameworks/Python.framework/Versions/3.13/lib/python3.13/m