In [1]:
import pickle
import numpy as np
from src.groupedpaneldatamodels import GroupedFixedEffects, GroupedInteractiveFixedEffects, grid_search_by_ic
from joblib import Parallel, delayed
from tqdm import tqdm
import os

# Bonhomme Manresa (DGP 1)

In [2]:
def bm_run(run, G, i):
    x, y, f, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gfe_iterations = 10 if G < 4 else 100
    n_boot = 200
    bootstrap = True if G < 4 else False
    try:
        m = GroupedFixedEffects(y, x, G, bootstrap, hide_progressbar=True).fit(
            max_iter=100, gfe_iterations=gfe_iterations, n_boot=n_boot, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return None

# runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))
# bm_run(runs[0], 3, 0)

In [3]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp1_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP1 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue

                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp1_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(bm_run)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP1 n={n}, t={t}, G={G}, k={k}")
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)



Running DGP1 with n=100, t=20, G=3, k=3


DGP1 n=100, t=20, G=3, k=3: 100%|██████████| 500/500 [10:03<00:00,  1.21s/it]




Running DGP1 with n=100, t=20, G=6, k=3


DGP1 n=100, t=20, G=6, k=3:   6%|▌         | 30/500 [00:04<01:24,  5.55it/s]

Error in run 17: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  12%|█▏        | 60/500 [00:12<01:44,  4.22it/s]

Error in run 46: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  36%|███▌      | 180/500 [00:45<01:20,  3.99it/s]

Error in run 163: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  60%|██████    | 300/500 [01:16<00:53,  3.75it/s]

Error in run 280: Singular matrix
Error in run 287: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  70%|███████   | 350/500 [01:30<00:41,  3.58it/s]

Error in run 328: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  72%|███████▏  | 360/500 [01:33<00:38,  3.59it/s]

Error in run 342: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  76%|███████▌  | 380/500 [01:38<00:32,  3.71it/s]

Error in run 366: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  88%|████████▊ | 440/500 [01:55<00:17,  3.44it/s]

Error in run 428: Singular matrix


DGP1 n=100, t=20, G=6, k=3:  96%|█████████▌| 480/500 [02:06<00:05,  3.75it/s]

Error in run 464: Singular matrix


DGP1 n=100, t=20, G=6, k=3: 100%|██████████| 500/500 [02:11<00:00,  3.80it/s]


Error in run 487: Singular matrix
Error in run 490: Singular matrix
Error in run 494: Singular matrix


Running DGP1 with n=100, t=50, G=3, k=3


DGP1 n=100, t=50, G=3, k=3: 100%|██████████| 500/500 [22:19<00:00,  2.68s/it]




Running DGP1 with n=100, t=50, G=6, k=3


DGP1 n=100, t=50, G=6, k=3:   4%|▍         | 20/500 [00:03<01:34,  5.08it/s]

Error in run 3: Singular matrix
Error in run 6: Singular matrix


DGP1 n=100, t=50, G=6, k=3:   6%|▌         | 30/500 [00:08<02:27,  3.19it/s]

Error in run 12: Singular matrix


DGP1 n=100, t=50, G=6, k=3:   8%|▊         | 40/500 [00:13<02:47,  2.75it/s]

Error in run 27: Singular matrix
Error in run 22: Singular matrix
Error in run 28: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  10%|█         | 50/500 [00:18<03:12,  2.33it/s]

Error in run 31: Singular matrix
Error in run 36: Singular matrix
Error in run 37: Singular matrix
Error in run 38: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  12%|█▏        | 60/500 [00:24<03:32,  2.08it/s]

Error in run 42: Singular matrix
Error in run 46: Singular matrix
Error in run 50: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  14%|█▍        | 70/500 [00:30<03:36,  1.98it/s]

Error in run 53: Singular matrix
Error in run 57: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  18%|█▊        | 90/500 [00:40<03:33,  1.92it/s]

Error in run 80: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  22%|██▏       | 110/500 [00:52<03:32,  1.83it/s]

Error in run 92: Singular matrix
Error in run 96: Singular matrix
Error in run 94: Singular matrix
Error in run 98: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  24%|██▍       | 120/500 [00:57<03:28,  1.82it/s]

Error in run 102: Singular matrix
Error in run 103: Singular matrix
Error in run 108: Singular matrix
Error in run 105: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  26%|██▌       | 130/500 [01:03<03:26,  1.79it/s]

Error in run 112: Singular matrix
Error in run 116: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  28%|██▊       | 140/500 [01:09<03:23,  1.77it/s]

Error in run 121: Singular matrix
Error in run 123: Singular matrix
Error in run 127: Singular matrix
Error in run 132: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  32%|███▏      | 160/500 [01:19<03:02,  1.87it/s]

Error in run 144: Singular matrix
Error in run 143: Singular matrix
Error in run 149: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  36%|███▌      | 180/500 [01:30<02:57,  1.81it/s]

Error in run 165: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  38%|███▊      | 190/500 [01:36<02:47,  1.85it/s]

Error in run 177: Singular matrix
Error in run 180: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  40%|████      | 200/500 [01:42<02:48,  1.78it/s]

Error in run 190: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  42%|████▏     | 210/500 [01:46<02:29,  1.94it/s]

Error in run 198: Singular matrix
Error in run 199: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  44%|████▍     | 220/500 [01:51<02:21,  1.98it/s]

Error in run 203: Singular matrix
Error in run 207: Singular matrix
Error in run 209: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  48%|████▊     | 240/500 [02:02<02:18,  1.87it/s]

Error in run 223: Singular matrix
Error in run 228: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  50%|█████     | 250/500 [02:07<02:09,  1.93it/s]

Error in run 229: Singular matrix
Error in run 230: Singular matrix
Error in run 237: Singular matrix
Error in run 238: Singular matrix
Error in run 234: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  52%|█████▏    | 260/500 [02:13<02:14,  1.78it/s]

Error in run 246: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  56%|█████▌    | 280/500 [02:24<02:00,  1.83it/s]

Error in run 263: Singular matrix
Error in run 268: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  58%|█████▊    | 290/500 [02:29<01:51,  1.89it/s]

Error in run 272: Singular matrix
Error in run 270: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  62%|██████▏   | 310/500 [02:37<01:30,  2.10it/s]

Error in run 289: Singular matrix
Error in run 290: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  64%|██████▍   | 320/500 [02:43<01:32,  1.94it/s]

Error in run 303: Singular matrix
Error in run 306: Singular matrix
Error in run 309: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  66%|██████▌   | 330/500 [02:48<01:28,  1.93it/s]

Error in run 315: Singular matrix
Error in run 319: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  68%|██████▊   | 340/500 [02:53<01:22,  1.94it/s]

Error in run 323: Singular matrix
Error in run 327: Singular matrix
Error in run 325: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  70%|███████   | 350/500 [02:59<01:18,  1.91it/s]

Error in run 341: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  72%|███████▏  | 360/500 [03:04<01:13,  1.90it/s]

Error in run 346: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  76%|███████▌  | 380/500 [03:15<01:02,  1.91it/s]

Error in run 361: Singular matrix
Error in run 362: Singular matrix
Error in run 370: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  78%|███████▊  | 390/500 [03:19<00:56,  1.96it/s]

Error in run 373: Singular matrix
Error in run 379: Singular matrix
Error in run 377: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  80%|████████  | 400/500 [03:24<00:51,  1.96it/s]

Error in run 388: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  82%|████████▏ | 410/500 [03:30<00:46,  1.94it/s]

Error in run 391: Singular matrix
Error in run 398: Singular matrix
Error in run 399: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  84%|████████▍ | 420/500 [03:34<00:39,  2.01it/s]

Error in run 409: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  86%|████████▌ | 430/500 [03:41<00:38,  1.82it/s]

Error in run 412: Singular matrix
Error in run 413: Singular matrix
Error in run 418: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  88%|████████▊ | 440/500 [03:46<00:32,  1.84it/s]

Error in run 421: Singular matrix
Error in run 425: Singular matrix
Error in run 427: Singular matrix
Error in run 428: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  90%|█████████ | 450/500 [03:51<00:26,  1.91it/s]

Error in run 430: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  92%|█████████▏| 460/500 [03:56<00:20,  1.98it/s]

Error in run 440: Singular matrix
Error in run 449: Singular matrix
Error in run 447: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  94%|█████████▍| 470/500 [04:01<00:15,  1.95it/s]

Error in run 452: Singular matrix
Error in run 457: Singular matrix
Error in run 461: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  96%|█████████▌| 480/500 [04:05<00:09,  2.04it/s]

Error in run 463: Singular matrix


DGP1 n=100, t=50, G=6, k=3:  98%|█████████▊| 490/500 [04:11<00:04,  2.00it/s]

Error in run 476: Singular matrix
Error in run 478: Singular matrix


DGP1 n=100, t=50, G=6, k=3: 100%|██████████| 500/500 [04:16<00:00,  1.95it/s]


Error in run 481: Singular matrix
Error in run 485: Singular matrix
Error in run 491: Singular matrix
Error in run 497: Singular matrix


Running DGP1 with n=200, t=20, G=3, k=3


DGP1 n=200, t=20, G=3, k=3: 100%|██████████| 500/500 [15:27<00:00,  1.86s/it]




Running DGP1 with n=200, t=20, G=6, k=3


DGP1 n=200, t=20, G=6, k=3: 100%|██████████| 500/500 [04:24<00:00,  1.89it/s]




Running DGP1 with n=200, t=50, G=3, k=3


DGP1 n=200, t=50, G=3, k=3: 100%|██████████| 500/500 [43:24<00:00,  5.21s/it]




Running DGP1 with n=200, t=50, G=6, k=3


DGP1 n=200, t=50, G=6, k=3:   6%|▌         | 30/500 [00:21<05:51,  1.34it/s]

Error in run 13: Singular matrix
Error in run 12: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  12%|█▏        | 60/500 [00:57<08:01,  1.09s/it]

Error in run 49: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  16%|█▌        | 80/500 [01:24<08:30,  1.22s/it]

Error in run 68: Singular matrix
Error in run 69: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  18%|█▊        | 90/500 [01:35<08:06,  1.19s/it]

Error in run 79: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  20%|██        | 100/500 [01:46<07:39,  1.15s/it]

Error in run 80: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  32%|███▏      | 160/500 [02:59<06:55,  1.22s/it]

Error in run 150: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  40%|████      | 200/500 [03:48<06:14,  1.25s/it]

Error in run 183: Singular matrix
Error in run 191: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  48%|████▊     | 240/500 [04:39<05:36,  1.29s/it]

Error in run 231: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  50%|█████     | 250/500 [04:49<05:06,  1.23s/it]

Error in run 238: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  62%|██████▏   | 310/500 [06:04<03:54,  1.23s/it]

Error in run 296: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  74%|███████▍  | 370/500 [07:16<02:38,  1.22s/it]

Error in run 352: Singular matrix
Error in run 360: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  80%|████████  | 400/500 [07:55<02:05,  1.26s/it]

Error in run 389: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  84%|████████▍ | 420/500 [08:20<01:40,  1.26s/it]

Error in run 403: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  90%|█████████ | 450/500 [08:55<01:01,  1.22s/it]

Error in run 439: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  92%|█████████▏| 460/500 [09:06<00:47,  1.18s/it]

Error in run 443: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  96%|█████████▌| 480/500 [09:31<00:24,  1.22s/it]

Error in run 465: Singular matrix


DGP1 n=200, t=50, G=6, k=3:  98%|█████████▊| 490/500 [09:43<00:12,  1.20s/it]

Error in run 479: Singular matrix


DGP1 n=200, t=50, G=6, k=3: 100%|██████████| 500/500 [09:56<00:00,  1.19s/it]


## DGP 2

In [4]:
def ab_run(run, G, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gife_iterations = 10 if G < 4 else 100
    n_boot = 200 if G < 4 else 100
    bootstrap = True if G < 4 else False
    try:
        m = GroupedInteractiveFixedEffects(y, x, G, bootstrap, hide_progressbar=True).fit(max_iter=100, gife_iterations=gife_iterations, n_boot=n_boot, boot_n_jobs=1)

        return m.to_dict()
    except Exception as err:
        print(f"Error {err}")

        return

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# ab_run(runs[0], 3, 0)

In [5]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp2_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP2 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP2 (ab) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ab_run)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)



Running DGP2 (ab) with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3: 100%|██████████| 500/500 [49:55<00:00,  5.99s/it]




Running DGP2 (ab) with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3: 100%|██████████| 500/500 [06:49<00:00,  1.22it/s]




Running DGP2 (ab) with n=100, t=50, G=3, k=3


DGP2 n=100, t=50, G=3, k=3: 100%|██████████| 500/500 [1:09:44<00:00,  8.37s/it]




Running DGP2 (ab) with n=100, t=50, G=6, k=3


DGP2 n=100, t=50, G=6, k=3: 100%|██████████| 500/500 [08:29<00:00,  1.02s/it]




Running DGP2 (ab) with n=200, t=20, G=3, k=3


DGP2 n=200, t=20, G=3, k=3: 100%|██████████| 500/500 [57:15<00:00,  6.87s/it]




Running DGP2 (ab) with n=200, t=20, G=6, k=3


DGP2 n=200, t=20, G=6, k=3: 100%|██████████| 500/500 [06:57<00:00,  1.20it/s]




Running DGP2 (ab) with n=200, t=50, G=3, k=3


DGP2 n=200, t=50, G=3, k=3: 100%|██████████| 500/500 [2:18:53<00:00, 16.67s/it]




Running DGP2 (ab) with n=200, t=50, G=6, k=3


DGP2 n=200, t=50, G=6, k=3: 100%|██████████| 500/500 [16:45<00:00,  2.01s/it]


In [6]:
def sj_run(run, G, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G < 4 else 50
    kappa = 0.005 if G < 4 else 0.00005
    bootstrap = True if G < 4 else False
    try:
        m = GroupedInteractiveFixedEffects(y, x, G, bootstrap, "su_ju", R=G, hide_progressbar=True).fit(
            n_boot=n_boot, only_bfgs=True, max_iter=100, kappa=kappa, tol=1e-2, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as err:
        print(f"Error: {err}")
        return None

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp2_n{n}_t{t}_G{G}_k{k}_su_ju_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP2 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP2 (su_ju) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(sj_run)(run, G, i)
                    for i, run in enumerate(tqdm(runs, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}"))
                )

                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)



Running DGP2 (su_ju) with n=100, t=20, G=3, k=3


DGP2 n=100, t=20, G=3, k=3: 100%|██████████| 500/500 [1:27:26<00:00, 10.49s/it]




Running DGP2 (su_ju) with n=100, t=20, G=6, k=3


DGP2 n=100, t=20, G=6, k=3:  14%|█▍        | 70/500 [00:15<01:45,  4.08it/s]

Error: Singular matrix


DGP2 n=100, t=20, G=6, k=3: 100%|██████████| 500/500 [01:59<00:00,  4.20it/s]




Running DGP2 (su_ju) with n=100, t=50, G=3, k=3


DGP2 n=100, t=50, G=3, k=3: 100%|██████████| 500/500 [4:28:15<00:00, 32.19s/it]  




Running DGP2 (su_ju) with n=100, t=50, G=6, k=3


DGP2 n=100, t=50, G=6, k=3:  34%|███▍      | 170/500 [02:15<04:50,  1.14it/s]

Error: Singular matrix


DGP2 n=100, t=50, G=6, k=3:  82%|████████▏ | 410/500 [05:40<01:17,  1.15it/s]

Error: Singular matrix


DGP2 n=100, t=50, G=6, k=3: 100%|██████████| 500/500 [06:57<00:00,  1.20it/s]




Running DGP2 (su_ju) with n=200, t=20, G=3, k=3


DGP2 n=200, t=20, G=3, k=3: 100%|██████████| 500/500 [3:35:45<00:00, 25.89s/it]  




Running DGP2 (su_ju) with n=200, t=20, G=6, k=3


DGP2 n=200, t=20, G=6, k=3: 100%|██████████| 500/500 [05:17<00:00,  1.57it/s]




Running DGP2 (su_ju) with n=200, t=50, G=3, k=3


DGP2 n=200, t=50, G=3, k=3:  44%|████▍     | 220/500 [5:07:23<6:54:02, 88.72s/it] 

# DGP 3 (SSJ)

In [None]:
def ssp_run(run, G, i, kappa=0.005):
    x, y, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    n_boot = 100 if G < 4 else 50
    # kappa = 0.005 if G < 4 else 0.00005
    bootstrap = True if G < 4 else False
    try:
        m = GroupedFixedEffects(y, x, G, bootstrap, "su_shi_phillips", hide_progressbar=True).fit(
            n_boot=n_boot, only_bfgs=False, max_iter=100, kappa=kappa, tol=1e-5, boot_n_jobs=1
        )

        return m.to_dict()
    except Exception as err:
        print(f"Error: {err}")
        return None

runs = pickle.load(open("development/generated_data/dgp3_n100_t20_G3_k3.pkl", "rb"))
ssp_run(runs[0], 3, 0)

In [None]:
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                output_path = f"estimates/dgp3_n{n}_t{t}_G{G}_k{k}_full.pkl"
                if os.path.exists(output_path):
                    print(f"Skipping DGP3 with n={n}, t={t}, G={G}, k={k} (already exists)")
                    continue
                print(f"\n\nRunning DGP3 (su_shi_phillips) with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp3_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))
                estimates = []
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ssp_run)(run, G, i) for i, run in enumerate(tqdm(runs, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}"))
                )
                with open(output_path, "wb") as f:
                    pickle.dump(estimates, f)

In [None]:
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

def ols(y, x):
    """
    Fit an OLS model to the data and return the coefficients.
    """
    model = OLS(y, add_constant(x))
    results = model.fit()
    return results.params[1:], results.conf_int(alpha=0.05)[1], results.bse[1:]

def heterogeneous_ols(y, x):
    """
    Fit an OLS model to the data and return the coefficients.
    """
    est_array = np.zeros((x.shape[0], x.shape[2]))
    conf_array = np.zeros((x.shape[0], x.shape[2], 2))
    se_array = np.zeros((x.shape[0], x.shape[2]))

    for i in range(len(x)):
        est, conf, se = ols(y[i], x[i])
        est_array[i] = est
        conf_array[i] = conf
        se_array[i] = se

    return est_array, conf_array, se_array

In [None]:
runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))

estimates = []
for run in tqdm(runs[:200]):
    x = np.float32(runs[0][0])
    y = np.float32(runs[0][1])

    estimates.append(heterogeneous_ols(y, x))

output_path = "estimates/dgp1_n100_t20_G3_k3_ols.pkl"
with open(output_path, "wb") as f:
    pickle.dump(estimates, f)

In [None]:
def heterogeneous_ols_run(dgp, n, t, G, k):
    runs = pickle.load(open(f"development/generated_data/dgp{dgp}_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

    def _run_single_estimation(run):
        x = np.float64(run[0])
        y = np.float64(run[1])
        return heterogeneous_ols(y, x)

    # Wrap tqdm around the iterator to preserve the progress bar
    estimates = Parallel(n_jobs=-1)(
        delayed(_run_single_estimation)(run) for run in tqdm(runs)
    )

    output_path = f"estimates/dgp{dgp}_n{n}_t{t}_G{G}_k{k}_ols.pkl"
    with open(output_path, "wb") as f:
        pickle.dump(estimates, f)

In [None]:
for dgp in (1, 2, 3):
    for n in (100, 200):
        for t in (20, 50):
            for G in (3, 6):
                for k in (3,):
                    heterogeneous_ols_run(dgp, n, t, G, k)

## IC

In [None]:
import contextlib
import pandas as pd

In [None]:
def bm_ic(run, G_true, i):
    x, y, f, g = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gfe_iterations = 10 if G_true < 4 else 100
    n_boot = 200 if G_true < 4 else 100
    bootstrap = True if G_true < 4 else False
    try:
        # m = GroupedFixedEffects(y, x, G_true, bootstrap, hide_progressbar=False).fit(
        #     max_iter=100, gfe_iterations=gfe_iterations, n_boot=n_boot, boot_n_jobs=1)
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(GroupedFixedEffects, {"G": [2, 3, 4, 5, 6, 7, 8]}, init_params={"dependent": y, "exog": x, "bootstrap": bootstrap, "hide_progressbar": False}, fit_params={"max_iter": 100, "gfe_iterations": gfe_iterations, "n_boot": n_boot, "boot_n_jobs": 1})[2]["G"]
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return np.nan

runs = pickle.load(open("development/generated_data/dgp1_n100_t20_G3_k3.pkl", "rb"))
bm_ic(runs[0], 3, 0)

True

In [None]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp1_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(bm_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP1 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP1 with n=100, t=20, G=3, k=3


DGP1 n=100, t=20, G=3, k=3: 100%|██████████| 200/200 [00:12<00:00, 15.77it/s]


Proportion of correct G estimates 100, 20, 3, 3: 1.0
\begin{tabular}{lrrrrr}
\toprule
 & n & t & G & k & proportion_correct \\
\midrule
0 & 100 & 20 & 3 & 3 & 1.000000 \\
\bottomrule
\end{tabular}



In [None]:
def ab_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    gife_iterations = 10 if G < 4 else 100
    n_boot = 200 if G < 4 else 100
    bootstrap = True if G < 4 else False
    try:
        with open(os.devnull, 'w') as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(GroupedInteractiveFixedEffects, {"G": [2, 3, 4, 5, 6, 7, 8]}, init_params={"dependent": y, "exog": x, "bootstrap": bootstrap, "hide_progressbar": False}, fit_params={"max_iter": 100, "gife_iterations": gife_iterations, "n_boot": n_boot, "boot_n_jobs": 1})[2]["G"]
    except Exception as e:
        print(f"Error in run {i}: {e}")
        return np.nan


runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
ab_ic(runs[0], 3, 0)

True

In [None]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP2 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ab_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP2 with n=100, t=20, G=3, k=3


DGP1 n=100, t=20, G=3, k=3: 100%|██████████| 200/200 [01:07<00:00,  2.97it/s]


Proportion of correct G estimates 100, 20, 3, 3: 1.0


Running DGP2 with n=100, t=20, G=6, k=3




KeyboardInterrupt: 

In [None]:
def sj_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G_true < 4 else 50
    kappa = 0.005 if G_true < 4 else 0.00005
    bootstrap = True if G_true < 4 else False
    try:
        with open(os.devnull, "w") as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return (
                    grid_search_by_ic(
                        GroupedInteractiveFixedEffects,
                        {"G": [2, 3, 4, 5, 6, 7, 8]},
                        init_params={
                            "dependent": y,
                            "exog": x,
                            "bootstrap": bootstrap,
                            "model": "su_ju",
                            "hide_progressbar": False,
                        },
                        fit_params={
                            "n_boot": n_boot,
                            "only_bfgs": True,
                            "max_iter": 100,
                            "kappa": kappa,
                            "tol": 1e-2,
                            "boot_n_jobs": 1,
                        },
                    )[2]["G"]
                )
    except Exception as err:
        print(f"Error in run {i}: {err}")
        return np.nan

# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_ic(runs[0], 3, 0)

False

In [None]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP2 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp2_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(sj_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP2 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)



Running DGP2 with n=100, t=20, G=3, k=3


DGP1 n=100, t=20, G=3, k=3: 100%|██████████| 200/200 [03:08<00:00,  1.06it/s]


Proportion of correct G estimates 100, 20, 3, 3: 0.995


Running DGP2 with n=100, t=20, G=6, k=3


DGP1 n=100, t=20, G=6, k=3:  40%|████      | 80/200 [00:36<01:01,  1.97it/s]

KeyboardInterrupt: 

In [None]:
def ssp_ic(run, G_true, i):
    x, y, f, g, *_ = run
    x = np.float32(x)
    y = np.atleast_3d(np.float32(y))
    f = np.float32(f)
    n_boot = 100 if G_true < 4 else 50
    kappa = 0.005 if G_true < 4 else 0.00005
    bootstrap = True if G_true < 4 else False
    try:
        with open(os.devnull, "w") as devnull:
            with contextlib.redirect_stdout(devnull), contextlib.redirect_stderr(devnull):
                return grid_search_by_ic(
                    GroupedFixedEffects,
                    {"G": [2, 3, 4, 5, 6, 7, 8]},
                    init_params={
                        "dependent": y,
                        "exog": x,
                        "bootstrap": bootstrap,
                        "model": "su_shi_phillips",
                        "hide_progressbar": False,
                    },
                    fit_params={
                        "n_boot": n_boot,
                        "only_bfgs": True,
                        "max_iter": 100,
                        "kappa": kappa,
                        "tol": 1e-2,
                        "boot_n_jobs": 1,
                    },
                )[2]["G"]
    except Exception as err:
        print(f"Error in run {i}: {err}")
        return np.nan


# runs = pickle.load(open("development/generated_data/dgp2_n100_t20_G3_k3.pkl", "rb"))
# sj_ic(runs[0], 3, 0)

In [None]:
res = []
for n in (100, 200):
    for t in (20, 50):
        for G in (3, 6):
            for k in (3,):
                print(f"\n\nRunning DGP1 with n={n}, t={t}, G={G}, k={k}")
                runs = pickle.load(open(f"development/generated_data/dgp3_n{n}_t{t}_G{G}_k{k}.pkl", "rb"))

                # Wrap the iterable in tqdm to get a progress bar
                estimates = Parallel(n_jobs=-1, prefer="processes")(
                    delayed(ssp_ic)(run, G, i)
                    for i, run in tqdm(enumerate(runs), total=500, desc=f"DGP3 n={n}, t={t}, G={G}, k={k}")
                )

                estimates = np.array(estimates)
                proportion_correct = np.mean(estimates == G)
                values = np.array([2, 3, 4, 5, 6, 7, 8])
                value_count = np.array([(estimates == val).sum() for val in values])

                # with open(output_path, "wb") as f:
                #     pickle.dump(estimates, f)
                print(f"Proportion of correct G estimates {n}, {t}, {G}, {k}: {proportion_correct:.2f}")
                res.append((n, t, G, k, proportion_correct, *value_count))

print(
    pd.DataFrame(res, columns=["n", "t", "G", "k", "proportion_correct", "2", "3", "4", "5", "6", "7", "8"]).to_latex()
)