In [1]:
import numpy as np

# DGPs:
I want to create five example cases, each specified for their preferred model.

## DGP 1:
Grouped variables and factors ($n$ = 50, 100, 200; $t$ = 10, 25, 100, $G$ = 3, 5, 10, $K$ = 3, 5, 10).

$$y_{t, i} = x_{t, i} \beta_{g_i} + f_{t, g_i} + \epsilon_{t, i}

In [2]:
def dgp1(N: int, T: int, G: int, K: int):
    x = np.random.normal(size=(N, T, K))
    f = np.abs(np.random.normal(size=(T, G))) * np.array([1, 2, 3])
    epsilon = np.random.normal(size=(N, T))
    beta = np.array([[1, 1], [2, 2], [3, 3]])
    g = np.random.choice(G, size=N)

    y = np.zeros((N, T))
    for i in range(N):
        y[i] = x[i] @ beta[g[i]] + f[:, g[i]] + epsilon[i]

    return x, y, f, g, beta

In [10]:
example = dgp1(100, 50, 3, 2)
example

(array([[[-0.01030048,  0.19805936],
         [ 0.83985362, -0.51213934],
         [ 0.66844358,  0.73304725],
         ...,
         [-1.77255488,  0.65910178],
         [-0.0722959 , -1.03984308],
         [ 0.18452853,  0.47767375]],
 
        [[ 0.9744146 ,  1.73260559],
         [ 0.63390177,  0.49009018],
         [-1.94027294,  1.08961426],
         ...,
         [ 0.75067059, -1.0568707 ],
         [ 0.6930925 ,  3.21955679],
         [ 0.80966994, -2.36188267]],
 
        [[ 0.35523288,  0.17635938],
         [-0.93792812,  0.4862022 ],
         [-0.27730345, -0.14917009],
         ...,
         [ 0.54821382, -0.04710261],
         [ 0.67949854,  0.74175478],
         [-0.94394987, -0.29539376]],
 
        ...,
 
        [[-0.60269049, -0.28381245],
         [-1.15456323, -0.02266887],
         [-0.91386281,  1.00223844],
         ...,
         [ 1.53036379, -0.13056153],
         [ 0.75849003,  0.15048819],
         [-0.83373737,  0.75353304]],
 
        [[-0.15527649,  0.611

## DGP 2: Factor structures

$$y_{t, i} = x_{t, i} \beta_{g_i} + \lambda_{g_i, i}f_{t, g_i} + \epsilon_{t, i}$$

In [4]:
def dgp2(N: int, T: int, G: int, K: int):
    x = np.random.normal(size=(N, T, K))
    f = np.random.normal(size=(T, G)) * np.array([1, 2, 5])
    l = np.random.uniform(1, 5, size=N)
    epsilon = np.random.normal(size=(N, T))
    beta = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
    g = np.random.choice(G, size=N)

    y = np.zeros((N, T))
    for i in range(N):
        y[i] = x[i] @ beta[g[i]] + l[i]*f[:, g[i]] + epsilon[i]

    return x, y, f, g, l, beta

In [5]:
example = dgp2(300, 100, 3, 3)
example

(array([[[ 1.62647542,  0.21862455, -0.91811586],
         [ 1.27687759,  1.96250551,  0.43299675],
         [-0.57364069, -0.22324326,  0.07673798],
         ...,
         [-1.87224606, -0.25267825,  0.94188107],
         [-1.71778059, -0.28332027,  1.00286093],
         [ 0.61390662, -0.28399197, -0.5956614 ]],
 
        [[ 0.74884862, -0.38416188, -0.0518779 ],
         [ 0.28158495, -0.7158681 , -0.81205647],
         [-0.1121425 ,  0.28758014,  0.04580245],
         ...,
         [-2.00011478,  0.24111541,  0.57433635],
         [ 0.83431822, -1.36349964, -1.29341482],
         [-0.44285458, -0.13382401,  0.84923067]],
 
        [[-1.40268849, -1.16282132, -1.29415347],
         [ 0.38577724, -0.35381625,  1.66558871],
         [-1.26224634,  0.65455239,  0.01796601],
         ...,
         [ 0.93532164,  0.74775644, -0.50737028],
         [ 0.21706125,  1.7801939 ,  0.55276392],
         [ 1.21908732,  0.40903486,  1.51941307]],
 
        ...,
 
        [[ 1.99858603,  0.13035272

## DGP 3:

Testing for su, Shi, \& Philips.
$$y_{t, i} = x_{t, i} \beta_{g_i} + \alpha_i + \epsilon_{t, i}$$

In [15]:
def dgp3(N: int, T: int, G: int, K: int):
    x = np.random.normal(size=(N, T, K))
    # f = np.random.normal(size=(T, G)) * np.array([1, 2, 5])
    # l = np.random.uniform(1, 5, size=N)
    # alpha = np.random.uniform(1, 5, size=N)
    alpha = np.zeros(N)
    epsilon = np.random.normal(size=(N, T))
    beta = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
    g = np.random.choice(G, size=N)

    y = np.zeros((N, T))
    for i in range(N):
        y[i] = x[i] @ beta[g[i]] + alpha[i] + epsilon[i]

    return x, y, g, alpha, beta

In [16]:
example = dgp3(100, 50, 3, 3)
example[4]

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

In [17]:
import pickle
from time import time
example_data = pickle.dump(example, open(f"dgp3-example-{int(time())}.pkl", "wb"))