In [1]:
import numpy as np

# DGPs:
I want to create five example cases, each specified for their preferred model.

## DGP 1:
Grouped variables and factors ($n$ = 50, 100, 200; $t$ = 10, 25, 100, $G$ = 3, 5, 10, $K$ = 3, 5, 10).

$$y_{t, i} = x_{t, i} \beta_{g_i} + f_{t, g_i} + \epsilon_{t, i}

In [68]:
def dgp1(N: int, T: int, G: int, K: int):
    x = np.random.normal(size=(N, T, K))
    f = np.abs(np.random.normal(size=(T, G))).cumsum(axis=0) * np.array([1, 2, 3])
    epsilon = np.random.normal(size=(N, T))
    beta = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
    g = np.random.choice(G, size=N)

    y = np.zeros((N, T))
    for i in range(N):
        y[i] = x[i] @ beta[g[i]] + f[:, g[i]] + epsilon[i]

    return x, y, f, g, beta

In [75]:
example = dgp1(30, 10, 3, 3)
example

(array([[[-7.07824844e-01, -1.16712149e+00, -4.47347408e-01],
         [ 3.23971825e-01,  9.45556967e-01, -1.34237759e-01],
         [-5.71575067e-02,  1.96348577e+00, -4.22975892e-01],
         [ 4.30110720e-01, -6.29330232e-01,  1.89832846e+00],
         [ 1.11368664e+00,  7.32817029e-01,  1.30467327e+00],
         [ 1.34418884e+00,  5.07665788e-01, -2.28073167e+00],
         [ 1.01791512e+00,  2.87428814e-01,  1.45613267e+00],
         [ 1.75964111e+00, -1.13950231e+00, -4.67831037e-02],
         [ 5.45390762e-01, -6.57993175e-01, -6.39306078e-02],
         [ 3.45245840e+00,  4.21422067e-01, -8.93991837e-01]],
 
        [[-1.38477595e+00,  1.10663645e+00, -1.92643054e+00],
         [ 5.15546358e-01, -8.54978219e-01,  6.55600587e-01],
         [ 1.55750285e+00,  5.02558380e-02, -1.65104962e+00],
         [ 1.01663518e-01,  5.22377514e-01, -7.36340704e-01],
         [ 1.17131274e+00, -7.16994263e-01,  6.73345232e-01],
         [-1.31157396e+00,  1.36358910e+00, -6.53978884e-01],
     

## DGP 2: Factor structures

$$y_{t, i} = x_{t, i} \beta_{g_i} + \lambda_{g_i, i}f_{t, g_i} + \epsilon_{t, i}$$

In [104]:
def dgp2(N: int, T: int, G: int, K: int):
    x = np.random.normal(size=(N, T, K))
    f = np.random.normal(size=(T, G)) * np.array([1, 2, 5])
    l = np.random.uniform(1, 5, size=N)
    epsilon = np.random.normal(size=(N, T))
    beta = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
    g = np.random.choice(G, size=N)

    y = np.zeros((N, T))
    for i in range(N):
        y[i] = x[i] @ beta[g[i]] + l[i]*f[:, g[i]] + epsilon[i]

    return x, y, f, g, l, beta

In [105]:
example = dgp2(300, 100, 3, 3)
example

(array([[[-0.40343436, -0.16549315,  1.00662797],
         [ 0.54818735,  0.30619078, -0.09251341],
         [-0.67129636,  0.03237763, -0.55931071],
         ...,
         [ 0.12716569, -0.95028322,  0.48190253],
         [ 0.33208043,  0.78116798,  1.53996736],
         [ 0.74997818, -0.10721394,  0.56026861]],
 
        [[-1.7732017 , -0.36514129,  0.36928175],
         [-0.04217653,  1.00519461,  0.31373204],
         [ 0.19169562, -0.07540095, -1.02032354],
         ...,
         [ 1.25309607, -1.09473386, -0.61987132],
         [-0.11820375, -0.71373141, -0.65487019],
         [-0.96837543, -0.50011843, -0.86155125]],
 
        [[-1.33851381,  0.47613993, -0.14172951],
         [ 0.40174625, -2.04630765,  0.59162223],
         [-1.95158775, -0.67912089, -0.64036272],
         ...,
         [-0.4689927 ,  0.80302347, -1.29970434],
         [ 0.45021042, -2.32015743,  2.75446796],
         [ 0.04465423,  0.76237328,  1.33286835]],
 
        ...,
 
        [[ 0.60894818, -1.27202738

## DGP 3:

Testing for su, Shi, \& Philips.
$$y_{t, i} = x_{t, i} \beta_{g_i} + \alpha_i + \epsilon_{t, i}$$

In [116]:
def dgp3(N: int, T: int, G: int, K: int):
    x = np.random.normal(size=(N, T, K))
    # f = np.random.normal(size=(T, G)) * np.array([1, 2, 5])
    # l = np.random.uniform(1, 5, size=N)
    alpha = np.random.uniform(1, 5, size=N)
    # alpha = np.zeros(N)
    epsilon = np.random.normal(size=(N, T))
    beta = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
    g = np.random.choice(G, size=N)

    y = np.zeros((N, T))
    for i in range(N):
        y[i] = x[i] @ beta[g[i]] + alpha[i] + epsilon[i]

    return x, y, g, alpha, beta

In [117]:
example = dgp3(300, 100, 3, 3)
example[4]

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

In [119]:
import pickle
example_data = pickle.dump(example, open("dgp3-example-4.pkl", "wb"))