In [None]:
import numpy as np
from himalaya.backend import set_backend
from scipy.stats import zscore
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
np.random.seed(42)
set_backend("cupy")
plt.style.use("nord")
sns.set_theme()
sns.color_palette()

In [None]:
n_samples_train = 1000
n_samples_test = 100
n_targets = 10000
d_list = [100, 100, 100]
scalars = [1/3, 1/3, 1/3]
noise_scalar = 0.1

In [None]:
feature_spaces = []
# Generate a random matrix of shape (samples, total rank)
M = np.random.randn(n_samples_train + n_samples_test, sum(d_list))
M = zscore(M)
M

In [None]:
sns.histplot(M[0])

In [None]:
U, S, Vt = np.linalg.svd(M, full_matrices=True)
print(U.shape, S.shape, Vt.shape)
S

In [None]:
sns.histplot(S)

In [None]:
start = 0
for rank in d_list:
    _S = np.zeros(len(S))
    _S[start:start + rank] = S[start:start + rank]

    # create rectangular diagonal sigma matrix
    diag_S = np.diag(_S)
    diag_S = np.pad(diag_S, ((0, U.shape[0] - diag_S.shape[0]), (0, Vt.shape[0] - diag_S.shape[1])))

    space = U @ diag_S @ Vt
    feature_spaces.append(space)
    start += rank
feature_spaces

In [None]:
feature_spaces_train = [feature_space[:n_samples_train] for feature_space in feature_spaces]
feature_spaces_test = [feature_space[n_samples_train:] for feature_space in feature_spaces]

add the first feature with all other feature spaces
[0 + 1, 0 + 2, 0 + 3, ...]

In [None]:
Xs_train = [1 / 2 * (feature_spaces_train[0] + feature_space) for feature_space in feature_spaces_train[1:]]
Xs_test = [1 / 2 * (feature_spaces_test[0] + feature_space) for feature_space in feature_spaces_test[1:]]

Xs_train = [zscore(x) for x in Xs_train]
Xs_test = [zscore(x) for x in Xs_test]

generate weights

In [None]:
betas = [np.random.randn(sum(d_list), n_targets) for _ in d_list]
betas = [zscore(beta) for beta in betas]
betas

generate targets

In [None]:
Y_train = sum(
    [alpha * zscore(feature_space @ beta) for alpha, feature_space, beta in
     zip(scalars, feature_spaces_train, betas)])
Y_test = sum(
    [alpha * zscore(feature_space @ beta) for alpha, feature_space, beta in
     zip(scalars, feature_spaces_test, betas)])
Y_train = zscore(Y_train)
Y_test = zscore(Y_test)

add noise

In [None]:
noise_train = zscore(np.random.randn(n_samples_train, n_targets))
noise_test = zscore(np.random.randn(n_samples_test, n_targets))
Y_train += noise_train * noise_scalar
Y_test += noise_test * noise_scalar
noise_train * noise_scalar