In [1]:
import numpy as np
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
# Generate a dictionary of Unit-norm vectors

def generate_gaussian_noises_dict(N, d):
    gaussian_noises = np.random.normal(size=(d, N))
    norms = np.linalg.norm(gaussian_noises, axis=0, keepdims=True)
    # Create unit-norm vectors
    unit_vectors = gaussian_noises / norms
    return unit_vectors

def generate_sparse_response(gaussian_matrix, m):
    indices = np.random.choice(gaussian_matrix.shape[1], size=m, replace=False)
    selected_vectors = gaussian_matrix[:, indices]
    coefficients = np.random.normal(size=(m, 1))  # random coefficients for each selected vector
    y = selected_vectors @ coefficients
    return y, indices, coefficients

In [8]:
# Use the functions to generate a Gaussian noise matrix and a sparse response
np.random.seed(0)
N = 100000
d = 300
m = 2
gaussian_noises_matrix = generate_gaussian_noises_dict(N, d)
y, indices, coefficients = generate_sparse_response(gaussian_noises_matrix, m)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(gaussian_noises_matrix, y, test_size=0.2, random_state=0)

In [9]:
# MP

MP_X_train = X_train.copy()
MP_X_test = X_test.copy()
MP_Y_train = y_train.copy()
MP_Y_test = y_test.copy()

IndexError: index 83612 is out of bounds for axis 0 with size 240

In [5]:
# LASSO Regression
LASSO_X_train = X_train.copy()
LASSO_X_test = X_test.copy()
LASSO_Y_train = y_train.copy()
LASSO_Y_test = y_test.copy()
np.random.seed(0)

# Reduced Standardization because the data is already unit-norm

lasso = Lasso()
LASSO_alphas = {"alpha": [1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]}
LASSO_cv = KFold(n_splits=10, shuffle=True, random_state=0)

# Perform grid search on training data
grid = GridSearchCV(lasso, LASSO_alphas, scoring='neg_mean_squared_error', cv=LASSO_cv, return_train_score=True)
grid.fit(LASSO_X_train, LASSO_Y_train)

# Print the best parameters and the corresponding score
print('Best parameters:', grid.best_params_)
print('Best score (neg_mean_squared_error):', grid.best_score_)

# Evaluate the model on the testing set
y_pred = grid.predict(LASSO_X_test)
mse_test = mean_squared_error(LASSO_Y_test, y_pred)
print('Testing set MSE:', mse_test)

Best parameters: {'alpha': 0.0001}
Best score (neg_mean_squared_error): -6.0769806502645135e-06
Testing set MSE: 5.920689195738421e-06


In [97]:
grid.best_estimator_.coef_

array([-0., -0., -0., ...,  0.,  0., -0.])