# Demo

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import mean_squared_error
import pickle as pkl
from algorithms import matching_pursuit, orthogonal_matching_pursuit, weak_orthogonal_matching_pursuit, sparse_LASSO
from concurrent.futures import ThreadPoolExecutor

## 1. Generate Data

In [2]:
def generate_gaussian_noises_dict(N, d, seed=0):
    np.random.seed(seed)
    gaussian_noises = np.random.normal(size=(d, N))
    norms = np.linalg.norm(gaussian_noises, axis=0, keepdims=True)
    # Create unit-norm vectors
    unit_vectors = gaussian_noises / norms
    return unit_vectors

def generate_sparse_response(gaussian_matrix, m, seed=0):
    np.random.seed(seed)
    indices = np.random.choice(gaussian_matrix.shape[1], size=m, replace=False)
    selected_vectors = gaussian_matrix[:, indices]
    coefficients = np.random.normal(size=(m, 1))  # random coefficients for each selected vector
    y = selected_vectors @ coefficients
    return y, indices, coefficients

def generate_perturbed_response(y, noise_level, seed=0):
    np.random.seed(seed)
    norm_y = np.linalg.norm(y)
    noise = np.random.normal(size=y.shape, scale=norm_y * noise_level)
    y_perturbed = y + noise
    return y_perturbed

def generate_perturbed_responses(y, noise_levels, seed=0):
    return [generate_perturbed_response(y, noise_level, seed) for noise_level in noise_levels]

In [3]:
# Use the functions to generate a Gaussian noise matrix and a sparse response
np.random.seed(0)
N = 100000
d = 300
m = 2
dictionary = generate_gaussian_noises_dict(N, d)
y, indices, coefficients = generate_sparse_response(dictionary, m)
noise_scale = 0.05
y_perturbed = generate_perturbed_response(y, noise_scale)

Check coefs

In [4]:
coefficients

array([[1.87413779],
       [0.630206  ]])

Check indices

In [5]:
indices

array([ 3582, 60498])

## Algorithms

### MP

In [6]:
MP_a, MP_indices, MP_coefficients = matching_pursuit(y, dictionary, 2*m)

MP_a_perturb, MP_indices_perturb, MP_coefficients_perturb = matching_pursuit(y_perturbed, dictionary, 2*m)

print('The indices for each step:', MP_indices_perturb)
print('The coefficients for each step:', MP_coefficients_perturb)

The indices for each step: [3582, 60498, 49122, 34717]
The coefficients for each step: [array([[1.87128833]]), array([[0.53962255]]), array([[0.46872148]]), array([[0.42025955]])]


### OMP

In [7]:
OMP_a, OMP_indices, OMP_coefficients = orthogonal_matching_pursuit(y, dictionary, 2*m)

OMP_a_perturb, OMP_indices_perturb, OMP_coefficients_perturb = orthogonal_matching_pursuit(y_perturbed, dictionary, 2*m)

print('The indices for each step:', OMP_indices_perturb)
print('The coefficients for each step:', OMP_coefficients_perturb)

The indices for each step: [3582, 60498, 49122, 34717]
The coefficients for each step: [[1.793204  ]
 [0.54003735]
 [0.4801365 ]
 [0.42310107]]


### WOMP

In [8]:
WOMP_a, WOMP_indices, WOMP_coefficients = weak_orthogonal_matching_pursuit(y, dictionary, 0.001)

WOMP_a_perturb, WOMP_indices_perturb, WOMP_coefficients_perturb = weak_orthogonal_matching_pursuit(y_perturbed, dictionary, 0.001)

print('The indices for each step:', WOMP_indices_perturb)
print('The coefficients for each step:', WOMP_coefficients_perturb)

The indices for each step: [3582]
The coefficients for each step: [[1.87128833]]


## Performance

In [9]:
%run -i 'testing.py'

Done!


In [24]:
performances = pkl.load(open('results.pkl', 'rb'))

In [25]:
# numpy sort by abosulte value
def sorting(numbers_array):
    sorted_numbers = sorted(numbers_array, key=abs)[::-1]
    argsort_idx = list(np.argsort(np.abs(numbers_array))[::-1])
    return sorted_numbers, argsort_idx

In [26]:
# Calculate the mean and standard deviation of the mean squared error for each row
def calculate_mse(row):
    real_coef = np.array(row['Real Coefficients'][0]).ravel()
    output_coef = np.array(row['Output Coefficients'][0]).ravel()
    
    real_sorted, _ = sorting(real_coef)
    
    mse = mean_squared_error(real_sorted, output_coef)
    return mse

def calculate_correct(row):
    real_coef = np.array(row['Real Coefficients'][0]).ravel()
    real_indices = np.array(row['Real Indices'][0]).ravel()
    output_indices = np.array(row['Output Indices'][0]).ravel()
    
    _, argsort_idx = sorting(real_coef)
    real_indices_sorted = real_indices[argsort_idx]
    
    correct_pct = np.sum(real_indices_sorted[:len(output_indices)] == output_indices) / len(output_indices)
    return correct_pct

performances = performances.drop(columns=['Trial index'])
performances['MSE'] = performances.apply(calculate_mse, axis=1)
performances['Correct Percentage'] = performances.apply(calculate_correct, axis=1)

performances.head()

Unnamed: 0,Algorithm,True sparsity,Noise level,Real Indices,Real Coefficients,Output Indices,Output Coefficients,MSE,Correct Percentage
0,matching_pursuit,2,0.0,"[[53218, 90510]]","[[[2.0657833202188343], [-1.471156925832625]]]","[[53218, 90510]]","[[[[1.92070472]], [[-1.45684995]]]]",0.01062624,1.0
1,matching_pursuit,2,0.0,"[[55853, 7390]]","[[[2.685162688755537], [-0.7635115532347454]]]","[[55853, 7390]]","[[[[2.68499343]], [[-0.76351152]]]]",1.432366e-08,1.0
2,matching_pursuit,2,0.0,"[[14375, 5671]]","[[[1.6243453636632417], [-0.6117564136500754]]]","[[14375, 5671]]","[[[[1.64983221]], [[-0.61069459]]]]",0.0003253535,1.0
3,matching_pursuit,2,0.0,"[[41909, 28290]]","[[[-0.14857953072029476], [1.5523643400605915]]]","[[28290, 41909]]","[[[[1.5403358]], [[-0.14760574]]]]",7.281703e-05,1.0
4,matching_pursuit,2,0.0,"[[60743, 33949]]","[[[-1.0297095253132769], [-1.030208617706558]]]","[[33949, 60743]]","[[[[-1.06135451]], [[-1.02876745]]]]",0.0004854769,1.0


# Lasso

In [31]:
LASSO_residual, LASSO_indices, LASSO_coefficients = sparse_LASSO(y, dictionary, 0.001)

LASSO_residual_perturb, LASSO_indices_perturb, LASSO_coefficients_perturb = sparse_LASSO(y_perturbed, dictionary, 0.001)