# Demo

In [9]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.metrics import mean_squared_error
from algorithms import matching_pursuit, orthogonal_matching_pursuit, weak_orthogonal_matching_pursuit, sparse_LASSO

## 1. Generate Data

In [2]:
def generate_gaussian_noises_dict(N, d, seed=0):
    np.random.seed(seed)
    gaussian_noises = np.random.normal(size=(d, N))
    norms = np.linalg.norm(gaussian_noises, axis=0, keepdims=True)
    # Create unit-norm vectors
    unit_vectors = gaussian_noises / norms
    return unit_vectors

def generate_sparse_response(gaussian_matrix, m, seed=0):
    np.random.seed(seed)
    indices = np.random.choice(gaussian_matrix.shape[1], size=m, replace=False)
    selected_vectors = gaussian_matrix[:, indices]
    coefficients = np.random.normal(size=(m, 1))  # random coefficients for each selected vector
    y = selected_vectors @ coefficients
    return y, indices, coefficients

def generate_perturbed_response(y, noise_level, seed=0):
    np.random.seed(seed)
    norm_y = np.linalg.norm(y)
    noise = np.random.normal(size=y.shape, scale=norm_y * noise_level)
    y_perturbed = y + noise
    return y_perturbed

def generate_perturbed_responses(y, noise_levels, seed=0):
    return [generate_perturbed_response(y, noise_level, seed) for noise_level in noise_levels]

In [3]:
# Use the functions to generate a Gaussian noise matrix and a sparse response
np.random.seed(0)
N = 100000
d = 300
m = 2
dictionary = generate_gaussian_noises_dict(N, d)
y, indices, coefficients = generate_sparse_response(dictionary, m)
noise_scale = 0.05
y_perturbed = generate_perturbed_response(y, noise_scale)

In [68]:
tested_algorithms = [matching_pursuit, orthogonal_matching_pursuit]

trials = 100 # number of trials



noise_levels = [0, 0.01, 0.05, 0.1]
N = 100000
d = 300
true_sparsity_list = [2, 5, 10, 20, 100, 200, 2000]


final_results = pd.DataFrame()

for true_sparsity in true_sparsity_list:
    m = true_sparsity # True sparsity
    K = m # Model sparsity
    for trial in range(trials):
        # Generate a dictionary and a sparse response
        dictionary = generate_gaussian_noises_dict(N, d, seed=trial)
        y, indices, coefficients = generate_sparse_response(dictionary, m, seed=trial)
        # Generate perturbed responses
        y_perturbeds = generate_perturbed_responses(y, noise_levels, seed=trial)
        sub_final_results = pd.DataFrame()
        # Run algorithms
        for algorithm_index in range(len(tested_algorithms)):
            for noise_level, y_perturbed in zip(noise_levels, y_perturbeds):
                esitmated_residual, estimated_indices, estimated_coefficients = tested_algorithms[algorithm_index](y_perturbed, dictionary, K)
                results = pd.DataFrame({'m': m, 'K': K,'trial': trial, 'algorithm': algorithm_index, 'noise_level': noise_level, 'estimated_indices': [estimated_indices], 'estimated_coefficients': [estimated_coefficients]})
                final_results = pd.concat([final_results, results], ignore_index=True)
        sub_final_results['trial'] = trial
        sub_final_results['noise_level'] = noise_level
        sub_final_results['indices'] = [indices]
        sub_final_results['coefficients'] = [coefficients]
    sub_final_results['m'] = m
    sub_final_results['K'] = K
    final_results = pd.concat([final_results, sub_final_results], ignore_index=True)

KeyboardInterrupt: 

In [67]:
final_results.shape

Unnamed: 0,trial,algorithm,noise_level,a,indices,coefficients
0,0,0,0.00,"[[-0.19120979969918472], [0.08050242609266271]...","[3582, 60498]","[[[1.9109490850410098]], [[0.6280557982127836]]]"
1,0,0,0.01,"[[-0.1903049456559999], [0.08057677758758931],...","[3582, 60498]","[[[1.9030169341428134]], [[0.6103691477001602]]]"
2,0,0,0.05,"[[-0.18668552948325984], [0.08087418356729535]...","[3582, 60498]","[[[1.871288330550019]], [[0.5396225456496645]]]"
3,0,0,0.10,"[[-0.06919420419858609], [0.16335127592956614]...","[3582, 49122]","[[[1.8316275760590293]], [[0.9329128784555818]]]"
4,0,1,0.00,"[[-0.18763149316921204], [0.0785656818692909],...","[3582, 60498]","[[1.8741377887404724], [0.6302060023211793]]"
...,...,...,...,...,...,...
795,99,0,0.10,"[[0.025947534265121343], [-0.02793913170610242...","[86155, 35317]","[[[-0.9154210261505685]], [[0.4291411282086854..."
796,99,1,0.00,"[[0.0028769865835809568], [-0.0382062641668236...","[86155, 29633]","[[-0.8427827412413429], [0.06410567665446153]]"
797,99,1,0.01,"[[0.0029057404412864765], [-0.0385316231651404...","[86155, 29633]","[[-0.8496980295529825], [0.06424194837927487]]"
798,99,1,0.05,"[[0.014849669962891597], [-0.03372278876222153...","[86155, 35317]","[[-0.872890012948686], [0.21538419101967166]]"


In [5]:
def create_dir(dir_name):
    if not os.path.exists(dir_name):
        try:
            os.makedirs(dir_name)
            print(f"Directory {dir_name} created.")
        except OSError as e:
            print(f"Error creating directory {dir_name}: {e}")
    else:
        print(f"Directory {dir_name} already exists.")


Check coefs

In [5]:
coefficients

array([[0.34249941],
       [1.7701584 ]])

Check indices

In [6]:
indices

array([16274, 83612])

## Algorithms

### MP

In [7]:
MP_residual, MP_indices, MP_coefficients = matching_pursuit(y, dictionary, 2*m)

MP_residual_perturb, MP_indices_perturb, MP_coefficients_perturb = matching_pursuit(y_perturbed, dictionary, 2*m)

### OMP

In [8]:
OMP_residual, OMP_indices, OMP_coefficients = orthogonal_matching_pursuit(y, dictionary, 2*m)

OMP_residual_perturb, OMP_indices_perturb, OMP_coefficients_perturb = orthogonal_matching_pursuit(y_perturbed, dictionary, 2*m)

### WOMP

In [10]:
WOMP_residual, WOMP_indices, WOMP_coefficients = weak_orthogonal_matching_pursuit(y, dictionary, 0.001)

WOMP_residual_perturb, WOMP_indices_perturb, WOMP_coefficients_perturb = weak_orthogonal_matching_pursuit(y_perturbed, dictionary, 0.001)

# Lasso

In [31]:
LASSO_residual, LASSO_indices, LASSO_coefficients = sparse_LASSO(y, dictionary, 0.001)

LASSO_residual_perturb, LASSO_indices_perturb, LASSO_coefficients_perturb = sparse_LASSO(y_perturbed, dictionary, 0.001)