In [1]:
import numpy as np
from scipy.stats import bootstrap
from sklearn.metrics import mean_squared_error

In [2]:
# Generate a dictionary of Unit-norm vectors

def generate_gaussian_noises_dict(N, d):
    gaussian_noises = np.random.normal(size=(d, N))
    norms = np.linalg.norm(gaussian_noises, axis=0, keepdims=True)
    # Create unit-norm vectors
    unit_vectors = gaussian_noises / norms
    return unit_vectors

def generate_sparse_response(gaussian_matrix, m):
    indices = np.random.choice(gaussian_matrix.shape[1], size=m, replace=False)
    selected_vectors = gaussian_matrix[:, indices]
    coefficients = np.random.normal(size=(m, 1))  # random coefficients for each selected vector
    y = selected_vectors @ coefficients
    
    return y, indices, coefficients

def generate_perturbed_response(y, noise_level):
    norm_y = np.linalg.norm(y)
    noise = np.random.normal(size=y.shape, scale=norm_y * noise_level)
    y_perturbed = y + noise
    return y_perturbed

In [3]:
# Use the functions to generate a Gaussian noise matrix and a sparse response
np.random.seed(0)
N = 100000
d = 300
m = 4
gaussian_noises_matrix = generate_gaussian_noises_dict(N, d)
y, indices, coefficients = generate_sparse_response(gaussian_noises_matrix, m)
y_perturbed = generate_perturbed_response(y, 0.1)

In [6]:
# Initial Orthogonal Matching Pursuit

def OMP(s, phi, K):
    """
    Perform the Orthogonal Matching Pursuit algorithm

    Args:
    s (numpy.ndarray): Input signal
    phi (numpy.ndarray): Dictionary
    K (int): Number of iterations (sparsity)

    Returns:
    a (numpy.ndarray): Sparse representation of s
    indices (list): Indices of the selected atoms
    coefficients (list): Coefficients of the selected atoms
    """
    # Initialize a and r
    a = np.zeros_like(s)
    r = s.copy()
    indices = []
    coefficients = []

    for _ in range(K):
        # Compute inner products
        inner_products = phi.T @ r
        
        # Though paper says OMP will not choose the same index twice, it does.
        inner_products[indices] = np.min(np.abs(inner_products))

        # Find the index with maximum absolute correlation
        lambda_k = np.argmax(np.abs(inner_products), axis=0)
        # print(np.max(np.abs(inner_products)))
        
        # Save the index
        indices.append(lambda_k[0])
        # print(indices)

        # Ordinary Least Squares
        X = phi[:, indices]
        betas = np.linalg.inv(X.T @ X) @ X.T @ s

        # Save the coefficient
        coefficients = betas

        # Update a
        a = X @ betas

        # Update r
        r = s - a

    return a, indices, coefficients

# Perform Orthogonal Matching Pursuit
OMP_residual, OMP_indices, OMP_coefficients = OMP(y_perturbed, gaussian_noises_matrix, 2*m)

In [7]:
OMP_coefficients

array([[ 1.90472344],
       [ 1.09546167],
       [ 1.19997854],
       [-1.26190546],
       [ 1.21839938],
       [-1.09539527],
       [ 1.16584145],
       [-0.93003484]])

In [17]:
OMP_indices

[23046, 83612, 29092, 53925, 17144, 17542, 17818, 51281]

In [10]:
# Refined Orthogonal Matching Pursuit 

def orthogonal_matching_pursuit(s, phi, K, alpha):
    """
    Perform the Orthogonal Matching Pursuit algorithm

    Args:
    s (numpy.ndarray): Input signal
    phi (numpy.ndarray): Dictionary
    K (int): Number of iterations (sparsity)
    alpha (float): Percentage of features to drop

    Returns:
    a (numpy.ndarray): Sparse representation of s
    indices (list): Indices of the selected atoms
    coefficients (list): Coefficients of the selected atoms
    """
    # Initialize a and r
    a = np.zeros_like(s)
    r = s.copy()
    indices = []
    coefficients = []

    for _ in range(K):
        # Drop alpha percentage of features in the dictionary
        #np.random.seed(K)
        drop_indices = np.random.choice(phi.shape[1], size=int(alpha*phi.shape[1]), replace=False)
        phi_dropped = np.delete(phi, drop_indices, axis=1)

        # Compute inner products
        inner_products = phi_dropped.T @ r
        
        # Though paper says OMP will not choose the same index twice, it does.
        inner_products[indices] = np.min(np.abs(inner_products))

        # Find the index with maximum absolute correlation
        lambda_k = np.argmax(np.abs(inner_products), axis=0)
        # print(np.max(np.abs(inner_products)))
        
        # Save the index
        indices.append(lambda_k[0])
        # print(indices)

        # Ordinary Least Squares
        X = phi_dropped[:, indices]
        betas = np.linalg.inv(X.T @ X) @ X.T @ s

        # Save the coefficient
        coefficients = betas

        # Update a
        a = X @ betas

        # Update r
        r = s - a

    return a, indices, coefficients

In [11]:
def bagging_OMP(y, X, alpha, s, k):
    """
    Perform bagging with OMP

    Args:
    y (numpy.ndarray): Input signal
    X (numpy.ndarray): Designed dictionary
    alpha (float): Percentage of features to drop
    s (int): Sparsity
    k (int): The maximum of iterations

    Returns:
    sparse_vector (numpy.ndarray): Sparse vector
    bagging_OMP_indice (numpy.ndarray): Indices of the selected atoms
    bagging_OMP_coefficients (numpy.ndarray): Coefficients of the selected atoms
    """
    # Initialize beta
    beta = np.zeros((X.shape[1],1))

    # Perform bagging with OMP
    for i in range(k):
        print(f'Iteration: {i}')

        # Do bootstrap
        #np.random.seed(k)
        resampled_indices = np.random.choice(X.shape[0], size=X.shape[0], replace=True)
        resampled_X = X[resampled_indices,:]
        resampled_y = y[resampled_indices,:]

        # Do OMP
        OMP_residual, OMP_indices, OMP_coefficients = orthogonal_matching_pursuit(resampled_y, resampled_X, 2*s, alpha)

        # Update beta
        for i in range(len(OMP_indices)):
            beta[OMP_indices[i]] += OMP_coefficients[i]
    
    # Update bagging result
    beta = beta/k  # Might have to rescale this parameter  
    bagging_OMP_indice = np.argsort(abs(beta.flatten()))[-2*s:]
    bagging_OMP_coefficients = beta[bagging_OMP_indice] 

    # Update sparse vector
    sparse_vector = np.zeros_like(beta)
    sparse_vector[bagging_OMP_indice] = beta[bagging_OMP_indice]

    return sparse_vector, bagging_OMP_indice, bagging_OMP_coefficients

In [12]:
sparse_vector, bagging_OMP_indice, bagging_OMP_coefficients = bagging_OMP(y_perturbed, gaussian_noises_matrix, 0.3, 4, 500)

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 30
Iteration: 31
Iteration: 32
Iteration: 33
Iteration: 34
Iteration: 35
Iteration: 36
Iteration: 37
Iteration: 38
Iteration: 39
Iteration: 40
Iteration: 41
Iteration: 42
Iteration: 43
Iteration: 44
Iteration: 45
Iteration: 46
Iteration: 47
Iteration: 48
Iteration: 49
Iteration: 50
Iteration: 51
Iteration: 52
Iteration: 53
Iteration: 54
Iteration: 55
Iteration: 56
Iteration: 57
Iteration: 58
Iteration: 59
Iteration: 60
Iteration: 61
Iteration: 62
Iteration: 63
Iteration: 64
Iteration: 65
Iteration: 66
Iteration: 67
Iteration: 68
Iteration: 69
Iteration: 70
Iteration: 71
It

In [16]:
bagging_OMP_indice

array([16078, 58551, 58499, 58525, 16187, 16091, 58554, 58581],
      dtype=int64)

In [18]:
OMP_indices

[23046, 83612, 29092, 53925, 17144, 17542, 17818, 51281]

In [14]:
# Real indices
indices

array([16274, 83612, 23046, 51189])