In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

import numpy as np
import sklearn

from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.datasets import make_regression
from sklearn.decomposition import SparseCoder, sparse_encode
from sklearn.linear_model import orthogonal_mp_gram

import scipy.linalg as LA

# Sklearn

## sparse_encode

In [8]:
def sklearn_OMP(Y, T_0, D, rng=42):
    # loss = np.empty(num_iter)
    # rng = np.random.default_rng(rng)

    # X = SparseCoder(dictionary = D, transform_n_nonzero_coefs = T_0).transform(Y)
    gram = D @ D.T
    cov = D @ Y.T
    X = sparse_encode(Y, 
                      D, 
                      algorithm='omp', 
                      n_nonzero_coefs = T_0,
                      gram = gram,
                      cov = cov,
                      copy_cov=False)
    return X

## orthogonal_mp_gram

In [9]:
def sklearn_OMP2(Y, T_0, D, rng=42):
    # loss = np.empty(num_iter)
    # rng = np.random.default_rng(rng)

    # X = SparseCoder(dictionary = D, transform_n_nonzero_coefs = T_0).transform(Y)
    gram = D @ D.T
    cov = D @ Y.T
    X = orthogonal_mp_gram(
        Gram=gram, 
        Xy=cov, 
        n_nonzero_coefs=T_0, 
        copy_Gram=False, 
        copy_Xy=False
    ).T
    return X

# Numpy lstsq

In [10]:
def OMP(Y, T_0, D, rng=42, debug=False):
    # loss = np.empty(num_iter)
    # rng = np.random.default_rng(rng)

    X = np.zeros((Y.shape[0], D.shape[0]))

    # gram = D @ D.T
    # cov = D @ Y.T
    # X = sparse_encode(Y, 
    #                   D, 
    #                   algorithm='omp', 
    #                   n_nonzero_coefs = T_0,
    #                   gram = gram,
    #                   cov = cov,
    #                   copy_cov=False)

    for i, y in enumerate(Y):
        I = []
        D_I = np.zeros((T_0, D.shape[1]))
        r = y
        gamma = 0

        for j in range(T_0):
            D_r = np.abs(D @ r)
            k = np.argmax(D_r)
            
            I.append(k)
            D_I[j] = D[k]
            
            gamma, res, rank, s = np.linalg.lstsq(D_I[:j+1].T, y)
            if debug:
                print(gamma.shape)
                print(D_I[:j+1].T.shape)
                print(y.shape)
            r = y - D_I[:(j+1)].T @ gamma 
            if debug:
                print(np.sum(r * r))
                print(res)

        X[i, I] = gamma
        
    return X

# Scipy lstsq (gelsd driver)

In [11]:
def OMP_2(Y, T_0, D, rng=42, debug=False):
    # loss = np.empty(num_iter)
    # rng = np.random.default_rng(rng)

    X = np.zeros((Y.shape[0], D.shape[0]))

    # gram = D @ D.T
    # cov = D @ Y.T
    # X = sparse_encode(Y, 
    #                   D, 
    #                   algorithm='omp', 
    #                   n_nonzero_coefs = T_0,
    #                   gram = gram,
    #                   cov = cov,
    #                   copy_cov=False)

    for i, y in enumerate(Y):
        I = []
        D_I = np.zeros((T_0, D.shape[1]))
        r = y
        gamma = 0

        for j in range(T_0):
            D_r = np.abs(D @ r)
            k = np.argmax(D_r)
            
            I.append(k)
            D_I[j] = D[k]
            
            gamma, res, rank, s = LA.lstsq(D_I[:j+1].T, y, lapack_driver='gelsd')
            if debug:
                print(gamma.shape)
                print(D_I[:j+1].T.shape)
                print(y.shape)
            r = y - D_I[:(j+1)].T @ gamma 
            if debug:
                print(np.sum(r * r))
                print(res)

        X[i, I] = gamma
        
    return X

# Scipy lstsq (gelsy driver)

In [12]:
def OMP_3(Y, T_0, D, rng=42, debug=False):
    # loss = np.empty(num_iter)
    # rng = np.random.default_rng(rng)

    X = np.zeros((Y.shape[0], D.shape[0]))

    # gram = D @ D.T
    # cov = D @ Y.T
    # X = sparse_encode(Y, 
    #                   D, 
    #                   algorithm='omp', 
    #                   n_nonzero_coefs = T_0,
    #                   gram = gram,
    #                   cov = cov,
    #                   copy_cov=False)

    for i, y in enumerate(Y):
        I = []
        D_I = np.zeros((T_0, D.shape[1]))
        r = y
        gamma = 0

        for j in range(T_0):
            D_r = np.abs(D @ r)
            k = np.argmax(D_r)
            
            I.append(k)
            D_I[j] = D[k]
            
            gamma, res, rank, s = LA.lstsq(D_I[:j+1].T, y, lapack_driver='gelsy')
            if debug:
                print(gamma.shape)
                print(D_I[:j+1].T.shape)
                print(y.shape)
            r = y - D_I[:(j+1)].T @ gamma 
            if debug:
                print(np.sum(r * r))
                print(res)

        X[i, I] = gamma
        
    return X

In [13]:
def OMP_verif(code):
    print(code)
    I = np.argmax(np.abs(code), axis = 1)
    print(I)
    print(code[np.arange(I.shape[0]), I])

In [14]:
X, y = make_regression(n_samples = 50, n_features = 20, n_targets = 2, noise=4, random_state=0)

In [15]:
X.shape

(50, 20)

In [16]:
y.shape

(50, 2)

In [17]:
b_sklearn = sklearn_OMP(y.T, 1, X.T)

In [18]:
OMP_verif(b_sklearn)

[[  0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.          99.70035543   0.           0.           0.
    0.           0.           0.           0.           0.        ]
 [  0.         146.54061532   0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.        ]]
[11  1]
[ 99.70035543 146.54061532]


In [19]:
b_sklearn2 = sklearn_OMP2(y.T, 1, X.T)

In [20]:
OMP_verif(b_sklearn2)

[[  0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.          99.70035543   0.           0.           0.
    0.           0.           0.           0.           0.        ]
 [  0.         146.54061532   0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.        ]]
[11  1]
[ 99.70035543 146.54061532]


In [21]:
b_numpy = OMP(y.T, 1, X.T, debug=True)

(1,)
(50, 1)
(50,)
2449425.0712439856
[2449425.07124399]
(1,)
(50, 1)
(50,)
1817892.1699581572
[1817892.16995816]


In [22]:
OMP_verif(b_numpy)

[[  0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.          99.70035543   0.           0.           0.
    0.           0.           0.           0.           0.        ]
 [  0.         146.54061532   0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.        ]]
[11  1]
[ 99.70035543 146.54061532]


In [23]:
b_scipy = OMP_2(y.T, 1, X.T, debug=True)

(1,)
(50, 1)
(50,)
2449425.0712439856
2449425.071243986
(1,)
(50, 1)
(50,)
1817892.1699581572
1817892.1699581575


In [24]:
OMP_verif(b_scipy)

[[  0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.          99.70035543   0.           0.           0.
    0.           0.           0.           0.           0.        ]
 [  0.         146.54061532   0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.        ]]
[11  1]
[ 99.70035543 146.54061532]


In [25]:
b_scipy2 = OMP_3(y.T, 1, X.T, debug=True)

(1,)
(50, 1)
(50,)
2449425.071243986
[]
(1,)
(50, 1)
(50,)
1817892.1699581572
[]


In [26]:
OMP_verif(b_scipy2)

[[  0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.          99.70035543   0.           0.           0.
    0.           0.           0.           0.           0.        ]
 [  0.         146.54061532   0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.
    0.           0.           0.           0.           0.        ]]
[11  1]
[ 99.70035543 146.54061532]


In [27]:
np.allclose(b_sklearn2[[0, 1], [11, 1]], b_sklearn[[0, 1], [11, 1]])

True

In [28]:
np.allclose(b_numpy[[0, 1], [11, 1]], b_sklearn[[0, 1], [11, 1]])

True

In [29]:
np.allclose(b_scipy[[0, 1], [11, 1]], b_sklearn[[0, 1], [11, 1]])

True

In [30]:
np.allclose(b_scipy2[[0, 1], [11, 1]], b_sklearn[[0, 1], [11, 1]])

True

## IT WORKS !!!

In [31]:
X, y = make_regression(n_samples = 50, n_features = 300, n_targets = 10_000, noise=4, random_state=0)

In [32]:
%timeit sklearn_OMP(y.T, 1, X.T)

616 ms ± 96.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [33]:
%timeit sklearn_OMP2(y.T, 1, X.T)

731 ms ± 13.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [34]:
%timeit OMP(y.T, 1, X.T)

556 ms ± 8.75 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
%timeit OMP_2(y.T, 1, X.T)

964 ms ± 14.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [36]:
%timeit OMP_3(y.T, 1, X.T)

784 ms ± 21.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [37]:
%load_ext line_profiler

In [38]:
from line_profiler import profile

sk_OMP = profile(sklearn_OMP)
%lprun -f sk_OMP sk_OMP(y.T, 1, X.T)

Timer unit: 1e-07 s

Total time: 0.980033 s
File: C:\Users\richa\AppData\Local\Temp\ipykernel_2784\2677671288.py
Function: sklearn_OMP at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def sklearn_OMP(Y, T_0, D, rng=42):
     2                                               # loss = np.empty(num_iter)
     3                                               # rng = np.random.default_rng(rng)
     4                                           
     5                                               # X = SparseCoder(dictionary = D, transform_n_nonzero_coefs = T_0).transform(Y)
     6         1       3281.0   3281.0      0.0      gram = D @ D.T
     7         1     172408.0 172408.0      1.8      cov = D @ Y.T
     8         2    9624624.0 4.81e+06     98.2      X = sparse_encode(Y, 
     9         1          2.0      2.0      0.0                        D, 
    10         1         10.0     10.0      0.0                      

In [39]:
from line_profiler import profile

sk_OMP2 = profile(sklearn_OMP2)
%lprun -f sk_OMP2 sk_OMP2(y.T, 1, X.T)

Timer unit: 1e-07 s

Total time: 0.965292 s
File: C:\Users\richa\AppData\Local\Temp\ipykernel_2784\4239697825.py
Function: sklearn_OMP2 at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def sklearn_OMP2(Y, T_0, D, rng=42):
     2                                               # loss = np.empty(num_iter)
     3                                               # rng = np.random.default_rng(rng)
     4                                           
     5                                               # X = SparseCoder(dictionary = D, transform_n_nonzero_coefs = T_0).transform(Y)
     6         1       3652.0   3652.0      0.0      gram = D @ D.T
     7         1     160529.0 160529.0      1.7      cov = D @ Y.T
     8         3    9488714.0 3.16e+06     98.3      X = orthogonal_mp_gram(
     9         1          3.0      3.0      0.0          Gram=gram, 
    10         1          2.0      2.0      0.0          Xy=cov, 
    1

In [40]:
np_OMP = profile(OMP)
%lprun -f np_OMP np_OMP(y.T, 1, X.T)

Timer unit: 1e-07 s

Total time: 0.912251 s
File: C:\Users\richa\AppData\Local\Temp\ipykernel_2784\1184636726.py
Function: OMP at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def OMP(Y, T_0, D, rng=42, debug=False):
     2                                               # loss = np.empty(num_iter)
     3                                               # rng = np.random.default_rng(rng)
     4                                           
     5         1        281.0    281.0      0.0      X = np.zeros((Y.shape[0], D.shape[0]))
     6                                           
     7                                               # gram = D @ D.T
     8                                               # cov = D @ Y.T
     9                                               # X = sparse_encode(Y, 
    10                                               #                   D, 
    11                                               # 

In [41]:
sci_OMP = profile(OMP_2)
%lprun -f sci_OMP sci_OMP(y.T, 1, X.T)

Timer unit: 1e-07 s

Total time: 1.58181 s
File: C:\Users\richa\AppData\Local\Temp\ipykernel_2784\2856197957.py
Function: OMP_2 at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def OMP_2(Y, T_0, D, rng=42, debug=False):
     2                                               # loss = np.empty(num_iter)
     3                                               # rng = np.random.default_rng(rng)
     4                                           
     5         1       2540.0   2540.0      0.0      X = np.zeros((Y.shape[0], D.shape[0]))
     6                                           
     7                                               # gram = D @ D.T
     8                                               # cov = D @ Y.T
     9                                               # X = sparse_encode(Y, 
    10                                               #                   D, 
    11                                              

In [42]:
sci_OMP2 = profile(OMP_3)
%lprun -f sci_OMP2 sci_OMP2(y.T, 1, X.T)

Timer unit: 1e-07 s

Total time: 1.34004 s
File: C:\Users\richa\AppData\Local\Temp\ipykernel_2784\1451344409.py
Function: OMP_3 at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def OMP_3(Y, T_0, D, rng=42, debug=False):
     2                                               # loss = np.empty(num_iter)
     3                                               # rng = np.random.default_rng(rng)
     4                                           
     5         1        310.0    310.0      0.0      X = np.zeros((Y.shape[0], D.shape[0]))
     6                                           
     7                                               # gram = D @ D.T
     8                                               # cov = D @ Y.T
     9                                               # X = sparse_encode(Y, 
    10                                               #                   D, 
    11                                              