In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import numpy as np
from numpy.linalg import norm
from numpy import trace
from numpy import identity
from numpy import argpartition
from numpy.linalg import multi_dot
from scipy.linalg import hadamard
from copy import deepcopy

import matplotlib.pyplot as plt

In [None]:
from pyqalm.data_structures import SparseFactors
from pyqalm.qalm import palm4msa
from pyqalm.qalm_fast import palm4msa_fast1, palm4msa_fast2, palm4msa_fast3

In [None]:
def get_data(small_dim=2**10, large_dim=2**11, n_nonzeros_small=None, n_nonzeros_large=None):
    if n_nonzeros_small is None:
        n_nonzeros_small = 2*small_dim
    if n_nonzeros_large is None:
        n_nonzeros_large = 2*large_dim
    n_factors = int(np.log2(small_dim))
    A = []
    for _ in range(n_factors - 1):
        A.append(np.zeros((small_dim, small_dim)))
        A[-1].flat[np.random.choice(small_dim**2, size=n_nonzeros_small)] = np.random.randn(n_nonzeros_small)
    A += [np.zeros((small_dim, large_dim))]
    A[-1].flat[np.random.choice(small_dim * large_dim, size=n_nonzeros_large)] = np.random.randn(n_nonzeros_large)
    P = np.linalg.multi_dot(A)
    S = SparseFactors(A)
    return A, P, S

# Execution time for computing a matrix-vector product

In [None]:
print('Execution time for computing a matrix-vector product (square case)')
lst_small_dim = 2**np.arange(8, 13)
times = np.empty((len(lst_small_dim), 4))
for i, small_dim in enumerate(lst_small_dim):
    large_dim = small_dim
    print('Dimension:', small_dim, large_dim)
    A, P, S = get_data(small_dim=small_dim, 
                       large_dim=large_dim, 
                       n_nonzeros_small=2*small_dim, 
                       n_nonzeros_large=2*large_dim)
    x = np.random.randn(large_dim)
    t = %timeit -o y = P @ x
    times[i, 0] = t.average
    t = %timeit -o y = S(x)
    times[i, 1] = t.average
    t = %timeit -o y = S @ x
    times[i, 2] = t.average
    t = %timeit -o y = S.matvec(x)
    times[i, 3] = t.average

In [None]:
plt.loglog(lst_small_dim, times[:, 0], label='dense')
plt.loglog(lst_small_dim, times[:, 1], label='Sparse: call')
plt.loglog(lst_small_dim, times[:, 2], label='Sparse: @')
plt.loglog(lst_small_dim, times[:, 3], label='Sparse: matvec')
plt.legend()
plt.grid()
plt.title('Execution time for computing a matrix-vector product (square case)')
plt.ylabel('Running time (s)')
plt.xlabel('Data dimension $n$ ($n \times n$)')

In [None]:
print('Execution time for computing a matrix-vector product (rectangular case)')
lst_small_dim = 2**np.arange(8, 13)
times = np.empty((len(lst_small_dim), 4))
for i, small_dim in enumerate(lst_small_dim):
    large_dim = small_dim * 4
    print('Dimension:', small_dim, large_dim)
    A, P, S = get_data(small_dim=small_dim, 
                       large_dim=large_dim, 
                       n_nonzeros_small=2*small_dim, 
                       n_nonzeros_large=2*large_dim)
    x = np.random.randn(large_dim)
    t = %timeit -o y = P @ x
    times[i, 0] = t.average
    t = %timeit -o y = S(x)
    times[i, 1] = t.average
    t = %timeit -o y = S @ x
    times[i, 2] = t.average
    t = %timeit -o y = S.matvec(x)
    times[i, 3] = t.average

In [None]:
plt.loglog(lst_small_dim, times[:, 0], label='dense')
plt.loglog(lst_small_dim, times[:, 1], label='Sparse: call')
plt.loglog(lst_small_dim, times[:, 2], label='Sparse: @')
plt.loglog(lst_small_dim, times[:, 3], label='Sparse: matvec')
plt.legend()
plt.grid()
plt.title('Execution time for computing a matrix-vector product (rectangular case)')
plt.ylabel('Running time (s)')
plt.xlabel('Data dimension $n$ ($n \times 4n$)')

# Execution time for computing the spectral norm

In [None]:
print('Execution time for computing the spectral norm (rectangular case)')
lst_small_dim = 2**np.arange(8, 13)
times = np.empty((len(lst_small_dim), 4))
for i, small_dim in enumerate(lst_small_dim):
    large_dim = small_dim * 4
    print('Dimension:', small_dim, large_dim)
    A, P, S = get_data(small_dim=small_dim, 
                       large_dim=large_dim, 
                       n_nonzeros_small=2*small_dim, 
                       n_nonzeros_large=2*large_dim)
    SH = S.adjoint()
    t = %timeit -o np.linalg.norm(P, ord=2)
    times[i, 0] = t.average
    t = %timeit -o S.compute_spectral_norm(method='svds')
    times[i, 1] = t.average
    t = %timeit -o S.compute_spectral_norm(method='eigs')
    times[i, 2] = t.average
    t = %timeit -o SH.compute_spectral_norm(method='eigs')
    times[i, 3] = t.average

In [None]:
plt.loglog(lst_small_dim, times[:, 0], label='dense')
plt.loglog(lst_small_dim, times[:, 1], label='Sparse: svds')
plt.loglog(lst_small_dim, times[:, 2], label='Sparse: eigs')
plt.loglog(lst_small_dim, times[:, 3], label='Sparse (Hermitian): eigs')
plt.legend()
plt.grid()
plt.title('Execution time for computing a matrix-vector product (rectangular case)')
plt.ylabel('Running time (s)')
plt.xlabel('Data dimension $n$ ($n \times 4n$)')

# Execution time for computing the product

In [None]:
print('Execution time for computing the product')
lst_small_dim = 2**np.arange(8, 13)
times = np.empty((len(lst_small_dim), 2))
for i, small_dim in enumerate(lst_small_dim):
    large_dim = small_dim * 4
    print('Dimension:', small_dim, large_dim)
    A, P, S = get_data(small_dim=small_dim, 
                       large_dim=large_dim, 
                       n_nonzeros_small=2*small_dim, 
                       n_nonzeros_large=2*large_dim)
    t = %timeit -o P = np.linalg.multi_dot(A)
    times[i, 0] = t.average
    t = %timeit -o PP = S.compute_product()
    times[i, 1] = t.average

In [None]:
plt.loglog(lst_small_dim, times[:, 0], label='dense')
plt.loglog(lst_small_dim, times[:, 1], label='Sparse')
plt.legend()
plt.grid()
plt.title('Execution time for computing the product (rectangular case)')
plt.ylabel('Running time (s)')
plt.xlabel('Data dimension $n$ ($n \times 4n$)')

# Execution time for `palm4msa`

In [None]:
from pyqalm.qalm import palm4msa, palm4msa_fast1, palm4msa_fast2, palm4msa_fast3
from pyqalm.utils import get_lambda_proxsplincol


In [None]:
print('Execution time for palm4msa')
lst_small_dim = 2**np.arange(7, 12)
times = np.empty((len(lst_small_dim), 4))
for i, small_dim in enumerate(lst_small_dim):
    large_dim = small_dim
    X = hadamard(small_dim)
    print('Dimension:', small_dim, large_dim)
    
    d = np.min(X.shape)
    if X.shape[1] == d:
        X = X.T
    nb_factors = int(np.log2(d))
    lst_S_init = []
    for _ in range(nb_factors - 1):
        lst_S_init.append(np.eye(d))
    lst_S_init.append(np.zeros(X.shape))

    nb_keep_values = 2 * d
    nb_values_residual = int(d / 2 ** nb_factors) * d
    lst_projection_functions = \
        [get_lambda_proxsplincol(nb_keep_values)] * nb_factors \
        + [get_lambda_proxsplincol(nb_values_residual)]

    f_lambda_init = 1
    nb_iter = 10
    update_right_to_left = True
    graphical_display = False
    
    t = %timeit -o f_lambda_ref, lst_S_ref, arr_X_curr_ref, objective_function_ref, i_iter_ref = \
        palm4msa(X, \
                 lst_S_init=lst_S_init, \
                 nb_factors=nb_factors, \
                 lst_projection_functions=lst_projection_functions, \
                 f_lambda_init=f_lambda_init, \
                 nb_iter=nb_iter, \
                 update_right_to_left=update_right_to_left, \
                 graphical_display=graphical_display)
    times[i, 0] = t.average

    t = %timeit -o f_lambda, lst_S, arr_X_curr, objective_function, i_iter = \
        palm4msa_fast1(X, \
                       lst_S_init=lst_S_init, \
                       nb_factors=nb_factors, \
                       lst_projection_functions=lst_projection_functions, \
                       f_lambda_init=f_lambda_init, \
                       nb_iter=nb_iter, \
                       update_right_to_left=update_right_to_left, \
                       graphical_display=graphical_display)
    times[i, 1] = t.average
    
    t = %timeit -o f_lambda, lst_S, arr_X_curr, objective_function, i_iter = \
    palm4msa_fast2(X, \
                   lst_S_init=lst_S_init, \
                   nb_factors=nb_factors, \
                   lst_projection_functions=lst_projection_functions, \
                   f_lambda_init=f_lambda_init, \
                   nb_iter=nb_iter, \
                   update_right_to_left=update_right_to_left, \
                   graphical_display=graphical_display)
    times[i, 2] = t.average
    
    t = %timeit -o f_lambda, lst_S, arr_X_curr, i_iter = \
    palm4msa_fast3(X, \
                   lst_S_init=lst_S_init, \
                   nb_factors=nb_factors, \
                   lst_projection_functions=lst_projection_functions, \
                   f_lambda_init=f_lambda_init, \
                   nb_iter=nb_iter, \
                   update_right_to_left=update_right_to_left, \
                   graphical_display=graphical_display)
    times[i, 3] = t.average

In [None]:
plt.loglog(lst_small_dim, times[:, 0], label='dense')
plt.loglog(lst_small_dim, times[:, 1], label='Sparse 1')
plt.loglog(lst_small_dim, times[:, 2], label='Sparse 2')
plt.loglog(lst_small_dim, times[:, 3], label='Sparse 3')
plt.legend()
plt.grid()
plt.title('Execution time for palm4msa (hadamard)')
plt.ylabel('Running time (s)')
plt.xlabel('Data dimension')

In [None]:
print('Execution time for palm4msa with a 70x2000 matrix')
times = np.empty(4)
small_dim = 70
large_dim = 2000
X = np.random.randn(small_dim, large_dim)

print('Dimension:', small_dim, large_dim)

d = np.min(X.shape)
if X.shape[1] == d:
    X = X.T
nb_factors = int(np.log2(d))
lst_S_init = []
for _ in range(nb_factors - 1):
    lst_S_init.append(np.eye(d))
lst_S_init.append(np.zeros(X.shape))

nb_keep_values = 2 * d
nb_values_residual = int(d / 2 ** nb_factors) * d
lst_projection_functions = \
    [get_lambda_proxsplincol(nb_keep_values)] * nb_factors \
    + [get_lambda_proxsplincol(nb_values_residual)]

f_lambda_init = 1
nb_iter = 10
update_right_to_left = True
graphical_display = False

t = %timeit -o f_lambda_ref, lst_S_ref, arr_X_curr_ref, objective_function_ref, i_iter_ref = \
    palm4msa(X, \
             lst_S_init=lst_S_init, \
             nb_factors=nb_factors, \
             lst_projection_functions=lst_projection_functions, \
             f_lambda_init=f_lambda_init, \
             nb_iter=nb_iter, \
             update_right_to_left=update_right_to_left, \
             graphical_display=graphical_display)
times[0] = t.average

t = %timeit -o f_lambda, lst_S, arr_X_curr, objective_function, i_iter = \
    palm4msa_fast1(X, \
                   lst_S_init=lst_S_init, \
                   nb_factors=nb_factors, \
                   lst_projection_functions=lst_projection_functions, \
                   f_lambda_init=f_lambda_init, \
                   nb_iter=nb_iter, \
                   update_right_to_left=update_right_to_left, \
                   graphical_display=graphical_display)
times[1] = t.average

t = %timeit -o f_lambda, lst_S, arr_X_curr, objective_function, i_iter = \
    palm4msa_fast2(X, \
                   lst_S_init=lst_S_init, \
                   nb_factors=nb_factors, \
                   lst_projection_functions=lst_projection_functions, \
                   f_lambda_init=f_lambda_init, \
                   nb_iter=nb_iter, \
                   update_right_to_left=update_right_to_left, \
                   graphical_display=graphical_display)
times[2] = t.average

t = %timeit -o f_lambda, lst_S, arr_X_curr, i_iter = \
    palm4msa_fast3(X, \
                   lst_S_init=lst_S_init, \
                   nb_factors=nb_factors, \
                   lst_projection_functions=lst_projection_functions, \
                   f_lambda_init=f_lambda_init, \
                   nb_iter=nb_iter, \
                   update_right_to_left=update_right_to_left, \
                   graphical_display=graphical_display)
times[3] = t.average

In [None]:
print('palm4msa', times[0])
print('palm4msa_fast1', times[1])
print('palm4msa_fast2', times[2])
print('palm4msa_fast3', times[3])