In [2]:
import numpy as np

**Structured Random Compression Algorithm Fig 1**

In [3]:
def randomized_compression(A, r, rOV, w):
    """
    Compute a compression matrix Q for A using a randomized algorithm.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration

    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Step 1: Draw a Gaussian random matrix Omega_L
    Omega_L = np.random.randn(n, d)

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = (A @ A.T) @ B # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

In [None]:
# Example Usage
np.random.seed(42)
A = np.random.randn(100, 50)  # Input matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

Q = randomized_compression(A, r, rOV, w)
print("Compression matrix Q shape:", Q.shape)

Compression matrix Q shape: (100, 10)


**Structured Random Compression Algorithm Fig 1 SRHT**

In [29]:
import numpy as np
from scipy.linalg import hadamard

def randomized_compression_srht(A, r, rOV, w):
    """
    Compute a compression matrix Q for A using SRHT as the test matrix.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration

    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure n is a power of 2 by padding A with zeros if necessary
    n_padded = 2 ** int(np.ceil(np.log2(n)))  # Smallest power of 2 >= n
    if n_padded != n:
        A_padded = np.zeros((m, n_padded))
        A_padded[:, :n] = A  # Pad with zeros
        A = A_padded
        n = n_padded

    # Step 1: Generate SRHT matrix
    H = hadamard(n)  # Hadamard matrix of size n x n
    D = np.diag(np.random.choice([-1, 1], size=n))  # Random diagonal matrix
    S = np.random.choice(n, size=d, replace=False)  # Random subsampling matrix
    Omega_L = (H @ D)[:, S] / np.sqrt(d)  # SRHT matrix (n x d)

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = (A @ A.T) @ B  # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

In [30]:
# Define the dimensions of the input matrix A
m = 1000  # Number of rows
n = 500   # Number of columns

# Generate a random matrix A ∈ R^(m x n)
A = np.random.randn(m, n)

# Set the parameters for the randomized compression
r = 50    # Target rank
rOV = 10  # Oversampling parameter
w = 2     # Exponent for power iteration

# Call the SRHT-based randomized compression function
Q = randomized_compression_srht(A, r, rOV, w)

# Print the shape of the resulting compression matrix Q
print("Shape of Q:", Q.shape)  # Expected output: (1000, 60) since r + rOV = 50 + 10 = 60

# Verify that Q is orthogonal (Q^T Q ≈ I)
orthogonality_check = Q.T @ Q
print("Orthogonality check (Q^T Q):")
print(np.round(orthogonality_check, 6))  # Should be close to the identity matrix

Shape of Q: (1000, 60)
Orthogonality check (Q^T Q):
[[ 1.  0.  0. ...  0.  0. -0.]
 [ 0.  1. -0. ... -0.  0.  0.]
 [ 0. -0.  1. ... -0. -0.  0.]
 ...
 [ 0. -0. -0. ...  1.  0.  0.]
 [ 0.  0. -0. ...  0.  1.  0.]
 [-0.  0.  0. ...  0.  0.  1.]]


**Structured Random Compression Algorithm Fig 1 SRFT**

In [7]:
import numpy as np
from scipy.fftpack import fft

def randomized_compression_srft(A, r, rOV, w):
    """
    Compute a compression matrix Q for A using SRFT as the test matrix.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration

    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Step 1: Generate SRFT matrix
    D = np.diag(np.random.choice([-1, 1], size=n))  # Random diagonal matrix
    F = fft(np.eye(n), axis=0)  # Fourier matrix of size n x n
    S = np.random.choice(n, size=d, replace=False)  # Random subsampling matrix
    Omega_L = (F @ D)[S, :] / np.sqrt(d)  # SRFT matrix

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = (A @ A.T) @ B  # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

**NMF with Structured Random Compression Fig 2 (Multiplicative Updates)**

In [35]:
def nmf_with_compression(A, r, rOV, w, max_iter=100, tol=1e-10):
    """
    Compute NMF with compression matrices L and R using multiplicative updates.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    X_k (numpy array): Nonnegative matrix (m x r)
    Y_k (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression_srht(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression_srht(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Initialize Y_k with nonnegative values
    Y_k = np.abs(np.random.randn(r, n))  # Y_k ∈ R^(r x n)

    # Step 3: Compute compressed matrices
    A_check = A @ R.T  # A_check ∈ R^(m x d)
    A_hat = L.T @ A  # A_hat ∈ R^(d x n)

    # Ensure compressed matrices are nonnegative
    A_check = np.abs(A_check)
    A_hat = np.abs(A_hat)

    # Initialize X_k
    X_k = np.abs(np.random.randn(m, r))  # X_k ∈ R^(m x r)

    norm_A = np.linalg.norm(A, 'fro')
    
    # Iterate until convergence
    prev_error = np.inf
    for k in range(max_iter):
        # Step 4: Compute Y_check_k = Y_k R^T
        Y_check_k = Y_k @ R.T  # Y_check_k ∈ R^(r x d)

        # Step 5: Update X_k+1 using multiplicative updates
        numerator_X = A_check @ Y_check_k.T
        denominator_X = X_k @ (Y_check_k @ Y_check_k.T)
        X_k_plus_1 = X_k * (numerator_X / denominator_X)

        # Step 6: Compute X_hat_k+1 = L^T X_k+1
        X_hat_k_plus_1 = L.T @ X_k_plus_1  # X_hat_k+1 ∈ R^(d x r)

        # Step 7: Update Y_k+1 using multiplicative updates
        numerator_Y = X_hat_k_plus_1.T @ A_hat
        denominator_Y = (X_hat_k_plus_1.T @ X_hat_k_plus_1) @ Y_k
        Y_k_plus_1 = Y_k * (numerator_Y / denominator_Y)

        # Compute reconstruction error
        reconstruction_error = np.linalg.norm(A - X_k_plus_1 @ Y_k_plus_1, 'fro')
        normalized_error = reconstruction_error / norm_A  # Normalized error
        print(f"Iteration {k+1}: Normalized Reconstruction Error = {normalized_error:.6f}")

        # Convergence check
        if abs(prev_error - normalized_error) < tol:
            break
        prev_error = normalized_error


        # Update X_k and Y_k
        X_k, Y_k = X_k_plus_1, Y_k_plus_1

    return X_k, Y_k

In [36]:
# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

Xk, Yk = nmf_with_compression(A, r, rOV, w)
print("Final X shape:", Xk.shape)
print("Final Y shape:", Yk.shape)

Iteration 1: Normalized Reconstruction Error = 1.721853
Iteration 2: Normalized Reconstruction Error = 1.721625
Iteration 3: Normalized Reconstruction Error = 1.721383
Iteration 4: Normalized Reconstruction Error = 1.721198
Iteration 5: Normalized Reconstruction Error = 1.721072
Iteration 6: Normalized Reconstruction Error = 1.721001
Iteration 7: Normalized Reconstruction Error = 1.720982
Iteration 8: Normalized Reconstruction Error = 1.721009
Iteration 9: Normalized Reconstruction Error = 1.721076
Iteration 10: Normalized Reconstruction Error = 1.721178
Iteration 11: Normalized Reconstruction Error = 1.721309
Iteration 12: Normalized Reconstruction Error = 1.721461
Iteration 13: Normalized Reconstruction Error = 1.721631
Iteration 14: Normalized Reconstruction Error = 1.721814
Iteration 15: Normalized Reconstruction Error = 1.722006
Iteration 16: Normalized Reconstruction Error = 1.722202
Iteration 17: Normalized Reconstruction Error = 1.722402
Iteration 18: Normalized Reconstruction 

**NMF with Structured Compression (MU SkLearn)**

In [43]:
import numpy as np
from sklearn.decomposition import NMF

def structured_compression_mu(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF using custom initialization with compression matrices L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    W (numpy array): Nonnegative matrix (m x r)
    H (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Custom initialization for W and H using projections
    W = A @ R.T  # W ∈ R^(m x d)
    W = W[:, :r]  # Take the first r columns for W ∈ R^(m x r)

    H = L.T @ A  # H ∈ R^(d x n)
    H = H[:r, :]  # Take the first r rows for H ∈ R^(r x n)

    # Ensure W and H are nonnegative
    W = np.abs(W)
    H = np.abs(H)

    # Step 3: Use scikit-learn's NMF solver for updates
    nmf = NMF(n_components=r, init='custom', solver='mu', max_iter=max_iter, tol=tol, random_state=42)

    # Fit the model using custom initialization
    W = nmf.fit_transform(A, W=W, H=H)  # Update W and H using A
    H = nmf.components_

    # Step 4: Compute reconstruction error
    reconstruction_error = np.linalg.norm(A - W @ H, 'fro')
    normalized_error = reconstruction_error / np.linalg.norm(A, 'fro')
    print(f"Final Normalized Reconstruction Error = {normalized_error:.6f}")

    return W, H

In [54]:
# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 45  # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

W, H = structured_compression_mu(A, r, rOV, w,1200,1e-5)
print("Final W shape:", W.shape)
print("Final H shape:", H.shape)

Final Normalized Reconstruction Error = 0.162617
Final W shape: (100, 45)
Final H shape: (45, 50)


In [57]:
import numpy as np
from scipy.linalg import hadamard
from sklearn.decomposition import NMF
from sklearn.decomposition._nmf import _initialize_nmf

def randomized_compression_srht(A, r, rOV, w):
    """
    Compute a low-rank approximation Q of A using SRHT.
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure n is a power of 2 by padding A with zeros if necessary
    n_padded = 2 ** int(np.ceil(np.log2(n)))  # Smallest power of 2 >= n
    if n_padded != n:
        A_padded = np.zeros((m, n_padded))
        A_padded[:, :n] = A  # Pad with zeros
        A = A_padded
        n = n_padded

    # Step 1: Generate SRHT matrix
    H = hadamard(n)  # Hadamard matrix of size n x n
    D = np.diag(np.random.choice([-1, 1], size=n))  # Random diagonal matrix
    S = np.random.choice(n, size=d, replace=False)  # Random subsampling matrix
    Omega_L = (H @ D)[:, S] / np.sqrt(d)  # SRHT matrix (n x d)

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = (A @ A.T) @ B  # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

def srht_nndsvd_initialization(A, r, rOV, w):
    """
    Combine SRHT and NNDSVD for NMF initialization.
    """
    m, n = A.shape

    # Step 1: Compute low-rank approximation using SRHT
    Q = randomized_compression_srht(A, r, rOV, w)

    # Step 2: Project Q onto the non-negative orthant
    Q_non_neg = np.maximum(Q, 0)

    # Step 3: Apply NNDSVD to Q_non_neg to initialize W and H
    # Use the first r columns of Q_non_neg for initialization
    W_init = Q_non_neg[:, :r]

    # Initialize H using NNDSVD on the low-rank approximation
    _, H_init = _initialize_nmf(A, r, init='nndsvd')

    # Ensure W_init and H_init are C-contiguous
    W_init = np.ascontiguousarray(W_init)
    H_init = np.ascontiguousarray(H_init)

    return W_init, H_init

# Example usage
m, n = 1000, 500  # Dimensions of A
r = 50  # Target rank
rOV = 10  # Oversampling parameter
w = 2  # Power iteration exponent

# Generate a random non-negative matrix A
A = np.abs(np.random.randn(m, n))  # Ensure non-negativity

# Initialize W and H using SRHT + NNDSVD
W, H = srht_nndsvd_initialization(A, r, rOV, w)

# Run NMF
model = NMF(n_components=r, init='custom', random_state=0)
W_final = model.fit_transform(A, W=W, H=H)
H_final = model.components_

# Evaluate reconstruction error
reconstruction_error = np.linalg.norm(A - W_final @ H_final, ord='fro') / np.linalg.norm(A, ord='fro')
print("Normalized Reconstruction Error:", reconstruction_error)

Normalized Reconstruction Error: 0.5412531442585214




**NMF Structured Compression (Coord Desc. SKLearn)**

In [13]:
import numpy as np
from sklearn.decomposition import NMF

def structured_compression_cd(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF using custom initialization with compression matrices L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    W (numpy array): Nonnegative matrix (m x r)
    H (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Custom initialization for W and H using projections
    W = A @ R.T  # W ∈ R^(m x d)
    W = W[:, :r]  # Take the first r columns for W ∈ R^(m x r)

    H = L.T @ A  # H ∈ R^(d x n)
    H = H[:r, :]  # Take the first r rows for H ∈ R^(r x n)

    # Ensure W and H are nonnegative
    W = np.abs(W)
    H = np.abs(H)

    # Step 3: Use scikit-learn's NMF solver for updates
    nmf = NMF(n_components=r, init='custom', solver='cd', max_iter=max_iter, tol=tol, random_state=42)

    # Fit the model using custom initialization
    W = nmf.fit_transform(A, W=W, H=H)  # Update W and H using A
    H = nmf.components_

    # Step 4: Compute reconstruction error
    reconstruction_error = np.linalg.norm(A - W @ H, 'fro')
    normalized_error = reconstruction_error / np.linalg.norm(A, 'fro')
    print(f"Final Normalized Reconstruction Error = {normalized_error:.6f}")

    return W, H

In [15]:
# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

W, H = structured_compression_cd(A, r, rOV, w,500)
print("Final W shape:", W.shape)
print("Final H shape:", H.shape)

Final Normalized Reconstruction Error = 0.535564
Final W shape: (100, 5)
Final H shape: (5, 50)


In [24]:
import numpy as np
from scipy.linalg import hadamard
from scipy.sparse import random as sparse_random
from sklearn.decomposition import NMF

def fast_jlt_transform(A, target_dim):
    """
    Apply the Fast Johnson-Lindenstrauss Transform (FJLT) to matrix A.

    Parameters:
    A (numpy array): Input matrix (m x n)
    target_dim (int): Target reduced dimension

    Returns:
    A_reduced (numpy array): Dimension-reduced matrix (target_dim x n)
    """
    m, n = A.shape

    # Step 1: Generate a Hadamard matrix (nearest power of 2)
    H_dim = 2**int(np.ceil(np.log2(m)))  # Get the nearest power of 2
    H = hadamard(H_dim)[:m, :m]  # Truncate to match dimensions

    # Step 2: Create a diagonal sign matrix D (random ±1)
    D = np.diag(np.random.choice([-1, 1], size=m))

    # Step 3: Apply Hadamard and sign-flipping
    HD = H @ D @ A  # (m x n)

    # Step 4: Generate a sparse projection matrix P (proper scaling)
    P = sparse_random(target_dim, m, density=1/target_dim, format='csr', random_state=42).toarray()
    P *= np.sqrt(1 / target_dim)  # Scale projection

    # Step 5: Compute the reduced matrix
    A_reduced = P @ HD  # (target_dim x n)

    return A_reduced

def nmf_with_fjlt_initialization(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF using FJLT for initialization of L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    W (numpy array): Nonnegative matrix (m x r)
    H (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R using FJLT
    L = fast_jlt_transform(A, d)  # L ∈ R^(d x m) = (10, 100)
    print(L.shape)
    R = fast_jlt_transform(A.T, d)  # R ∈ R^(d x n) = (10, 50)
    print(R.shape)
    # Step 2: Custom initialization for W and H using L and R
    W = A @ R.T  # W ∈ R^(m x d) = (100, 10)
    W = W[:, :r]  # Take the first r columns for W ∈ R^(m x r) = (100, 5)

    H = L.T @ A  # H ∈ R^(d x n) = (10, 50)
    H = H[:r, :]  # Take the first r rows for H ∈ R^(r x n) = (5, 50)

    # Ensure W and H are nonnegative
    W = np.abs(W)
    H = np.abs(H)

    # Step 3: Use scikit-learn's NMF solver for updates
    nmf = NMF(n_components=r, init='custom', solver='mu', max_iter=max_iter, tol=tol, random_state=42)

    # Fit the model using custom initialization
    W = nmf.fit_transform(A, W=W, H=H)  # Update W and H using A
    H = nmf.components_

    # Step 4: Compute reconstruction error
    reconstruction_error = np.linalg.norm(A - W @ H, 'fro')
    normalized_error = reconstruction_error / np.linalg.norm(A, 'fro')
    print(f"Final Normalized Reconstruction Error = {normalized_error:.6f}")

    return W, H

# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

W, H = nmf_with_fjlt_initialization(A, r, rOV, w)
print("Final W shape:", W.shape)
print("Final H shape:", H.shape)
print("W (nonnegative):\n", W)
print("H (nonnegative):\n", H)

(10, 50)
(10, 100)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 100 is different from 50)