In [1]:
import numpy as np

**Structured Random Compression Algorithm Fig 1**

In [3]:
def randomized_compression(A, r, rOV, w):
    """
    Compute a compression matrix Q for A using a randomized algorithm.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration

    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Step 1: Draw a Gaussian random matrix Omega_L
    Omega_L = np.random.randn(n, d)

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = A @ (A.T @ B)  # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

In [4]:
# Example Usage
np.random.seed(42)
A = np.random.randn(100, 50)  # Input matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

Q = randomized_compression(A, r, rOV, w)
print("Compression matrix Q shape:", Q.shape)

Compression matrix Q shape: (100, 10)


**NMF with Structured Random Compression Fig 2 (Multiplicative Updates)**

In [23]:
def nmf_with_compression(A, r, rOV, w, max_iter=100, tol=1e-10):
    """
    Compute NMF with compression matrices L and R using multiplicative updates.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    X_k (numpy array): Nonnegative matrix (m x r)
    Y_k (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Initialize Y_k with nonnegative values
    Y_k = np.abs(np.random.randn(r, n))  # Y_k ∈ R^(r x n)

    # Step 3: Compute compressed matrices
    A_check = A @ R.T  # A_check ∈ R^(m x d)
    A_hat = L.T @ A  # A_hat ∈ R^(d x n)

    # Ensure compressed matrices are nonnegative
    A_check = np.abs(A_check)
    A_hat = np.abs(A_hat)

    # Initialize X_k
    X_k = np.abs(np.random.randn(m, r))  # X_k ∈ R^(m x r)

    norm_A = np.linalg.norm(A, 'fro')
    
    # Iterate until convergence
    prev_error = np.inf
    for k in range(max_iter):
        # Step 4: Compute Y_check_k = Y_k R^T
        Y_check_k = Y_k @ R.T  # Y_check_k ∈ R^(r x d)

        # Step 5: Update X_k+1 using multiplicative updates
        numerator_X = A_check @ Y_check_k.T
        denominator_X = X_k @ (Y_check_k @ Y_check_k.T)
        X_k_plus_1 = X_k * (numerator_X / denominator_X)

        # Step 6: Compute X_hat_k+1 = L^T X_k+1
        X_hat_k_plus_1 = L.T @ X_k_plus_1  # X_hat_k+1 ∈ R^(d x r)

        # Step 7: Update Y_k+1 using multiplicative updates
        numerator_Y = X_hat_k_plus_1.T @ A_hat
        denominator_Y = (X_hat_k_plus_1.T @ X_hat_k_plus_1) @ Y_k
        Y_k_plus_1 = Y_k * (numerator_Y / denominator_Y)

        # Compute reconstruction error
        reconstruction_error = np.linalg.norm(A - X_k_plus_1 @ Y_k_plus_1, 'fro')
        normalized_error = reconstruction_error / norm_A  # Normalized error
        print(f"Iteration {k+1}: Normalized Reconstruction Error = {normalized_error:.6f}")

        # Convergence check
        if abs(prev_error - normalized_error) < tol:
            break
        prev_error = normalized_error


        # Update X_k and Y_k
        X_k, Y_k = X_k_plus_1, Y_k_plus_1

    return X_k, Y_k

In [24]:
# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

Xk, Yk = nmf_with_compression(A, r, rOV, w)
print("Final X shape:", Xk.shape)
print("Final Y shape:", Yk.shape)

Iteration 1: Normalized Reconstruction Error = 1.719811
Iteration 2: Normalized Reconstruction Error = 1.719563
Iteration 3: Normalized Reconstruction Error = 1.719371
Iteration 4: Normalized Reconstruction Error = 1.719237
Iteration 5: Normalized Reconstruction Error = 1.719153
Iteration 6: Normalized Reconstruction Error = 1.719108
Iteration 7: Normalized Reconstruction Error = 1.719097
Iteration 8: Normalized Reconstruction Error = 1.719112
Iteration 9: Normalized Reconstruction Error = 1.719151
Iteration 10: Normalized Reconstruction Error = 1.719209
Iteration 11: Normalized Reconstruction Error = 1.719284
Iteration 12: Normalized Reconstruction Error = 1.719374
Iteration 13: Normalized Reconstruction Error = 1.719476
Iteration 14: Normalized Reconstruction Error = 1.719589
Iteration 15: Normalized Reconstruction Error = 1.719710
Iteration 16: Normalized Reconstruction Error = 1.719838
Iteration 17: Normalized Reconstruction Error = 1.719971
Iteration 18: Normalized Reconstruction 

**NMF with Structured Compression (MU SkLearn)**

In [11]:
import numpy as np
from sklearn.decomposition import NMF

def structured_compression_mu(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF using custom initialization with compression matrices L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    W (numpy array): Nonnegative matrix (m x r)
    H (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Custom initialization for W and H using projections
    W = A @ R.T  # W ∈ R^(m x d)
    W = W[:, :r]  # Take the first r columns for W ∈ R^(m x r)

    H = L.T @ A  # H ∈ R^(d x n)
    H = H[:r, :]  # Take the first r rows for H ∈ R^(r x n)

    # Ensure W and H are nonnegative
    W = np.abs(W)
    H = np.abs(H)

    # Step 3: Use scikit-learn's NMF solver for updates
    nmf = NMF(n_components=r, init='custom', solver='mu', max_iter=max_iter, tol=tol, random_state=42)

    # Fit the model using custom initialization
    W = nmf.fit_transform(A, W=W, H=H)  # Update W and H using A
    H = nmf.components_

    # Step 4: Compute reconstruction error
    reconstruction_error = np.linalg.norm(A - W @ H, 'fro')
    normalized_error = reconstruction_error / np.linalg.norm(A, 'fro')
    print(f"Final Normalized Reconstruction Error = {normalized_error:.6f}")

    return W, H

In [12]:
# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

W, H = structured_compression_mu(A, r, rOV, w)
print("Final W shape:", W.shape)
print("Final H shape:", H.shape)

Final Normalized Reconstruction Error = 0.550010
Final W shape: (100, 5)
Final H shape: (5, 50)


**NMF Structured Compression (Coord Desc. SKLearn)**

In [13]:
import numpy as np
from sklearn.decomposition import NMF

def structured_compression_cd(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF using custom initialization with compression matrices L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    W (numpy array): Nonnegative matrix (m x r)
    H (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Custom initialization for W and H using projections
    W = A @ R.T  # W ∈ R^(m x d)
    W = W[:, :r]  # Take the first r columns for W ∈ R^(m x r)

    H = L.T @ A  # H ∈ R^(d x n)
    H = H[:r, :]  # Take the first r rows for H ∈ R^(r x n)

    # Ensure W and H are nonnegative
    W = np.abs(W)
    H = np.abs(H)

    # Step 3: Use scikit-learn's NMF solver for updates
    nmf = NMF(n_components=r, init='custom', solver='cd', max_iter=max_iter, tol=tol, random_state=42)

    # Fit the model using custom initialization
    W = nmf.fit_transform(A, W=W, H=H)  # Update W and H using A
    H = nmf.components_

    # Step 4: Compute reconstruction error
    reconstruction_error = np.linalg.norm(A - W @ H, 'fro')
    normalized_error = reconstruction_error / np.linalg.norm(A, 'fro')
    print(f"Final Normalized Reconstruction Error = {normalized_error:.6f}")

    return W, H

In [15]:
# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

W, H = structured_compression_cd(A, r, rOV, w,500)
print("Final W shape:", W.shape)
print("Final H shape:", H.shape)

Final Normalized Reconstruction Error = 0.535564
Final W shape: (100, 5)
Final H shape: (5, 50)


In [24]:
import numpy as np
from scipy.linalg import hadamard
from scipy.sparse import random as sparse_random
from sklearn.decomposition import NMF

def fast_jlt_transform(A, target_dim):
    """
    Apply the Fast Johnson-Lindenstrauss Transform (FJLT) to matrix A.

    Parameters:
    A (numpy array): Input matrix (m x n)
    target_dim (int): Target reduced dimension

    Returns:
    A_reduced (numpy array): Dimension-reduced matrix (target_dim x n)
    """
    m, n = A.shape

    # Step 1: Generate a Hadamard matrix (nearest power of 2)
    H_dim = 2**int(np.ceil(np.log2(m)))  # Get the nearest power of 2
    H = hadamard(H_dim)[:m, :m]  # Truncate to match dimensions

    # Step 2: Create a diagonal sign matrix D (random ±1)
    D = np.diag(np.random.choice([-1, 1], size=m))

    # Step 3: Apply Hadamard and sign-flipping
    HD = H @ D @ A  # (m x n)

    # Step 4: Generate a sparse projection matrix P (proper scaling)
    P = sparse_random(target_dim, m, density=1/target_dim, format='csr', random_state=42).toarray()
    P *= np.sqrt(1 / target_dim)  # Scale projection

    # Step 5: Compute the reduced matrix
    A_reduced = P @ HD  # (target_dim x n)

    return A_reduced

def nmf_with_fjlt_initialization(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF using FJLT for initialization of L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    W (numpy array): Nonnegative matrix (m x r)
    H (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R using FJLT
    L = fast_jlt_transform(A, d)  # L ∈ R^(d x m) = (10, 100)
    print(L.shape)
    R = fast_jlt_transform(A.T, d)  # R ∈ R^(d x n) = (10, 50)
    print(R.shape)
    # Step 2: Custom initialization for W and H using L and R
    W = A @ R.T  # W ∈ R^(m x d) = (100, 10)
    W = W[:, :r]  # Take the first r columns for W ∈ R^(m x r) = (100, 5)

    H = L.T @ A  # H ∈ R^(d x n) = (10, 50)
    H = H[:r, :]  # Take the first r rows for H ∈ R^(r x n) = (5, 50)

    # Ensure W and H are nonnegative
    W = np.abs(W)
    H = np.abs(H)

    # Step 3: Use scikit-learn's NMF solver for updates
    nmf = NMF(n_components=r, init='custom', solver='mu', max_iter=max_iter, tol=tol, random_state=42)

    # Fit the model using custom initialization
    W = nmf.fit_transform(A, W=W, H=H)  # Update W and H using A
    H = nmf.components_

    # Step 4: Compute reconstruction error
    reconstruction_error = np.linalg.norm(A - W @ H, 'fro')
    normalized_error = reconstruction_error / np.linalg.norm(A, 'fro')
    print(f"Final Normalized Reconstruction Error = {normalized_error:.6f}")

    return W, H

# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

W, H = nmf_with_fjlt_initialization(A, r, rOV, w)
print("Final W shape:", W.shape)
print("Final H shape:", H.shape)
print("W (nonnegative):\n", W)
print("H (nonnegative):\n", H)

(10, 50)
(10, 100)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 100 is different from 50)