In [4]:
import numpy as np
from sklearn.random_projection import GaussianRandomProjection
from sklearn.decomposition import NMF

# Successive Projection Algorithm (SPA) with JL Dimensionality Reduction for NMF
def jl_spa_nmf(X, r, d):
    """
    Apply Johnson-Lindenstrauss Lemma for dimensionality reduction, then run SPA and NMF.
    
    Parameters:
    X (numpy array): Input data matrix (m x n)
    r (int): Number of basis columns to select
    d (int): Target reduced dimension using JL transform
    
    Returns:
    W (numpy array): Basis matrix from NMF
    H (numpy array): Coefficient matrix from NMF
    """
    # Step 1: Apply Johnson-Lindenstrauss Transform
    jl_transform = GaussianRandomProjection(n_components=d)
    X_reduced = jl_transform.fit_transform(X.T).T  # Project columns into lower dimension
    
    # Step 2: Successive Projection Algorithm (SPA)
    selected_indices = []
    residual = X_reduced.copy()
    
    for _ in range(r):
        # Select the column with the largest norm
        col_norms = np.linalg.norm(residual, axis=0)
        selected_idx = np.argmax(col_norms)
        selected_indices.append(selected_idx)
        
        # Project remaining columns onto the orthogonal complement of selected column
        selected_col = residual[:, selected_idx].reshape(-1, 1)
        projection = (selected_col @ (selected_col.T @ residual)) / (selected_col.T @ selected_col)
        residual -= projection
    
    # Step 3: Extract selected columns from original X for NMF initialization
    W_init = X[:, selected_indices]
    nmf_model = NMF(n_components=r, init='custom', solver='mu', max_iter=500)
    H_init = np.abs(np.random.rand(r, X.shape[1]))  # Randomly initialize non-negative H
    
    # Fit NMF using custom initialization
    W = nmf_model.fit_transform(X, W=W_init, H=H_init)
    H = nmf_model.components_
    
    return W, H

# Example Usage
np.random.seed(42)
X = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 features, 50 samples)
r = 5  # Number of basis vectors
JL_dim = 20  # Reduced dimension via JL transform

W, H = jl_spa_nmf(X, r, JL_dim)
print("W shape:", W.shape)
print("H shape:", H.shape)

W shape: (100, 5)
H shape: (5, 50)


In [10]:
np.linalg.norm(X - W @ H, 'fro')

np.float64(37.91220921279779)

In [25]:
import numpy as np
from sklearn.random_projection import GaussianRandomProjection

# Successive Projection Algorithm (SPA) with JL Dimensionality Reduction for SNMF
def jl_spa_nmf(X, r, d):
    """
    Apply Johnson-Lindenstrauss Lemma for dimensionality reduction, then run SPA for NMF initialization.
    
    Parameters:
    X (numpy array): Input data matrix (m x n)
    r (int): Number of basis columns to select (components)
    d (int): Target reduced dimension using JL transform
    
    Returns:
    W (numpy array): Initialized basis matrix for NMF (m x r)
    H (numpy array): Initialized coefficient matrix for NMF (r x n)
    """
    # Step 1: Apply Johnson-Lindenstrauss Transform
    jl_transform = GaussianRandomProjection(n_components=d)
    X_reduced = jl_transform.fit_transform(X.T).T  # Project columns into lower dimension
    
    # Step 2: Successive Projection Algorithm (SPA)
    selected_indices = []
    residual = X_reduced.copy()
    
    for _ in range(r):
        # Select the column with the largest norm
        col_norms = np.linalg.norm(residual, axis=0)
        selected_idx = np.argmax(col_norms)
        selected_indices.append(selected_idx)
        
        # Project remaining columns onto the orthogonal complement of selected column
        selected_col = residual[:, selected_idx].reshape(-1, 1)
        projection = (selected_col @ (selected_col.T @ residual)) / (selected_col.T @ selected_col)
        residual -= projection
    
    # Step 3: Extract selected columns from original X for NMF initialization
    W_init = X[:, selected_indices]
    
    # Initialize H matrix randomly
    H_init = np.abs(np.random.rand(r, X.shape[1]))  # Random initialization for H
    
    return W_init, H_init

# Separable NMF Optimization (H update only, W fixed)
def snmf(X, W_init, H_init, max_iter=500, tol=1e-4):
    """
    Perform Separable NMF with fixed W and initialized H.
    
    Parameters:
    X (numpy array): Input data matrix (m x n)
    W_init (numpy array): Initialized basis matrix for NMF (m x r), fixed
    H_init (numpy array): Initialized coefficient matrix for NMF (r x n)
    max_iter (int): Maximum number of iterations
    tol (float): Tolerance for convergence
    
    Returns:
    H (numpy array): Learned coefficient matrix from NMF
    """
    # Initialize H with provided initialization
    H = H_init

    # List to store reconstruction errors at each iteration
    reconstruction_errors = []

    for i in range(max_iter):
        # Update H using multiplicative update rule (keeping W fixed)
        H = np.maximum(0, H * (W_init.T @ X) / (W_init.T @ W_init @ H + 1e-10))  # Avoid division by zero
        
        # Compute reconstruction error
        reconstruction_error = np.linalg.norm(X - W_init @ H, 'fro')
        reconstruction_errors.append(reconstruction_error)  # Log the error
        
        # Check for convergence
        if reconstruction_error < tol:
            print(f"Converged at iteration {i+1} with error: {reconstruction_error:.6f}")
            break
    
    # Optionally, print the reconstruction errors at each iteration
    # print("Reconstruction errors at each iteration:", reconstruction_errors)

    return H, reconstruction_errors

# Example Usage
np.random.seed(42)
X = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 features, 50 samples)
r = 5  # Number of components (basis vectors)
JL_dim = 20  # Reduced dimension via JL transform

# Step 1: Apply JL and SPA to initialize W and H
W_init, H_init = jl_spa_nmf(X, r, JL_dim)

# Step 2: Perform Separable NMF (SNMF) by updating H only, keeping W fixed
H, reconstruction_errors = snmf(X, W_init, H_init)

# Check final shape of H and print reconstruction errors
print("H shape:", H.shape)

H shape: (5, 50)


In [28]:
import plotly.graph_objects as go

In [29]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(y = reconstruction_errors)
)

fig.show()

In [31]:
import numpy as np

# Fast Johnson-Lindenstrauss Transform (FJLT)
def fjlt(X, d):
    """
    Apply Fast Johnson-Lindenstrauss Transform (FJLT) for dimensionality reduction using Hadamard matrix.
    
    Parameters:
    X (numpy array): Input data matrix (m x n)
    d (int): Target reduced dimension (d << m)

    Returns:
    X_reduced (numpy array): Reduced dimensionality matrix (d x n)
    """
    # Dimensions of input matrix X (m x n)
    m, n = X.shape
    
    # Generate a random Hadamard matrix (of size m x d)
    H = np.random.choice([-1, 1], size=(m, d))
    
    # Project X using Hadamard matrix for FJLT
    X_reduced = H.T @ X  # This is a fast projection
    
    return X_reduced

# Successive Projection Algorithm (SPA) with FJLT for NMF
def fjlt_spa_nmf(X, r, d):
    """
    Apply Fast Johnson-Lindenstrauss Transform (FJLT) for dimensionality reduction, then run SPA for NMF initialization.
    
    Parameters:
    X (numpy array): Input data matrix (m x n)
    r (int): Number of basis columns to select (components)
    d (int): Target reduced dimension using FJLT transform
    
    Returns:
    W (numpy array): Initialized basis matrix for NMF (m x r)
    H (numpy array): Initialized coefficient matrix for NMF (r x n)
    """
    # Step 1: Apply Fast Johnson-Lindenstrauss Transform (FJLT)
    X_reduced = fjlt(X, d)
    
    # Step 2: Successive Projection Algorithm (SPA)
    selected_indices = []
    residual = X_reduced.copy()
    
    for _ in range(r):
        # Select the column with the largest norm
        col_norms = np.linalg.norm(residual, axis=0)
        selected_idx = np.argmax(col_norms)
        selected_indices.append(selected_idx)
        
        # Project remaining columns onto the orthogonal complement of selected column
        selected_col = residual[:, selected_idx].reshape(-1, 1)
        projection = (selected_col @ (selected_col.T @ residual)) / (selected_col.T @ selected_col)
        residual -= projection
    
    # Step 3: Extract selected columns from original X for NMF initialization
    W_init = X[:, selected_indices]
    
    # Initialize H matrix randomly
    H_init = np.abs(np.random.rand(r, X.shape[1]))  # Random initialization for H
    
    return W_init, H_init

# Separable NMF Optimization (H update only, W fixed)
def snmf(X, W_init, H_init, max_iter=500, tol=1e-4):
    """
    Perform Separable NMF with fixed W and initialized H.
    
    Parameters:
    X (numpy array): Input data matrix (m x n)
    W_init (numpy array): Initialized basis matrix for NMF (m x r), fixed
    H_init (numpy array): Initialized coefficient matrix for NMF (r x n)
    max_iter (int): Maximum number of iterations
    tol (float): Tolerance for convergence
    
    Returns:
    H (numpy array): Learned coefficient matrix from NMF
    """
    # Initialize H with provided initialization
    H = H_init

    # List to store reconstruction errors at each iteration
    reconstruction_errors = []

    for i in range(max_iter):
        # Update H using multiplicative update rule (keeping W fixed)
        H = np.maximum(0, H * (W_init.T @ X) / (W_init.T @ W_init @ H + 1e-10))  # Avoid division by zero
        
        # Compute reconstruction error
        reconstruction_error = np.linalg.norm(X - W_init @ H, 'fro')
        reconstruction_errors.append(reconstruction_error)  # Log the error
        
        # Check for convergence
        if reconstruction_error < tol:
            print(f"Converged at iteration {i+1} with error: {reconstruction_error:.6f}")
            break
    
    # Optionally, print the reconstruction errors at each iteration
    print("Reconstruction errors at each iteration:", reconstruction_errors)

    return H, reconstruction_errors

# Example Usage
np.random.seed(42)
X = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 features, 50 samples)
r = 5  # Number of components (basis vectors)
FJL_dim = 20  # Reduced dimension via FJLT transform

# Step 1: Apply FJLT and SPA to initialize W and H
W_init, H_init = fjlt_spa_nmf(X, r, FJL_dim)

# Step 2: Perform Separable NMF (SNMF) by updating H only, keeping W fixed
H, reconstruction_errors = snmf(X, W_init, H_init)

# Check final shape of H and print reconstruction errors
print("H shape:", H.shape)

Reconstruction errors at each iteration: [np.float64(45.072924025419105), np.float64(44.5897352585997), np.float64(44.16368101994192), np.float64(43.79614861774031), np.float64(43.48778496818529), np.float64(43.23805508876769), np.float64(43.04181891433104), np.float64(42.88975562601729), np.float64(42.771809355855765), np.float64(42.6795177065883), np.float64(42.60649826136999), np.float64(42.54810530968535), np.float64(42.50095847614004), np.float64(42.462566531564896), np.float64(42.4310655317967), np.float64(42.40504164091064), np.float64(42.38340968671164), np.float64(42.365327897650964), np.float64(42.35013679587984), np.float64(42.33731480143918), np.float64(42.326445694949), np.float64(42.317194599017775), np.float64(42.3092901026846), np.float64(42.30251082251092), np.float64(42.2966751796222), np.float64(42.29163352608282), np.float64(42.287262007999196), np.float64(42.28345773119466), np.float64(42.28013491866341), np.float64(42.27722183375622), np.float64(42.27465830157144)

In [32]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(y = reconstruction_errors)
)

fig.show()

*Structured Random Compression*

In [None]:
import numpy as np
from scipy.linalg import qr

def structured_random_compression(A, r, rOV, w):
    """
    Structured random compression algorithm.
    
    Parameters:
    A (numpy array): Input matrix A (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power of AAT
    
    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    # Step 1: Draw a Gaussian random matrix Ω_L (n x (r + rOV))
    n = A.shape[1]
    Omega_L = np.random.randn(n, r + rOV)  # Gaussian random matrix
    
    # Step 2: Compute B = (A A^T)^w A Omega_L
    AtA = np.dot(A, A.T)  # A A^T (m x m)
    AtA_w = np.linalg.matrix_power(AtA, w)  # (A A^T)^w (m x m)
    B = np.dot(AtA_w, np.dot(A, Omega_L))  # B = (A A^T)^w A Omega_L (m x (r + rOV))
    
    # Step 3: QR decomposition of B to get Q
    Q, _ = qr(B, mode='economic')  # QR decomposition (m x (r + rOV))
    
    return Q

# Example usage
np.random.seed(42)

# Create a random matrix A (100 samples, 50 features)
A = np.abs(np.random.randn(100, 50))

# Set parameters: rank (r), oversampling (rOV), and exponent (w)
r = 10
rOV = 5
w = 2

# Apply the structured random compression
Q = structured_random_compression(A, r, rOV, w)

print("Q shape:", Q.shape)

Q shape: (100, 15)


*NMF with Structured Random Projection*

In [34]:
import numpy as np
from scipy.linalg import qr
from scipy.optimize import nnls

def structured_random_compression(A, r, rOV, w):
    """
    Structured random compression algorithm.
    
    Parameters:
    A (numpy array): Input matrix A (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power of AAT
    
    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    n = A.shape[1]
    Omega_L = np.random.randn(n, r + rOV)  # Gaussian random matrix
    
    # Step 2: Compute B = (A A^T)^w A Omega_L
    AtA = np.dot(A, A.T)  # A A^T (m x m)
    AtA_w = np.linalg.matrix_power(AtA, w)  # (A A^T)^w (m x m)
    B = np.dot(AtA_w, np.dot(A, Omega_L))  # B = (A A^T)^w A Omega_L (m x (r + rOV))
    
    # Step 3: QR decomposition of B to get Q
    Q, _ = qr(B, mode='economic')  # QR decomposition (m x (r + rOV))
    
    return Q

def nmf_with_compression(A, r, rOV, w, max_iter=100, tol=1e-5):
    """
    NMF using structured random compression as described in the algorithm.
    
    Parameters:
    A (numpy array): Input matrix A (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power of AAT
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance
    
    Returns:
    Xk (numpy array): Nonnegative matrix Xk (m x r)
    Yk (numpy array): Nonnegative matrix Yk (r x n)
    """
    # Step 1: Compute compression matrices L and R
    L = structured_random_compression(A, r, rOV, w)
    R = L.T  # R is the transpose of L (r+rOV x m)
    
    # Step 2: Initialize Yk (r x n) with random nonnegative values
    m, n = A.shape
    Yk = np.abs(np.random.randn(r, n))  # Random initialization
    
    # Initialize Xk (m x r) as a random nonnegative matrix
    Xk = np.abs(np.random.randn(m, r))
    
    # Iterate until convergence
    for k in range(max_iter):
        # Step 5: Compute Y_k tilde = Y_k * R.T
        Yk_tilde = np.dot(Yk, R.T)
        
        # Step 6: Solve for Xk+1 using NNLS (Nonnegative Least Squares)
        Xk_plus_1 = np.zeros_like(Xk)
        for i in range(m):
            # NNLS optimization for each row i
            Xk_plus_1[i, :], _ = nnls(Yk_tilde, A[i, :])
        
        # Step 7: Compute X_hat_k+1 = L^T * Xk+1
        X_hat_k_plus_1 = np.dot(L.T, Xk_plus_1)
        
        # Step 8: Solve for Yk+1 using NNLS
        Yk_plus_1 = np.zeros_like(Yk)
        for j in range(n):
            # NNLS optimization for each column j
            Yk_plus_1[:, j], _ = nnls(X_hat_k_plus_1.T, A[:, j])
        
        # Check for convergence (Frobenius norm difference)
        diff_X = np.linalg.norm(Xk_plus_1 - Xk)
        diff_Y = np.linalg.norm(Yk_plus_1 - Yk)
        
        if diff_X < tol and diff_Y < tol:
            print(f"Convergence reached at iteration {k+1}")
            break
        
        # Update Xk and Yk for the next iteration
        Xk = Xk_plus_1
        Yk = Yk_plus_1
    
    return Xk, Yk

# Example usage
np.random.seed(42)

# Create a random matrix A (100 samples, 50 features)
A = np.abs(np.random.randn(100, 50))

# Set parameters: rank (r), oversampling (rOV), and exponent (w)
r = 10
rOV = 5
w = 2

# Apply NMF with structured random compression
Xk, Yk = nmf_with_compression(A, r, rOV, w)

print("Xk shape:", Xk.shape)
print("Yk shape:", Yk.shape)

ValueError: shapes (10,50) and (100,15) not aligned: 50 (dim 1) != 100 (dim 0)

In [35]:
import numpy as np

# Function to compute reconstruction error (Frobenius norm)
def compute_reconstruction_error(A, Xk, Yk):
    """
    Compute the reconstruction error using Frobenius norm.
    
    Parameters:
    A (numpy array): Original matrix A (m x n)
    Xk (numpy array): Factorized matrix Xk (m x r)
    Yk (numpy array): Factorized matrix Yk (r x n)
    
    Returns:
    float: Reconstruction error (Frobenius norm)
    """
    # Reconstruct the matrix A_hat
    A_hat = np.dot(Xk, Yk)
    
    # Compute the Frobenius norm of the difference between A and A_hat
    error = np.linalg.norm(A - A_hat, 'fro')
    return error

# Example usage:
np.random.seed(42)

# Create a random matrix A (100 samples, 50 features)
A = np.abs(np.random.randn(100, 50))

# Set parameters: rank (r), oversampling (rOV), and exponent (w)
r = 10
rOV = 5
w = 2

# Apply NMF with structured random compression
Xk, Yk = nmf_with_compression(A, r, rOV, w)

# Compute the reconstruction error
reconstruction_error = compute_reconstruction_error(A, Xk, Yk)
print(f"Reconstruction Error (Frobenius norm): {reconstruction_error}")


ValueError: shapes (10,50) and (100,15) not aligned: 50 (dim 1) != 100 (dim 0)

In [36]:
import numpy as np

# Function to apply structured random compression-based NMF
def nmf_with_compression(A, r, rOV, w, max_iter=100):
    m, n = A.shape
    
    # Step 1: Compute compression matrices L and R
    # Draw Gaussian random matrix Omega_L (n x (r + rOV))
    Omega_L = np.random.randn(n, r + rOV)
    # Compute the matrix B = A * A^T * w * A * Omega_L
    B = np.dot(A, A.T) ** w  # A * A^T
    B = np.dot(B, Omega_L)   # B = A * A^T * Omega_L
    # Perform QR decomposition to get Q, an orthogonal matrix
    Q, _ = np.linalg.qr(B)   # Q will be our L matrix (m x (r + rOV))
    
    # Step 2: Initialize Y_k (r x n), can be done with random values
    Yk = np.abs(np.random.randn(r, n))  # Random initialization for Y_k
    
    # Step 3: Iterative NMF using structured random compression
    Xk = np.zeros((m, r))  # Initialize X_k as (m x r)
    
    for k in range(max_iter):
        # Step 5: Compute Y_k tilde = Y_k * R.T
        R = np.random.randn(r + rOV, n)  # Generate R matrix (r + rOV, n)
        Yk_tilde = np.dot(Yk, R.T)  # Matrix multiplication (r x n) * (n x (r + rOV)) -> (r x (r + rOV))
        
        # Step 6: Solve for Xk+1 using Nonnegative Least Squares
        # This part can be handled using any NNLS solver
        # For simplicity, using random initialization and assuming a solver is used here
        Xk_plus_1 = np.abs(np.random.randn(m, r))  # Placeholder
        
        # Step 7: Update Xk (project it back using L.T)
        Xk_plus_1 = np.dot(Q.T, Xk_plus_1)  # L^T * Xk+1
        
        # Step 8: Solve for Yk+1 using Nonnegative Least Squares
        Yk_plus_1 = np.abs(np.random.randn(r, n))  # Placeholder
        
        # Step 9: Update Yk
        Yk = Yk_plus_1  # Update Y_k with new values

        # Optionally, compute the reconstruction error at each iteration
        reconstruction_error = np.linalg.norm(A - np.dot(Xk, Yk), 'fro')
        print(f"Iteration {k+1}: Reconstruction Error: {reconstruction_error}")

    return Xk, Yk

# Example usage:
np.random.seed(42)

# Create a random matrix A (100 samples, 50 features)
A = np.abs(np.random.randn(100, 50))

# Set parameters: rank (r), oversampling (rOV), and exponent (w)
r = 10
rOV = 5
w = 2

# Apply NMF with structured random compression
Xk, Yk = nmf_with_compression(A, r, rOV, w)

# Compute the reconstruction error after the final iteration
reconstruction_error = np.linalg.norm(A - np.dot(Xk, Yk), 'fro')
print(f"Final Reconstruction Error: {reconstruction_error}")

ValueError: shapes (100,100) and (50,15) not aligned: 100 (dim 1) != 50 (dim 0)

In [18]:
import numpy as np
from scipy.linalg import hadamard
from scipy.sparse import random as sparse_random
from sklearn.decomposition import NMF

def fast_jlt_transform(A, target_dim):
    """
    Apply the Fast Johnson-Lindenstrauss Transform (FJLT) to matrix A.
    
    Parameters:
    A (numpy array): Input matrix (m x n)
    target_dim (int): Target reduced dimension
    
    Returns:
    A_reduced (numpy array): Dimension-reduced matrix (target_dim x n)
    """
    m, n = A.shape

    # Step 1: Generate a Hadamard matrix (nearest power of 2)
    H_dim = 2**int(np.ceil(np.log2(m)))  # Get the nearest power of 2
    H = hadamard(H_dim)[:m, :m]  # Truncate to match dimensions

    # Step 2: Create a diagonal sign matrix D (random ±1)
    D = np.diag(np.random.choice([-1, 1], size=m))

    # Step 3: Apply Hadamard and sign-flipping
    HD = H @ D @ A  # (m x n)

    # Step 4: Generate a sparse projection matrix P (proper scaling)
    P = sparse_random(target_dim, m, density=1/target_dim, format='csr', random_state=42).toarray()
    P *= np.sqrt(1 / target_dim)  # Scale projection

    # Step 5: Compute the reduced matrix
    A_reduced = P @ HD  # (target_dim x n)

    return A_reduced

def nmf_with_fjlt_compression(A, r, rOV, max_iter=100, tol=1e-4):
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension
    
    # Apply FJLT transformations
    L = fast_jlt_transform(A, d)  # (d x m)
    R = fast_jlt_transform(A.T, d)  # (d x n)

    # Compute compressed approximation
    A_hat = L.T @ A @ R  # (d x m) @ (m x n) @ (n x d) -> (d x d)

    # Step 3: Initialize NMF factors
    nmf = NMF(n_components=r, init='random', solver='mu', max_iter=500, random_state=42)
    X_k = nmf.fit_transform(A_hat)  # Initial X (d x r)
    Y_k = nmf.components_  # Initial Y (r x d)

    # Iterate until convergence
    prev_error = np.linalg.norm(A_hat - X_k @ Y_k, 'fro')
    for k in range(max_iter):
        # Step 5: Compute Y_k_tilde = Y_k @ R
        Y_k_tilde = np.dot(Y_k, R)  # (r x d) @ (d x n) -> (r x n)

        # Step 6: Solve for X_k+1 using Nonnegative Least Squares (NNLS)
        nmf = NMF(n_components=r, init='custom', solver='mu', max_iter=500)
        X_k_plus_1 = nmf.fit_transform(A, W=X_k, H=Y_k_tilde)  # (m x r)
        
        # Step 7: Compute X_k+1_tilde = L.T @ X_k+1
        X_k_plus_1_tilde = np.dot(L.T, X_k_plus_1)  # (m x d) @ (d x r) -> (m x r)

        # Step 8: Solve for Y_k+1 using NNLS
        nmf = NMF(n_components=r, init='custom', solver='mu', max_iter=500)
        Y_k_plus_1 = nmf.fit_transform(A_hat, W=X_k_plus_1_tilde, H=Y_k)  # (r x d)

        # Compute reconstruction error
        reconstruction_error = np.linalg.norm(A_hat - X_k_plus_1_tilde @ Y_k_plus_1, 'fro')
        print(f"Iteration {k+1}: Reconstruction Error = {reconstruction_error:.6f}")

        # Convergence check
        if abs(prev_error - reconstruction_error) < tol:
            break
        prev_error = reconstruction_error

        # Update X_k and Y_k
        X_k, Y_k = X_k_plus_1, Y_k_plus_1

    return X_k, Y_k

# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 features, 50 samples)
r = 5   # Target rank
rOV = 5 # Oversampling parameter

Xk, Yk = nmf_with_fjlt_compression(A, r, rOV)
print("Final X shape:", Xk.shape)
print("Final Y shape:", Yk.shape)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 100 is different from 10)

**Structured Random Compression Fig 1**

In [19]:
import numpy as np

def randomized_compression(A, r, rOV, w):
    """
    Compute a compression matrix Q for A using a randomized algorithm.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration

    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Step 1: Draw a Gaussian random matrix Omega_L
    Omega_L = np.random.randn(n, d)

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = A @ (A.T @ B)  # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

# Example Usage
np.random.seed(42)
A = np.random.randn(100, 50)  # Input matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

Q = randomized_compression(A, r, rOV, w)
print("Compression matrix Q shape:", Q.shape)

Compression matrix Q shape: (100, 10)


**NMF with Structured Random Compression Fig 2**

In [24]:
import numpy as np
from sklearn.decomposition import NMF

def randomized_compression(A, r, rOV, w):
    """
    Compute a compression matrix Q for A using a randomized algorithm.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration

    Returns:
    Q (numpy array): Compression matrix (m x (r + rOV))
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Step 1: Draw a Gaussian random matrix Omega_L
    Omega_L = np.random.randn(n, d)

    # Step 2: Compute B = (A A^T)^w A Omega_L
    B = A @ Omega_L  # Initial multiplication: A Omega_L
    for _ in range(w):
        B = A @ (A.T @ B)  # Power iteration: (A A^T) B

    # Step 3: Compute the orthogonal basis Q using QR decomposition
    Q, _ = np.linalg.qr(B)

    return Q

def nmf_with_compression(A, r, rOV, w, max_iter=100, tol=1e-4):
    """
    Compute NMF with compression matrices L and R.

    Parameters:
    A (numpy array): Input matrix (m x n)
    r (int): Target rank
    rOV (int): Oversampling parameter
    w (int): Exponent for the power iteration
    max_iter (int): Maximum number of iterations
    tol (float): Convergence tolerance

    Returns:
    X_k (numpy array): Nonnegative matrix (m x r)
    Y_k (numpy array): Nonnegative matrix (r x n)
    """
    m, n = A.shape
    d = r + rOV  # Effective reduced dimension

    # Ensure A is nonnegative
    A = np.abs(A)

    # Step 1: Compute compression matrices L and R
    L = randomized_compression(A, r, rOV, w)  # L ∈ R^(m x d)
    R = randomized_compression(A.T, r, rOV, w).T  # R ∈ R^(d x n)

    # Step 2: Initialize Y_k with nonnegative values
    Y_k = np.abs(np.random.randn(r, n))  # Y_k ∈ R^(r x n)

    # Step 3: Compute compressed matrices
    A_check = A @ R.T  # A_check ∈ R^(m x d)
    A_hat = L.T @ A  # A_hat ∈ R^(d x n)

    # Ensure compressed matrices are nonnegative
    A_check = np.abs(A_check)
    A_hat = np.abs(A_hat)

    # Iterate until convergence
    prev_error = np.inf
    for k in range(max_iter):
        # Step 4: Compute Y_check_k = Y_k R^T
        Y_check_k = Y_k @ R.T  # Y_check_k ∈ R^(r x d)

        # Step 5: Solve for X_k+1 ≥ 0
        nmf = NMF(n_components=r, init='random', solver='mu', max_iter=500, random_state=42)
        X_k_plus_1 = nmf.fit_transform(A_check)  # X_k+1 ∈ R^(m x r)

        # Step 6: Compute X_hat_k+1 = L^T X_k+1
        X_hat_k_plus_1 = L.T @ X_k_plus_1  # X_hat_k+1 ∈ R^(d x r)

        # Step 7: Solve for Y_k+1 ≥ 0
        nmf = NMF(n_components=r, init='random', solver='mu', max_iter=500, random_state=42)
        Y_k_plus_1 = nmf.fit_transform(A_hat)  # Y_k+1 ∈ R^(r x n)

        # Compute reconstruction error
        # reconstruction_error = np.linalg.norm(A - X_k_plus_1 @ Y_k_plus_1, 'fro')
        # print(f"Iteration {k+1}: Reconstruction Error = {reconstruction_error:.6f}")

        # Convergence check
        # if abs(prev_error - reconstruction_error) < tol:
        #    break
        # prev_error = reconstruction_error

        # Update Y_k
        Y_k = Y_k_plus_1

    return X_k_plus_1, Y_k_plus_1

# Example Usage
np.random.seed(42)
A = np.abs(np.random.randn(100, 50))  # Nonnegative matrix (100 x 50)
r = 5   # Target rank
rOV = 5 # Oversampling parameter
w = 2   # Exponent

Xk, Yk = nmf_with_compression(A, r, rOV, w)
print("Final X shape:", Xk.shape)
print("Final Y shape:", Yk.shape)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 50 is different from 5)

In [25]:
A_hec

NameError: name 'A_hec' is not defined