**Section 3: Compression Algorithms**

*Imports*

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import timeit
import math

from collections import defaultdict
from random_matrix import *

**Code**

In [2]:
def randomized_power_iteration(A, omega=None, power_iter=3, random_state=None):
    """
    Randomized subspace iteration with customizable projection matrix
    
    Parameters:
    -----------
    A : ndarray
        Input matrix to approximate
    rank : int
        Target rank of approximation
    omega : ndarray, optional
        Custom projection matrix (shape n × (rank + oversampling))
        If None, uses Gaussian random matrix
    power_iter : int
        Number of power iterations
    random_state : int, optional
        Random seed for reproducibility
    """
    if random_state is not None:
        np.random.seed(random_state)
    
    Y = omega @ A
    for _ in range(power_iter):
        Y = (Y @ A.T) @ A  # Power iteration for row space
    
    Q, _ = np.linalg.qr(Y)
    return Q

*Benchmarking*

In [9]:
# Matrix Methods
methods = {
    "Gaussian": gaussian_random_matrix,
    # "SRHT": srht_matrix,
    "SRFT": srft_matrix,
    "CountSketch": countsketch_matrix,
    "Sparse JL": lambda n, k, seed: sparse_jl_matrix(n, k, s=3, seed=seed)
}

# Parameters
sizes = np.arange(1_000, 5_001, 1_000)
runs = 10
q = 2  # Number of power iterations for RPI

rows = []
for n in sizes:
    # Generate Random Matrix A to Project
    np.random.seed(1)
    m = int(0.75*n)
    for density,label in [(1.0,'dense'),(0.01,'sparse')]:
        A,_,_ = generate_synthetic_matrix(m,n,delta = density,seed = 1) 
        for method_name, method in methods.items():
            generation_times = []
            projection_times = []
            rpi_times = []
            total_times = []
            
            for i in range(runs):
                seed = i + 1

                # Time projection matrix generation
                timer_proj = timeit.Timer(lambda: method(n, m, seed=seed))
                generation_time = timer_proj.timeit(number=1)  # Single run as creation is deterministic per seed
                
                # Generate projection matrix
                sigma = method(n, m, seed=seed)
                
                # Time projection operation
                timer_mult = timeit.Timer(lambda: sigma @ A)
                mult_time = timer_mult.timeit(number=1) 
                
                # Time RPI operation
                timer_rpi = timeit.Timer(lambda: randomized_power_iteration(A = A, omega = sigma, power_iter = q))
                rpi_time = timer_rpi.timeit(number=1)
                
                generation_times.append(generation_time)
                projection_times.append(mult_time)
                rpi_times.append(rpi_time)
                total_times.append(generation_time + mult_time + rpi_time)
            
            # Store average times
            rows.append(
                {
                    'Method':method_name,
                    'Size':n,
                    'Density':label,
                    'Generation Time': np.mean(generation_times),
                    'Projection Time': np.mean(projection_times),
                    'RPI Time':np.mean(rpi_times),
                    'Total Time': np.mean(total_times)  
                }
            )
        print(f'Completed benchmarking for matrix size: {n}')

Completed benchmarking for matrix size: 1000
Completed benchmarking for matrix size: 1000
Completed benchmarking for matrix size: 2000
Completed benchmarking for matrix size: 2000
Completed benchmarking for matrix size: 3000
Completed benchmarking for matrix size: 3000
Completed benchmarking for matrix size: 4000
Completed benchmarking for matrix size: 4000
Completed benchmarking for matrix size: 5000
Completed benchmarking for matrix size: 5000


In [10]:
summary_df = pd.DataFrame(rows)

***Figure 1: Generation Cost***

In [11]:
fig = go.Figure()

_df = summary_df[summary_df['Density'] == 'dense']
for method in _df['Method'].unique():
    
    fig.add_trace(
        go.Scatter(
            x = _df[_df['Method']==method]['Size'],
            y = _df[_df['Method']==method]['Generation Time'],
            name = method
        ),
    )

fig.update_layout(
    title = 'Time (s) vs Matrix Size by Compression Algorithm',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Time (s)',
    template = 'plotly_white'
)
fig.show()

***Figure 2: Projection Cost***

In [12]:
fig = make_subplots(rows = 1, cols = 2,
                    subplot_titles=['Dense','Sparse'])

_df = summary_df[summary_df['Density'] == 'dense']
for method in _df['Method'].unique():

    fig.add_trace(
        go.Scatter(
            x = _df[_df['Method'] == method]['Size'],
            y = _df[_df['Method'] == method]['Projection Time'],
            name = method
        ),
        row=1,col=1
    )

_df = summary_df[summary_df['Density'] == 'sparse']
for method in _df['Method'].unique():
    fig.add_trace(
        go.Scatter(
            x = _df[_df['Method'] == method]['Size'],
            y = _df[_df['Method'] == method]['Projection Time'],
            name = method
        ),
        row=1,col=2
    )

fig.update_layout(
    title = 'Sparse vs Dense Matrix Projection Cost by Test Matrix',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Time (s)',
    template = 'plotly_white' 

)

fig.show()

***Figure 3: Randomized Power Iteration Cost***

In [13]:
fig = make_subplots(rows = 1, cols = 2,
                    subplot_titles=['Dense','Sparse'])

_df = summary_df[summary_df['Density'] == 'dense']
for method in _df['Method'].unique():

    fig.add_trace(
        go.Scatter(
            x = _df[_df['Method'] == method]['Size'],
            y = _df[_df['Method'] == method]['RPI Time'],
            name = method
        ),
        row=1,col=1
    )

_df = summary_df[summary_df['Density'] == 'sparse']
for method in _df['Method'].unique():
    fig.add_trace(
        go.Scatter(
            x = _df[_df['Method'] == method]['Size'],
            y = _df[_df['Method'] == method]['RPI Time'],
            name = method
        ),
        row=1,col=2
    )

fig.update_layout(
    title = 'Sparse vs Dense Matrix RPI Cost (q = 4) t by Test Matrix',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Time (s)',
    template = 'plotly_white' 

)

fig.show()