**Section 4: Synthetic Matrix Performance**

*Imports*

In [2]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import timeit
import math

from archive.randomized_projected_nmf import *
from random_matrix import *
from nmf import * 
from benchmark import *
from collections import defaultdict

*Benchmark*

In [97]:
methods = {
    "MU C": nmf_compress_mu,
    'MU SC': nmf_structured_compress_mu,
    'HALS C': nmf_compress_hals,
    'HALS SC': nmf_structured_compress_hals
}
methods_baseline = {
    "MU": nmf_mu,
    'HALS': nmf_hals,
}


projection_types = [
    'gaussian',
    # 'srht',
    # 'srft',
    'sparse-jl',
    'count-sketch',
]

rows = []

# Parameters
sizes = np.arange(1_00, 10_001, 1_000)
runs = 10
r = 20
for n in sizes:
    # Generate A (n x 0.75n)
    np.random.seed(1)
    m = int(0.75*n)
    for density, label in [(0.01,'sparse'),(1.0,'dense')]:
        A, _, _= generate_synthetic_matrix(n=m,m=n,r=r,delta = density)

        # Compressed Methods
        for method_name, method in methods.items():
            for projection in projection_types:
                total_times = [] 
                total_errors = []       
                for i in range(runs):
                    # Set seed per run
                    seed = i + 1
                    
                    # Time NMF Method
                    start_time = timeit.default_timer()
                    _, _, errors = method(A, r, random_state=seed,projection_type = projection)
                    time = timeit.default_timer() - start_time

                    # Store
                    total_times.append(time)
                    total_errors.append(errors[-1])
                
                rows.append(
                    {
                        'Method':method_name,
                        'Projection Type':projection,
                        'Size':n,
                        'Density':label,
                        'Time': np.mean(total_times),
                        'Errors': np.mean(total_errors)
                    }
                )
        # Regular Methods
        for method_name,method in methods_baseline.items():
            total_times = [] 
            total_errors = []       
            for i in range(runs):
                # Set seed per run
                seed = i + 1
                
                # Time NMF Method
                start_time = timeit.default_timer()
                _, _, errors = method(A, r)
                time = timeit.default_timer() - start_time

                # Store
                total_times.append(time)
                total_errors.append(errors[-1])
            
            rows.append(
                {
                    'Method':method_name,
                    'Projection Type':'',
                    'Size':n,
                    'Density':label,
                    'Time': np.mean(total_times),
                    'Errors': np.mean(total_errors)
                }
            )


    print(f'Completed benchmarking for matrix size: {n}')

Completed benchmarking for matrix size: 100
Completed benchmarking for matrix size: 1100
Completed benchmarking for matrix size: 2100
Completed benchmarking for matrix size: 3100


KeyboardInterrupt: 

In [90]:
A

array([[7.06783079, 6.813081  , 5.38449234, ..., 5.96894479, 1.64254075,
        4.79622313],
       [3.4053049 , 2.10139065, 4.20165453, ..., 2.53430644, 2.85287984,
        3.5272496 ],
       [3.09875185, 3.34068754, 3.05058999, ..., 3.13522907, 3.20708009,
        3.42731741],
       ...,
       [5.87937796, 3.35649994, 4.07000582, ..., 4.13329593, 2.93069377,
        5.39803491],
       [3.8273481 , 2.55072551, 3.71311234, ..., 6.19717538, 1.31929683,
        1.71438176],
       [4.66388401, 6.55052092, 3.53200779, ..., 4.10573846, 3.21425655,
        2.76643839]], shape=(1000, 750))

In [91]:
stats = pd.DataFrame(rows)

In [92]:
stats['Method'].unique()

array(['MU C', 'MU SC', 'HALS C', 'HALS SC', 'MU', 'HALS'], dtype=object)

*Plot: Computation Time by Method/Projection*

*i) dense*

In [93]:
dense = stats[(stats['Density'] == 'dense') & (stats['Projection Type'] != '')]

fig = make_subplots(rows = 1,cols = 4,shared_xaxes = True,shared_yaxes=True,subplot_titles=dense['Method'].unique())

projection_colors = {
    'gaussian': 'blue',
    'sparse-jl': 'red',
    'count-sketch': 'green',
    
}


for i, method in enumerate(dense['Method'].unique()):
    for j, projection in enumerate(dense['Projection Type'].unique()):
        _df = dense[(dense['Method'] == method) & (dense['Projection Type'] == projection)]
        fig.add_trace(
            go.Scatter(
                x = _df['Size'],
                y = _df['Time'],
                name = f'{projection}',
                legendgroup=projection,  # Group by projection type
                line = dict(color=projection_colors[projection]),  # Color by projection
                showlegend = (i == 0),
            ),
            row = 1, col = i + 1,
            
        )

mu_dense = stats[(stats['Method'] == 'MU') & (stats['Density'] == "dense")]

for i in range(2):
    fig.add_trace(
        go.Scatter(
            x = mu_dense['Size'],
            y = mu_dense['Time'],
            name = 'MU',
            showlegend = (i == 0),
            line = dict(color='black'),
            legendgroup = 'MU'
        ),
        row = 1,col= i+1
    )


hals_dense = stats[(stats['Method'] == 'HALS') & (stats['Density'] == "dense")]
for i in range(2,4):
    fig.add_trace(
        go.Scatter(
            x = hals_dense['Size'],
            y = hals_dense['Time'],
            name = 'HALS',
            showlegend = (i == 2),
            line = dict(color='grey'),
            legendgroup = 'HALS'
        ),
        row = 1,col= i+1
    )

fig.update_layout(
    title = 'Computation Time by Method/Projection Type - Synthetic Dense Matrix',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Time (s)',
    template = 'plotly_white'
)

fig.show()

*ii) sparse*

In [94]:
sparse = stats[(stats['Density'] == 'sparse') & (stats['Projection Type'] != '')]

fig = make_subplots(rows = 1,cols = 4,shared_xaxes = True,shared_yaxes=True,subplot_titles=sparse['Method'].unique())

projection_colors = {
    'gaussian': 'blue',
    'sparse-jl': 'red',
    'count-sketch': 'green',
}

for i, method in enumerate(sparse['Method'].unique()):
    for j, projection in enumerate(sparse['Projection Type'].unique()):
        _df = sparse[(sparse['Method'] == method) & (sparse['Projection Type'] == projection)]
        fig.add_trace(
            go.Scatter(
                x = _df['Size'],
                y = _df['Time'],
                name = f'{projection}',
                legendgroup=projection,  # Group by projection type
                line = dict(color=projection_colors[projection]),  # Color by projection
                showlegend = (i == 0),
            ),
            row = 1, col = i + 1,
            
        )
mu_dense = stats[(stats['Method'] == 'MU') & (stats['Density'] == "sparse")]

for i in range(2):
    fig.add_trace(
        go.Scatter(
            x = mu_dense['Size'],
            y = mu_dense['Time'],
            name = 'MU',
            showlegend = (i == 0),
            line = dict(color='black'),
            legendgroup = 'MU'
        ),
        row = 1,col= i+1
    )


hals_dense = stats[(stats['Method'] == 'HALS') & (stats['Density'] == "sparse")]
for i in range(2,4):
    fig.add_trace(
        go.Scatter(
            x = hals_dense['Size'],
            y = hals_dense['Time'],
            name = 'HALS',
            showlegend = (i == 2),
            line = dict(color='grey'),
            legendgroup = 'HALS'
        ),
        row = 1,col= i+1
    )


fig.update_layout(
    title = 'Computation Time by Method/Projection Type - Synthetic Sparse Matrix',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Time (s)',
    template = 'plotly_white'
)
fig.show()

*Plot: Reconstruction Error by Method/Projection*

*i) dense*

In [95]:
dense = stats[(stats['Density'] == 'sparse') & (stats['Projection Type'] != '')]

fig = make_subplots(rows = 1,cols = 4,shared_xaxes = True,shared_yaxes=True,subplot_titles=dense['Method'].unique())

projection_colors = {
    'gaussian': 'blue',
    'sparse-jl': 'red',
    'count-sketch': 'green',
}

for i, method in enumerate(dense['Method'].unique()):
    for j, projection in enumerate(dense['Projection Type'].unique()):
        _df = dense[(dense['Method'] == method) & (dense['Projection Type'] == projection)]
        fig.add_trace(
            go.Scatter(
                x = _df['Size'],
                y = _df['Errors'],
                name = f'{projection}',
                legendgroup=projection,  # Group by projection type
                line = dict(color=projection_colors[projection]),  # Color by projection
                showlegend = (i == 0),
            ),
            row = 1, col = i + 1,
            
        )

mu_dense = stats[(stats['Method'] == 'MU') & (stats['Density'] == "dense")]

for i in range(2):
    fig.add_trace(
        go.Scatter(
            x = mu_dense['Size'],
            y = mu_dense['Errors'],
            name = 'MU',
            showlegend = (i == 0),
            line = dict(color='black'),
            legendgroup = 'MU'
        ),
        row = 1,col= i+1
    )


hals_dense = stats[(stats['Method'] == 'HALS') & (stats['Density'] == "dense")]
for i in range(2,4):
    fig.add_trace(
        go.Scatter(
            x = hals_dense['Size'],
            y = hals_dense['Errors'],
            name = 'HALS',
            showlegend = (i == 2),
            line = dict(color='grey'),
            legendgroup = 'HALS'
        ),
        row = 1,col= i+1
    )


fig.update_layout(
    title = 'Reconstruction Error by Method/Projection Type - Synthetic Dense Matrix',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Errors',
    template = 'plotly_white'
)
fig.show()

*ii) sparse*

In [96]:
sparse = stats[stats['Density'] == 'sparse']

fig = make_subplots(rows = 1,cols = 4,shared_xaxes = True,shared_yaxes=True,subplot_titles=sparse['Method'].unique())

projection_colors = {
    'gaussian': 'blue',
    'sparse-jl': 'red',
    'count-sketch': 'green',
}

for i, method in enumerate(sparse['Method'].unique()):
    for j, projection in enumerate(sparse['Projection Type'].unique()):
        _df = sparse[(sparse['Method'] == method) & (sparse['Projection Type'] == projection)]
        fig.add_trace(
            go.Scatter(
                x = _df['Size'],
                y = _df['Errors'],
                name = f'{projection}',
                legendgroup=projection,
                line = dict(color=projection_colors[projection]),
                showlegend = (i == 0),
            ),
            row = 1, col = i + 1,
            
        )

fig.update_layout(
    title = 'Reconstruction Error by Method/Projection Type - Synthetic Sparse Matrix',
    xaxis_title = 'Size (n)',
    yaxis_title = 'Error',
    template = 'plotly_white'
)
fig.show()

KeyError: ''