**Section 5a: Empirical Performance - Olivetti Faces**

*Imports*

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.datasets import fetch_olivetti_faces
from collections import defaultdict
from random_matrix import *
from nmf import *
from benchmark import *

---

*Data*

In [2]:
X_faces = fetch_olivetti_faces(shuffle=True).data

In [3]:
print(f'Shape: {X_faces.shape}')
print(f'Sparsity: {1 - len(np.nonzero(X_faces)[0]) / (X_faces.shape[0]*X_faces.shape[1])}')

Shape: (400, 4096)
Sparsity: 6.103515625133227e-07


*Plot: Sample*

In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

n_images = 4

# Create subplots with just 1 row
fig = make_subplots(
    rows=1, cols=4,
    subplot_titles=[],
    horizontal_spacing=0.02
)

# Plot only the original images
for col in range(1, n_images + 1):
    z = X_faces[col-1].reshape(64, 64)
    fig.add_trace(
        go.Heatmap(z=z, colorscale='gray', showscale=False),
        row=1, col=col
    )

# Update layout
fig.update_layout(
    width=800,
    height=200,  # Reduced height for single row
    font=dict(size=8, color='black'),
    margin=dict(l=30, r=30, b=30, t=30, pad=10),
    plot_bgcolor='white',
    paper_bgcolor='white',
)

# Remove axis labels
for col in range(1,5):
    fig.update_xaxes(
        showticklabels=False,
        title_text='',
        row=1, col=col
    )
    fig.update_yaxes(
        showticklabels=False,
        title_text='',
        row=1, col=col
    )

fig.update_yaxes(autorange='reversed')
fig.show()

*Method Benchmarks*

In [None]:
methods = {
    "MU C": nmf_compress_mu,
    'MU SC': nmf_structured_compress_mu,
    'HALS C': nmf_compress_hals,
    'HALS SC': nmf_structured_compress_hals
}

projection_types = [
    'gaussian',
    'srht',
    # 'givens',
    'srft',
    'sparse-jl',
    'count-sketch',
]

stats = benchmark_faces(X_faces,methods,projection_types,r=49,runs=10)
stats

In [None]:
stats.round(4)

**Baseline Algorithms**

In [None]:

methods = {
    'MU':nmf_mu,
    'HALS':nmf_hals,
}

# Set r
r = 49
runs = 10
rows = []
for method_name, method in methods.items():
        total_errors = []    
        total_times = []   
        for i in range(runs):
            # Set seed per run
            seed = i + 1
            
            # Time NMF Method
            start_time = timeit.default_timer()
            _, _, errors = method(X_faces, r, random_state=seed)
            time = timeit.default_timer() - start_time

            # Store
            total_times.append(time)
            total_errors.append(errors[-1])
        
        # Store average times
        rows.append(
            {   
                'Method': method_name,
                'time':np.mean(total_times),
                'errors':np.mean(total_errors)
            }
        )
        print(f"Completed {method_name}")

In [None]:
pd.DataFrame(rows).round(4)

**Multiplicative Updates**

In [4]:
# Standard MU Algorithm
X_mu,Y_mu,errors_mu = nmf_mu(X_faces,49,max_iter=100)

# Standard Compressed MU
X_c_mu,Y_c_mu,errors_s_mu = nmf_compress_mu(X_faces,49,max_iter=100,projection_type='count-sketch')

# Structured Compressed MU
X_sc_mu,Y_sc_mu,errors_sc_mu = nmf_structured_compress_mu(X_faces,49,max_iter=100,projection_type='gaussian')

# Errors 
mu_errors_df = pd.DataFrame({'MU':errors_mu,
              'C MU':errors_s_mu,
              'SC MU':errors_sc_mu})

*Plot: Reconstruction Errors*

In [5]:
fig = go.Figure()

for method in mu_errors_df.columns:
    fig.add_trace(
        go.Scatter(
            y = mu_errors_df[method],
            name = method
        )
    )

fig.update_layout(
    title = 'CBCL Reconstruction Error by MU Method (Best Projection)',
    yaxis_title = 'Reconstruction Error',
    xaxis_title = 'Iteration'
)
fig.show()

*Plot: Visual Reconstruction Error by Method*

In [None]:
n_images = 4

fig = make_subplots(
    rows=4, cols=4,
    subplot_titles=(["Original Images"] + [' ']*3 + 
                   ["MU"] + [' ']*3 + 
                   ["MU C (Count-Sketch)"] + [' ']*3 + 
                   ["MU SC (Gaussian)"] + [' ']*3),
    vertical_spacing=0.05,
    horizontal_spacing=0.02
)

matrix_pairs = [
    ("Original", X_faces),
    ("MU", X_mu @ Y_mu),
    ("MU C", X_c_mu @ Y_c_mu),
    ("MU SC", X_sc_mu @ Y_sc_mu)
]

for row, (name, Z) in enumerate(matrix_pairs, start=1):
    for col in range(1, n_images + 1):
        z = Z[col-1].reshape(64, 64)
        fig.add_trace(
            go.Heatmap(z=z, colorscale='gray', showscale=False),
            row=row, col=col
        )
  
fig.update_layout(
    width=800,
    height=600,
    font=dict(size=8, color='black'), 
    margin=dict(l=30, r=30, b=30, t=30, pad=10),
    plot_bgcolor='white', 
    paper_bgcolor='white',
)
for row in range(1,5):
    for col in range(1,5):
        fig.update_xaxes(
            showticklabels=False,
            title_text='',
            row=row, col=col
        )
        fig.update_yaxes(
            showticklabels=False,
            title_text='',
            row=row, col=col
        )

fig.update_annotations(font_size=10, font_color='black')
fig.update_yaxes(autorange='reversed')
fig.show()

**Hierarchical Least Squares**

In [22]:
# Standard HALS Algorithm
X_hals,Y_hals,errors_hals = nmf_hals(X_faces,49,max_iter=100)

# Standard Compressed HALS
X_c_hals,Y_c_hals,errors_s_hals = nmf_compress_hals(X_faces,49,max_iter=100,projection_type='gaussian')

# Structured Compressed HALS
X_sc_hals,Y_sc_hals,errors_sc_hals = nmf_structured_compress_hals(X_faces,49,max_iter=100,projection_type='count-sketch')

# Errors dataframe
hals_errors_df = pd.DataFrame({'HALS':errors_hals,
              'C HALS':errors_s_hals,
              'SC HALS':errors_sc_hals})

*Plot: Reconstruction Errors*

In [23]:
fig = go.Figure()

for method in hals_errors_df.columns:
    fig.add_trace(
        go.Scatter(
            y = hals_errors_df[method],
            name = method
        )
    )

fig.update_layout(
    title = 'CBCL Reconstruction Error by HALS Method',
    yaxis_title = 'Reconstruction Error',
    xaxis_title = 'Iteration'
)
fig.show()

*Plot: Visual Reconstruction Error by Method*

In [None]:
n_images = 4

fig = make_subplots(
    rows=4, cols=4,
    subplot_titles=(["Original Images"] + [' ']*3 + 
                   ["HALS"] + [' ']*3 + 
                   ["HALS C (Gaussian)"] + [' ']*3 + 
                   ["HALS SC (Count-Sketch)"] + [' ']*3),
    vertical_spacing=0.05,
    horizontal_spacing=0.02
)

matrix_pairs = [
    ("Original", X_faces),
    ("HALS", X_hals @ Y_hals),
    ("HALS C", X_c_hals @ Y_c_hals),
    ("HALS SC", X_sc_hals @ Y_sc_hals)
]

for row, (name, Z) in enumerate(matrix_pairs, start=1):
    for col in range(1, n_images + 1):
        z = Z[col-1].reshape(64, 64)
        fig.add_trace(
            go.Heatmap(z=z, colorscale='gray', showscale=False),
            row=row, col=col
        )

fig.update_layout(
    width=800,
    height=600,
    font=dict(size=8, color='black'), 
    margin=dict(l=30, r=30, b=30, t=30, pad=10),
    plot_bgcolor='white', 
    paper_bgcolor='white',
)


for row in range(1,5):
    for col in range(1,5):
        fig.update_xaxes(
            showticklabels=False,
            title_text='',
            row=row, col=col
        )
        fig.update_yaxes(
            showticklabels=False,
            title_text='',
            row=row, col=col
        )

fig.update_annotations(font_size=10, font_color='black')
fig.update_yaxes(autorange='reversed')
fig.show()

---