**Section 5a: Empirical Performance - CBCL Faces**

*Imports*

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots

from sklearn.datasets import fetch_olivetti_faces
from collections import defaultdict
from random_matrix import *
from nmf import *
from benchmark import *

---

*Data*

In [2]:
X_faces = fetch_olivetti_faces(shuffle=True).data

In [None]:
print(f'Shape: {X_faces.shape}')
print(f'Sparsity: {1 - len(np.nonzero(X_faces)[0]) / (X_faces.shape[0]*X_faces.shape[1])}')

0.9999993896484375

*Method Benchmarks*

In [5]:
methods = {
    "MU C": nmf_compress_mu,
    'MU SC': nmf_structured_compress_mu,
    'HALS C': nmf_compress_hals,
    'HALS SC': nmf_structured_compress_hals
}

projection_types = [
    'gaussian',
    # 'srht',
    # 'givens',
    'srft',
    'sparse-jl',
    'count-sketch',
]

stats = benchmark_faces(X_faces,methods,projection_types,r=49,runs=10)
stats

Completed MU C,gaussian
Completed MU C,srft
Completed MU C,sparse-jl
Completed MU C,count-sketch
Completed MU SC,gaussian
Completed MU SC,srft
Completed MU SC,sparse-jl
Completed MU SC,count-sketch
Completed HALS C,gaussian
Completed HALS C,srft
Completed HALS C,sparse-jl
Completed HALS C,count-sketch
Completed HALS SC,gaussian
Completed HALS SC,srft
Completed HALS SC,sparse-jl
Completed HALS SC,count-sketch


Unnamed: 0_level_0,errors,errors,errors,errors,time,time,time,time
projection,count-sketch,gaussian,sparse-jl,srft,count-sketch,gaussian,sparse-jl,srft
algorithm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
HALS C,0.180025,0.173771,0.175253,0.203729,0.530832,0.543841,0.534766,0.561237
HALS SC,0.149175,0.149193,0.149466,0.149182,0.563104,0.59129,0.563164,0.57056
MU C,0.188077,0.190083,0.185767,0.225832,0.440946,0.468168,0.447436,0.484997
MU SC,0.180842,0.180809,0.180818,0.180832,0.496006,0.496959,0.49227,0.501356


In [6]:
stats.round(4)

Unnamed: 0_level_0,errors,errors,errors,errors,time,time,time,time
projection,count-sketch,gaussian,sparse-jl,srft,count-sketch,gaussian,sparse-jl,srft
algorithm,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
HALS C,0.18,0.1738,0.1753,0.2037,0.5308,0.5438,0.5348,0.5612
HALS SC,0.1492,0.1492,0.1495,0.1492,0.5631,0.5913,0.5632,0.5706
MU C,0.1881,0.1901,0.1858,0.2258,0.4409,0.4682,0.4474,0.485
MU SC,0.1808,0.1808,0.1808,0.1808,0.496,0.497,0.4923,0.5014


**Baseline Algorithms**

In [7]:

methods = {
    'MU':nmf_mu,
    'HALS':nmf_hals,
}

# Set r
r = 49
runs = 10
rows = []
for method_name, method in methods.items():
        total_errors = []    
        total_times = []   
        for i in range(runs):
            # Set seed per run
            seed = i + 1
            
            # Time NMF Method
            start_time = timeit.default_timer()
            _, _, errors = method(X_faces, r, random_state=seed)
            time = timeit.default_timer() - start_time

            # Store
            total_times.append(time)
            total_errors.append(errors[-1])
        
        # Store average times
        rows.append(
            {   
                'Method': method_name,
                'time':np.mean(total_times),
                'errors':np.mean(total_errors)
            }
        )
        print(f"Completed {method_name}")

Completed MU
Completed HALS


In [10]:
pd.DataFrame(rows).round(4)

Unnamed: 0,Method,time,errors
0,MU,0.893,0.1381
1,HALS,0.7709,0.1508


**Multiplicative Updates**

In [9]:
# Standard MU Algorithm
X_mu,Y_mu,errors_mu = nmf_mu(X_faces,49,max_iter=100)

# Standard Compressed MU
X_c_mu,Y_c_mu,errors_s_mu = nmf_compress_mu(X_faces,49,max_iter=100,projection_type='count-sketch')

# Structured Compressed MU
X_sc_mu,Y_sc_mu,errors_sc_mu = nmf_structured_compress_mu(X_faces,49,max_iter=100,projection_type='gaussian')

# Errors 
mu_errors_df = pd.DataFrame({'MU':errors_mu,
              'C MU':errors_s_mu,
              'SC MU':errors_sc_mu})

*Plot: Reconstruction Errors*

In [None]:
fig = go.Figure()

for method in mu_errors_df.columns:
    fig.add_trace(
        go.Scatter(
            y = mu_errors_df[method],
            name = method
        )
    )

fig.update_layout(
    title = 'CBCL Reconstruction Error by MU Method (Best Projection)',
    yaxis_title = 'Reconstruction Error',
    xaxis_title = 'Iteration'
)
fig.show()

*Plot: Visual Reconstruction Error by Method*

In [None]:
n_images = 4

fig = make_subplots(
    rows=4, cols=4,
    subplot_titles=(["Original Images"] + [' ']*3 + 
                   ["MU"] + [' ']*3 + 
                   ["Comp. MU (Count-Sketch)"] + [' ']*3 + 
                   ["Struct. Comp. MU (Gaussian)"] + [' ']*3),
    vertical_spacing=0.05,
    horizontal_spacing=0.02
)

matrix_pairs = [
    ("Original", X_faces),
    ("MU", X_mu @ Y_mu),
    ("Compressed MU", X_c_mu @ Y_c_mu),
    ("Structured Compressed MU", X_sc_mu @ Y_sc_mu)
]

for row, (name, Z) in enumerate(matrix_pairs, start=1):
    for col in range(1, n_images + 1):
        z = Z[col-1].reshape(64, 64)
        fig.add_trace(
            go.Heatmap(z=z, colorscale='gray', showscale=False),
            row=row, col=col
        )
  
fig.update_layout(
    title="NMF MU Reconstruction",
    width=800,
    height=600,
    font=dict(size=8), 
    margin=dict(l=10, r=10, b=10, t=40, pad=0), 
    plot_bgcolor='white', 
    paper_bgcolor='white'
)

fig.update_annotations(font_size=10)
fig.update_yaxes(autorange='reversed')
fig.show()

**Hierarchical Least Squares**

In [None]:
# Standard HALS Algorithm
X_hals,Y_hals,errors_hals = nmf_hals(X_faces,49,max_iter=100)

# Standard Compressed HALS
X_c_hals,Y_c_hals,errors_s_hals = nmf_compress_hals(X_faces,49,max_iter=100,projection_type='gaussian')

# Structured Compressed HALS
X_sc_hals,Y_sc_hals,errors_sc_hals = nmf_structured_compress_hals(X_faces,49,max_iter=100,projection_type='count-sketch')

# Errors dataframe
hals_errors_df = pd.DataFrame({'HALS':errors_hals,
              'C HALS':errors_s_hals,
              'SC HALS':errors_sc_hals})

*Plot: Reconstruction Errors*

In [None]:
fig = go.Figure()

for method in hals_errors_df.columns:
    fig.add_trace(
        go.Scatter(
            y = hals_errors_df[method],
            name = method
        )
    )

fig.update_layout(
    title = 'CBCL Reconstruction Error by HALS Method',
    yaxis_title = 'Reconstruction Error',
    xaxis_title = 'Iteration'
)
fig.show()

*Plot: Visual Reconstruction Error by Method*

In [None]:
n_images = 4

fig = make_subplots(
    rows=4, cols=4,
    subplot_titles=(["Original Images"] + [' ']*3 + 
                   ["HALS"] + [' ']*3 + 
                   ["Comp. HALS (Gaussian)"] + [' ']*3 + 
                   ["Struct. Comp. HALS (Count-Sketch)"] + [' ']*3),
    vertical_spacing=0.05,
    horizontal_spacing=0.02
)

matrix_pairs = [
    ("Original", X_faces),
    ("HALS", X_hals @ Y_hals),
    ("Compressed HALS", X_c_hals @ Y_c_hals),
    ("Structured Compressed HALS", X_sc_hals @ Y_sc_hals)
]

for row, (name, Z) in enumerate(matrix_pairs, start=1):
    for col in range(1, n_images + 1):
        z = Z[col-1].reshape(64, 64)
        fig.add_trace(
            go.Heatmap(z=z, colorscale='gray', showscale=False),
            row=row, col=col
        )

fig.update_layout(
    title="NMF HALS Reconstruction",
    width=800,
    height=600,
    font=dict(size=8),  
    margin=dict(l=10, r=10, b=10, t=40, pad=0), 
    plot_bgcolor='white',
    paper_bgcolor='white'
)

fig.update_annotations(font_size=10)
fig.update_yaxes(autorange='reversed')
fig.show()

---