# Performance Benchmark: rSVD vs NumPy SVD

Here we benchmark the execution time of our **Randomized SVD** implementation against the highly optimized `numpy.linalg.svd` (LAPACK).

**Hypothesis:** rSVD should be significantly faster when the target rank $k$ is much smaller than the matrix dimensions ($k \ll m, n$).

In [None]:
import sys
import os
import time
import numpy as np
import matplotlib.pyplot as plt

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from randomized_svd.core import rsvd

# Parameters
matrix_sizes = [500, 1000, 2000, 3000] # Increasing matrix size
target_rank = 50                       # Fixed small rank
times_rsvd = []
times_numpy = []

print(f"Benchmarking with fixed rank k={target_rank}...")

for n in matrix_sizes:
    # Generate random matrix
    X = np.random.randn(n, n)
    
    # 1. Time rSVD
    start = time.time()
    rsvd(X, t=target_rank)
    times_rsvd.append(time.time() - start)
    
    # 2. Time NumPy SVD
    # Note: We compute full SVD because that's the standard baseline, 
    # but even 'compute_uv=False' would be slower than rSVD for large N.
    start = time.time()
    np.linalg.svd(X, full_matrices=False)
    times_numpy.append(time.time() - start)
    
    print(f"Size {n}x{n} done.")

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(matrix_sizes, times_numpy, 'o--', label='NumPy SVD (Deterministic)', color='red')
plt.plot(matrix_sizes, times_rsvd, 's-', label='randomized-svd (Randomized)', color='green')

plt.title(f"Performance Comparison (Fixed Rank k={target_rank})")
plt.xlabel("Matrix Size (N x N)")
plt.ylabel("Execution Time (seconds)")
plt.grid(True, alpha=0.3)
plt.legend()

# Annotation for speedup
speedup = times_numpy[-1] / times_rsvd[-1]
plt.annotate(f"{speedup:.1f}x Faster", 
             xy=(matrix_sizes[-1], times_rsvd[-1]), 
             xytext=(matrix_sizes[-1]-500, times_rsvd[-1]+1),
             arrowprops=dict(facecolor='black', shrink=0.05))

plt.show()