# Tutorial 06: Singular Value Decomposition (SVD)

Interactive visualizations for understanding SVD in ML.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse

plt.style.use('seaborn-v0_8-whitegrid')

## 1. Geometric Interpretation: Rotate → Scale → Rotate

In [None]:
def visualize_svd_geometry(A):
    """
    Visualize SVD as: Rotate (V^T) → Scale (Σ) → Rotate (U)
    """
    U, S, Vt = np.linalg.svd(A)
    
    # Unit circle
    theta = np.linspace(0, 2*np.pi, 100)
    circle = np.array([np.cos(theta), np.sin(theta)])
    
    # Transformations step by step
    step1 = Vt @ circle           # After V^T (rotation in input space)
    step2 = np.diag(S) @ step1    # After Σ (scaling)
    step3 = U @ step2             # After U (rotation in output space)
    
    # Also: full transformation
    final = A @ circle
    
    fig, axes = plt.subplots(1, 4, figsize=(16, 4))
    
    titles = [
        'Original Unit Circle',
        'After V^T (rotate/reflect)',
        'After Σ (scale)',
        'After U (rotate/reflect)\n= A @ circle'
    ]
    data = [circle, step1, step2, step3]
    
    for ax, d, title in zip(axes, data, titles):
        ax.plot(d[0], d[1], 'b-', linewidth=2)
        ax.scatter([d[0, 0]], [d[1, 0]], c='red', s=50, zorder=5)  # Mark starting point
        ax.set_aspect('equal')
        ax.axhline(y=0, color='k', linewidth=0.5)
        ax.axvline(x=0, color='k', linewidth=0.5)
        ax.set_title(title)
        ax.grid(True, alpha=0.3)
        
        # Set same scale for all
        max_val = max(3, np.abs(d).max() * 1.2)
        ax.set_xlim(-max_val, max_val)
        ax.set_ylim(-max_val, max_val)
    
    plt.suptitle(f'SVD Decomposition: A = UΣV^T\nSingular values: σ₁={S[0]:.2f}, σ₂={S[1]:.2f}', fontsize=14)
    plt.tight_layout()
    plt.show()
    
    return U, S, Vt

# Example 1: Simple scaling
print("Example 1: Scaling matrix")
A = np.array([[2, 0], [0, 1]])
visualize_svd_geometry(A)

# Example 2: Rotation + scaling
print("\nExample 2: General matrix")
A = np.array([[2, 1], [1, 2]])
visualize_svd_geometry(A)

# Example 3: Shear
print("\nExample 3: Shear matrix")
A = np.array([[1, 1], [0, 1]])
visualize_svd_geometry(A)

## 2. SVD for Non-Square Matrices

In [None]:
def svd_nonsquare_demo():
    """
    Show SVD works for any matrix, not just square.
    """
    # Tall matrix (m > n): maps R^2 to R^3
    A_tall = np.array([[1, 0], [0, 1], [1, 1]])
    
    # Wide matrix (m < n): maps R^3 to R^2
    A_wide = np.array([[1, 0, 1], [0, 1, 1]])
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    for ax, A, name in [(axes[0], A_tall, 'Tall (3×2)'), (axes[1], A_wide, 'Wide (2×3)')]:
        U, S, Vt = np.linalg.svd(A, full_matrices=False)
        
        ax.text(0.5, 0.9, f'{name} Matrix', ha='center', va='top', fontsize=14, 
                transform=ax.transAxes, fontweight='bold')
        ax.text(0.5, 0.75, f'A = \n{A}', ha='center', va='top', fontsize=10,
                transform=ax.transAxes, family='monospace')
        ax.text(0.5, 0.45, f'Singular values: {S}', ha='center', va='top', fontsize=11,
                transform=ax.transAxes)
        ax.text(0.5, 0.3, f'Rank: {np.sum(S > 1e-10)}', ha='center', va='top', fontsize=11,
                transform=ax.transAxes)
        ax.text(0.5, 0.15, f'U shape: {U.shape}, Σ shape: {len(S)}, V^T shape: {Vt.shape}',
                ha='center', va='top', fontsize=10, transform=ax.transAxes)
        ax.axis('off')
    
    plt.tight_layout()
    plt.show()

svd_nonsquare_demo()

## 3. Low-Rank Approximation

In [None]:
def visualize_low_rank_approximation():
    """
    Show how SVD provides the best low-rank approximation.
    """
    # Create a matrix with clear low-rank structure
    np.random.seed(42)
    m, n = 50, 40
    
    # True low-rank matrix plus noise
    true_rank = 5
    U_true = np.random.randn(m, true_rank)
    V_true = np.random.randn(n, true_rank)
    A_true = U_true @ V_true.T
    noise = 0.5 * np.random.randn(m, n)
    A = A_true + noise
    
    # Compute SVD
    U, S, Vt = np.linalg.svd(A, full_matrices=False)
    
    # Different rank approximations
    ranks = [1, 3, 5, 10, 20]
    
    fig, axes = plt.subplots(2, len(ranks) + 1, figsize=(18, 8))
    
    # Original matrix
    axes[0, 0].imshow(A, cmap='viridis', aspect='auto')
    axes[0, 0].set_title('Original A')
    axes[0, 0].axis('off')
    
    # Singular values
    axes[1, 0].bar(range(len(S[:20])), S[:20])
    axes[1, 0].set_xlabel('Index')
    axes[1, 0].set_ylabel('Singular Value')
    axes[1, 0].set_title('Singular Values')
    
    for i, k in enumerate(ranks):
        # Rank-k approximation
        A_k = U[:, :k] @ np.diag(S[:k]) @ Vt[:k, :]
        error = np.linalg.norm(A - A_k, 'fro') / np.linalg.norm(A, 'fro')
        energy = np.sum(S[:k]**2) / np.sum(S**2)
        
        axes[0, i+1].imshow(A_k, cmap='viridis', aspect='auto')
        axes[0, i+1].set_title(f'Rank {k}\nError: {error:.2%}')
        axes[0, i+1].axis('off')
        
        axes[1, i+1].imshow(A - A_k, cmap='RdBu', aspect='auto', vmin=-2, vmax=2)
        axes[1, i+1].set_title(f'Difference\nEnergy: {energy:.1%}')
        axes[1, i+1].axis('off')
    
    plt.suptitle('Low-Rank Approximation via SVD (Eckart-Young Theorem)', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_low_rank_approximation()

## 4. Image Compression

In [None]:
def image_compression_demo():
    """
    Demonstrate SVD image compression.
    """
    # Create a sample image with structure
    np.random.seed(42)
    m, n = 100, 120
    
    # Create image with patterns
    x, y = np.meshgrid(np.linspace(0, 1, n), np.linspace(0, 1, m))
    image = (np.sin(4*np.pi*x) * np.sin(4*np.pi*y) + 
             np.sin(8*np.pi*x) * 0.5 +
             np.sin(12*np.pi*y) * 0.3 +
             0.1 * np.random.randn(m, n))
    
    # SVD
    U, S, Vt = np.linalg.svd(image, full_matrices=False)
    
    # Different compression levels
    k_values = [1, 5, 10, 20, 50]
    
    fig, axes = plt.subplots(2, len(k_values) + 1, figsize=(18, 8))
    
    # Original
    axes[0, 0].imshow(image, cmap='gray')
    axes[0, 0].set_title(f'Original\n{m}×{n} = {m*n} values')
    axes[0, 0].axis('off')
    
    # Singular value spectrum
    axes[1, 0].semilogy(S, 'b.-')
    axes[1, 0].set_xlabel('Index')
    axes[1, 0].set_ylabel('Singular Value (log)')
    axes[1, 0].set_title('Singular Value Spectrum')
    
    for i, k in enumerate(k_values):
        compressed = U[:, :k] @ np.diag(S[:k]) @ Vt[:k, :]
        storage = k * (m + n + 1)
        ratio = (m * n) / storage
        error = np.linalg.norm(image - compressed, 'fro') / np.linalg.norm(image, 'fro')
        
        axes[0, i+1].imshow(compressed, cmap='gray')
        axes[0, i+1].set_title(f'k={k}\n{storage} values ({ratio:.1f}× compression)')
        axes[0, i+1].axis('off')
        
        # Cumulative energy
        cumsum = np.cumsum(S[:k]**2) / np.sum(S**2)
        axes[1, i+1].fill_between(range(k), cumsum, alpha=0.3)
        axes[1, i+1].plot(range(k), cumsum, 'b-')
        axes[1, i+1].axhline(y=1, color='r', linestyle='--', alpha=0.5)
        axes[1, i+1].set_ylim(0, 1.1)
        axes[1, i+1].set_xlabel('# Components')
        axes[1, i+1].set_title(f'Energy: {cumsum[-1]:.1%}\nError: {error:.1%}')
    
    plt.suptitle('Image Compression with SVD', fontsize=14)
    plt.tight_layout()
    plt.show()

image_compression_demo()

## 5. SVD Components as Rank-1 Matrices

In [None]:
def visualize_svd_components():
    """
    Show SVD as sum of rank-1 matrices: A = Σ σ_i u_i v_i^T
    """
    # Create a simple image
    np.random.seed(42)
    m, n = 30, 40
    x, y = np.meshgrid(np.linspace(0, 1, n), np.linspace(0, 1, m))
    image = np.sin(2*np.pi*x) * np.sin(2*np.pi*y) + 0.5 * np.cos(4*np.pi*x)
    
    U, S, Vt = np.linalg.svd(image, full_matrices=False)
    
    n_components = 5
    fig, axes = plt.subplots(3, n_components + 1, figsize=(18, 10))
    
    cumulative = np.zeros_like(image)
    
    # Original
    axes[0, 0].imshow(image, cmap='RdBu')
    axes[0, 0].set_title('Original A')
    axes[0, 0].axis('off')
    axes[1, 0].axis('off')
    axes[2, 0].text(0.5, 0.5, f'σ values:\n' + '\n'.join([f'σ{i+1}={S[i]:.2f}' for i in range(n_components)]),
                   ha='center', va='center', fontsize=10, transform=axes[2, 0].transAxes)
    axes[2, 0].axis('off')
    
    for i in range(n_components):
        # Rank-1 component
        component = S[i] * np.outer(U[:, i], Vt[i, :])
        cumulative += component
        
        # Show component
        vmax = max(np.abs(component).max(), 0.1)
        axes[0, i+1].imshow(component, cmap='RdBu', vmin=-vmax, vmax=vmax)
        axes[0, i+1].set_title(f'σ{i+1}·u{i+1}·v{i+1}ᵀ\nσ={S[i]:.2f}')
        axes[0, i+1].axis('off')
        
        # Show cumulative reconstruction
        axes[1, i+1].imshow(cumulative, cmap='RdBu')
        error = np.linalg.norm(image - cumulative, 'fro') / np.linalg.norm(image, 'fro')
        axes[1, i+1].set_title(f'Sum of 1..{i+1}\nError: {error:.2%}')
        axes[1, i+1].axis('off')
        
        # Show u and v vectors
        axes[2, i+1].plot(U[:, i], 'b-', label=f'u{i+1}')
        axes[2, i+1].plot(Vt[i, :], 'r-', label=f'v{i+1}')
        axes[2, i+1].legend(fontsize=8)
        axes[2, i+1].set_title(f'Singular vectors')
    
    plt.suptitle('SVD as Sum of Rank-1 Matrices: A = Σ σᵢ uᵢ vᵢᵀ', fontsize=14)
    plt.tight_layout()
    plt.show()

visualize_svd_components()

## 6. PCA via SVD

In [None]:
def pca_via_svd_demo():
    """
    Show the connection between PCA and SVD.
    """
    np.random.seed(42)
    
    # Generate 2D data with correlation
    n_samples = 200
    mean = [0, 0]
    cov = [[3, 2], [2, 2]]
    X = np.random.multivariate_normal(mean, cov, n_samples)
    
    # Center the data
    X_centered = X - X.mean(axis=0)
    
    # Method 1: SVD of data matrix
    U, S, Vt = np.linalg.svd(X_centered, full_matrices=False)
    
    # Method 2: Eigendecomposition of covariance matrix
    cov_matrix = X_centered.T @ X_centered / (n_samples - 1)
    eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
    idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[idx]
    eigenvectors = eigenvectors[:, idx]
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Original data with PCs from SVD
    ax1 = axes[0]
    ax1.scatter(X_centered[:, 0], X_centered[:, 1], alpha=0.5, s=20)
    
    # PCs from SVD (rows of Vt)
    for i in range(2):
        pc = Vt[i] * S[i] / np.sqrt(n_samples - 1) * 2  # Scale for visualization
        ax1.arrow(0, 0, pc[0], pc[1], head_width=0.1, head_length=0.05,
                 fc=f'C{i}', ec=f'C{i}', linewidth=2, label=f'PC{i+1} (SVD)')
    
    ax1.set_xlabel('x')
    ax1.set_ylabel('y')
    ax1.set_title('Data with Principal Components from SVD')
    ax1.legend()
    ax1.set_aspect('equal')
    ax1.grid(True, alpha=0.3)
    
    # Compare SVD and eigendecomposition
    ax2 = axes[1]
    ax2.text(0.5, 0.9, 'SVD of X:', ha='center', fontsize=12, fontweight='bold',
            transform=ax2.transAxes)
    ax2.text(0.5, 0.75, f'Singular values: {S}', ha='center', fontsize=10,
            transform=ax2.transAxes)
    ax2.text(0.5, 0.6, f'σ²/(n-1) = {S**2 / (n_samples-1)}', ha='center', fontsize=10,
            transform=ax2.transAxes)
    ax2.text(0.5, 0.45, 'Eigendecomp of XᵀX/(n-1):', ha='center', fontsize=12, fontweight='bold',
            transform=ax2.transAxes)
    ax2.text(0.5, 0.3, f'Eigenvalues: {eigenvalues}', ha='center', fontsize=10,
            transform=ax2.transAxes)
    ax2.text(0.5, 0.1, 'They match! σ² = (n-1) × eigenvalue', ha='center', fontsize=11,
            transform=ax2.transAxes, color='green', fontweight='bold')
    ax2.axis('off')
    ax2.set_title('SVD vs Eigendecomposition')
    
    # Projected data
    ax3 = axes[2]
    projected = X_centered @ Vt.T  # = U @ diag(S)
    ax3.scatter(projected[:, 0], projected[:, 1], alpha=0.5, s=20)
    ax3.axhline(y=0, color='r', linestyle='--', alpha=0.5)
    ax3.axvline(x=0, color='r', linestyle='--', alpha=0.5)
    ax3.set_xlabel('PC1 score')
    ax3.set_ylabel('PC2 score')
    ax3.set_title('Data in PC Space\n(Decorrelated)')
    ax3.set_aspect('equal')
    ax3.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

pca_via_svd_demo()

## 7. Pseudoinverse and Least Squares

In [None]:
def pseudoinverse_demo():
    """
    Show how SVD provides the pseudoinverse for least squares.
    """
    np.random.seed(42)
    
    # Overdetermined system: Ax = b (more equations than unknowns)
    A = np.array([[1, 1], [1, 2], [1, 3], [1, 4]])
    b = np.array([2.1, 3.9, 6.2, 7.8])  # Noisy linear relationship
    
    # SVD of A
    U, S, Vt = np.linalg.svd(A, full_matrices=False)
    
    # Pseudoinverse
    S_inv = np.diag(1/S)
    A_pinv = Vt.T @ S_inv @ U.T
    
    # Least squares solution
    x_svd = A_pinv @ b
    x_numpy = np.linalg.lstsq(A, b, rcond=None)[0]
    
    # Residual
    residual = b - A @ x_svd
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Data and fit
    ax1 = axes[0]
    x_data = A[:, 1]  # Second column is the x values
    ax1.scatter(x_data, b, s=100, label='Data points')
    x_line = np.linspace(0, 5, 100)
    y_line = x_svd[0] + x_svd[1] * x_line
    ax1.plot(x_line, y_line, 'r-', linewidth=2, label=f'Fit: y = {x_svd[0]:.2f} + {x_svd[1]:.2f}x')
    
    # Show residuals
    for xi, bi, ri in zip(x_data, b, residual):
        yi_fit = x_svd[0] + x_svd[1] * xi
        ax1.plot([xi, xi], [bi, yi_fit], 'g--', alpha=0.7)
    
    ax1.set_xlabel('x')
    ax1.set_ylabel('y')
    ax1.set_title('Least Squares Fit via SVD')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # SVD components
    ax2 = axes[1]
    ax2.text(0.5, 0.9, 'SVD Pseudoinverse:', ha='center', fontsize=12, fontweight='bold',
            transform=ax2.transAxes)
    ax2.text(0.5, 0.75, f'A = U Σ Vᵀ', ha='center', fontsize=11, transform=ax2.transAxes)
    ax2.text(0.5, 0.6, f'A⁺ = V Σ⁻¹ Uᵀ', ha='center', fontsize=11, transform=ax2.transAxes)
    ax2.text(0.5, 0.45, f'x = A⁺b minimizes ||Ax - b||²', ha='center', fontsize=11, transform=ax2.transAxes)
    ax2.text(0.5, 0.25, f'Singular values: σ = {S}', ha='center', fontsize=10, transform=ax2.transAxes)
    ax2.text(0.5, 0.1, f'Solution: x = {x_svd}', ha='center', fontsize=10, transform=ax2.transAxes)
    ax2.axis('off')
    ax2.set_title('SVD Method')
    
    # Verify orthogonality of residual
    ax3 = axes[2]
    residual_dot_A = A.T @ residual  # Should be zero if residual ⊥ column space
    ax3.bar(['Aᵀ @ residual [0]', 'Aᵀ @ residual [1]'], residual_dot_A)
    ax3.axhline(y=0, color='r', linestyle='--')
    ax3.set_ylabel('Value')
    ax3.set_title('Residual is ⊥ to Column Space\n(Aᵀr ≈ 0)')
    
    plt.tight_layout()
    plt.show()
    
    print(f"SVD solution: {x_svd}")
    print(f"NumPy lstsq: {x_numpy}")
    print(f"Residual norm: {np.linalg.norm(residual):.4f}")

pseudoinverse_demo()

## 8. Matrix Completion (Recommender Systems)

In [None]:
def recommender_demo():
    """
    Simple demonstration of matrix completion for recommendations.
    """
    np.random.seed(42)
    
    # Create a simple user-item rating matrix with low-rank structure
    n_users, n_items = 20, 15
    true_rank = 3
    
    # User and item factors
    user_factors = np.random.rand(n_users, true_rank)
    item_factors = np.random.rand(n_items, true_rank)
    
    # True rating matrix (low-rank)
    R_true = user_factors @ item_factors.T
    R_true = (R_true - R_true.min()) / (R_true.max() - R_true.min()) * 4 + 1  # Scale to 1-5
    
    # Observe only 40% of entries
    mask = np.random.rand(n_users, n_items) < 0.4
    R_observed = np.where(mask, R_true, np.nan)
    
    # Matrix completion via SVD
    R_filled = np.where(mask, R_observed, 0)
    
    # Iterative SVD completion
    for _ in range(10):
        U, S, Vt = np.linalg.svd(R_filled, full_matrices=False)
        R_approx = U[:, :true_rank] @ np.diag(S[:true_rank]) @ Vt[:true_rank, :]
        R_filled = np.where(mask, R_observed, R_approx)
    
    # Visualize
    fig, axes = plt.subplots(1, 4, figsize=(16, 4))
    
    axes[0].imshow(R_true, cmap='YlOrRd', vmin=1, vmax=5)
    axes[0].set_title('True Ratings\n(Unknown)')
    axes[0].set_xlabel('Items')
    axes[0].set_ylabel('Users')
    
    R_display = np.where(mask, R_observed, np.nan)
    axes[1].imshow(R_display, cmap='YlOrRd', vmin=1, vmax=5)
    axes[1].set_title(f'Observed Ratings\n({mask.sum()} / {mask.size} = {mask.mean():.0%})')
    axes[1].set_xlabel('Items')
    
    axes[2].imshow(R_approx, cmap='YlOrRd', vmin=1, vmax=5)
    axes[2].set_title('SVD Completion\n(Rank-3 approx)')
    axes[2].set_xlabel('Items')
    
    error = np.sqrt(np.nanmean((R_approx[~mask] - R_true[~mask])**2))
    axes[3].imshow(np.abs(R_true - R_approx), cmap='Reds', vmin=0, vmax=1)
    axes[3].set_title(f'|Error|\nRMSE: {error:.3f}')
    axes[3].set_xlabel('Items')
    
    for ax in axes:
        ax.set_aspect('auto')
    
    plt.suptitle('Matrix Completion for Recommendations using SVD', fontsize=14)
    plt.tight_layout()
    plt.show()

recommender_demo()

## 9. Truncated SVD for Large Matrices

In [None]:
def truncated_svd_comparison():
    """
    Compare full SVD vs truncated/randomized SVD.
    """
    import time
    from scipy.sparse.linalg import svds
    
    np.random.seed(42)
    
    # Create large matrix
    m, n = 1000, 800
    A = np.random.randn(m, n)
    
    k = 20  # Number of components
    
    # Full SVD
    start = time.time()
    U_full, S_full, Vt_full = np.linalg.svd(A, full_matrices=False)
    time_full = time.time() - start
    
    # Truncated SVD (scipy.sparse.linalg.svds)
    start = time.time()
    U_trunc, S_trunc, Vt_trunc = svds(A, k=k)
    time_trunc = time.time() - start
    
    # Sort truncated results (svds returns in ascending order)
    idx = np.argsort(S_trunc)[::-1]
    S_trunc = S_trunc[idx]
    U_trunc = U_trunc[:, idx]
    Vt_trunc = Vt_trunc[idx, :]
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Timing comparison
    ax1 = axes[0]
    ax1.bar(['Full SVD\n(all singular values)', f'Truncated SVD\n(top {k} only)'], 
           [time_full, time_trunc], color=['steelblue', 'coral'])
    ax1.set_ylabel('Time (seconds)')
    ax1.set_title(f'Computation Time\n({m}×{n} matrix)')
    for i, t in enumerate([time_full, time_trunc]):
        ax1.text(i, t + 0.01, f'{t:.3f}s', ha='center', fontsize=12)
    
    # Accuracy comparison
    ax2 = axes[1]
    ax2.plot(range(1, k+1), S_full[:k], 'b.-', markersize=10, label='Full SVD')
    ax2.plot(range(1, k+1), S_trunc, 'rx', markersize=10, label='Truncated SVD')
    ax2.set_xlabel('Component index')
    ax2.set_ylabel('Singular value')
    ax2.set_title(f'Top {k} Singular Values\n(Should match!)')
    ax2.legend()
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print(f"Full SVD time: {time_full:.3f}s")
    print(f"Truncated SVD time: {time_trunc:.3f}s")
    print(f"Speedup: {time_full / time_trunc:.1f}x")
    print(f"Max singular value difference: {np.max(np.abs(S_full[:k] - S_trunc)):.2e}")

truncated_svd_comparison()

## 10. Summary

In [None]:
print("""
KEY CONCEPTS SUMMARY
====================

1. SVD DEFINITION
   - A = UΣVᵀ for ANY matrix (not just square!)
   - U, V orthogonal; Σ diagonal with σ₁ ≥ σ₂ ≥ ... ≥ 0

2. GEOMETRIC MEANING
   - Any linear transform = rotate → scale → rotate
   - σᵢ are the scaling factors along principal axes
   - Vᵀ gives input directions, U gives output directions

3. ECKART-YOUNG THEOREM
   - Truncated SVD gives BEST low-rank approximation
   - A_k = Σᵢ₌₁ᵏ σᵢ uᵢ vᵢᵀ minimizes ||A - A_k||

4. CONNECTION TO EIGENDECOMPOSITION
   - σᵢ² = eigenvalues of AᵀA (and AAᵀ)
   - V = eigenvectors of AᵀA
   - U = eigenvectors of AAᵀ

5. ML APPLICATIONS
   - PCA: X = UΣVᵀ, PCs are rows of Vᵀ
   - Image compression: Keep top k components
   - Recommender systems: Matrix completion
   - Pseudoinverse: A⁺ = VΣ⁺Uᵀ for least squares
   - Latent semantic analysis: Document-term matrices

6. PRACTICAL TIPS
   - Use truncated SVD for large matrices (scipy.sparse.linalg.svds)
   - Randomized SVD for even larger matrices
   - SVD is numerically more stable than forming AᵀA
""")