# plot and check Marchenko Pastur for different $\gamma=\frac{N}{M}$
To make sure I understood this correctly, I have to make sure everything works empirically. This tool a while with several AIs... Anyway, bottom line is that things seem to work. And for a certain $\gamma=\frac{N}{M}$ we shuold expect $s_{max}=1+
\sqrt{\gamma}$

In [86]:
import numpy as np
import matplotlib.pyplot as plt

# Parameters
N = 256
gamma_values = [128, 256]
num_Ts = 200000 

def generate_complex_matrix(N, M):
    """Generate a normalized complex Gaussian random matrix."""
    sigma = np.sqrt(2) / 2
    real_part = np.random.normal(0, sigma, size=(N, M))
    imag_part = np.random.normal(0, sigma, size=(N, M))
    A = (real_part + 1j * imag_part) / np.sqrt(M)  
    return A

def marchenko_pastur_svd_pdf(x, gamma):
    x_plus = (1 + np.sqrt(gamma)) ** 2
    x_minus = (1 - np.sqrt(gamma)) ** 2
    density = np.zeros_like(x)
    valid = (x ** 2 >= x_minus) & (x ** 2 <= x_plus)
    epsilon = 1e-15  # Small value to avoid division by zero
    density[valid] = (1 / (gamma*np.pi * np.maximum(x[valid], epsilon))) * np.sqrt((x_plus - x[valid] ** 2) * (x[valid] ** 2 - x_minus))
    
    if gamma > 1:
        density *= gamma
        
        # Add point mass at zero
        if x[0] == 0:
            density[0] = (gamma - 1) / gamma

    
    return density

fig, axs = plt.subplots(1, len(gamma_values), figsize=(18, 5))

for ax, gamma in zip(axs, gamma_values):
    M = int(N / gamma)
    singular_values = []
    
    # Generate matrices and compute singular values
    for _ in range(num_Ts):
        A = generate_complex_matrix(N, M)
        svd_vals = np.linalg.svd(A, compute_uv=False)
        singular_values.extend(svd_vals)
    
    singular_values = np.array(singular_values)
    print(f'{gamma=}')
    print(f'min={singular_values.min():.2f}, max={singular_values.max():.2f}')
    
    # Plot histogram of singular values
    ax.hist(singular_values, bins=50, density=True, alpha=0.7, label="Empirical")
    
    # Plot theoretical Marchenko-Pastur distribution for singular values
    x_vals = np.linspace(0, max(2, (1 + np.sqrt(gamma)) + 0.5), 1000)
    mp_pdf = marchenko_pastur_svd_pdf(x_vals, gamma)
    ax.plot(x_vals, mp_pdf, 'r-', label="Theoretical")
    
    min_sv = np.abs(1 - np.sqrt(gamma))
    max_sv = (1 + np.sqrt(gamma))
    
    ax.axvline(min_sv, color='g', linestyle='--', label=f'Min SV: {min_sv:.2f}')
    ax.axvline(max_sv, color='b', linestyle='--', label=f'Max SV: {max_sv:.2f}')
    
    if gamma > 1:
        ax.plot([0], [(gamma - 1) / gamma], 'ro', markersize=10, label="Point mass at 0")
    
    ax.set_title(f"γ = {gamma}")
    ax.set_xlabel("Singular Value")
    ax.set_ylabel("Density")
    ax.legend()
    fig.show()

plt.tight_layout()

gamma=128
min=9.50, max=13.30
gamma=256
min=13.81, max=18.66


In [108]:
N = 256
M = 512
A = generate_complex_matrix(N, M)
v_in = 1/np.sqrt(M)*np.ones(M)
(np.abs(A@v_in)**2).sum()

0.45002438108227266

# Expected maximal SVD value for finite size N 
This might explain the N dependance. For larger matrices we have a larger chance of hitting a large SVD value. (which is bounded from above by 4 for $N\rightarrow\infty$)

In [109]:
import numpy as np
from scipy.integrate import quad
import matplotlib.pyplot as plt

def marchenko_pastur_pdf(x, q):
    """
    PDF of the Marchenko-Pastur distribution.
    """
    a = (1 - np.sqrt(q))**2
    b = (1 + np.sqrt(q))**2
    if a <= x <= b:
        return np.sqrt((b - x) * (x - a)) / (2 * np.pi * q * x)
    else:
        return 0

def marchenko_pastur_cdf(x, q):
    """
    CDF of the Marchenko-Pastur distribution, computed by integrating the PDF.
    """
    a = (1 - np.sqrt(q))**2
    if x < a:
        return 0
    b = (1 + np.sqrt(q))**2
    if x > b:
        return 1
    result, _ = quad(lambda t: marchenko_pastur_pdf(t, q), a, x)
    return result

def expected_maximum(N, q):
    """
    Compute the expected maximum for N samples from the Marchenko-Pastur distribution.
    """
    a = (1 - np.sqrt(q))**2
    b = (1 + np.sqrt(q))**2

    def integrand(x):
        f_x = marchenko_pastur_pdf(x, q)
        F_x = marchenko_pastur_cdf(x, q)
        return x * N * (F_x**(N-1)) * f_x

    result, _ = quad(integrand, a, b)
    return result

# Parameters
q = 0.5  # Ratio M/N
N_values = 2**np.linspace(1, 20, 21)

# Compute E[max_s] for each N
expected_max_values = [expected_maximum(N, q) for N in N_values]

# Plot
plt.figure(figsize=(8, 6))
plt.plot(N_values, expected_max_values, marker="o", label=f"Marchenko-Pastur (q={q})", color="blue")
plt.xscale("log")
plt.xlabel("Number of Samples (N) [log scale]")
plt.ylabel("Expected Maximum (E[max_s])")
plt.title("Expected Maximum vs. Number of Samples for Marchenko-Pastur")
plt.grid(visible=True, which="both", linestyle="--", linewidth=0.5)
plt.legend()
plt.show()

# Print results for reference
for N, value in zip(N_values, expected_max_values):
    print(f"N={N}, E[max_s]={value:.6f}")


  If increasing the limit yields no improvement it is advised to analyze 
  the integrand in order to determine the difficulties.  If the position of a 
  local difficulty can be determined (singularity, discontinuity) one will 
  probably gain from splitting up the interval and calling the integrator 
  on the subranges.  Perhaps a special-purpose integrator should be used.
  result, _ = quad(integrand, a, b)


N=2.0, E[max_s]=1.395469
N=3.863745315699382, E[max_s]=1.771419
N=7.464263932294459, E[max_s]=2.098364
N=14.42000740177328, E[max_s]=2.353605
N=27.85761802547597, E[max_s]=2.538537
N=53.81737057623773, E[max_s]=2.666339
N=103.9683067335981, E[max_s]=2.752180
N=200.8535290615681, E[max_s]=2.808878
N=388.0234410266618, E[max_s]=2.845962
N=749.6118763241597, E[max_s]=2.870077
N=1448.1546878700494, E[max_s]=2.885705
N=2797.6504458330005, E[max_s]=2.895813
N=5404.704402525769, E[max_s]=2.902342
N=10441.200658999393, E[max_s]=2.906557
N=20171.070068243094, E[max_s]=2.909277
N=38967.93874440916, E[max_s]=2.911035
N=75281.09539308565, E[max_s]=2.912180
N=145433.48984287644, E[max_s]=2.912963
N=280958.9825631136, E[max_s]=2.913627
N=542776.9763909484, E[max_s]=2.914599
N=1048576.0, E[max_s]=2.916910
