In [None]:
import torch
import matplotlib.pyplot as plt
import time

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ==========================================
# 1. Definitions
# ==========================================

def compute_theoretical_loss(d, n, kappa):
    """Theoretical EK Loss based on user formula."""
    P = n * d
    lH = 1.0 / d
    denom = lH + (kappa / P)
    
    bias_theo = ((kappa / P) / denom) ** 2
    var_theo = (kappa / P) * (lH / denom)
    
    return bias_theo, var_theo

def kernel_fn(X1, X2, d):
    """Linear Kernel normalized by d."""
    return (X1 @ X2.T) / d

def run_empirical_simulation(d, n, kappa, n_trials=30, n_test=1000):
    """Runs GPR simulation entirely on CUDA."""
    P = int(n * d)

    # 1. Teacher / Ground Truth (Unit vector)
    w_star = torch.randn(d, 1, device=device)
    w_star = w_star / torch.norm(w_star)

    # 2. Test Set
    X_test = torch.randn(n_test, d, device=device)
    f_test = X_test @ w_star  # (n_test, 1)

    # Pre-allocate predictions matrix on GPU
    predictions = torch.zeros((n_trials, n_test), device=device)

    for i in range(n_trials):
        # A. Train Data
        X_train = torch.randn(P, d, device=device)
        y_train = X_train @ w_star # (P, 1)

        # B. Kernel Matrix
        K_train = kernel_fn(X_train, X_train, d)

        # C. Solve with SCALED Ridge using Torch Native Linalg
        reg_matrix = K_train + kappa * torch.eye(P, device=device)

        try:
            L = torch.linalg.cholesky(reg_matrix)
            # Use torch.cholesky_solve instead of torch.linalg.cholesky_solve
            alpha = torch.cholesky_solve(y_train, L)
        except (torch.linalg.LinAlgError, RuntimeError):
            # Fallback to general solver if not positive definite
            alpha = torch.linalg.solve(reg_matrix, y_train)

        # D. Predict
        K_test = kernel_fn(X_test, X_train, d)
        predictions[i, :] = (K_test @ alpha).squeeze()

    # 3. Compute Metrics
    f_bar = predictions.mean(dim=0)

    # Bias: Error of the mean predictor
    bias_emp = torch.mean((f_bar - f_test.squeeze())**2).item()

    # Variance: Average variance of predictors around mean
    var_emp = torch.mean(torch.var(predictions, dim=0)).item()

    return bias_emp, var_emp

# ==========================================
# 2. Experiment Setup
# ==========================================

d = 10            
kappa = 1.0        
n_trials = 100      

# Setup n_values directly on torch
n_values = torch.logspace(-1, 2, 10).to(device)

results = {
    'n': n_values.cpu().numpy(),
    'bias_th': [], 'var_th': [],
    'bias_emp': [], 'var_emp': []
}

print(f"Running Scaling Experiment: d={d}, kappa={kappa}")
print("-" * 65)
print(f"{'n':<6} | {'P':<6} | {'Bias Th':<9} | {'Bias Emp':<9} | {'Var Th':<9} | {'Var Emp':<9}")
print("-" * 65)

start_time = time.time()

for n in n_values:
    n_val = n.item()
    # Theory
    b_th, v_th = compute_theoretical_loss(d, n_val, kappa)
    
    # Empirical
    b_emp, v_emp = run_empirical_simulation(d, n_val, kappa, n_trials=n_trials)
    
    results['bias_th'].append(b_th)
    results['var_th'].append(v_th)
    results['bias_emp'].append(b_emp)
    results['var_emp'].append(v_emp)
    
    print(f"{n_val:<6.2f} | {int(n_val*d):<6} | {b_th:<9.4f} | {b_emp:<9.4f} | {v_th:<9.4f} | {v_emp:<9.4f}")

print("-" * 65)
print(f"Total time: {time.time() - start_time:.2f} seconds")

# ==========================================
# 3. Plotting
# ==========================================

plt.figure(figsize=(12, 5))

# Bias Plot
plt.subplot(1, 2, 1)
plt.loglog(results['n'], results['bias_th'], 'k-', label='Theory')
plt.loglog(results['n'], results['bias_emp'], 'ro', label='Empirical')
plt.title('Bias')
plt.xlabel('n (P/d)')
plt.grid(True, alpha=0.3)
plt.legend()

# Variance Plot
plt.subplot(1, 2, 2)
plt.loglog(results['n'], results['var_th'], 'k-', label='Theory')
plt.loglog(results['n'], results['var_emp'], 'bo', label='Empirical')
plt.title('Variance')
plt.xlabel('n (P/d)')
plt.grid(True, alpha=0.3)
plt.legend()

plt.tight_layout()
plt.show()

Using device: cuda
Running Scaling Experiment: d=10, kappa=1.0
-----------------------------------------------------------------
n      | P      | Bias Th   | Bias Emp  | Var Th    | Var Emp  
-----------------------------------------------------------------


AttributeError: module 'torch.linalg' has no attribute 'cholesky_solve'