In [295]:
import numpy as np
import scipy.sparse as sp

In [296]:
def ensure_positive_definite(M, epsilon=1e-3, min_threshold=1e-3):
    """Ensure M is well-conditioned ; add small value to diagonal if needed"""
    min_eig = np.min(np.linalg.eigvals(M))
    
    if min_eig < min_threshold:
        print(f"Minimum eigenvalue too small ({min_eig:.2e}), adding {epsilon} to diagonal elements.")
        M += np.eye(M.shape[0]) * (abs(min_eig) + epsilon)
    
    return M

In [297]:
def make_sparse_spd_matrix(
    n_dim=10,
    alpha=0.95,
    norm_diag=True,
    smallest_coef=0.1,
    largest_coef=0.9,
    random_state=42
):
    """
    Generate a sparse symmetric positive definite (SPD) matrix.

    Parameters
    ----------
    n_dim : int, default=10
        The size of the random matrix to generate.

    alpha : float, default=0.95
        The probability that a coefficient is zero, controlling sparsity.
        Higher values mean more sparsity. Should be between 0 and 1.

    norm_diag : bool, default=False
        If True, normalizes the matrix so that the diagonal elements are all 1.

    smallest_coef : float, default=0.1
        The smallest coefficient in the randomly generated values (between 0 and 1).

    largest_coef : float, default=0.9
        The largest coefficient in the randomly generated values (between 0 and 1).

    random_state : int or None, default=None
        Seed for random number generation, ensuring reproducible results.

    Returns
    -------
    ndarray or sparse matrix
        The generated sparse SPD matrix as a dense ndarray.
    """
    rng = np.random.default_rng(random_state)

    # Start with a negative identity matrix, which will form the basis of the Cholesky factor.
    chol = -sp.eye(n_dim, format="csc")

    # Generate a random sparse lower triangular matrix to add sparsity
    aux = sp.random(
        m=n_dim, n=n_dim, density=1 - alpha,
        data_rvs=lambda x: rng.uniform(low=smallest_coef, high=largest_coef, size=x),
        format="csc"
    )
    aux = sp.tril(aux, k=-1, format="csc")

    # Randomly permute rows and columns to avoid asymmetries
    permutation = rng.permutation(n_dim)
    aux = aux[permutation].T[permutation]

    # Add the sparse auxiliary matrix to the Cholesky factor
    chol += aux

    # Form the SPD matrix by taking the product of the Cholesky factor with its transpose
    prec = chol.T @ chol

    # Optionally normalize the diagonal to 1
    if norm_diag:
        d = sp.diags(1.0 / np.sqrt(prec.diagonal()))
        prec = d @ prec @ d
    prec = ensure_positive_definite(prec.toarray())

    return prec


In [298]:
import scipy.linalg as la
from scipy.linalg import solve_sylvester, norm
import networkx as nx

# Fix random number generator for reproducibility
np.random.seed(42)

In [299]:
import numpy as np
import scipy.linalg as la

def generate_sparse_covariance(n, sample_scaling=1.0, alpha=0.8, random_state=42):
    """
    Generate a sparse inverse covariance matrix B, compute its associated covariance matrix E,
    and generate samples from a multivariate normal distribution with covariance E.

    Parameters:
    - n (int): Dimension of the matrix.
    - sample_scaling (float): Scaling factor for the number of samples (N = sample_scaling * d^2 log2(n)).
    - alpha (float): Sparsity level for the sparse SPD matrix.
    - random_state (int): Random seed for reproducibility.

    Returns:
    - B (np.ndarray): Sparse inverse covariance matrix (precision matrix).
    - y_samples (np.ndarray): Generated samples following N(0, E).
    - S (np.ndarray): Sample covariance matrix from the generated samples.
    - N (int): Computed number of samples.
    """
    np.random.seed(random_state)

    # Create sparse PSD matrix (B)
    B = make_sparse_spd_matrix(n_dim=n, alpha=alpha, norm_diag=True, random_state=random_state)

    # Compute degree d as the maximum number of nonzero entries per row in B (excluding diagonal)
    d = np.max(np.sum(B != 0, axis=1)) - 1  # Exclude diagonal elements
    print("Max degree in B", d)

    # Compute the required number of samples with log 
    N = int(sample_scaling * (d**2 * np.log(n)))
    N = max(N, n)  # Ensure N is at least n for stability

    # Compute true inverse covariance matrix (Strue)
    Strue = np.linalg.matrix_power(B, 2)

    # Compute covariance matrix (E)
    E = np.linalg.inv(Strue)

    # Generate N samples Y ~ N(0, E)
    y_samples = la.sqrtm(E).dot(np.random.randn(n, N))

    # Calculate sample covariance matrix
    S = np.cov(y_samples)

    return B, S, N


In [300]:
def newton_nare(A, B, C, D, X0, tol=1e-13, kmax=30):
    """
    Newton's method for solving the Nonlinear Algebraic Riccati Equation (NARE):
    C + XA + DX - XBX = 0
    """
    X = X0.copy()
    k = 0
    err = 1

    while err > tol and k < kmax:
        # Compute residual RX = C + XA + DX - XBX
        RX = C + X @ A + D @ X - X @ B @ X

        # Solve the Sylvester equation (D - XB)H + H(A - BX) = -RX for H
        H = solve_sylvester(D - X @ B, A - B @ X, -RX)

        # Update X
        X = X + H

        # Calculate the error
        err = norm(H, 'fro') / (1 + norm(X, 'fro'))

        # Increment iteration counter
        k += 1

    # Check if the solution converged
    if k == kmax:
        print("Warning: reached the maximum number of iterations without convergence.")

    return X

In [301]:
# Soft thresholding function
def soft_thresholding(x, threshold):
    """Applies soft-thresholding elementwise."""
    return np.sign(x) * np.maximum(np.abs(x) - threshold, 0)

In [302]:

# ADMM Algorithm for Elastic-Net Penalized Precision Matrix Estimation
def admm_precision_matrix(S, lambda_, rho=1.0, max_iter=100, tol=1e-4):
    """
    ADMM algorithm for precision matrix estimation with elastic-net penalty.
    """
    n = S.shape[0]
    Z = np.zeros((n, n))
    Lambda = np.zeros((n, n))
    I = np.eye(n)  # Identity matrix

    # Initial B (can be initialized as identity matrix)
    B = np.eye(n)

    for k in range(max_iter):
        # Step 1: Update B using Newton NARE
        # Here, we set up the matrices to solve the NARE: A3 + XA1 + A4X - XA2X = 0
        A3 = - 2 * I
        A4 = Lambda - rho * Z
        A1 = 0 * I
        A2 = - (2 * S + rho * I)
        X0 = B  # Initial guess for Newton NARE

        # Solve for the new B using Newton NARE
        B_new = newton_nare(A1, A2, A3, A4, X0)

        # Step 2: Update Z elementwise using soft-thresholding
        Z_new = soft_thresholding(rho * B_new + Lambda, lambda_)
        Z_new = Z_new / rho

        # Step 3: Update Lambda (Lagrange multiplier)
        Lambda_new = Lambda + rho * (B_new - Z_new)

        # Check convergence
        if np.linalg.norm(B_new - B, ord='fro') < tol:
            print(f"ADMM Converged after {k+1} iterations.")
            break
        elif k == max_iter-1 :
            print(f"ADMM failed to converge after {k+1} iterations.")

        # Update for the next iteration
        B, Z, Lambda = B_new, Z_new, Lambda_new      

    return B

In [303]:
#Thresholding B_estimate
def hard_threshold(B_estimate,threshold):
  return np.where(np.abs(B_estimate) > threshold, B_estimate, 0)

Experimentation

In [304]:
import matplotlib.pyplot as plt
import os
from sklearn.metrics import confusion_matrix, f1_score

In [305]:
def evaluate_metrics_vs_lambda(lambda_values, n = 10, sample_scaling = 1.0, alpha=1, rho=1.0, max_iter=100, tol=1e-4, threshold=1e-4, log_dir="experiment_logs"):    
    """
    Evaluate Frobenius norm, KL divergence, MCC, Sensitivity, and Specificity
    of the estimated precision matrix at various lambda values.
    """

    B, S, N = generate_sparse_covariance(n, sample_scaling= sample_scaling, alpha= alpha)
    ground_truth_adjacency = (B != 0).astype(int).flatten()
    logdet_B = np.log(np.linalg.det(B))

    print("Number of samples:", N)
    print("Minimum eigenvalue:", np.min(np.linalg.eigvals(B)))

    metrics = {
        "lambda": [],
        "Fnorm": [],
        "KL": [],
        "MCC": [],
        "sensitivity": [],
        "specificity": [],
        "f1": [], 
        "plot_paths": []
    }


    for lambda_ in lambda_values:
        # Compute estimated precision matrix
        B_est  = admm_precision_matrix(S, lambda_, rho, max_iter, tol)
        
        # Apply thresholding
        B_est_thresholded  = hard_threshold(B_est, threshold)

        # Plot sparsity patterns and save them
        plt.figure(figsize=(6, 12))

        plt.subplot(1, 2, 1)
        plt.spy(B)
        plt.title('B matrix', fontsize=16)

        # Placeholder for estimated matrix
        plt.subplot(1, 2, 2)
        plt.spy(B_est_thresholded)
        plt.title('B_hat (Estimated)', fontsize=16)

        # plt.subplot(2, 2, 3)
        # plt.spy(E)
        # plt.title('True covariance', fontsize=16)

        # plt.subplot(2, 2, 4)
        # plt.spy(S)
        # plt.title('Sample covariance', fontsize=16)

        plot_path = os.path.join(log_dir, f"sparsity_patterns_lambda_{lambda_:.3f}.png")
        plt.savefig(plot_path)
        plt.close()
        
        metrics["plot_paths"].append(plot_path)

        # Fnorm_val
        Fnorm_val = np.linalg.norm(B - B_est, ord='fro')

        # KL divergence
        try:
            inv_Best = np.linalg.inv(B_est)
            logdet_Best = np.log(np.linalg.det(B_est))
            trace_term = np.trace(inv_Best @ B)
            KL_val = -logdet_Best + trace_term + logdet_B - n
        except np.linalg.LinAlgError:
            # If B_est is singular, define KL as +∞ or some large number
            KL_val = np.inf
        
        predicted_adjacency = (B_est_thresholded != 0).astype(int).flatten()
        cm = confusion_matrix(ground_truth_adjacency, predicted_adjacency, labels=[0,1])
        TN, FP, FN, TP = cm.ravel()

        # MCC = (TP*TN - FP*FN) / sqrt( (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN) )
        # We'll do a safe-check:
        denom = (TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)
        if denom == 0:
            MCC_val = 0.0
        else:
            MCC_val = ((TP*TN) - (FP*FN)) / np.sqrt(denom)
        
        # Sensitivity = TP / (TP + FN)  [recall]
        sensitivity_val = TP / (TP + FN) if (TP+FN) > 0 else 0.0
        # Specificity = TN / (TN + FP)
        specificity_val = TN / (TN + FP) if (TN+FP) > 0 else 0.0
        # F1 score
        f1_val = f1_score(ground_truth_adjacency, predicted_adjacency)

        metrics["lambda"].append(lambda_)
        metrics["Fnorm"].append(Fnorm_val)
        metrics["KL"].append(KL_val)
        metrics["MCC"].append(MCC_val)
        metrics["sensitivity"].append(sensitivity_val)
        metrics["specificity"].append(specificity_val)
        metrics["f1"].append(f1_val)
        # metrics["plot_paths"].append(plot_path)

    return metrics


In [306]:
import numpy as np
import pandas as pd
import os
import json
import matplotlib.pyplot as plt

In [341]:
# Updated main loop
n = 200
sample_scaling = 5

for _alpha in [0.98]:
    log_dir = f"experiment_logs/n_{n}_scale_{sample_scaling}/alpha_{_alpha:.3f}"
    # Create a directory to save logs if it doesn’t exist
    os.makedirs(log_dir, exist_ok=True)

    # params
    args = {
        "lambda_values": np.linspace(0, 0.5, 10).tolist(),
        "n": n,
        "sample_scaling": sample_scaling,
        "alpha": _alpha,
        "rho": 8.0,
        "max_iter": 500,
        "tol": 1e-4,
        "threshold": 1e-2,
        "log_dir": log_dir
    }
    print(f"Evaluating metrics for {args}")

    # Save args to a JSON file (for reproducibility)
    json_path = os.path.join(log_dir, "params.json")
    with open(json_path, "w") as json_file:
        json.dump(args, json_file, indent=4)

    # Call our updated evaluate_metrics_vs_lambda
    metrics = evaluate_metrics_vs_lambda(**args)

    # Convert metrics to a DataFrame
    metrics_df = pd.DataFrame(metrics)

    # Save metrics to CSV or Excel
    csv_path = os.path.join(log_dir, "metrics.csv")
    metrics_df.to_csv(csv_path, index=False)

    # Pick best lambda by F1
    best_idx = metrics_df["f1"].idxmax()
    best_lambda = metrics_df.loc[best_idx, "lambda"]
    print(f"Best lambda by F1 = {best_lambda:.3f}")

    # print the corresponding row of metric values
    # best_row = metrics_df.loc[best_idx]
    # print("Corresponding Metrics:")
    # print(best_row.to_dict())

    # Plot all metrics vs. lambda
    plt.figure(figsize=(8, 6))
    for metric in ["Fnorm", "KL", "MCC", "sensitivity", "specificity", "f1"]:
        plt.plot(metrics_df["lambda"], metrics_df[metric], marker="o", label=metric)

    plt.xscale("log")
    plt.xlabel("Lambda")
    plt.ylabel("Metric Value")
    plt.title("Metrics vs. Lambda")
    plt.legend()
    plot_path = os.path.join(log_dir, "metrics_vs_lambda.png")
    plt.savefig(plot_path)
    plt.close()


Evaluating metrics for {'lambda_values': [0.0, 0.05555555555555555, 0.1111111111111111, 0.16666666666666666, 0.2222222222222222, 0.2777777777777778, 0.3333333333333333, 0.38888888888888884, 0.4444444444444444, 0.5], 'n': 200, 'sample_scaling': 5, 'alpha': 0.98, 'rho': 8.0, 'max_iter': 500, 'tol': 0.0001, 'threshold': 0.01, 'log_dir': 'experiment_logs/n_200_scale_5/alpha_0.980'}
Max degree in B 28
Number of samples: 20769
Minimum eigenvalue: 0.021734224482170314
ADMM Converged after 149 iterations.
ADMM Converged after 94 iterations.
ADMM Converged after 111 iterations.
ADMM Converged after 116 iterations.
ADMM Converged after 143 iterations.
ADMM Converged after 160 iterations.
ADMM Converged after 176 iterations.
ADMM Converged after 185 iterations.
ADMM Converged after 197 iterations.
ADMM Converged after 204 iterations.
Best lambda by F1 = 0.056


F1 vs sample scaling

In [320]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
import os

def plot_f1_vs_sample_scaling(
    n=50,
    alpha=0.92,
    best_lambda=1.0,
    rho=7.0,
    max_iter=500,
    tol=1e-4,
    threshold=1e-2,
    scaling_values=None,
    log_dir="experiment_scale_logs"
):
    """
    Fix parameters (n, alpha, best_lambda, etc.) and update sample_scaling from
    1..20 to see how F1 improves with more samples.
    """
    if scaling_values is None:
        # By default, range from 1 to 20 inclusive
        scaling_values = np.arange(1, 21)

    os.makedirs(log_dir, exist_ok=True)

    f1_scores = []

    for scale in scaling_values:
        # Generate data with the current scale
        B, S, N = generate_sparse_covariance(
            n=n,
            sample_scaling=scale,
            alpha=alpha
        )

        # Estimate precision matrix using the best param config
        B_est = admm_precision_matrix(S, best_lambda, rho, max_iter, tol)
        
        # Threshold the estimate
        B_est_thresholded = hard_threshold(B_est, threshold)

        # Compute adjacency-based F1
        ground_truth = (B != 0).astype(int).ravel()
        predicted = (B_est_thresholded != 0).astype(int).ravel()
        f1_val = f1_score(ground_truth, predicted, zero_division=0)

        f1_scores.append(f1_val)

        print(f"scale={scale}, #samples={N}, F1={f1_val:.3f}")

    # Convert to DataFrame for convenience
    df = pd.DataFrame({
        "sample_scaling": scaling_values,
        "F1": f1_scores
    })

    # Save to CSV
    csv_path = os.path.join(log_dir, f"f1_vs_scaling_{n}.csv")
    df.to_csv(csv_path, index=False)

    # Plot
    plt.figure(figsize=(6, 4))
    plt.plot(scaling_values, f1_scores, marker="o")
    plt.xlabel("N/(d^2log(n))")
    plt.ylabel("F1 Score")
    plt.title(f"F1 vs. Sample Scaling (n={n}, alpha={alpha}, lambda={best_lambda})")
    plt.grid(True)
    plot_path = os.path.join(log_dir, f"f1_vs_scaling_{n}.png")
    plt.savefig(plot_path)
    plt.close()

    return df

In [340]:
plot_f1_vs_sample_scaling(
        n=200,
        alpha=0.98,
        best_lambda=0,  
        rho=7.0,
        max_iter=500,
        tol=1e-4,
        threshold=1e-2,
        scaling_values=np.linspace(1, 21, 30), 
        log_dir="f1_scale_experiment"
    )

Max degree in B 28
ADMM Converged after 137 iterations.
scale=1.0, #samples=4153, F1=0.364
Max degree in B 28
ADMM Converged after 137 iterations.
scale=1.6896551724137931, #samples=7018, F1=0.577
Max degree in B 28
ADMM Converged after 136 iterations.
scale=2.3793103448275863, #samples=9883, F1=0.747
Max degree in B 28
ADMM Converged after 135 iterations.
scale=3.0689655172413794, #samples=12748, F1=0.864
Max degree in B 28
ADMM Converged after 134 iterations.
scale=3.7586206896551726, #samples=15612, F1=0.932
Max degree in B 28
ADMM Converged after 132 iterations.
scale=4.448275862068966, #samples=18477, F1=0.961
Max degree in B 28
ADMM Converged after 134 iterations.
scale=5.137931034482759, #samples=21342, F1=0.982
Max degree in B 28
ADMM Converged after 133 iterations.
scale=5.827586206896552, #samples=24207, F1=0.993
Max degree in B 28
ADMM Converged after 133 iterations.
scale=6.517241379310345, #samples=27071, F1=0.996
Max degree in B 28
ADMM Converged after 133 iterations.
sca

Unnamed: 0,sample_scaling,F1
0,1.0,0.363705
1,1.689655,0.577026
2,2.37931,0.74721
3,3.068966,0.864021
4,3.758621,0.931862
5,4.448276,0.961424
6,5.137931,0.981781
7,5.827586,0.99285
8,6.517241,0.995897
9,7.206897,0.997432
