In [None]:
import numpy as np


# -----------------------------
# Utility functions
# -----------------------------

def f(A, S):
    """Objective: f(A) = -2 log det(A) + tr(A^T S A)."""
    return -2 * np.log(np.linalg.det(A)) + np.trace(A.T @ S @ A)


def delta_star(A, S, i, j,eps = 1e-6):
    """
    Compute δ* = argmin_δ f(A + δ E_ij)
    following Theorem 1.
    """

    A_reg = A + eps * np.eye(A.shape[0])
    try:
        A_inv = np.linalg.inv(A_reg)
    except np.linalg.LinAlgError:
        return 0.0  
    c = S[i, i]
    b = (S @ A)[i, j]
    alpha = np.linalg.inv(A)[j, i]

    D = (c + alpha * b) ** 2 - 4 * alpha * c * (b - alpha)

    # Handle α = 0 case separately
    if abs(alpha) < 1e-12:
        return -b / c

    # Closed-form stable expression
    delta = 2 * (b - alpha) / (-(c + alpha * b) - np.sqrt(D))
    return delta


def is_DAG(B, tol=1e-8):
    """
    Acyclicity check ignoring diagonal entries.
    Approximation via (I + B/n)^n method.
    """
    B = B.copy()
    np.fill_diagonal(B, 0.0)
    n = B.shape[0]
    M = np.eye(n) + B / n
    return abs(np.trace(np.linalg.matrix_power(M, n)) - n) < tol


# def is_DAG(B):
#     """
#     Simple acyclicity check via (I + B/n)^n ≈ exp(B) test.
#     For small graphs, this brute-force method is acceptable.
#     """
#     n = B.shape[0]
#     M = np.eye(n) + B / n
#     return np.linalg.matrix_power(M, n).trace() == n  # exp(B) has trace == n if acyclic


def weight_to_adjacency(W, threshold=0.05):
    """
    Convert weighted matrix W to binary adjacency matrix G.
    Diagonal entries set to 0.
    """
    if not isinstance(W, np.ndarray):
        raise TypeError("Input W must be a numpy array.")
    if W.shape[0] != W.shape[1]:
        raise ValueError("Input W must be a square matrix.")

    G = (np.abs(W) > threshold).astype(int)
    np.fill_diagonal(G, 0)
    return G


# -----------------------------
# Main algorithm
# -----------------------------

def dag_coordinate_descent(S, T=100, seed=0, threshold=0.05):
    """
    Simplified DAG-Constrained Coordinate Descent.
    Returns (A, G, f(A))
    """
    np.random.seed(seed)
    d = S.shape[0]
    
    A = np.eye(d)

    for t in range(T):
        i, j = np.random.choice(d, 2, replace=True)

        A[i, j] = A[j, i] = 0.0
        
        delta_ij = delta_star(A, S, i, j)
        delta_ji = delta_star(A, S, j, i)

        f0 = f(A, S)
        A_ij = A.copy();  A_ij[i, j] += delta_ij
        A_ji = A.copy();  A_ji[j, i] += delta_ji

        f_ij = f(A_ij, S) if np.isfinite(np.linalg.det(A_ij)) else np.inf
        f_ji = f(A_ji, S) if np.isfinite(np.linalg.det(A_ji)) else np.inf

        # Choose direction that minimizes f
        if f_ij < f_ji:
            A = A_ij
        else:
            A = A_ji

    G = weight_to_adjacency(A, threshold)
    return A, G, f(A, S)

In [36]:
# test
import sys, os
import numpy as np
from numpy.linalg import LinAlgError, inv
from scipy.linalg import sqrtm

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
sys.path.append(r"C:\Users\super\DAG")
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
print(os.getcwd())
from SCM_data import generate_scm_data
# Load dataset
X, Y, Z, G_true, CPDAG = generate_scm_data(3,10000, seed = 42)
A_true = (np.eye(3) - G_true)
data = np.array([X, Y, Z]).T
R_hat = np.cov(data.T)
print(R_hat.shape)
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)
# A_init = np.linalg.cholesky(Theta_hat)
# A_init = A_init + 0.01 * np.random.randn(*A_init.shape)
np.random.seed(54)
B_init = np.random.randn(*R_hat.shape)
B_true = G_true
beta = 5
lam = 0.5


print("R_hat = \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true = \n",A_true)

c:\Users\super\DAG\coordinate
(3, 3)
R_hat = 
 [[1.01 1.02 -0.01]
 [1.02 3.08 1.02]
 [-0.01 1.02 1.00]]
A_true = 
 [[ 1. -1.  0.]
 [ 0.  1.  0.]
 [ 0. -1.  1.]]


In [46]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=7, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))


Iteration 1 of 7, i, j = 2, 1
Iteration 2 of 7, i, j = 2, 0
Iteration 3 of 7, i, j = 0, 2
Iteration 4 of 7, i, j = 2, 0
Iteration 5 of 7, i, j = 2, 1
Iteration 6 of 7, i, j = 1, 0
Iteration 7 of 7, i, j = 1, 2
Optimized A:
 [[ 1.    -1.027  0.   ]
 [ 0.     1.     0.   ]
 [ 0.009 -1.027  1.   ]]

Adjacency G:
 [[0 1 0]
 [0 0 0]
 [0 1 0]]

f(A): 2.99037


In [None]:
# test 2 chain

X, Y, Z, G_true, CPDAG = generate_scm_data(4,10000, seed = 42)
A_true = (np.eye(3) - G_true)
data = np.array([X, Y, Z]).T
R_hat = np.cov(data.T)
print(R_hat.shape)
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)
# A_init = np.linalg.cholesky(Theta_hat)
# A_init = A_init + 0.01 * np.random.randn(*A_init.shape)
np.random.seed(54)
B_init = np.random.randn(*R_hat.shape)
B_true = G_true
beta = 5
lam = 0.5


print("R_hat = \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true = \n",A_true)

c:\Users\super\DAG\coordinate
(3, 3)
R_hat = 
 [[1.01 1.00 1.02]
 [1.00 1.99 2.04]
 [1.02 2.04 3.08]]
A_true = 
 [[ 1. -1.  0.]
 [ 0.  1. -1.]
 [ 0.  0.  1.]]


In [34]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=30, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))


Optimized A:
 [[ 1.    -0.94   0.   ]
 [ 0.     1.    -1.002]
 [-0.021  0.     1.   ]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 2.99253


In [37]:
# test 3 triangle

X, Y, Z, G_true, CPDAG = generate_scm_data(5,10000, seed = 42)
A_true = (np.eye(3) - G_true)
data = np.array([X, Y, Z]).T
R_hat = np.cov(data.T)
print(R_hat.shape)
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)
# A_init = np.linalg.cholesky(Theta_hat)
# A_init = A_init + 0.01 * np.random.randn(*A_init.shape)
np.random.seed(54)
B_init = np.random.randn(*R_hat.shape)
B_true = G_true
beta = 5
lam = 0.5


print("R_hat = \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true = \n",A_true)

(3, 3)
R_hat = 
 [[1.01 1.00 5.04]
 [1.00 1.99 8.03]
 [5.04 8.03 35.34]]
A_true = 
 [[ 1. -1. -2.]
 [ 0.  1. -3.]
 [ 0.  0.  1.]]


In [38]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=30, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))


Optimized A:
 [[ 1.    -0.991 -1.97 ]
 [ 0.     1.    -3.057]
 [ 0.     0.     1.   ]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 2.9913


In [57]:
from SCM_data import generate_scm_from_BN 
# A->B->C D
B_true = np.array([
    [0, 1, 0, 0],
    [0, 0, 2, 0],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])
N = [1,2,3,4]
data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.0f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)

R_hat= 
 [[0.98 0.99 2.00 0.01]
 [0.99 3.04 6.10 -0.02]
 [2.00 6.10 15.26 -0.06]
 [0.01 -0.02 -0.06 4.15]]
A_true= 
 [[1 -1 0 0]
 [0 1 -2 0]
 [0 0 1 0]
 [0 0 0 1]]
10.168528384447105
likelihood_true =  10.168528384447105


In [58]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=300, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))

Optimized A:
 [[ 1.000e+00 -1.017e+00  0.000e+00 -2.100e-02]
 [ 0.000e+00  1.000e+00 -2.009e+00  0.000e+00]
 [ 2.000e-03  0.000e+00  1.000e+00  6.000e-03]
 [ 0.000e+00  5.000e-03  0.000e+00  1.000e+00]]

Adjacency G:
 [[0 1 0 0]
 [0 0 1 0]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 10.1674


In [42]:
from SCM_data import generate_scm_from_BN 
# A->D B->D C->D
B_true = np.array([
    [0, 0, 0, 1],
    [0, 0, 0, 1],
    [0, 0, 0, 1],
    [0, 0, 0, 0]
])

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.0f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)

R_hat= 
 [[0.98 0.01 0.01 1.00]
 [0.01 1.02 0.01 1.02]
 [0.01 0.01 1.00 1.01]
 [1.00 1.02 1.01 4.06]]
A_true= 
 [[1 0 0 -1]
 [0 1 0 -1]
 [0 0 1 -1]
 [0 0 0 1]]
4.036124638121187
likelihood_true =  4.036124638121187


In [43]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=300, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))

Optimized A:
 [[ 1.    -0.009 -0.007 -1.005]
 [ 0.     1.     0.    -0.989]
 [ 0.    -0.006  1.    -0.996]
 [ 0.     0.     0.     1.   ]]

Adjacency G:
 [[0 0 0 1]
 [0 0 0 1]
 [0 0 0 1]
 [0 0 0 0]]

f(A): 4.03582


In [59]:
from SCM_data import generate_scm_from_BN 
# A->B A->C A->D D->B
B_true = np.array([
    [0, 1, 2, 3],
    [0, 0, 0, 0],
    [0, 0, 0, 0],
    [0, 4, 0, 0]
])
N = [1,2,3,4]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.0f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)

R_hat= 
 [[0.98 12.79 1.97 2.95]
 [12.79 235.17 25.70 55.08]
 [1.97 25.70 6.97 5.92]
 [2.95 55.08 5.92 13.03]]
A_true= 
 [[1 -1 -2 -3]
 [0 1 0 0]
 [0 0 1 0]
 [0 -4 0 1]]
10.168528384447072
likelihood_true =  10.168528384447072


In [63]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=300, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))

Optimized A:
 [[ 1.000e+00 -1.053e+00 -2.022e+00 -3.009e+00]
 [ 0.000e+00  1.000e+00 -0.000e+00  0.000e+00]
 [ 0.000e+00  0.000e+00  1.000e+00  0.000e+00]
 [ 0.000e+00 -3.989e+00  3.000e-03  1.000e+00]]

Adjacency G:
 [[0 1 1 1]
 [0 0 0 0]
 [0 0 0 0]
 [0 1 0 0]]

f(A): 10.16799


In [38]:
from SCM_data import generate_scm_from_BN 
B_true = np.array([
    [0, 1, 2],
    [0, 0, 0],
    [0, 3, 0]
])
N = [5,4,3]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[5,0,0],[0,4,0],[0,0,3]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)

R_hat= 
 [[5.08 35.74 10.21]
 [35.74 282.69 80.86]
 [10.21 80.86 23.52]]
[[0.4472136  0.         0.        ]
 [0.         0.5        0.        ]
 [0.         0.         0.57735026]]
A_true= 
 [[0.45 -0.50 -1.15]
 [0.00 0.50 0.00]
 [0.00 -1.50 0.58]]
3.0249052422001084
likelihood_true =  7.119249864266276


In [45]:
A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=3000,seed=1, threshold=0.05)

print("Optimized A:\n", np.round(A_opt, 3))
print("\nAdjacency G:\n", G_opt)
print("\nf(A):", round(fval, 5))

Optimized A:
 [[ 0.444 -0.49  -1.155]
 [ 0.     0.5    0.   ]
 [ 0.    -1.508  0.575]]

Adjacency G:
 [[0 1 1]
 [0 0 0]
 [0 1 0]]

f(A): 7.11878
