In [25]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
sys.path.append(r"C:\Users\super\DAG")
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
print(os.getcwd())
from SCM_data import generate_scm_from_BN 
from numpy.linalg import inv
from scipy.linalg import sqrtm

c:\Users\super\DAG\coordinate


In [26]:
import numpy as np
from scipy.linalg import expm


# -----------------------------
# Utility functions
# -----------------------------

def f(A, S):
    """Objective: f(A) = -2 log det(A) + tr(A^T S A)."""
    return -2 * np.log(np.linalg.det(A)) + np.trace(A.T @ S @ A)


def delta_star(A, S, i, j,eps = 1e-6):
    """
    Compute δ* = argmin_δ f(A + δ E_ij)
    following Theorem 1.
    """

    A_reg = A + eps * np.eye(A.shape[0])
    try:
        A_inv = np.linalg.inv(A_reg)
    except np.linalg.LinAlgError:
        return 0.0  
    c = S[i, i]
    b = (S @ A)[i, j]
    alpha = np.linalg.inv(A)[j, i]

    D = (c + alpha * b) ** 2 - 4 * alpha * c * (b - alpha)

    # Handle α = 0 case separately
    if abs(alpha) < 1e-12:
        return -b / c

    # Closed-form stable expression
    delta = 2 * (b - alpha) / (-(c + alpha * b) - np.sqrt(D))
    return delta


def is_DAG(W, tol=1e-8, k=None):
    """
    Acyclicity check using NOTEARS constraint:
        h(W) = tr(exp(W ∘ W)) - d = 0
        True if W represents a DAG, False otherwise
    """
    W = W.copy()
    np.fill_diagonal(W, 0.0)
    d = W.shape[0]

    # NOTEARS acyclicity constraint
    h = np.trace(expm(W * W)) - d
    is_dag = abs(h) < tol

    # Edge count check (nonzero entries)
    if k is not None:
        edge_count = np.sum(np.abs(W) > tol)
        return is_dag and (edge_count <= k)
    else:
        return is_dag


def weight_to_adjacency(W, threshold=0.05):
    """
    Convert weighted matrix W to binary adjacency matrix G.
    Diagonal entries set to 0.
    """
    if not isinstance(W, np.ndarray):
        raise TypeError("Input W must be a numpy array.")
    if W.shape[0] != W.shape[1]:
        raise ValueError("Input W must be a square matrix.")

    G = (np.abs(W) > threshold).astype(int)
    np.fill_diagonal(G, 0)
    return G


# -----------------------------
# Main algorithm
# -----------------------------

# def dag_coordinate_descent_l0(S, T=100, seed=0, threshold=0.05, k=None, lambda_l0 = 0.2):
    """
    Simplified DAG-Constrained Coordinate Descent.
    Returns (A, G, f(A))
    """
    np.random.seed(seed)
    d = S.shape[0]
    A = np.eye(d)

    for t in range(T):
        i, j = np.random.choice(d, 2, replace=True)

        #test
        # print(f"t = {t}, (i, j) = ({i}, {j})")
        if i == j: A[i, i] = 0.3
        else: A[i, j] = A[j, i] = 0.0        
        Δ, Δ_bar = -np.inf, -np.inf 
        
        #test
        # try direction i→j
        A_ij = A.copy()
        Eij = np.eye(d)[i][:, None] * np.eye(d)[j][None, :]
        if is_DAG(A_ij + Eij,k=k):
            δ_t = delta_star(A, S, i, j)
            Δ = f(A, S) - f(A + δ_t * Eij, S) - lambda_l0
        # try direction j→i
        A_ji = A.copy()
        Eji = np.eye(d)[j][:, None] * np.eye(d)[i][None, :]
        if is_DAG(A_ji + Eji, k=k):
            δ_bar_t = delta_star(A, S, j, i)
            Δ_bar = f(A, S) - f(A + δ_bar_t * Eji, S) - lambda_l0

        if Δ == -np.inf and Δ_bar == -np.inf:
            print("DAG/k constraint, continue")
            continue

        if Δ < 0 and Δ_bar < 0:
            # print("Δ & Δ_bar < 0, continue")
            continue

        # choose better direction
        if Δ > Δ_bar:
            A = A + δ_t * Eij
        else:
            A = A + δ_bar_t * Eji
        # print("A = \n", A)
    G = weight_to_adjacency(A, threshold)
    return A, G, f(A, S)


def update_diagonal(A, S, i):
    d = S.shape[0]
    A[i, i] = 0.3
    Eii = np.eye(d)[i][:, None] * np.eye(d)[i][None, :]
    δ = delta_star(A, S, i, i)
    if f(A, S) - f(A + δ * Eii, S) < 0:
        print(f"error: i=j={i}, Δ < 0")
        return A
    return A + δ * Eii


def update_off_diagonal(A,S, i, j, lambda_l0 = 0.2):
    d = S.shape[0]
    A[i, j] = A[j, i] = 0.0
    Δ, Δ_bar = -np.inf, -np.inf
    # try direction i→j
    A_ij = A.copy()
    Eij = np.eye(d)[i][:, None] * np.eye(d)[j][None, :]
    if is_DAG(A_ij + Eij):
        δ_t = delta_star(A, S, i, j)
        Δ = f(A, S) - f(A + δ_t * Eij, S) - lambda_l0
    # try direction j→i
    A_ji = A.copy()
    Eji = np.eye(d)[j][:, None] * np.eye(d)[i][None, :]
    if is_DAG(A_ji + Eji):
        δ_bar_t = delta_star(A, S, j, i)
        Δ_bar = f(A, S) - f(A + δ_bar_t * Eji, S) - lambda_l0

    if Δ == -np.inf and Δ_bar == -np.inf:
        print("DAG/k constraint, continue")
        return A

    if Δ < 0 and Δ_bar < 0:
        # print("Δ & Δ_bar < 0, continue")
        return A

    # choose better direction
    if Δ > Δ_bar:
        A = A + δ_t * Eij
    else:
        A = A + δ_bar_t * Eji
    return A


def dag_coordinate_descent_l0(S, T=100, seed=0, threshold=0.05, lambda_l0 = 0.2):
    """
    Simplified DAG-Constrained Coordinate Descent.
    Returns (A, G, f(A))
    """
    np.random.seed(seed)
    d = S.shape[0]
    A = np.eye(d)

    for t in range(T):
        i, j = np.random.choice(d, 2, replace=True)

        if i == j:
            A = update_diagonal(A, S, i)
        else:
            A = update_off_diagonal(A, S, i, j, lambda_l0)

    G = weight_to_adjacency(A, threshold)
    return A, G, f(A, S)



In [None]:
# A->B<-C
print("d=3, A->B<-C")
B_true = np.array([
    [0, 1, 0],
    [0, 0, 0],
    [0, 2, 0]
])
N = [1,2,3]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0],[0,2,0],[0,0,3]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
# print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=300, seed = seed, threshold=0.05, lambda_l0=0.2)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

In [28]:
# A->B->C
print("d=3, A->B->C")
B_true = np.array([
    [0, 1, 0],
    [0, 0, 3],
    [0, 0, 0]
])
N = [1,3,4]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0],[0,3,0],[0,0,4]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = \n\n", likelihood_true)


for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=300, seed = seed, threshold=0.05,lambda_l0=0.2)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=3, A->B->C
R_hat= 
 [[1.02 1.03 3.11]
 [1.03 4.04 12.20]
 [3.11 12.20 40.81]]
[[1.         0.         0.        ]
 [0.         0.57735026 0.        ]
 [0.         0.         0.5       ]]
A_true= 
 [[1.00 -0.58 0.00]
 [0.00 0.58 -1.50]
 [0.00 0.00 0.50]]
3.0249052668015364
likelihood_true = 

 5.509811952486
seed =  0
Optimized A:
 [[ 0.992 -0.585  0.   ]
 [ 0.     0.577 -1.535]
 [ 0.     0.     0.509]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.51031

seed =  1
Optimized A:
 [[ 0.992 -0.585  0.   ]
 [ 0.     0.577 -1.529]
 [ 0.     0.     0.506]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.50988

seed =  2
Optimized A:
 [[ 0.992 -0.585  0.   ]
 [ 0.     0.577 -1.512]
 [ 0.     0.     0.501]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.50944

seed =  3
Optimized A:
 [[ 0.992 -0.585  0.   ]
 [ 0.     0.577 -1.551]
 [ 0.     0.     0.514]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.51144

seed =  4
Optimized A:
 [[ 1.133  0.     0.   ]
 [ 0.     1.576  

In [29]:
# A->B->C, A->C
print("d=3, A->B->C, A->C")

from SCM_data import generate_scm_from_BN 
B_true = np.array([
    [0, 1, 2],
    [0, 0, 3],
    [0, 0, 0]
])
N = [5,4,3]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[5,0,0],[0,4,0],[0,0,3]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=3000, seed = seed, threshold=0.05,lambda_l0=0.2)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=3, A->B->C, A->C
R_hat= 
 [[5.08 5.12 25.56]
 [5.12 9.15 37.77]
 [25.56 37.77 167.78]]
[[0.4472136  0.         0.        ]
 [0.         0.5        0.        ]
 [0.         0.         0.57735026]]
A_true= 
 [[0.45 -0.50 -1.15]
 [0.00 0.50 -1.73]
 [0.00 0.00 0.58]]
3.024905242200109
likelihood_true =  7.119249864266276
seed =  0
Optimized A:
 [[ 0.444 -0.503 -1.149]
 [ 0.     0.5   -1.732]
 [ 0.     0.     0.575]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.11878

seed =  1
Optimized A:
 [[ 0.444 -0.503 -1.149]
 [ 0.     0.5   -1.732]
 [ 0.     0.     0.575]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.11878

seed =  2
Optimized A:
 [[ 0.444 -0.503 -1.149]
 [ 0.     0.5   -1.732]
 [ 0.     0.     0.575]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.11878

seed =  3
Optimized A:
 [[ 0.444 -0.503 -1.149]
 [ 0.     0.5   -1.732]
 [ 0.     0.     0.575]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.11878

seed =  4
Optimized A:
 [[ 0.917  0.     0.   ]
 [ 0.  

In [None]:
# A->B, B->C, B->D, |v| = 0
print("d=4, A->B, B->C, B->D, |v| = 0")
B_true = np.array([
    [0, 2, 0, 0],
    [0, 0, 3, 4],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])
N = [1, 4, 3, 2]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0],[0,4,0,0],[0,0,3,0],[0,0,0,2]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05, lambda_l0=0.2)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=4, A->B, B->C, B->D, |v| = 0
R_hat= 
 [[0.98 1.97 5.91 7.88]
 [1.97 4.97 14.94 19.89]
 [5.91 14.94 45.88 59.75]
 [7.88 19.89 59.75 80.59]]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
A_true= 
 [[1.00 -2.00 0.00 0.00]
 [0.00 1.00 -3.00 -4.00]
 [0.00 0.00 1.00 0.00]
 [0.00 0.00 0.00 1.00]]
4.036124638121439
likelihood_true =  4.036124638121439
seed =  0
Optimized A:
 [[ 1.01  -1.992  0.     0.   ]
 [ 0.     0.992 -3.004 -2.736]
 [ 0.     0.     1.    -0.208]
 [ 0.     0.     0.     0.844]]

Adjacency G:
 [[0 1 0 0]
 [0 0 1 1]
 [0 0 0 1]
 [0 0 0 0]]

f(A): 4.12197

seed =  1
Optimized A:
 [[ 2.185  0.     0.     0.   ]
 [ 0.     0.448 -3.004 -3.948]
 [ 0.     0.     1.     0.   ]
 [-0.214  0.     0.     0.987]]

Adjacency G:
 [[0 0 0 0]
 [0 0 1 1]
 [0 0 0 0]
 [1 0 0 0]]

f(A): 4.079

seed =  2
Optimized A:
 [[ 1.01  -0.615  0.    -1.937]
 [ 0.     2.971  0.     0.   ]
 [ 0.    -0.887  0.81   0.   ]
 [ 0.     0.    -0.601  0.241]]

Adjacency G:
 [[0 1 0 1]
 [0 0 0 0]
 [0

In [31]:
# A->C, A->D, B->C, B->D, |v| = 2
print("d=4, A->C, A->D, B->C, B->D, |v| = 2")
B_true = np.array([
    [0, 0, 2, 3],
    [0, 0, 3, 4],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])
N = [2, 4, 3, 5]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[2,0,0,0],[0,4,0,0],[0,0,3,0],[0,0,0,5]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05, lambda_l0=0.2)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=4, A->C, A->D, B->C, B->D, |v| = 2
R_hat= 
 [[1.96 0.02 4.01 5.99]
 [0.02 4.07 12.28 16.30]
 [4.01 12.28 47.95 61.01]
 [5.99 16.30 61.01 88.22]]
[[0.70710677 0.         0.         0.        ]
 [0.         0.5        0.         0.        ]
 [0.         0.         0.57735026 0.        ]
 [0.         0.         0.         0.4472136 ]]
A_true= 
 [[0.71 0.00 -1.15 -1.34]
 [0.00 0.50 -1.73 -1.79]
 [0.00 0.00 0.58 0.00]
 [0.00 0.00 0.00 0.45]]
4.036124543795759
likelihood_true =  8.823616380650414
seed =  0
Optimized A:
 [[ 0.714  0.    -1.18  -1.315]
 [ 0.     0.496 -1.757 -1.744]
 [ 0.     0.     0.585  0.   ]
 [ 0.     0.     0.     0.437]]

Adjacency G:
 [[0 0 1 1]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 8.82277

seed =  1
Optimized A:
 [[ 0.714  0.    -1.165 -1.333]
 [ 0.     0.496 -1.743 -1.767]
 [ 0.     0.     0.58   0.   ]
 [ 0.     0.     0.     0.443]]

Adjacency G:
 [[0 0 1 1]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 8.82256

seed =  2
Optimized A:
 [[ 0.714  0.    -1.167 -1.31

In [32]:
# A->D, B->D, C->D, |v| = 3
print("d=4, A->D, B->D, C->D, |v| = 3")

B_true = np.array([
    [0, 0, 0, 1],
    [0, 0, 0, 3],
    [0, 0, 0, 5],
    [0, 0, 0, 0]
])
N = [5, 4, 3, 2]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[5,0,0,0],[0,4,0,0],[0,0,3,0],[0,0,0,2]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05, lambda_l0=0.2)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=4, A->D, B->D, C->D, |v| = 3
R_hat= 
 [[4.90 0.04 0.03 5.16]
 [0.04 4.07 0.02 12.32]
 [0.03 0.02 3.00 15.08]
 [5.16 12.32 15.08 119.48]]
[[0.4472136  0.         0.         0.        ]
 [0.         0.5        0.         0.        ]
 [0.         0.         0.57735026 0.        ]
 [0.         0.         0.         0.70710677]]
A_true= 
 [[0.45 0.00 0.00 -0.71]
 [0.00 0.50 0.00 -2.12]
 [0.00 0.00 0.58 -3.54]
 [0.00 0.00 0.00 0.71]]
4.036124543201381
likelihood_true =  8.823616380056034
seed =  0
Optimized A:
 [[ 0.452  0.615  0.     0.   ]
 [ 0.     1.963  0.     0.   ]
 [ 0.     3.069  0.955  0.   ]
 [ 0.    -0.614 -0.121  0.091]]

Adjacency G:
 [[0 1 0 0]
 [0 0 0 0]
 [0 1 0 0]
 [0 1 1 0]]

f(A): 8.96305

seed =  1
Optimized A:
 [[ 0.452  0.     0.    -0.642]
 [ 0.     0.496  0.    -1.916]
 [ 0.     0.     0.577 -3.199]
 [ 0.     0.     0.     0.642]]

Adjacency G:
 [[0 0 0 1]
 [0 0 0 1]
 [0 0 0 1]
 [0 0 0 0]]

f(A): 8.83431

seed =  2
Optimized A:
 [[ 0.452  0.     0.    -0.746]
 [ 0. 

In [37]:
# d=5, e=4, |v|=0
print("d=5, e=4, |v|=0")

B_true = np.array([
    [0, 1, 0, 2, 0],
    [0, 0, 3, 0, 4],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=0
R_hat= 
 [[1.00 1.03 3.05 2.02 4.13]
 [1.03 3.06 9.14 2.08 12.24]
 [3.05 9.14 30.26 6.13 36.56]
 [2.02 2.08 6.13 6.12 8.33]
 [4.13 12.24 36.56 8.33 49.94]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 -0.71 0.00 -1.41 0.00]
 [0.00 0.71 -1.73 0.00 -4.00]
 [0.00 0.00 0.58 0.00 0.00]
 [0.00 0.00 0.00 0.71 0.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930056
likelihood_true =  7.511211363071604
seed =  0
Optimized A:
 [[ 1.721  0.     0.     0.     0.   ]
 [ 0.     0.652 -1.751  0.    -3.901]
 [ 0.     0.     0.586  0.     0.   ]
 [-0.567 -0.221  0.     0.404  0.   ]
 [ 0.     0.     0.     0.     0.976]]

Adjacency G:
 [[0 0 0 0 0]
 [0 0 1 0 1]
 [0 0 0 0 0]
 [1 1 0 0 0]
 [0 0 0 0 0]]

f(A): 7.6731

seed =  1
Optimi

In [38]:
# d=5, e=4, |v|=1
print("d=5, e=4, |v|=1")

B_true = np.array([
    [0, 0, 1, 2, 0],
    [0, 0, 0, 2, 3],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=1
R_hat= 
 [[1.00 0.03 0.97 2.07 0.10]
 [0.03 2.00 0.02 4.07 5.99]
 [0.97 0.02 3.88 1.96 0.09]
 [2.07 4.07 1.96 14.38 12.19]
 [0.10 5.99 0.09 12.19 18.92]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 0.00 -0.58 -1.41 0.00]
 [0.00 0.71 0.00 -1.41 -3.00]
 [0.00 0.00 0.58 0.00 0.00]
 [0.00 0.00 0.00 0.71 0.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930029
likelihood_true =  7.511211363071577
seed =  0
Optimized A:
 [[ 1.192  1.242 -0.561  0.     0.   ]
 [ 0.     1.563  0.     0.    -2.979]
 [ 0.     0.     0.582  0.     0.   ]
 [-0.172 -0.621  0.     0.264  0.   ]
 [ 0.     0.     0.     0.     0.996]]

Adjacency G:
 [[0 1 1 0 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [1 1 0 0 0]
 [0 0 0 0 0]]

f(A): 7.50947

seed =  1
Optimiz

In [39]:
# d=5, e=4, |v|=2
print("d=5, e=4, |v|=2")

B_true = np.array([
    [0, 0, 0, 1, 0],
    [0, 0, 0, 2, 3],
    [0, 0, 0, 0, 4],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=2
R_hat= 
 [[1.00 0.03 -0.04 1.07 -0.05]
 [0.03 2.00 -0.00 4.04 5.98]
 [-0.04 -0.00 2.95 -0.07 11.79]
 [1.07 4.04 -0.07 11.23 11.80]
 [-0.05 5.98 11.79 11.80 66.06]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 0.00 0.00 -0.71 0.00]
 [0.00 0.71 0.00 -1.41 -3.00]
 [0.00 0.00 0.58 0.00 -4.00]
 [0.00 0.00 0.00 0.71 0.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930057
likelihood_true =  7.511211363071605
seed =  0
Optimized A:
 [[ 0.999  0.62   0.     0.     0.   ]
 [ 0.     1.564  0.     0.    -2.983]
 [ 0.     0.     0.582  0.    -3.992]
 [ 0.    -0.621  0.     0.298  0.   ]
 [ 0.     0.     0.     0.     0.998]]

Adjacency G:
 [[0 1 0 0 0]
 [0 0 0 0 1]
 [0 0 0 0 1]
 [0 1 0 0 0]
 [0 0 0 0 0]]

f(A): 7.61708

seed = 

In [40]:
# d=5, e=4, |v|=6
print("d=5, e=4, |v|=6")

B_true = np.array([
    [0, 0, 0, 0, 2],
    [0, 0, 0, 0, 3],
    [0, 0, 0, 0, 4],
    [0, 0, 0, 0, 5],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent_l0(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=6
R_hat= 
 [[1.00 0.03 -0.04 0.01 2.02]
 [0.03 2.00 -0.00 0.01 6.06]
 [-0.04 -0.00 2.95 -0.03 11.57]
 [0.01 0.01 -0.03 2.06 10.22]
 [2.02 6.06 11.57 10.22 120.59]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 0.00 0.00 0.00 -2.00]
 [0.00 0.71 0.00 0.00 -3.00]
 [0.00 0.00 0.58 0.00 -4.00]
 [0.00 0.00 0.00 0.71 -5.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930009
likelihood_true =  7.511211363071557
seed =  0
Optimized A:
 [[ 0.999  0.     0.     0.    -2.006]
 [ 0.     0.706  0.     0.    -2.977]
 [ 0.     0.     0.582  0.    -3.984]
 [ 0.     0.     0.     0.697 -4.976]
 [ 0.     0.     0.     0.     0.996]]

Adjacency G:
 [[0 0 0 0 1]
 [0 0 0 0 1]
 [0 0 0 0 1]
 [0 0 0 0 1]
 [0 0 0 0 0]]

f(A): 7.51016

seed =  1