In [1]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
sys.path.append(r"C:\Users\super\DAG")
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from MEC import is_in_markov_equiv_class
print(os.getcwd())
from SCM_data import generate_scm_from_BN 
from numpy.linalg import inv
from scipy.linalg import sqrtm

c:\Users\super\DAG\coordinate_descent


In [2]:
import numpy as np
from scipy.linalg import expm


# -----------------------------
# Utility functions
# -----------------------------

def f(A, S):
    """Objective: f(A) = -2 log det(A) + tr(A^T S A)."""
    return -2 * np.log(np.linalg.det(A)) + np.trace(A.T @ S @ A)


def delta_star(A, S, i, j,eps = 1e-6):
    """
    Compute δ* = argmin_δ f(A + δ E_ij)
    following Theorem 1.
    """

    A_reg = A + eps * np.eye(A.shape[0])
    try:
        A_inv = np.linalg.inv(A_reg)
    except np.linalg.LinAlgError:
        return 0.0  
    c = S[i, i]
    b = (S @ A)[i, j]
    alpha = np.linalg.inv(A)[j, i]

    D = (c + alpha * b) ** 2 - 4 * alpha * c * (b - alpha)

    # Handle α = 0 case separately
    if abs(alpha) < 1e-12:
        return -b / c

    # Closed-form stable expression
    delta = 2 * (b - alpha) / (-(c + alpha * b) - np.sqrt(D))
    return delta


def is_DAG(W, tol=1e-8, k=None):
    """
    Acyclicity check using NOTEARS constraint:
        h(W) = tr(exp(W ∘ W)) - d = 0
        True if W represents a DAG, False otherwise
    """
    W = W.copy()
    np.fill_diagonal(W, 0.0)
    d = W.shape[0]

    # NOTEARS acyclicity constraint
    h = np.trace(expm(W * W)) - d
    is_dag = abs(h) < tol

    # Edge count check (nonzero entries)
    if k is not None:
        edge_count = np.sum(np.abs(W) > tol)
        return is_dag and (edge_count <= k)
    else:
        return is_dag


# def is_DAG(B):
#     """
#     Simple acyclicity check via (I + B/n)^n ≈ exp(B) test.
#     For small graphs, this brute-force method is acceptable.
#     """
#     n = B.shape[0]
#     M = np.eye(n) + B / n
#     return np.linalg.matrix_power(M, n).trace() == n  # exp(B) has trace == n if acyclic


def weight_to_adjacency(W, threshold=0.05):
    """
    Convert weighted matrix W to binary adjacency matrix G.
    Diagonal entries set to 0.
    """
    if not isinstance(W, np.ndarray):
        raise TypeError("Input W must be a numpy array.")
    if W.shape[0] != W.shape[1]:
        raise ValueError("Input W must be a square matrix.")

    G = (np.abs(W) > threshold).astype(int)
    np.fill_diagonal(G, 0)
    return G


# -----------------------------
# Main algorithm
# -----------------------------

def dag_coordinate_descent(S, T=100, seed=0, threshold=0.05, k=None):
    """
    Simplified DAG-Constrained Coordinate Descent.
    Returns (A, G, f(A))
    """
    np.random.seed(seed)
    d = S.shape[0]
    A = np.eye(d)

    for t in range(T):
        i, j = np.random.choice(d, 2, replace=True)
        
        #test
        # print(f"t = {t}, (i, j) = ({i}, {j})")
        if i == j: A[i, i] = 0.3
        else: A[i, j] = A[j, i] = 0.0        
        Δ, Δ_bar = -np.inf, -np.inf 
        
        #test
        # try direction i→j
        A_ij = A.copy()
        Eij = np.eye(d)[i][:, None] * np.eye(d)[j][None, :]
        if is_DAG(A_ij + Eij,k=k):
            δ_t = delta_star(A, S, i, j)
            Δ = f(A, S) - f(A + δ_t * Eij, S)

        # try direction j→i
        A_ji = A.copy()
        Eji = np.eye(d)[j][:, None] * np.eye(d)[i][None, :]
        if is_DAG(A_ji + Eji, k=k):
            δ_bar_t = delta_star(A, S, j, i)
            Δ_bar = f(A, S) - f(A + δ_bar_t * Eji, S)

        if Δ == -np.inf and Δ_bar == -np.inf:
            # print("DAG/k constraint, continue")
            continue

        if Δ < 0 and Δ_bar < 0:
            # print("Δ & Δ_bar < 0, continue")
            continue

        # choose better direction
        if Δ > Δ_bar:
            A = A + δ_t * Eij
        else:
            A = A + δ_bar_t * Eji
        # print("A = \n", A)
    G = weight_to_adjacency(A, threshold)
    return A, G, f(A, S)


In [6]:
# A->B<-C
B_true = np.array([
    [0, 1, 0],
    [0, 0, 0],
    [0, 2, 0]
])
G_true = weight_to_adjacency(B_true, 0.05)
N = [1,2,3]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0],[0,2,0],[0,0,3]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
# print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)


correct = 0
times = 100
for seed in range(100):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=300, seed = seed, threshold=0.05)
    #print("seed = ", seed)
    #print("Optimized A:\n", np.round(A_opt, 3))
    #print("\nAdjacency G:\n", G_opt)
    if is_in_markov_equiv_class(G_true, G_opt):
    #    print("Correct")
        correct += 1
    #print("\nf(A):", round(fval, 5))
    #print("")
print("Correct rate = ", correct / times)

R_hat= 
 [[1.02 1.06 0.02]
 [1.06 15.32 6.09]
 [0.02 6.09 3.02]]
[[1.         0.         0.        ]
 [0.         0.70710677 0.        ]
 [0.         0.         0.57735026]]
A_true= 
 [[1.00 -0.71 0.00]
 [0.00 0.71 0.00]
 [0.00 -1.41 0.58]]
likelihood_true =  4.816664771649337
Correct rate =  0.79


In [48]:
# A->B->C
B_true = np.array([
    [0, 1, 0],
    [0, 0, 3],
    [0, 0, 0]
])
N = [1,3,4]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0],[0,3,0],[0,0,4]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = \n\n", likelihood_true)


for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=300, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

R_hat= 
 [[1.02 1.03 3.11]
 [1.03 4.04 12.20]
 [3.11 12.20 40.81]]
[[1.         0.         0.        ]
 [0.         0.57735026 0.        ]
 [0.         0.         0.5       ]]


A_true= 
 [[1.00 -0.58 0.00]
 [0.00 0.58 -1.50]
 [0.00 0.00 0.50]]
3.0249052668015364
likelihood_true = 

 5.509811952486
seed =  0
Optimized A:
 [[ 0.992 -0.585 -0.003]
 [ 0.     0.577 -1.535]
 [ 0.     0.     0.509]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.51032

seed =  1
Optimized A:
 [[ 0.992 -0.585  0.003]
 [ 0.     0.577 -1.53 ]
 [ 0.     0.     0.506]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.50992

seed =  2
Optimized A:
 [[ 0.992 -0.585 -0.003]
 [ 0.     0.577 -1.511]
 [ 0.     0.     0.501]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.50943

seed =  3
Optimized A:
 [[ 0.992 -0.585 -0.002]
 [ 0.     0.577 -1.551]
 [ 0.     0.     0.515]]

Adjacency G:
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

f(A): 5.5115

seed =  4
Optimized A:
 [[ 0.992 -0.208 -0.547]
 [ 0.     1.6    0.   ]
 [ 0.    -0.462  0.179]]

Adjacency G:
 [[0 1 1]
 [0 0 0]
 [0 1 0]]

f(A): 5.50941

seed =  5
Optimized A:
 [[ 0.992 -0.585  0.   ]
 [ 0.     0.577 -1.517]
 [ 0.     0.     0.502]]

A

In [49]:
# A->B->C, A->C

from SCM_data import generate_scm_from_BN 
B_true = np.array([
    [0, 1, 2],
    [0, 0, 3],
    [0, 0, 0]
])
N = [5,4,3]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[5,0,0],[0,4,0],[0,0,3]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=300, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

R_hat= 
 [[5.08 5.12 25.56]
 [5.12 9.15 37.77]
 [25.56 37.77 167.78]]
[[0.4472136  0.         0.        ]
 [0.         0.5        0.        ]
 [0.         0.         0.57735026]]
A_true= 
 [[0.45 -0.50 -1.15]
 [0.00 0.50 -1.73]
 [0.00 0.00 0.58]]
3.024905242200109
likelihood_true =  7.119249864266276
seed =  0
Optimized A:
 [[ 0.444 -0.503 -1.574]
 [ 0.     0.5   -2.38 ]
 [ 0.     0.     0.79 ]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.36958

seed =  1
Optimized A:
 [[ 0.444 -0.503 -1.531]
 [ 0.     0.5   -2.37 ]
 [ 0.     0.     0.774]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.34535

seed =  2
Optimized A:
 [[ 0.444 -0.503 -1.397]
 [ 0.     0.5   -2.104]
 [ 0.     0.     0.699]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.20602

seed =  3
Optimized A:
 [[ 0.444 -0.503 -1.626]
 [ 0.     0.5   -2.414]
 [ 0.     0.     0.805]]

Adjacency G:
 [[0 1 1]
 [0 0 1]
 [0 0 0]]

f(A): 7.40635

seed =  4
Optimized A:
 [[ 1.098  0.     0.   ]
 [ 0.824  1.219  0.   ]
 [

In [51]:
# A->B, B->C, B->D, |v| = 0
B_true = np.array([
    [0, 2, 0, 0],
    [0, 0, 3, 4],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])
N = [1, 4, 3, 2]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0],[0,4,0,0],[0,0,3,0],[0,0,0,2]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

R_hat= 
 [[0.98 1.98 5.95 7.92]
 [1.98 8.06 24.22 32.22]
 [5.95 24.22 75.80 96.82]
 [7.92 32.22 96.82 130.88]]
[[1.         0.         0.         0.        ]
 [0.         0.5        0.         0.        ]
 [0.         0.         0.57735026 0.        ]
 [0.         0.         0.         0.70710677]]
A_true= 
 [[1.00 -1.00 0.00 0.00]
 [0.00 0.50 -1.73 -2.83]
 [0.00 0.00 0.58 0.00]
 [0.00 0.00 0.00 0.71]]
4.036124566679151
likelihood_true =  7.214178467152102
seed =  0
Optimized A:
 [[ 1.01  -1.     0.    -0.032]
 [ 0.     0.496 -1.83  -2.318]
 [ 0.     0.     0.609 -0.044]
 [ 0.     0.     0.     0.616]]

Adjacency G:
 [[0 1 0 0]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 7.25165

seed =  1
Optimized A:
 [[ 1.421  0.     0.     0.   ]
 [-0.198  0.352 -1.795 -3.223]
 [-0.009  0.     0.597  0.038]
 [-0.031  0.     0.     0.775]]

Adjacency G:
 [[0 0 0 0]
 [1 0 1 1]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 7.24752

seed =  2
Optimized A:
 [[ 1.01  -0.132 -0.081 -0.987]
 [ 0.     2.82   0.     0.   ]
 [ 

In [52]:
# A->C, A->D, B->C, B->D, |v| = 2
B_true = np.array([
    [0, 0, 2, 3],
    [0, 0, 3, 4],
    [0, 0, 0, 0],
    [0, 0, 0, 0]
])
N = [2, 4, 3, 5]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[2,0,0,0],[0,4,0,0],[0,0,3,0],[0,0,0,5]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

R_hat= 
 [[1.96 0.02 4.01 5.99]
 [0.02 4.07 12.28 16.30]
 [4.01 12.28 47.95 61.01]
 [5.99 16.30 61.01 88.22]]
[[0.70710677 0.         0.         0.        ]
 [0.         0.5        0.         0.        ]
 [0.         0.         0.57735026 0.        ]
 [0.         0.         0.         0.4472136 ]]
A_true= 
 [[0.71 0.00 -1.15 -1.34]
 [0.00 0.50 -1.73 -1.79]
 [0.00 0.00 0.58 0.00]
 [0.00 0.00 0.00 0.45]]
4.036124543795759
likelihood_true =  8.823616380650414
seed =  0
Optimized A:
 [[ 0.714 -0.006 -1.18  -1.305]
 [ 0.     0.496 -1.757 -1.728]
 [ 0.     0.     0.585 -0.003]
 [ 0.     0.     0.     0.436]]

Adjacency G:
 [[0 0 1 1]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 8.82284

seed =  1
Optimized A:
 [[ 0.714 -0.006 -1.165 -1.398]
 [ 0.     0.496 -1.743 -1.862]
 [ 0.     0.     0.58   0.023]
 [ 0.     0.     0.     0.449]]

Adjacency G:
 [[0 0 1 1]
 [0 0 1 1]
 [0 0 0 0]
 [0 0 0 0]]

f(A): 8.82465

seed =  2
Optimized A:
 [[ 0.714 -0.006 -1.167 -1.289]
 [ 0.     0.496 -1.748 -1.708]
 [ 

In [None]:
# A->D, B->D, C->D, |v| = 3

B_true = np.array([
    [0, 0, 0, 1],
    [0, 0, 0, 3],
    [0, 0, 0, 5],
    [0, 0, 0, 0]
])
N = [5, 4, 3, 2]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[5,0,0,0],[0,4,0,0],[0,0,3,0],[0,0,0,2]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

R_hat= 
 [[4.90 0.04 0.03 5.16]
 [0.04 4.07 0.02 12.32]
 [0.03 0.02 3.00 15.08]
 [5.16 12.32 15.08 119.48]]
[[0.4472136  0.         0.         0.        ]
 [0.         0.5        0.         0.        ]
 [0.         0.         0.57735026 0.        ]
 [0.         0.         0.         0.70710677]]
A_true= 
 [[0.45 0.00 0.00 -0.71]
 [0.00 0.50 0.00 -2.12]
 [0.00 0.00 0.58 -3.54]
 [0.00 0.00 0.00 0.71]]
4.036124543201381
likelihood_true =  8.823616380056034
seed =  0
Optimized A:
 [[ 0.748  0.     0.     0.   ]
 [ 1.415  0.496 -0.003 -1.128]
 [ 2.341  0.     0.577 -1.88 ]
 [-0.474  0.     0.     0.376]]

Adjacency G:
 [[0 0 0 0]
 [1 0 0 1]
 [1 0 0 1]
 [1 0 0 0]]

f(A): 8.86237

seed =  1
Optimized A:
 [[ 4.520e-01 -4.000e-03 -3.000e-03 -6.420e-01]
 [ 0.000e+00  4.960e-01 -3.000e-03 -1.916e+00]
 [ 0.000e+00  0.000e+00  5.770e-01 -3.199e+00]
 [ 0.000e+00  0.000e+00  0.000e+00  6.420e-01]]

Adjacency G:
 [[0 0 0 1]
 [0 0 0 1]
 [0 0 0 1]
 [0 0 0 0]]

f(A): 8.83415

seed =  2
Optimized A:
 [[ 4

In [5]:
# d=5, e=4, |v|=0
print("d=5, e=4, |v|=0")

B_true = np.array([
    [0, 1, 0, 2, 0],
    [0, 0, 3, 0, 4],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]
G_true = weight_to_adjacency(B_true, 0.05)

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)


correct = 0
times = 10
for seed in range(times):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=3000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    if is_in_markov_equiv_class(G_true, G_opt):
        print("Correct")
        correct += 1
    print("\nf(A):", round(fval, 5))
    print("")
print("Correct rate = ", correct / times)


# for seed in range(10):
#     A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
#     print("seed = ", seed)
#     print("Optimized A:\n", np.round(A_opt, 3))
#     print("\nAdjacency G:\n", G_opt)
#     print("\nf(A):", round(fval, 5))
#     print("")

d=5, e=4, |v|=0
R_hat= 
 [[1.00 1.03 3.05 2.02 4.13]
 [1.03 3.06 9.14 2.08 12.24]
 [3.05 9.14 30.26 6.13 36.56]
 [2.02 2.08 6.13 6.12 8.33]
 [4.13 12.24 36.56 8.33 49.94]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 -0.71 0.00 -1.41 0.00]
 [0.00 0.71 -1.73 0.00 -4.00]
 [0.00 0.00 0.58 0.00 0.00]
 [0.00 0.00 0.00 0.71 0.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930056
likelihood_true =  7.511211363071604
seed =  0
Optimized A:
 [[ 1.867e+00  0.000e+00  5.000e-03  0.000e+00 -2.900e-02]
 [-2.730e-01  6.520e-01 -1.756e+00  0.000e+00 -3.965e+00]
 [ 0.000e+00  0.000e+00  5.830e-01  0.000e+00  0.000e+00]
 [-5.220e-01 -2.210e-01  8.000e-03  4.040e-01  2.000e-03]
 [ 0.000e+00  0.000e+00  2.000e-03  0.000e+00  9.940e-01]]

Adjacency G

In [5]:
# d=5, e=4, |v|=1
print("d=5, e=4, |v|=1")

B_true = np.array([
    [0, 0, 1, 2, 0],
    [0, 0, 0, 2, 3],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=1
R_hat= 
 [[1.00 0.03 0.97 2.07 0.10]
 [0.03 2.00 0.02 4.07 5.99]
 [0.97 0.02 3.88 1.96 0.09]
 [2.07 4.07 1.96 14.38 12.19]
 [0.10 5.99 0.09 12.19 18.92]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 0.00 -0.58 -1.41 0.00]
 [0.00 0.71 0.00 -1.41 -3.00]
 [0.00 0.00 0.58 0.00 0.00]
 [0.00 0.00 0.00 0.71 0.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930029
likelihood_true =  7.511211363071577
seed =  0
Optimized A:
 [[ 1.192  1.242 -0.575  0.    -0.019]
 [ 0.     1.563  0.007  0.    -2.984]
 [ 0.     0.     0.582  0.     0.   ]
 [-0.172 -0.621  0.007  0.264  0.003]
 [ 0.     0.    -0.007  0.     0.996]]

Adjacency G:
 [[0 1 1 0 0]
 [0 0 0 0 1]
 [0 0 0 0 0]
 [1 1 0 0 0]
 [0 0 0 0 0]]

f(A): 7.50914

seed =  1
Optimiz

In [6]:
# d=5, e=4, |v|=2
print("d=5, e=4, |v|=2")

B_true = np.array([
    [0, 0, 0, 1, 0],
    [0, 0, 0, 2, 3],
    [0, 0, 0, 0, 4],
    [0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=2
R_hat= 
 [[1.00 0.03 -0.04 1.07 -0.05]
 [0.03 2.00 -0.00 4.04 5.98]
 [-0.04 -0.00 2.95 -0.07 11.79]
 [1.07 4.04 -0.07 11.23 11.80]
 [-0.05 5.98 11.79 11.80 66.06]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 0.00 0.00 -0.71 0.00]
 [0.00 0.71 0.00 -1.41 -3.00]
 [0.00 0.00 0.58 0.00 -4.00]
 [0.00 0.00 0.00 0.71 0.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930057
likelihood_true =  7.511211363071605
seed =  0
Optimized A:
 [[ 1.054e+00  6.200e-01  1.300e-02  0.000e+00 -1.700e-02]
 [ 0.000e+00  1.564e+00 -1.600e-02  0.000e+00 -2.988e+00]
 [ 0.000e+00  0.000e+00  5.820e-01  0.000e+00 -3.992e+00]
 [-1.000e-01 -6.210e-01  8.000e-03  2.980e-01  3.000e-03]
 [ 0.000e+00  0.000e+00  0.000e+00  0.000e+00  9.980e-01]]

Adj

In [7]:
# d=5, e=4, |v|=6
print("d=5, e=4, |v|=6")

B_true = np.array([
    [0, 0, 0, 0, 2],
    [0, 0, 0, 0, 3],
    [0, 0, 0, 0, 4],
    [0, 0, 0, 0, 5],
    [0, 0, 0, 0, 0]
])
N = [1, 2, 3, 2, 1]

data, G, B, Sigma = generate_scm_from_BN(B_true.T, n_samples=10000, N=N, seed=42)
R_hat = np.cov(data.T)
print("R_hat= \n",np.array2string(R_hat, formatter={'float_kind':lambda x: f"{x:.2f}"}))
d = R_hat.shape[0]

Omega_true = [[1,0,0,0,0],[0,2,0,0,0],[0,0,3,0,0],[0,0,0,2,0],[0,0,0,0,1]]
print(inv(sqrtm(Omega_true)))
Theta_hat = inv(R_hat)
A_true = (np.eye(d) - B_true) @ inv(sqrtm(Omega_true))
print("A_true= \n",np.array2string(A_true, formatter={'float_kind':lambda x: f"{x:.2f}"}))
print(np.trace(A_true.T @ R_hat @ A_true))
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)
print("likelihood_true = ", likelihood_true)



for seed in range(10):
    A_opt, G_opt, fval = dag_coordinate_descent(R_hat, T=1000, seed = seed, threshold=0.05)
    print("seed = ", seed)
    print("Optimized A:\n", np.round(A_opt, 3))
    print("\nAdjacency G:\n", G_opt)
    print("\nf(A):", round(fval, 5))
    print("")

d=5, e=4, |v|=6
R_hat= 
 [[1.00 0.03 -0.04 0.01 2.02]
 [0.03 2.00 -0.00 0.01 6.06]
 [-0.04 -0.00 2.95 -0.03 11.57]
 [0.01 0.01 -0.03 2.06 10.22]
 [2.02 6.06 11.57 10.22 120.59]]
[[1.         0.         0.         0.         0.        ]
 [0.         0.70710677 0.         0.         0.        ]
 [0.         0.         0.57735026 0.         0.        ]
 [0.         0.         0.         0.70710677 0.        ]
 [0.         0.         0.         0.         1.        ]]
A_true= 
 [[1.00 0.00 0.00 0.00 -2.00]
 [0.00 0.71 0.00 0.00 -3.00]
 [0.00 0.00 0.58 0.00 -4.00]
 [0.00 0.00 0.00 0.71 -5.00]
 [0.00 0.00 0.00 0.00 1.00]]
5.026304608930009
likelihood_true =  7.511211363071557
seed =  0
Optimized A:
 [[ 9.990e-01  0.000e+00  0.000e+00  0.000e+00 -2.006e+00]
 [-1.400e-02  7.060e-01  1.000e-03  0.000e+00 -2.977e+00]
 [ 1.200e-02  0.000e+00  5.820e-01  0.000e+00 -3.984e+00]
 [-6.000e-03 -2.000e-03  8.000e-03  6.970e-01 -4.976e+00]
 [ 0.000e+00  0.000e+00  0.000e+00  0.000e+00  9.960e-01]]

Adjac