In [3]:
import sys, os
import numpy as np
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
sys.path.append(r"C:\Users\super\DAG")
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
from MEC import is_in_markov_equiv_class
print(os.getcwd())
from SCM_data import generate_scm_from_BN 
from numpy.linalg import inv
from scipy.linalg import sqrtm
# from golemMain.src.golem import golem
from coordinate_descent.coordinate import dag_coordinate_descent, weight_to_adjacency
from coordinate_descent.coordinate0 import dag_coordinate_descent_l0
from coordinate_descent.cd_B import dag_coordinate_descent_BOmega, dag_coordinate_descent_BOmega_epoch
print(f"Working directory: {os.getcwd()}")

f:\projects\DAG\experiments
Working directory: f:\projects\DAG\experiments


In [6]:
experiments = []

# ----------- Experiment 1 -----------
experiments.append({
    "name": "d=3, A→B←C",
    "B_true": np.array([
        [0, 1, 0],
        [0, 0, 0],
        [0, 2, 0]
    ]),
    "N": np.array([1, 2, 3]),
    "Omega_true": np.diag([1, 2, 3])
})

# ----------- Experiment 2 -----------
experiments.append({
    "name": "d=3, A→B→C",
    "B_true": np.array([
        [0, 1, 0],
        [0, 0, 3],
        [0, 0, 0]
    ]),
    "N": np.array([1, 3, 4]),
    "Omega_true": np.diag([1, 3, 4])
})

# ----------- Experiment 3 -----------
experiments.append({
    "name": "d=3, A→B→C + A→C",
    "B_true": np.array([
        [0, 1, 2],
        [0, 0, 3],
        [0, 0, 0]
    ]),
    "N": np.array([5, 4, 3]),
    "Omega_true": np.diag([5, 4, 3])
})

# ----------- Experiment 4 -----------
experiments.append({
    "name": "d=4, A→B, B→C, B→D",
    "B_true": np.array([
        [0, 3, 0, 0],
        [0, 0, 3, 4],
        [0, 0, 0, 0],
        [0, 0, 0, 0]
    ]),
    "N": np.array([1, 3, 3, 2]),
    "Omega_true": np.diag([1, 3, 3, 2])
})

# ----------- Experiment 5 -----------
experiments.append({
    "name": "d=4, A→C, A→D, B→C, B→D",
    "B_true": np.array([
        [0, 0, 2, 3],
        [0, 0, 3, 4],
        [0, 0, 0, 0],
        [0, 0, 0, 0]
    ]),
    "N": np.array([2, 4, 3, 5]),
    "Omega_true": np.diag([2, 4, 3, 5])
})

# ----------- Experiment 6 -----------
experiments.append({
    "name": "d=4, A→D, B→D, C→D",
    "B_true": np.array([
        [0, 0, 0, 1],
        [0, 0, 0, 3],
        [0, 0, 0, 5],
        [0, 0, 0, 0]
    ]),
    "N": np.array([5, 4, 3, 2]),
    "Omega_true": np.diag([5, 4, 3, 2])
})

# ----------- Experiment 7 -----------
experiments.append({
    "name": "d=5, e=4, |v|=0",
    "B_true": np.array([
        [0, 1, 0, 2, 0],
        [0, 0, 3, 0, 4],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]
    ]),
    "N": np.array([1, 2, 3, 2, 1]),
    "Omega_true": np.diag([1, 2, 3, 2, 1])
})

# ----------- Experiment 8 -----------
experiments.append({
    "name": "d=5, e=4, |v|=1",
    "B_true": np.array([
        [0, 0, 1, 2, 0],
        [0, 0, 0, 2, 3],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]
    ]),
    "N": np.array([1, 2, 3, 2, 1]),
    "Omega_true": np.diag([1, 2, 3, 2, 1])
})

# ----------- Experiment 9 -----------
experiments.append({
    "name": "d=5, e=4, |v|=2",
    "B_true": np.array([
        [0, 0, 0, 1, 0],
        [0, 0, 0, 2, 3],
        [0, 0, 0, 0, 4],
        [0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0]
    ]),
    "N": np.array([1, 2, 3, 2, 1]),
    "Omega_true": np.diag([1, 2, 3, 2, 1])
})


In [7]:
# Test CD-BOmega on Experiment 2 (Standard SCM 2: A -> B, C independent) using config from experiments listprint("\n=== Testing on Experiment 2 (A -> B -> C) ===")

# 1. Retrieve configuration from 'experiments' list (Index 1 corresponds to Experiment 2)
exp_config = experiments[1]
print(f"Loaded config: {exp_config['name']}")

B_true = exp_config["B_true"]
N = exp_config["N"]
Omega_true = exp_config["Omega_true"]

# 2. Setup Parameters
n_samples = 5000
seed = 10
lambda_l0 = 0.0


data, G_true_gen, _, _ = generate_scm_from_BN(
    B_true.T,  # Transposing as per reference usage
    n_samples=n_samples, 
    N=N, 
    seed=seed
)
n, d = data.shape

S = data.T @ data / n
# Initialize Omega with inverse variance of data (diagonal approximation)
Omega_init = np.diag(1.0 / np.diag(S)) 

# 5. Run Algorithm
print(f"Running CD-BOmega with n={n_samples}...")
B_est, G_est, score, debug_info = dag_coordinate_descent_BOmega(
    S, 
    Omega_init, 
    T=5000, 
    seed=seed, 
    lambda_l0=lambda_l0, 
    k=None
)

# 6. Evaluation
G_true = weight_to_adjacency(B_true, threshold=0.05) # Re-derive G from B_true for comparison
print("\nGround Truth Adjacency (from B_true):\n", G_true)
print("\nEstimated Adjacency:\n", G_est)
print("\nEstimated B:\n", np.round(B_est, 3))

# Check Mec
is_mec = is_in_markov_equiv_class(G_true, G_est)
print(f"\nIs in Markov Equivalence Class: {is_mec}")
'''
# 8. Detailed Debug Info Output
import pandas as pd
import matplotlib.pyplot as plt

print("\n=== Debug Info Analysis ===")
if debug_info:
    # Convert debug info to DataFrame for easier analysis
    df_debug = pd.DataFrame(debug_info)
    
    print("\n--- Summary of Updates ---")
    print(f"Total updates recorded: {len(df_debug)}")
    
    # Filter for non-zero updates (where delta was non-zero)
    df_active = df_debug[(df_debug['delta_ij'] != 0) | (df_debug['delta_ji'] != 0)]
    print(f"Number of active updates (delta != 0): {len(df_active)}")
    
    if not df_active.empty:
        print("\n--- Sample of Active Updates (First 10) ---")
        print(df_active.head(10)[['i', 'j', 'delta_ij', 'delta_ji']].to_string())
        
        print("\n--- Last 10 Active Updates ---")
        print(df_active.tail(10)[['i', 'j', 'delta_ij', 'delta_ji']].to_string())

        # Omega Evolution
        omega_start = df_debug.iloc[0]['omega_diag']
        omega_end = df_debug.iloc[-1]['omega_diag']
        print("\n--- Omega Evolution ---")
        print(f"Initial Omega Diag (approx): {np.round(omega_start, 3)}")
        print(f"Final Omega Diag: {np.round(omega_end, 3)}")
        print(f"True Omega Diag:  {np.diag(Omega_true)}")
        
        # Plot distribution of deltas
        plt.figure(figsize=(12, 5))
        plt.subplot(1, 2, 1)
        plt.hist(df_active['delta_ij'][df_active['delta_ij'] != 0], bins=30, alpha=0.7, label='delta_ij')
        plt.title('Distribution of Non-zero delta_ij')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.hist(df_active['delta_ji'][df_active['delta_ji'] != 0], bins=30, alpha=0.7, color='orange', label='delta_ji')
        plt.title('Distribution of Non-zero delta_ji')
        plt.legend()
        plt.show()
    else:
        print("No active updates found. The algorithm might have converged immediately or initialized at a stationary point.")
else:
    print("No debug info collected.")
'''

Loaded config: d=3, A→B→C
Running CD-BOmega with n=5000...

Ground Truth Adjacency (from B_true):
 [[0 1 0]
 [0 0 1]
 [0 0 0]]

Estimated Adjacency:
 [[0 0 0]
 [1 0 1]
 [1 0 0]]

Estimated B:
 [[0.    0.    0.   ]
 [0.126 0.    0.291]
 [3.002 0.    0.   ]]

Is in Markov Equivalence Class: False


'\n# 8. Detailed Debug Info Output\nimport pandas as pd\nimport matplotlib.pyplot as plt\n\nprint("\n=== Debug Info Analysis ===")\nif debug_info:\n    # Convert debug info to DataFrame for easier analysis\n    df_debug = pd.DataFrame(debug_info)\n    \n    print("\n--- Summary of Updates ---")\n    print(f"Total updates recorded: {len(df_debug)}")\n    \n    # Filter for non-zero updates (where delta was non-zero)\n    df_active = df_debug[(df_debug[\'delta_ij\'] != 0) | (df_debug[\'delta_ji\'] != 0)]\n    print(f"Number of active updates (delta != 0): {len(df_active)}")\n    \n    if not df_active.empty:\n        print("\n--- Sample of Active Updates (First 10) ---")\n        print(df_active.head(10)[[\'i\', \'j\', \'delta_ij\', \'delta_ji\']].to_string())\n        \n        print("\n--- Last 10 Active Updates ---")\n        print(df_active.tail(10)[[\'i\', \'j\', \'delta_ij\', \'delta_ji\']].to_string())\n\n        # Omega Evolution\n        omega_start = df_debug.iloc[0][\'omega_di

In [8]:
def run_one_coordinate_BOmega(B_true, N, name=None,
                       n_samples=5000,
                       lambda_l0 = 0.0,
                       threshold=0.05,
                       T=5000,
                       n_repeats=10, 
                       seed_data = 42):
    if name is None:
        name = "Unnamed experiment"
    print("=" * 80)
    print(f"Running CD-BOmega: {name}")

    G_true = weight_to_adjacency(B_true, threshold)

    # 1. Generate Data
    # Note: generate_scm_from_BN expects B_true.T if B is upper triangular adjacency in DAG sense?
    # In previous cell: B_true.T was passed. "Transposing as per reference usage"
    data, G_true_gen, _, _ = generate_scm_from_BN(
        B_true.T, n_samples=n_samples, N=N, seed=seed_data
    )
    n, d = data.shape
    
    # 2. Compute Statistics
    S = data.T @ data / n
    # Initialize Omega with inverse variance of data (diagonal approximation)
    Omega_init = np.diag(1.0 / np.diag(S))

    correct = 0
    
    for seed in range(n_repeats):
        B_est, G_est, score, debug_info = dag_coordinate_descent_BOmega(
            S, 
            Omega_init, 
            T=T, 
            seed=seed, 
            lambda_l0=lambda_l0, 
            k=None
        )
        
        if is_in_markov_equiv_class(G_true, G_est):
            correct += 1

    correct_rate = correct / n_repeats
    print("Correct rate =", correct_rate)
    return correct_rate

In [10]:
print("N_samples = 10000")
correct_rate = []
for seed_data in range(10):
    print("seed of data: ",seed_data)
    for exp in experiments:
        correct_rate.append(
            run_one_coordinate_BOmega(
                B_true=exp["B_true"],
                N=exp["N"],
                name=exp["name"],
                n_samples=10000,
                lambda_l0=0.0,
                threshold=0.05,
                T=5000,
                n_repeats=100,
                seed_data=seed_data
            )
        )


N_samples = 10000
seed of data:  0
Running CD-BOmega: d=3, A→B←C
Correct rate = 0.0
Running CD-BOmega: d=3, A→B→C
Correct rate = 0.31
Running CD-BOmega: d=3, A→B→C + A→C
Correct rate = 1.0
Running CD-BOmega: d=4, A→B, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→C, A→D, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→D, B→D, C→D
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=0
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=1
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=2
Correct rate = 0.0
seed of data:  1
Running CD-BOmega: d=3, A→B←C
Correct rate = 0.0
Running CD-BOmega: d=3, A→B→C
Correct rate = 0.31
Running CD-BOmega: d=3, A→B→C + A→C
Correct rate = 1.0
Running CD-BOmega: d=4, A→B, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→C, A→D, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→D, B→D, C→D
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=0
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=1
Correct rate = 0.0
Running CD-BOmega:

In [11]:
print("N_samples = 10000")
correct_rate = []
for seed_data in range(10):
    print("seed of data: ",seed_data)
    for exp in experiments:
        correct_rate.append(
            run_one_coordinate_BOmega(
                B_true=exp["B_true"],
                N=exp["N"],
                name=exp["name"],
                n_samples=10000,
                lambda_l0=0.1,
                threshold=0.05,
                T=5000,
                n_repeats=100,
                seed_data=seed_data
            )
        )

N_samples = 10000
seed of data:  0
Running CD-BOmega: d=3, A→B←C
Correct rate = 0.0
Running CD-BOmega: d=3, A→B→C
Correct rate = 0.31
Running CD-BOmega: d=3, A→B→C + A→C
Correct rate = 0.59
Running CD-BOmega: d=4, A→B, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→C, A→D, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→D, B→D, C→D
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=0
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=1
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=2
Correct rate = 0.0
seed of data:  1
Running CD-BOmega: d=3, A→B←C
Correct rate = 0.0
Running CD-BOmega: d=3, A→B→C
Correct rate = 0.31
Running CD-BOmega: d=3, A→B→C + A→C
Correct rate = 0.59
Running CD-BOmega: d=4, A→B, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→C, A→D, B→C, B→D
Correct rate = 0.0
Running CD-BOmega: d=4, A→D, B→D, C→D
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=0
Correct rate = 0.0
Running CD-BOmega: d=5, e=4, |v|=1
Correct rate = 0.0
Running CD-BOmeg

KeyboardInterrupt: 

In [12]:
print("N_samples = 10000")
correct_rate = []
for seed_data in range(10):
    print("seed of data: ",seed_data)
    for exp in experiments:
        correct_rate.append(
            run_one_coordinate_BOmega(
                B_true=exp["B_true"],
                N=exp["N"],
                name=exp["name"],
                n_samples=10000,
                lambda_l0=0.2,
                threshold=0.05,
                T=5000,
                n_repeats=100,
                seed_data=seed_data
            )
        )

N_samples = 10000
seed of data:  0
Running CD-BOmega: d=3, A→B←C


KeyboardInterrupt: 