In [1]:
import torch
import torch.nn as nn
from torch.optim import Adam, LBFGS
from nodag_gumbel_softmax import train_gumbel_sgd
from SCM_data import generate_scm_data
import numpy as np
from numpy.linalg import LinAlgError, inv
from scipy.linalg import sqrtm
import MEC
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [2]:
def nodag_findbest_loss(R_hat, lam=0.5, delta=1e-6, max_steps=5000, tau_start=0.2, tau_end=0.2, times=100):
    best_loss = np.inf
    best_seed = 0
    for t in range(times):
        seed = t
        np.random.seed(seed) 
        B_init = np.random.randn(*R_hat.shape)
        B_final,G_final, info = train_gumbel_sgd(
            Rhat_np = R_hat,
            lam = lam,
            delta = delta,
            max_steps = max_steps,
            tau_start = tau_start,
            tau_end = tau_end,
            B_init = B_init
            )
        if info["final_loss"] < best_loss:
            best_loss = info["final_loss"]
            best_likelihood = info["final_likelihood"]
            best_penalty = info["final_penalty"]
            best_seed = seed
            best_G = G_final
            best_B = B_final
    return best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed

In [3]:
def nodag_findbest_likelihood_penalty(
    R_hat, lam=0.5, delta=1e-6, max_steps=5000,
    tau_start=0.2, tau_end=0.2, times=100, eps=1e-3
):
    results = []
    for t in range(times):
        seed = t
        np.random.seed(seed)
        B_init = np.random.randn(*R_hat.shape)
        if t % 100 == 0: print("t = ",t)

        B_final, G_final, info = train_gumbel_sgd(
            Rhat_np=R_hat,
            lam=lam,
            delta=delta,
            max_steps=max_steps,
            tau_start=tau_start,
            tau_end=tau_end,
            B_init=B_init
        )

        results.append({
            "seed": seed,
            "G": G_final,
            "B": B_final,
            "loss": info["final_loss"],
            "likelihood": info["final_likelihood"],
            "penalty": info["final_penalty"],
        })

    min_likelihood = min(r["likelihood"] for r in results)

    candidates = [
        r for r in results
        if abs(r["likelihood"] - min_likelihood) <= eps
    ]

    best = min(candidates, key=lambda r: r["penalty"])

    return (
        best["G"], best["B"],
        best["loss"], best["likelihood"],
        best["penalty"], best["seed"]
    )

In [5]:
# ER1, find best loss

from torch.utils.data import Dataset

from synthetic_dataset import SyntheticDataset
from synthetic_dataset import dataset_based_on_B

seed = 1
# Load dataset
n, d = 10000, 4
graph_type, degree = 'ER', 1    # ER1 graph
B_scale = 1.0
noise_type = 'gaussian_ev'

Dataset = SyntheticDataset(n, d, graph_type, degree,
                        noise_type, B_scale, seed=seed)
data = Dataset.X
B_true = Dataset.B
G_true = Dataset.B_bin

R_hat = np.cov(data.T)
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)

A_true = np.eye(d) - B_true
# print("A_true = \n",A_true)
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)


best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed = nodag_findbest_loss(R_hat = R_hat, times = 500)


print("random seed = ", seed)
print("likelihood_true = ", likelihood_true)
print("G_true = \n", G_true)
print("G_est = \n", best_G)
# print("Is in MEC: ", MEC.is_in_markov_equiv_class(G_true, best_B))
print("Final Loss = ", best_loss)
print("Final penalty = ", best_penalty)
print("Final likelihood = ", best_likelihood)
print("seed = ", best_seed)
print("")

random seed =  1
likelihood_true =  3.9974760872380175
G_true = 
 [[0. 1. 1. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
G_est = 
 [[0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]]
Final Loss =  5.5069874712220415
Final penalty =  1.500000000001951
Final likelihood =  4.00698747122009
seed =  494



In [4]:
# ER1, find best penalty in best likelihoods

from torch.utils.data import Dataset

from synthetic_dataset import SyntheticDataset
from synthetic_dataset import dataset_based_on_B

seed = 1
# Load dataset
n, d = 10000, 4
graph_type, degree = 'ER', 1    # ER1 graph
B_scale = 1.0
noise_type = 'gaussian_ev'

Dataset = SyntheticDataset(n, d, graph_type, degree,
                        noise_type, B_scale, seed=seed)
data = Dataset.X
B_true = Dataset.B
G_true = Dataset.B_bin

R_hat = np.cov(data.T)
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)

A_true = np.eye(d) - B_true
# print("A_true = \n",A_true)
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)


best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed = nodag_findbest_likelihood_penalty(R_hat = R_hat, times = 500)


print("random seed = ", seed)
print("likelihood_true = ", likelihood_true)
print("G_true = \n", G_true)
print("G_est = \n", best_G)
# print("Is in MEC: ", MEC.is_in_markov_equiv_class(G_true, best_B))
print("Final Loss = ", best_loss)
print("Final penalty = ", best_penalty)
print("Final likelihood = ", best_likelihood)
print("seed = ", best_seed)
print("")

t =  0
t =  100
t =  200
t =  300
t =  400
random seed =  1
likelihood_true =  3.9974760872380175
G_true = 
 [[0. 1. 1. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
G_est = 
 [[0. 1. 1. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 0.]
 [1. 0. 0. 0.]]
Final Loss =  5.997049781055185
Final penalty =  2.000000000000734
Final likelihood =  3.9970497810544505
seed =  353



In [None]:
# Find best loss, seed0-9

from torch.utils.data import Dataset

from synthetic_dataset import SyntheticDataset
from synthetic_dataset import dataset_based_on_B

for seed in range(10):
    # Load dataset
    n, d = 10000, 4
    graph_type, degree = 'ER', 1    # ER1 graph
    B_scale = 1.0
    noise_type = 'gaussian_ev'

    Dataset = SyntheticDataset(n, d, graph_type, degree,
                            noise_type, B_scale, seed=seed)
    data = Dataset.X
    B_true = Dataset.B
    G_true = Dataset.B_bin

    R_hat = np.cov(data.T)
    d = R_hat.shape[0]
    Omega_true = np.eye(d)
    Theta_hat = inv(R_hat)

    A_true = np.eye(d) - B_true
    # print("A_true = \n",A_true)
    likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)


    best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed = nodag_findbest_loss(R_hat = R_hat, times = 300)

    print("random seed = ", seed)
    print("likelihood_true = ", likelihood_true)
    print("G_true = \n", G_true)
    print("G_est = \n", best_G)
    # print("Is in MEC: ", MEC.is_in_markov_equiv_class(G_true, best_B))
    print("Final Loss = ", best_loss)
    print("Final penalty = ", best_penalty)
    print("Final likelihood = ", best_likelihood)
    print("seed = ", best_seed)
    print("")

In [None]:
# Find best penalty in best likelihoods, seed0-9

from torch.utils.data import Dataset

from synthetic_dataset import SyntheticDataset
from synthetic_dataset import dataset_based_on_B

for seed in range(10):
    # Load dataset
    n, d = 10000, 4
    graph_type, degree = 'ER', 1    # ER1 graph
    B_scale = 1.0
    noise_type = 'gaussian_ev'

    Dataset = SyntheticDataset(n, d, graph_type, degree,
                            noise_type, B_scale, seed=seed)
    data = Dataset.X
    B_true = Dataset.B
    G_true = Dataset.B_bin

    R_hat = np.cov(data.T)
    d = R_hat.shape[0]
    Omega_true = np.eye(d)
    Theta_hat = inv(R_hat)

    A_true = np.eye(d) - B_true
    # print("A_true = \n",A_true)
    likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)


    best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed = nodag_findbest_likelihood_penalty(R_hat = R_hat, times = 300)

    print("random seed = ", seed)
    print("likelihood_true = ", likelihood_true)
    print("G_true = \n", G_true)
    print("G_est = \n", best_G)
    # print("Is in MEC: ", MEC.is_in_markov_equiv_class(G_true, best_B))
    print("Final Loss = ", best_loss)
    print("Final penalty = ", best_penalty)
    print("Final likelihood = ", best_likelihood)
    print("seed = ", best_seed)
    print("")

t =  0
t =  100
t =  200
random seed =  0
likelihood_true =  3.9726462866418357
G_true = 
 [[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
G_est = 
 [[-1.  0.  0.  1.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 1.  0.  0. -1.]]
Final Loss =  4.972038338746174
Final penalty =  1.000000000002756
Final likelihood =  3.972038338743418
seed =  202

t =  0
t =  100
t =  200
random seed =  1
likelihood_true =  3.9974760872380175
G_true = 
 [[0. 1. 1. 1.]
 [0. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
G_est = 
 [[ 0.  1.  1.  0.]
 [ 1.  0.  0.  1.]
 [ 0.  0.  0.  0.]
 [ 1.  0.  0. -1.]]
Final Loss =  6.497046976915103
Final penalty =  2.500000000000619
Final likelihood =  3.997046976914484
seed =  46

t =  0
t =  100
t =  200
random seed =  2
likelihood_true =  4.014033534766731
G_true = 
 [[0. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 1. 0. 0.]]
G_est = 
 [[0. 0. 0. 0.]
 [1. 0. 0. 1.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
Final Loss =  5.51302787550704
Final penalty =  1.500000000008667