In [None]:
import sys, os
sys.path.append(os.path.abspath(".."))
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

In [None]:
import numpy as np
from collections import Counter
from causallearn.utils.DAG2CPDAG import dag2cpdag
from causallearn.graph.Dag import Dag
from causallearn.graph.GraphNode import GraphNode
from causallearn.graph.Edge import Edge
from causallearn.graph.Endpoint import Endpoint


def adjacency_to_dag(G: np.ndarray) -> Dag:
    """nparray to Dag class"""
    d = G.shape[0]
    nodes = [GraphNode(f"X{i}") for i in range(d)]
    dag = Dag(nodes)
    for i in range(d):
        for j in range(d):
            if G[i, j] == 1:
                edge = Edge(nodes[i], nodes[j], Endpoint.TAIL, Endpoint.ARROW)
                dag.add_edge(edge)
    return dag


def nodag_findbest_cpdag(R_hat, lam=0.5, delta=1e-6, max_steps=5000,
                         tau_start=0.2, tau_end=0.2, times=100):
    """
    改造版：不再比较 loss，而是统计每次 CPDAG 的出现频率。
    返回值保持和 nodag_findbest_loss 一样：
    (best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed)
    其中 best_G 为出现频率最高的 CPDAG 的邻接矩阵。
    """
    cpdag_counter = Counter()
    cpdag_records = {}

    for t in range(times):
        seed = t
        np.random.seed(seed)

        B_init = np.random.randn(*R_hat.shape)
        print("t = ",t)
        B_final, G_final, info = train_gumbel_sgd(
            Rhat_np=R_hat,
            lam=lam,
            delta=delta,
            max_steps=max_steps,
            tau_start=tau_start,
            tau_end=tau_end,
            B_init=B_init
        )

        # 转成 CPDAG 邻接矩阵
        dag = adjacency_to_dag(G_final)
        cpdag = dag2cpdag(dag)
        cpdag_adj = cpdag.get_adjacency_matrix()

        # 作为 key 统计频率
        cpdag_key = tuple(cpdag_adj.flatten())
        cpdag_counter[cpdag_key] += 1

        # 保存一次完整记录（只保留最新即可）
        cpdag_records[cpdag_key] = (
            cpdag_adj, B_final,
            info["final_loss"], info["final_likelihood"],
            info["final_penalty"], seed
        )

    # 选频率最高的 CPDAG
    best_cpdag_key, _ = cpdag_counter.most_common(1)[0]
    return cpdag_records[best_cpdag_key]

In [None]:
# ER1, find best loss

from torch.utils.data import Dataset

from synthetic_dataset import SyntheticDataset
from synthetic_dataset import dataset_based_on_B

seed = 1
# Load dataset
n, d = 10000, 4
graph_type, degree = 'ER', 1    # ER1 graph
B_scale = 1.0
noise_type = 'gaussian_ev'

Dataset = SyntheticDataset(n, d, graph_type, degree,
                        noise_type, B_scale, seed=seed)
data = Dataset.X
B_true = Dataset.B
G_true = Dataset.B_bin

R_hat = np.cov(data.T)
d = R_hat.shape[0]
Omega_true = np.eye(d)
Theta_hat = inv(R_hat)

A_true = np.eye(d) - B_true
# print("A_true = \n",A_true)
likelihood_true = - 2 * np.log(np.linalg.det(A_true)) + np.trace(A_true.T @ R_hat @ A_true)


best_G, best_B, best_loss, best_likelihood, best_penalty, best_seed = nodag_findbest_cpdag(R_hat = R_hat, times = 5)


print("random seed = ", seed)
print("likelihood_true = ", likelihood_true)
print("G_true = \n", G_true)
print("G_est = \n", best_G)
# print("Is in MEC: ", MEC.is_in_markov_equiv_class(G_true, best_B))
print("Final Loss = ", best_loss)
print("Final penalty = ", best_penalty)
print("Final likelihood = ", best_likelihood)
print("seed = ", best_seed)
print("")

t =  0
