From 3c4892d340eb8b21d8d65112acddea30663c917c Mon Sep 17 00:00:00 2001 From: Boyle <2018770887@qq.com> Date: Fri, 18 Aug 2023 09:50:33 +0800 Subject: [PATCH 01/84] modify the code of GeneralGraph.py and GES.py Signed-off-by: Oliver Schacht --- causallearn/graph/GeneralGraph.py | 18 ++++++++++++++++-- causallearn/search/ScoreBased/GES.py | 6 ++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/causallearn/graph/GeneralGraph.py b/causallearn/graph/GeneralGraph.py index 41076590..1104bcea 100644 --- a/causallearn/graph/GeneralGraph.py +++ b/causallearn/graph/GeneralGraph.py @@ -504,17 +504,31 @@ def is_ancestor_of(self, node1: Node, node2: Node) -> bool: j = self.node_map[node2] return self.dpath[j, i] == 1 + # # Return true iff node1 is a child of node2. + # def is_child_of(self, node1: Node, node2: Node) -> bool: + # i = self.node_map[node1] + # j = self.node_map[node2] + # return self.graph[i, j] == Endpoint.TAIL.value or self.graph[i, j] == Endpoint.ARROW_AND_ARROW.value + # + # # Returns true iff node1 is a parent of node2. + # def is_parent_of(self, node1: Node, node2: Node) -> bool: + # i = self.node_map[node1] + # j = self.node_map[node2] + # return self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value + # Return true iff node1 is a child of node2. def is_child_of(self, node1: Node, node2: Node) -> bool: i = self.node_map[node1] j = self.node_map[node2] - return self.graph[i, j] == Endpoint.TAIL.value or self.graph[i, j] == Endpoint.ARROW_AND_ARROW.value + return (self.graph[j, i] == Endpoint.TAIL.value and self.graph[i, j] == Endpoint.ARROW.value) \ + or self.graph[j, i] == Endpoint.TAIL_AND_ARROW.value # Returns true iff node1 is a parent of node2. def is_parent_of(self, node1: Node, node2: Node) -> bool: i = self.node_map[node1] j = self.node_map[node2] - return self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value + return (self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value) \ + or self.graph[i, j] == Endpoint.TAIL_AND_ARROW.value # Returns true iff node1 is a proper ancestor of node2. def is_proper_ancestor_of(self, node1: Node, node2: Node) -> bool: diff --git a/causallearn/search/ScoreBased/GES.py b/causallearn/search/ScoreBased/GES.py index 1c73fbc4..5a6c91df 100644 --- a/causallearn/search/ScoreBased/GES.py +++ b/causallearn/search/ScoreBased/GES.py @@ -5,10 +5,11 @@ from causallearn.utils.DAG2CPDAG import dag2cpdag from causallearn.utils.GESUtils import * from causallearn.utils.PDAG2DAG import pdag2dag +from typing import Union def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = None, - parameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + parameters: Optional[Dict[str, Any]] = None, node_names: Union[List[str], None] = None,) -> Dict[str, Any]: """ Perform greedy equivalence search (GES) algorithm @@ -95,7 +96,8 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = raise Exception('Unknown function!') score_func = localScoreClass - node_names = [("X%d" % (i + 1)) for i in range(N)] + if node_names is None: + node_names = [("X%d" % (i + 1)) for i in range(N)] nodes = [] for name in node_names: From 7fd028fff0c7cc0ee341801f50ca9dafb22c7b4e Mon Sep 17 00:00:00 2001 From: Boyle <2018770887@qq.com> Date: Fri, 18 Aug 2023 10:09:29 +0800 Subject: [PATCH 02/84] modify the code of GeneralGraph.py and GES.py Signed-off-by: Oliver Schacht --- causallearn/graph/GeneralGraph.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/causallearn/graph/GeneralGraph.py b/causallearn/graph/GeneralGraph.py index 1104bcea..0969f44e 100644 --- a/causallearn/graph/GeneralGraph.py +++ b/causallearn/graph/GeneralGraph.py @@ -504,18 +504,6 @@ def is_ancestor_of(self, node1: Node, node2: Node) -> bool: j = self.node_map[node2] return self.dpath[j, i] == 1 - # # Return true iff node1 is a child of node2. - # def is_child_of(self, node1: Node, node2: Node) -> bool: - # i = self.node_map[node1] - # j = self.node_map[node2] - # return self.graph[i, j] == Endpoint.TAIL.value or self.graph[i, j] == Endpoint.ARROW_AND_ARROW.value - # - # # Returns true iff node1 is a parent of node2. - # def is_parent_of(self, node1: Node, node2: Node) -> bool: - # i = self.node_map[node1] - # j = self.node_map[node2] - # return self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value - # Return true iff node1 is a child of node2. def is_child_of(self, node1: Node, node2: Node) -> bool: i = self.node_map[node1] From 6ff809edf8d20025429a28d9975263820909b142 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 6 May 2024 09:19:14 +0200 Subject: [PATCH 03/84] add fast kci Signed-off-by: Oliver Schacht --- causallearn/utils/cit.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 8119dd72..d842281a 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -5,6 +5,7 @@ from scipy.stats import chi2, norm from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd +from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -13,6 +14,7 @@ mv_fisherz = "mv_fisherz" mc_fisherz = "mc_fisherz" kci = "kci" +fastkci = "fastkci" chisq = "chisq" gsq = "gsq" d_separation = "d_separation" @@ -32,6 +34,8 @@ def CIT(data, method='fisherz', **kwargs): return FisherZ(data, **kwargs) elif method == kci: return KCI(data, **kwargs) + elif method == fastkci: + return FastKCI(data, **kwargs) elif method in [chisq, gsq]: return Chisq_or_Gsq(data, method_name=method, **kwargs) elif method == mv_fisherz: @@ -193,6 +197,28 @@ def __call__(self, X, Y, condition_set=None): self.pvalue_cache[cache_key] = p return p +class FastKCI(CIT_Base): + def __init__(self, data, **kwargs): + super().__init__(data, **kwargs) + kci_ui_kwargs = {k: v for k, v in kwargs.items() if k in + ['kernelX', 'kernelY', 'null_ss', 'approx', 'est_width', 'polyd', 'kwidthx', 'kwidthy']} + kci_ci_kwargs = {k: v for k, v in kwargs.items() if k in + ['K', 'J', 'alpha', 'use_gp']} + self.check_cache_method_consistent( + 'kci', hashlib.md5(json.dumps(kci_ci_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) + self.assert_input_data_is_valid() + self.kci_ui = KCI_UInd(**kci_ui_kwargs) + self.kci_ci = FastKCI_CInd(**kci_ci_kwargs) + + def __call__(self, X, Y, condition_set=None): + # Kernel-based conditional independence test. + Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set) + if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key] + p = self.kci_ui.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \ + self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] + self.pvalue_cache[cache_key] = p + return p + class Chisq_or_Gsq(CIT_Base): def __init__(self, data, method_name, **kwargs): def _unique(column): From 23c9e280ef23b22852e24d5cdcc754e9977d49e7 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 6 May 2024 09:20:46 +0200 Subject: [PATCH 04/84] safe null dist for debug Signed-off-by: Oliver Schacht --- causallearn/utils/KCI/KCI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 40a33452..7207c14d 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -306,8 +306,8 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): k_appr, theta_appr = self.get_kappa(uu_prod) pvalue = 1 - stats.gamma.cdf(test_stat, k_appr, 0, theta_appr) else: - null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) - pvalue = sum(null_samples > test_stat) / float(self.nullss) + self.null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + pvalue = sum(self.null_samples > test_stat) / float(self.nullss) return pvalue, test_stat def kernel_matrix(self, data_x, data_y, data_z): From 671355034442f2aa1a73438a24fc7df2d56a51f1 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 6 May 2024 09:25:17 +0200 Subject: [PATCH 05/84] add fast kci module Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 480 ++++++++++++++++++++++++++ causallearn/utils/FastKCI/__init__.py | 0 2 files changed, 480 insertions(+) create mode 100644 causallearn/utils/FastKCI/FastKCI.py create mode 100644 causallearn/utils/FastKCI/__init__.py diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py new file mode 100644 index 00000000..d9e2d245 --- /dev/null +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -0,0 +1,480 @@ +from causallearn.utils.KCI.KCI import GaussianKernel, Kernel +import numpy as np +from numpy.linalg import eigh +import scipy.stats as stats +from scipy.special import logsumexp +from sklearn.preprocessing import LabelEncoder +from joblib import Parallel, delayed +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF +import warnings + +class FastKCI_CInd(object): + """ + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + + References + ---------- + [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, + "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + [2] M. Zhang, and S. Williamson, + "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + + """ + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimming_thresh = 1e-3, use_gp=False): + """ + Initialize the FastKCI_CInd object. + + Parameters + ---------- + K: Number of Gaussians that are assumed to be in the mixture. + J: Number of independent repittitions. + alpha: Parameter for the Dirichlet distribution. + epsilon: Penalty for the matrix ridge regression. + eig_threshold: Threshold for Eigenvalues. + use_gp: Whether to use Gaussian Process Regression to determine the kernel widths + """ + self.K = K + self.J = J + self.alpha = alpha + self.epsilon = epsilon + self.eig_thresh = eig_thresh + self.trimming_thresh = trimming_thresh + self.use_gp = use_gp + self.nullss = 5000 + # TODO: Adjust to causal-learn API + + def compute_pvalue(self, data_x=None, data_y=None, data_z=None): + """ + Main function: compute the p value of H_0: X|Y conditional on Z. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + _________ + pvalue: p value (scalar) + test_stat: test statistic (scalar) + """ + self.data = [data_x, data_y] + self.data_z = data_z + self.n = data_x.shape[0] + + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) + self.Z_proposal, self.prob_Z = zip(*Z_proposal) + block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) + test_stat, null_samples, log_likelihood = zip(*block_res) + + log_likelihood = np.array(log_likelihood) + self.prob_Z += log_likelihood + self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used + self.all_null_samples = np.vstack(null_samples) + self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) + self.all_test_stats = np.array(test_stat) + self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) + self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + # experimental, not used + self.pvalue_alt = np.sum(np.average(null_samples, axis = 0, weights = self.prob_Z) > np.average(np.array(test_stat), weights = self.prob_Z)) / float(self.nullss) + self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) + + return self.pvalue, self.test_stat + + def partition_data(self): + """ + Partitions data into K Gaussians following an expectation maximization approach on Z. + + Returns + _________ + Z: Latent Gaussian that was drawn for each observation (nx1 array) + prob_Z: Log-Likelihood of that random assignment (scalar) + """ + Z_mean = self.data_z.mean(axis=0) + Z_sd = self.data_z.std(axis=0) + mu_k = np.random.normal(Z_mean, Z_sd, size = (self.K,self.data_z.shape[1])) + sigma_k = np.eye(self.data_z.shape[1]) + pi_j = np.random.dirichlet([self.alpha]*self.K) + ll = np.tile(np.log(pi_j),(self.n,1)) + for k in range(self.K): + ll[:,k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k,:], cov=sigma_k, allow_singular=True) + Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + le = LabelEncoder() + Z = le.fit_transform(Z) + prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed + return Z, prob_Z + + def pvalue_onblocks(self, Z_proposal): + """ + Calculate p value on given partitions of the data. + + Parameters + ---------- + Z_proposal: partition of the data into K clusters (nxd1 array) + + Returns + _________ + test_stat: test statistic (scalar) + null_samples: bootstrapped sampled of the null distribution (self.nullssx1 array) + log_likelihood: estimated probability P(X,Y|Z,k) + """ + unique_Z_j = np.unique(Z_proposal) + test_stat = 0 + log_likelihood = 0 + null_samples = np.zeros((1,self.nullss)) + for k in unique_Z_j: + K_mask = (Z_proposal==k) + X_k = np.copy(self.data[0][K_mask]) + Y_k = np.copy(self.data[1][K_mask]) + Z_k = np.copy(self.data_z[K_mask]) + if (Z_k.shape[0]<6): # small blocks cause problems in GP, experimental + continue + Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) + KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) + if epsilon_x != epsilon_y: + KyR, Rzy = Kernel.center_kernel_matrix_regression(Ky, Kzy, epsilon_y) + else: + KyR = Rzx.dot(Ky.dot(Rzx)) + test_stat += np.einsum('ij,ji->', KxR, KyR) + uu_prod, size_u = self.get_uuprod(KxR, KyR) + null_samples += self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + log_likelihood += likelihood_x + likelihood_y + return test_stat, null_samples, log_likelihood + + + def kernel_matrix(self, data_x, data_y, data_z): + """ + Calculates the Gaussian Kernel for given data inputs as well as the shared kernel. + + Returns + _________ + K: Kernel matrices (n_kxn_k array) + """ + kernel_obj = GaussianKernel() + kernel_obj.set_width_empirical_kci(data_z) + + data_x = stats.zscore(data_x, ddof=1, axis=0) + data_x[np.isnan(data_x)] = 0. + + data_y = stats.zscore(data_y, ddof=1, axis=0) + data_y[np.isnan(data_y)] = 0. + + data_z = stats.zscore(data_z, ddof=1, axis=0) + data_z[np.isnan(data_z)] = 0. + + data_x = np.concatenate((data_x, 0.5 * data_z), axis=1) + + kernelX = GaussianKernel() + kernelX.set_width_empirical_kci(data_z) + kernelY = GaussianKernel() + kernelY.set_width_empirical_kci(data_z) + + Kx = kernelX.kernel(data_x) + Ky = kernelY.kernel(data_y) + + # centering kernel matrix + Kx = Kernel.center_kernel_matrix(Kx) + Ky = Kernel.center_kernel_matrix(Ky) + + if not self.use_gp: + kernelZ = GaussianKernel() + kernelZ.set_width_empirical_kci(data_z) + Kzx = kernelZ.kernel(data_z) + Kzx = Kernel.center_kernel_matrix(Kzx) + Kzy = Kzx + epsilon_x, epsilon_y = self.epsilon, self.epsilon + gpx = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(X|Z) + gpx.fit(X = data_z, y = data_x) + likelihood_x = gpx.log_marginal_likelihood_value_ + gpy = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(Y|X,Z) + gpy.fit(X = np.c_[data_x,data_z], y=data_y) + likelihood_y = gpy.log_marginal_likelihood_value_ + + else: + n, Dz = data_z.shape + + widthz = np.sqrt(1.0 / (kernelX.width * data_x.shape[1])) + + # Instantiate a Gaussian Process model for x + wx, vx = eigh(Kx) + topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + idx = np.argsort(-wx) + wx = wx[idx] + vx = vx[:, idx] + wx = wx[0:topkx] + vx = vx[:, 0:topkx] + vx = vx[:, np.abs(wx) > np.abs(wx).max() * 1e-10] + wx = wx[np.abs(wx) > np.abs(wx).max() * 1e-10] + vx = 2 * np.sqrt(n) * vx.dot(np.diag(np.sqrt(wx))) / np.sqrt(wx[0]) + kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + gpx = GaussianProcessRegressor(kernel=kernelx) + # fit Gaussian process, including hyperparameter optimization + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + gpx.fit(data_z, vx) + + # construct Gaussian kernels according to learned hyperparameters + Kzx = gpx.kernel_.k1(data_z, data_z) + epsilon_x = np.exp(gpx.kernel_.theta[-1]) + likelihood_x = gpx.log_marginal_likelihood_value_ + + # Instantiate a Gaussian Process model for y + wy, vy = eigh(Ky) + topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + idy = np.argsort(-wy) + wy = wy[idy] + vy = vy[:, idy] + wy = wy[0:topky] + vy = vy[:, 0:topky] + vy = vy[:, np.abs(wy) > np.abs(wy).max() * 1e-10] + wy = wy[np.abs(wy) > np.abs(wy).max() * 1e-10] + vy = 2 * np.sqrt(n) * vy.dot(np.diag(np.sqrt(wy))) / np.sqrt(wy[0]) + kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + gpy = GaussianProcessRegressor(kernel=kernely) + # fit Gaussian process, including hyperparameter optimization + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + gpy.fit(data_z, vy) + + # construct Gaussian kernels according to learned hyperparameters + Kzy = gpy.kernel_.k1(data_z, data_z) + epsilon_y = np.exp(gpy.kernel_.theta[-1]) + likelihood_y = gpy.log_marginal_likelihood_value_ + + return Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y + + def get_uuprod(self, Kx, Ky): + """ + Compute eigenvalues for null distribution estimation + + Parameters + ---------- + Kx: centralized kernel matrix for data_x (nxn) + Ky: centralized kernel matrix for data_y (nxn) + + Returns + _________ + uu_prod: product of the eigenvectors of Kx and Ky + size_u: number of produced eigenvectors + + """ + n_block = Kx.shape[0] + wx, vx = eigh(Kx) + wy, vy = eigh(Ky) + idx = np.argsort(-wx) + idy = np.argsort(-wy) + wx = wx[idx] + vx = vx[:, idx] + wy = wy[idy] + vy = vy[:, idy] + vx = vx[:, wx > np.max(wx) * self.eig_thresh] + wx = wx[wx > np.max(wx) * self.eig_thresh] + vy = vy[:, wy > np.max(wy) * self.eig_thresh] + wy = wy[wy > np.max(wy) * self.eig_thresh] + vx = vx.dot(np.diag(np.sqrt(wx))) + vy = vy.dot(np.diag(np.sqrt(wy))) + + # calculate their product + num_eigx = vx.shape[1] + num_eigy = vy.shape[1] + size_u = num_eigx * num_eigy + uu = np.zeros((n_block, size_u)) + for i in range(0, num_eigx): + for j in range(0, num_eigy): + uu[:, i * num_eigy + j] = vx[:, i] * vy[:, j] + + if size_u > self.n: + uu_prod = uu.dot(uu.T) + else: + uu_prod = uu.T.dot(uu) + + return uu_prod, size_u + + def get_kappa(self, mean_appr, var_appr): + """ + Get parameters for the approximated gamma distribution + Parameters + ---------- + uu_prod: product of the eigenvectors of Kx and Ky + + Returns + ---------- + k_appr, theta_appr: approximated parameters of the gamma distribution + + """ + k_appr = mean_appr ** 2 / var_appr + theta_appr = var_appr / mean_appr + return k_appr, theta_appr + + def null_sample_spectral(self, uu_prod, size_u, T): + """ + Simulate data from null distribution + + Parameters + ---------- + uu_prod: product of the eigenvectors of Kx and Ky + size_u: number of produced eigenvectors + T: sample size + + Returns + _________ + null_dstr: samples from the null distribution + + """ + eig_uu = np.linalg.eigvalsh(uu_prod) + eig_uu = -np.sort(-eig_uu) + eig_uu = eig_uu[0:np.min((T, size_u))] + eig_uu = eig_uu[eig_uu > np.max(eig_uu) * self.eig_thresh] + + f_rand = np.random.chisquare(1, (eig_uu.shape[0], self.nullss)) + null_dstr = eig_uu.T.dot(f_rand) + return null_dstr + +class FastKCI_UInd(object): + """ + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + + References + ---------- + [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, + "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + [2] M. Zhang, and S. Williamson, + "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + """ + def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): + """ + Construct the FastKCI_UInd model. + + Parameters + ---------- + K: Number of Gaussians that are assumed to be in the mixture + J: Number of independent repittitions. + alpha: Parameter for the Dirichlet distribution. + trimming_thresh: Threshold for trimming the propensity weights. + """ + self.K = K + self.J = J + self.alpha = alpha + self.trimming_thresh = trimming_thresh + # TODO: Adjust to causal-learn API + + def compute_pvalue(self, data_x=None, data_y=None): + """ + Main function: compute the p value and return it together with the test statistic + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + + Returns + _________ + pvalue: p value (scalar) + test_stat: test statistic (scalar) + """ + self.data_x = data_x + self.data_y = data_y + self.n = data_x.shape[0] + + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)(self.data) for i in range(self.J)) + self.Z_proposal, self.prob_Y = zip(*Z_proposal) + block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) + test_stat, mean_appr, var_appr, log_likelihood = zip(*block_res) + self.prob_Y = np.array(self.prob_Y) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood))) + self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) + self.mean_appr = np.average(np.array(mean_appr), weights = self.prob_weights) + self.var_appr = np.average(np.array(var_appr), weights = self.prob_weights) + + k_appr, theta_appr = self.get_kappa(self.mean_appr, self.var_appr) + self.pvalue = 1 - stats.gamma.cdf(self.test_stat, k_appr, 0, theta_appr) + + return self.pvalue, self.test_stat + + def partition_data(self): + """ + Partitions data into K Gaussians + + Returns + _________ + Z: Latent Gaussian that was drawn for each observation (nx1 array) + prob_Y: Log-Likelihood of that random assignment (scalar) + """ + Y_mean = self.data_y.mean(axis=0) + Y_sd = self.data_y.std(axis=0) + mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.shape[1])) + sigma_k = np.eye(self.data_y.shape[1]) + pi_j = np.random.dirichlet([self.alpha]*self.K) + ll = np.tile(np.log(pi_j),(self.n,1)) + for k in range(self.K): + ll[:,k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k,:], cov=sigma_k, allow_singular=True) + Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + prop_Y = np.take_along_axis(ll, Z[:, None], axis=1).sum() + le = LabelEncoder() + Z = le.fit_transform(Z) + return (Z, prop_Y) + + def pvalue_onblocks(self, Z_proposal): + unique_Z_j = np.unique(Z_proposal) + test_stat = 0 + prob_weight = 0 + mean_appr = 0 + var_appr = 0 + for k in unique_Z_j: + K_mask = (Z_proposal==k) + X_k = np.copy(self.data[0][K_mask]) + Y_k = np.copy(self.data[1][K_mask]) + + Kx = self.kernel_matrix(X_k) + Ky = self.kernel_matrix(Y_k) + n_block = Kx.shape[0] + + Kxc = Kernel.center_kernel_matrix(Kx) + Kyc = Kernel.center_kernel_matrix(Ky) + test_stat += np.einsum('ij,ji->', Kxc, Kyc) + mean_appr += np.trace(Kxc) * np.trace(Kyc) / n_block + var_appr += 2 * np.sum(Kxc ** 2) * np.sum(Kyc ** 2) / n_block / n_block + var_appr /= unique_Z_j.shape[0] + return (test_stat, mean_appr, var_appr, prob_weight) + + def kernel_matrix(self, data): + """ + Calculates the Gaussian Kernel for given data inputs. + + Returns + _________ + K: Kernel matrix (n_kxn_k array) + """ + kernel_obj = GaussianKernel() + kernel_obj.set_width_empirical_hsic(data) + + data = stats.zscore(data, ddof=1, axis=0) + data[np.isnan(data)] = 0. + + K = kernel_obj.kernel(data) + + return K + + def get_kappa(self, mean_appr, var_appr): + """ + Get parameters for the approximated gamma distribution + Parameters + ---------- + Kx: kernel matrix for data_x (nxn) + Ky: kernel matrix for data_y (nxn) + + Returns + _________ + k_appr, theta_appr: approximated parameters of the gamma distribution + """ + k_appr = mean_appr ** 2 / var_appr + theta_appr = var_appr / mean_appr + return k_appr, theta_appr diff --git a/causallearn/utils/FastKCI/__init__.py b/causallearn/utils/FastKCI/__init__.py new file mode 100644 index 00000000..e69de29b From 7d140602b20973135541d281f3211e9f7d124e5d Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Wed, 30 Aug 2023 16:59:39 -0400 Subject: [PATCH 06/84] removed uncessary output for GES Signed-off-by: Oliver Schacht --- causallearn/search/ScoreBased/GES.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/causallearn/search/ScoreBased/GES.py b/causallearn/search/ScoreBased/GES.py index 5a6c91df..5f8c4a4b 100644 --- a/causallearn/search/ScoreBased/GES.py +++ b/causallearn/search/ScoreBased/GES.py @@ -182,7 +182,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = break G = insert(G, min_desc[0], min_desc[1], min_desc[2]) update1.append([min_desc[0], min_desc[1], min_desc[2]]) - print(G.graph) G = pdag2dag(G) G = dag2cpdag(G) G_step1.append(G) @@ -192,7 +191,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = ## -------------------------------------------------------------------- # backward greedy search - print('backward') count2 = 0 score_new = score update2 = [] @@ -246,7 +244,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = break G = delete(G, min_desc[0], min_desc[1], min_desc[2]) update2.append([min_desc[0], min_desc[1], min_desc[2]]) - print(G.graph) G = pdag2dag(G) G = dag2cpdag(G) G_step2.append(G) From 55493bcebc631e07c24ed3fc68c35b42990ea6f8 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Wed, 30 Aug 2023 17:00:40 -0400 Subject: [PATCH 07/84] updated setup Signed-off-by: Oliver Schacht --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 90aadea9..66781037 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ with open('README.md', 'r') as fh: README = fh.read() -VERSION = '0.1.3.4' +VERSION = '0.1.3.5' setuptools.setup( name='causal-learn', From 29e6b587617ff2ed9e8626c7c707adaca77855ab Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Thu, 31 Aug 2023 13:20:00 -0400 Subject: [PATCH 08/84] Add a function to load real-world datasets Signed-off-by: Oliver Schacht --- causallearn/utils/Dataset.py | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 causallearn/utils/Dataset.py diff --git a/causallearn/utils/Dataset.py b/causallearn/utils/Dataset.py new file mode 100644 index 00000000..6e62c2ee --- /dev/null +++ b/causallearn/utils/Dataset.py @@ -0,0 +1,41 @@ +import numpy as np +import urllib.request +from io import StringIO + +def load_dataset(dataset_name): + ''' + Load real-world datasets. Processed datasets are from https://github.com/cmu-phil/example-causal-datasets/tree/main + + Parameters + ---------- + dataset_name : str, ['sachs', 'boston_housing', 'airfoil'] + + Returns + ------- + data = np.array + labels = list + ''' + + url_mapping = { + 'sachs': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/sachs/data/sachs.2005.continuous.txt', + 'boston_housing': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/boston-housing/data/boston-housing.continuous.txt', + 'airfoil': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/airfoil-self-noise/data/airfoil-self-noise.continuous.txt' + } + + if dataset_name not in url_mapping: + raise ValueError("Invalid dataset name") + + url = url_mapping[dataset_name] + with urllib.request.urlopen(url) as response: + content = response.read().decode('utf-8') # Read content and decode to string + + # Use StringIO to turn the string content into a file-like object so numpy can read from it + labels_array = np.genfromtxt(StringIO(content), delimiter="\t", dtype=str, max_rows=1) + data = np.loadtxt(StringIO(content), skiprows=1) + + # Convert labels_array to a list of strings + labels_list = labels_array.tolist() + if isinstance(labels_list, str): # handle the case where there's only one label + labels_list = [labels_list] + + return data, labels_list \ No newline at end of file From c4c5096fc4ecab0021a4a564a430266d4518934c Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 20:42:09 -0400 Subject: [PATCH 09/84] update doc Signed-off-by: kunwuz <514397511@qq.com> Signed-off-by: Oliver Schacht --- .../Constraint-based causal discovery methods/CDNOD.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst index c8603c39..10d0b997 100644 --- a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst +++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst @@ -16,7 +16,7 @@ Usage from causallearn.search.ConstraintBased.CDNOD import cdnod # default parameters - cg = cdnod(data) + cg = cdnod(data, c_indx) # or customized parameters cg = cdnod(data, c_indx, alpha, indep_test, stable, uc_rule, uc_priority, mvcdnod, From 58f45d9c06ec7c8625e454bd06ba33ee253b0e4e Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Fri, 22 Sep 2023 01:57:52 -0400 Subject: [PATCH 10/84] updated doc Signed-off-by: Oliver Schacht --- docs/source/utilities_index/Datasets.rst | 27 ++++++++++++++++++++++++ docs/source/utilities_index/index.rst | 1 + setup.py | 2 +- 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 docs/source/utilities_index/Datasets.rst diff --git a/docs/source/utilities_index/Datasets.rst b/docs/source/utilities_index/Datasets.rst new file mode 100644 index 00000000..fad90d9a --- /dev/null +++ b/docs/source/utilities_index/Datasets.rst @@ -0,0 +1,27 @@ +.. _datasets: + +Datasets +============================================== + + + +Usage +---------------------------- +.. code-block:: python + + from causallearn.utils.Dataset import load_dataset + + data, labels = load_dataset(dataset_name) + +Parameters +------------------- +**dataset_name**: str, the name of a dataset in ['sachs', 'boston_housing', 'airfoil'] + + +Returns +------------------- + +**data**: np.array, data with a shape of (number of samples, number of variables). + +**labels**: list, labels of variables in the data. + diff --git a/docs/source/utilities_index/index.rst b/docs/source/utilities_index/index.rst index c175e226..ec4dfc83 100644 --- a/docs/source/utilities_index/index.rst +++ b/docs/source/utilities_index/index.rst @@ -10,3 +10,4 @@ Contents: Graph operations/index Evaluations + Datasets diff --git a/setup.py b/setup.py index 66781037..49cdb7c0 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ with open('README.md', 'r') as fh: README = fh.read() -VERSION = '0.1.3.5' +VERSION = '0.1.3.6' setuptools.setup( name='causal-learn', From 1879b6ac4865d81b56b5cdb7b1827c7044454195 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:07:34 -0400 Subject: [PATCH 11/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 670a48c7..db8757f9 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -11,3 +11,12 @@ build: tools: python: "3.11" +python: + version: 3.11 + install: + - method: pip + path: . + extra_requirements: + - docs + - method: pip + requirements: sphinx_rtd_theme \ No newline at end of file From 325f55f9583313f41d083e1eb9ad612fbd2d974e Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:08:30 -0400 Subject: [PATCH 12/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index db8757f9..7ce70c02 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -12,7 +12,7 @@ build: python: "3.11" python: - version: 3.11 + version: "3.11" install: - method: pip path: . From 5fe761171821d2af467175a421247385f9fe9156 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:10:07 -0400 Subject: [PATCH 13/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 7ce70c02..eb12ce62 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -2,21 +2,18 @@ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details -# Required version: 2 -# Set the version of Python and other tools you might need build: os: ubuntu-22.04 - tools: - python: "3.11" python: - version: "3.11" + version: 3.11 install: - method: pip path: . extra_requirements: - docs - method: pip - requirements: sphinx_rtd_theme \ No newline at end of file + extra_requirements: + - sphinx_rtd_theme From c4dad3b003933319b4f1ab60b8c99c44ab9896df Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:12:57 -0400 Subject: [PATCH 14/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index eb12ce62..ef525ca4 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -6,14 +6,5 @@ version: 2 build: os: ubuntu-22.04 - -python: - version: 3.11 - install: - - method: pip - path: . - extra_requirements: - - docs - - method: pip - extra_requirements: - - sphinx_rtd_theme + commands: + - pip install sphinx_rtd_theme From c6803180187dbd2e6f722bcd231edb9252222bd1 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:16:00 -0400 Subject: [PATCH 15/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index ef525ca4..2964cf61 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -4,7 +4,13 @@ version: 2 -build: - os: ubuntu-22.04 - commands: - - pip install sphinx_rtd_theme +python: + install: + - method: pip + path: . + extra_requirements: + - docs + - method: pip + extra_requirements: + - sphinx_rtd_theme + From c6e71897c17f6767605b53fb3c69fe00c821cefd Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:18:08 -0400 Subject: [PATCH 16/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 2964cf61..d6fa74bc 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -4,13 +4,10 @@ version: 2 -python: - install: - - method: pip - path: . - extra_requirements: - - docs - - method: pip - extra_requirements: - - sphinx_rtd_theme +build: + os: ubuntu-22.04 + tools: + python: "3.11" + commands: + - pip install sphinx_rtd_theme From a761ae2935445244ef1cd5367d1a56c5fbf64ce7 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:24:03 -0400 Subject: [PATCH 17/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index d6fa74bc..fba64dbc 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -8,6 +8,12 @@ build: os: ubuntu-22.04 tools: python: "3.11" - commands: - - pip install sphinx_rtd_theme + +python: + install: + - method: pip + path: . + - method: pip + extra_requirements: + - sphinx_rtd_theme From cbc3c27c5533d47c7c7c70c8f89270210a84311b Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:25:53 -0400 Subject: [PATCH 18/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index fba64dbc..50220262 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -13,7 +13,6 @@ python: install: - method: pip path: . - - method: pip extra_requirements: - sphinx_rtd_theme From 9830cd8c6218cbfe9debe969bf8e23ad903962b4 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:30:23 -0400 Subject: [PATCH 19/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/source/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 4019298f..a777b7e8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,7 @@ # import os # import sys # sys.path.insert(0, os.path.abspath('.')) - +import sphinx_rtd_theme # -- Project information ----------------------------------------------------- @@ -22,7 +22,7 @@ author = 'CLeaR' # The full version, including alpha/beta/rc tags -release = '0.1.2.8' +release = '0.1.3.6' # -- General configuration --------------------------------------------------- From 37417ddf54645d57927722e1771044ae1051f740 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:36:27 -0400 Subject: [PATCH 20/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 5 +++-- docs/requirements.txt | 1 + docs/source/conf.py | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 docs/requirements.txt diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 50220262..dc8f65f2 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -13,6 +13,7 @@ python: install: - method: pip path: . - extra_requirements: - - sphinx_rtd_theme + - method: pip + requirements: requirements.txt + diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..52b04f2e --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinx_rtd_theme \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index a777b7e8..d649ff4c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,7 +13,6 @@ # import os # import sys # sys.path.insert(0, os.path.abspath('.')) -import sphinx_rtd_theme # -- Project information ----------------------------------------------------- From b832014e4f0b46e94d689a0dc4f085bee6ec5a81 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:38:21 -0400 Subject: [PATCH 21/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index dc8f65f2..31c7d19b 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -11,9 +11,6 @@ build: python: install: - - method: pip - path: . - - method: pip - requirements: requirements.txt + - requirements: requirements.txt From 7a6ff43b6ab9112a28c9dd66a808e29da93832f8 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:39:57 -0400 Subject: [PATCH 22/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 31c7d19b..41b8aba3 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -11,6 +11,6 @@ build: python: install: - - requirements: requirements.txt + - requirements: ./requirements.txt From 6ed1767d6b4dc273c051d01f7a20199a19b2a54e Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:41:43 -0400 Subject: [PATCH 23/84] update readthedoc config Signed-off-by: Oliver Schacht --- docs/.readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 41b8aba3..5e8dd497 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -11,6 +11,6 @@ build: python: install: - - requirements: ./requirements.txt + - requirements: docs/requirements.txt From a55b36c67d5057cacc91fc99bf4c60c356b7fcd7 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:44:42 -0400 Subject: [PATCH 24/84] update typo in a comment Signed-off-by: Oliver Schacht --- causallearn/utils/ScoreUtils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/utils/ScoreUtils.py b/causallearn/utils/ScoreUtils.py index e5af6ca1..6bd075aa 100644 --- a/causallearn/utils/ScoreUtils.py +++ b/causallearn/utils/ScoreUtils.py @@ -523,7 +523,7 @@ def cov_noise(logtheta=None, x=None, z=None, nargout=1): def cov_seard(loghyper=None, x=None, z=None, nargout=1): - # Squared Exponential covariance function with Automatic Relevance Detemination + # Squared Exponential covariance function with Automatic Relevance Determination # (ARD) distance measure. The covariance function is parameterized as: # # k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2) From 48780a321bad5ccce6a16af593abd6d94ab335c0 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Sat, 14 Oct 2023 21:53:03 -0400 Subject: [PATCH 25/84] uupdate minor descriptions Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 2 +- .../Constraint-based causal discovery methods/FCI.rst | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 9f5af378..57d2f17a 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -716,7 +716,7 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = Returns ------- - graph : a CausalGraph object, where graph.graph[j,i]=1 and graph.graph[i,j]=-1 indicates i --> j , + graph : a GeneralGraph object, where graph.graph[j,i]=1 and graph.graph[i,j]=-1 indicates i --> j , graph.graph[i,j] = graph.graph[j,i] = -1 indicates i --- j, graph.graph[i,j] = graph.graph[j,i] = 1 indicates i <-> j, graph.graph[j,i]=1 and graph.graph[i,j]=2 indicates i o-> j. diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst index 5c71f1ad..2ebadb57 100644 --- a/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst +++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst @@ -16,16 +16,16 @@ Usage from causallearn.search.ConstraintBased.FCI import fci # default parameters - G, edges = fci(data) + g, edges = fci(data) # or customized parameters - G, edges = fci(data, independence_test_method, alpha, depth, max_path_length, + g, edges = fci(data, independence_test_method, alpha, depth, max_path_length, verbose, background_knowledge, cache_variables_map) # visualization from causallearn.utils.GraphUtils import GraphUtils - pdy = GraphUtils.to_pydot(G) + pdy = GraphUtils.to_pydot(g) pdy.write_png('simple_test.png') Visualization using pydot is recommended. If specific label names are needed, please refer to this `usage example `_. @@ -60,7 +60,7 @@ For detailed usage, please kindly refer to its `usage example Date: Fri, 27 Oct 2023 18:36:47 +0200 Subject: [PATCH 26/84] Fixed indexing error in dag2pag() Signed-off-by: Oliver Schacht --- causallearn/utils/DAG2PAG.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/causallearn/utils/DAG2PAG.py b/causallearn/utils/DAG2PAG.py index 581f5f20..9980b96b 100644 --- a/causallearn/utils/DAG2PAG.py +++ b/causallearn/utils/DAG2PAG.py @@ -96,12 +96,13 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: data = np.empty(shape=(0, len(observed_nodes))) independence_test_method = CIT(data, method=d_separation, true_dag=true_dag) - + node_map = PAG.get_node_map() + sepset_reindexed = {(node_map[nodes[i]], node_map[nodes[j]]): sepset[(i, j)] for (i, j) in sepset} while change_flag: change_flag = False change_flag = rulesR1R2cycle(PAG, None, change_flag, False) - change_flag = ruleR3(PAG, sepset, None, change_flag, False) - change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset, + change_flag = ruleR3(PAG, sepset_reindexed, None, change_flag, False) + change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset_reindexed, change_flag=change_flag, bk=None, verbose=False) return PAG From ec4861eb9b9b4ddd28ff9cdeeb888aee519376f7 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Wed, 1 Nov 2023 21:10:21 -0400 Subject: [PATCH 27/84] update varlingam and 0.1.3.7 Signed-off-by: Oliver Schacht --- causallearn/search/FCMBased/lingam/var_lingam.py | 4 ++-- setup.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/causallearn/search/FCMBased/lingam/var_lingam.py b/causallearn/search/FCMBased/lingam/var_lingam.py index 1f2d931f..ecc7439a 100644 --- a/causallearn/search/FCMBased/lingam/var_lingam.py +++ b/causallearn/search/FCMBased/lingam/var_lingam.py @@ -253,14 +253,14 @@ def _estimate_var_coefs(self, X): # XXX: VAR.fit() is not searching lags correctly if self._criterion not in ['aic', 'fpe', 'hqic', 'bic']: var = VAR(X) - result = var.fit(maxlags=self._lags, trend='nc') + result = var.fit(maxlags=self._lags, trend='n') else: min_value = float('Inf') result = None for lag in range(1, self._lags + 1): var = VAR(X) - fitted = var.fit(maxlags=lag, ic=None, trend='nc') + fitted = var.fit(maxlags=lag, ic=None, trend='n') value = getattr(fitted, self._criterion) if value < min_value: diff --git a/setup.py b/setup.py index 49cdb7c0..5bf2cf08 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ with open('README.md', 'r') as fh: README = fh.read() -VERSION = '0.1.3.6' +VERSION = '0.1.3.7' setuptools.setup( name='causal-learn', From ee58783a3a5a65152df4ae1bbbcf0262b9ead150 Mon Sep 17 00:00:00 2001 From: Haoyue Dai Date: Wed, 22 Nov 2023 19:59:52 -0500 Subject: [PATCH 28/84] Endpoint comparison: check only for Endpoint instances Signed-off-by: Oliver Schacht --- causallearn/graph/Endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/graph/Endpoint.py b/causallearn/graph/Endpoint.py index df551955..83f1ece4 100644 --- a/causallearn/graph/Endpoint.py +++ b/causallearn/graph/Endpoint.py @@ -20,4 +20,4 @@ def __str__(self): return self.name def __eq__(self, other): - return self.value == other.value + return isinstance(other, Endpoint) and self.value == other.value From bb77f08c5b9023e8a3f673a199230161d24da55b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filip=20Edstr=C3=B6m?= Date: Wed, 20 Dec 2023 23:40:10 +0100 Subject: [PATCH 29/84] Update KCI.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed np.int (deprecated) to int according to error message in numpy. Signed-off-by: Filip Edström Signed-off-by: Oliver Schacht --- causallearn/utils/KCI/KCI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 7207c14d..681d48d4 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -191,7 +191,7 @@ def null_sample_spectral(self, Kxc, Kyc): """ T = Kxc.shape[0] if T > 1000: - num_eig = np.int(np.floor(T / 2)) + num_eig = int(np.floor(T / 2)) else: num_eig = T lambdax = eigvalsh(Kxc) From 60b8cfd7886468151aa90d3c8f1e3c978a4b1e1d Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Tue, 2 Jan 2024 21:09:19 -0500 Subject: [PATCH 30/84] add reference for pc stable Signed-off-by: Oliver Schacht --- .../Constraint-based causal discovery methods/PC.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst index 349889c7..81c70f69 100644 --- a/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst +++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst @@ -81,7 +81,7 @@ and n_features is the number of features. - ":ref:`kci `": kernel-based conditional independence test. (As a kernel method, its complexity is cubic in the sample size, so it might be slow if the same size is not small.) - ":ref:`mv_fisherz `": Missing-value Fisher's Z conditional independence test. -**stable**: run stabilized skeleton discovery if True. Default: True. +**stable**: run stabilized skeleton discovery [4]_ if True. Default: True. **uc_rule**: how unshielded colliders are oriented. Default: 0. - 0: run uc_sepset. @@ -114,4 +114,5 @@ Returns .. [1] Spirtes, P., Glymour, C. N., Scheines, R., & Heckerman, D. (2000). Causation, prediction, and search. MIT press. .. [2] Tu, R., Zhang, C., Ackermann, P., Mohan, K., Kjellström, H., & Zhang, K. (2019, April). Causal discovery in the presence of missing data. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 1762-1770). PMLR. -.. [3] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. \ No newline at end of file +.. [3] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. +.. [4] Colombo, D., & Maathuis, M. H. (2014). Order-independent constraint-based causal structure learning. J. Mach. Learn. Res., 15(1), 3741-3782. \ No newline at end of file From 1ab02070bfa66e412d38cf1ca75be8395c34ab0c Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Tue, 2 Jan 2024 21:16:27 -0500 Subject: [PATCH 31/84] add reference for pc stable in cdnod Signed-off-by: Oliver Schacht --- .../Constraint-based causal discovery methods/CDNOD.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst index 10d0b997..2999b3e7 100644 --- a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst +++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst @@ -51,7 +51,7 @@ and n_features is the number of features. - ":ref:`kci `": kernel-based conditional independence test. (As a kernel method, its complexity is cubic in the sample size, so it might be slow if the same size is not small.) - ":ref:`mv_fisherz `": Missing-value Fisher's Z conditional independence test. -**stable**: run stabilized skeleton discovery if True. Default: True. +**stable**: run stabilized skeleton discovery [3]_ if True. Default: True. **uc_rule**: how unshielded colliders are oriented. Default: 0. - 0: run uc_sepset. @@ -82,4 +82,5 @@ Returns **cg** : a CausalGraph object, where cg.G.graph[j,i]=1 and cg.G.graph[i,j]=-1 indicate i --> j; cg.G.graph[i,j] = cg.G.graph[j,i] = -1 indicates i --- j; cg.G.graph[i,j] = cg.G.graph[j,i] = 1 indicates i <-> j. .. [1] Huang, B., Zhang, K., Zhang, J., Ramsey, J. D., Sanchez-Romero, R., Glymour, C., & Schölkopf, B. (2020). Causal Discovery from Heterogeneous/Nonstationary Data. J. Mach. Learn. Res., 21(89), 1-53. -.. [2] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. \ No newline at end of file +.. [2] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. +.. [3] Colombo, D., & Maathuis, M. H. (2014). Order-independent constraint-based causal structure learning. J. Mach. Learn. Res., 15(1), 3741-3782. \ No newline at end of file From 51c4661543e2b4b97776dda340f01ea8bf384c3a Mon Sep 17 00:00:00 2001 From: ZhiyiHuang Date: Mon, 22 Jan 2024 08:05:20 +0800 Subject: [PATCH 32/84] Fixed issue#129 Signed-off-by: Oliver Schacht --- causallearn/utils/PCUtils/SkeletonDiscovery.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/PCUtils/SkeletonDiscovery.py b/causallearn/utils/PCUtils/SkeletonDiscovery.py index 6cfaa492..428d2cee 100644 --- a/causallearn/utils/PCUtils/SkeletonDiscovery.py +++ b/causallearn/utils/PCUtils/SkeletonDiscovery.py @@ -120,8 +120,9 @@ def skeleton_discovery( else: if verbose: print('%d dep %d | %s with p-value %f\n' % (x, y, S, p)) - append_value(cg.sepset, x, y, tuple(sepsets)) - append_value(cg.sepset, y, x, tuple(sepsets)) + if (x, y) in edge_removal or not cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y]): + append_value(cg.sepset, x, y, tuple(sepsets)) + append_value(cg.sepset, y, x, tuple(sepsets)) if show_progress: pbar.refresh() From dc2a154705a639e0ffe59f904f89ed3a657b344a Mon Sep 17 00:00:00 2001 From: ZhiyiHuang Date: Mon, 22 Jan 2024 08:15:45 +0800 Subject: [PATCH 33/84] Added a test for updating separated set Signed-off-by: Oliver Schacht --- tests/TestSkeletonDiscovery.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/TestSkeletonDiscovery.py diff --git a/tests/TestSkeletonDiscovery.py b/tests/TestSkeletonDiscovery.py new file mode 100644 index 00000000..10b0a5a2 --- /dev/null +++ b/tests/TestSkeletonDiscovery.py @@ -0,0 +1,18 @@ +from unittest import TestCase +import numpy as np +from causallearn.search.ConstraintBased.PC import pc +import networkx as nx +from causallearn.utils.cit import chisq, fisherz, gsq, kci, mv_fisherz, d_separation + + +class TestSkeletonDiscovery(TestCase): + def test_sepset(self): + truth_DAG_directed_edges = {(0, 2), (1, 2), (2, 3), (2, 4)} + + true_dag_netx = nx.DiGraph() + true_dag_netx.add_nodes_from(list(range(5))) + true_dag_netx.add_edges_from(truth_DAG_directed_edges) + + data = np.zeros((100, len(true_dag_netx.nodes))) # just a placeholder + cg = pc(data, 0.05, d_separation, True, 0, -1, true_dag=true_dag_netx) + assert cg.sepset[0, 2] is None \ No newline at end of file From 2c5a4d118055e2481ce712292898f1340f59029a Mon Sep 17 00:00:00 2001 From: ZhiyiHuang Date: Thu, 1 Feb 2024 09:51:03 +0800 Subject: [PATCH 34/84] Fixed issue#164 Signed-off-by: ZhiyiHuang Signed-off-by: Oliver Schacht --- causallearn/search/ScoreBased/GES.py | 2 +- tests/TestGES.py | 38 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/causallearn/search/ScoreBased/GES.py b/causallearn/search/ScoreBased/GES.py index 5f8c4a4b..c9475755 100644 --- a/causallearn/search/ScoreBased/GES.py +++ b/causallearn/search/ScoreBased/GES.py @@ -139,7 +139,7 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj Ti = np.union1d(np.where(G.graph[:, i] != Endpoint.NULL.value)[0], - np.where(G.graph[i, 0] != Endpoint.NULL.value)[0]) # adjacent to Xi + np.where(G.graph[i, :] != Endpoint.NULL.value)[0]) # adjacent to Xi NTi = np.setdiff1d(np.arange(N), Ti) T0 = np.intersect1d(Tj, NTi) # find the neighbours of Xj that are not adjacent to Xi diff --git a/tests/TestGES.py b/tests/TestGES.py index 137a7e9a..ba7e919f 100644 --- a/tests/TestGES.py +++ b/tests/TestGES.py @@ -10,7 +10,7 @@ ######################################### Test Notes ########################################### -# All the benchmark results of loaded files (e.g. "./TestData/benchmark_returned_results/") # +# All the benchmark results of loaded files (e.g. "tests/TestData/benchmark_returned_results/")# # are obtained from the code of causal-learn as of commit # # https://github.com/cmu-phil/causal-learn/commit/b51d788 (07-08-2022). # # # @@ -23,13 +23,13 @@ BENCHMARK_TXTFILE_TO_MD5 = { - "./TestData/data_linear_10.txt": "95a17e15038d4cade0845140b67c05a6", - "./TestData/data_discrete_10.txt": "ccb51c6c1946d8524a8b29a49aef2cc4", - "./TestData/graph.10.txt": "4970d4ecb8be999a82a665e5f5e0825b", - "./TestData/test_ges_simulated_linear_gaussian_data.txt": "0d2490eeb9ee8ef3b18bf21d7e936e1e", - "./TestData/test_ges_simulated_linear_gaussian_CPDAG.txt": "aa0146777186b07e56421ce46ed52914", - "./TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt": "3accb3673d2ccb4c110f3703d60fe702", - "./TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt": "eebd11747c1b927b2fdd048a55c8c3a5", + "tests/TestData/data_linear_10.txt": "95a17e15038d4cade0845140b67c05a6", + "tests/TestData/data_discrete_10.txt": "ccb51c6c1946d8524a8b29a49aef2cc4", + "tests/TestData/graph.10.txt": "4970d4ecb8be999a82a665e5f5e0825b", + "tests/TestData/test_ges_simulated_linear_gaussian_data.txt": "0d2490eeb9ee8ef3b18bf21d7e936e1e", + "tests/TestData/test_ges_simulated_linear_gaussian_CPDAG.txt": "aa0146777186b07e56421ce46ed52914", + "tests/TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt": "3accb3673d2ccb4c110f3703d60fe702", + "tests/TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt": "eebd11747c1b927b2fdd048a55c8c3a5", } INCONSISTENT_RESULT_GRAPH_ERRMSG = "Returned graph is inconsistent with the benchmark. Please check your code with the commit b51d788." @@ -47,8 +47,8 @@ class TestGES(unittest.TestCase): # Load data from file "data_linear_10.txt". Run GES with local_score_BIC. def test_ges_load_linear_10_with_local_score_BIC(self): print('Now start test_ges_load_linear_10_with_local_score_BIC ...') - data_path = "./TestData/data_linear_10.txt" - truth_graph_path = "./TestData/graph.10.txt" + data_path = "tests/TestData/data_linear_10.txt" + truth_graph_path = "tests/TestData/graph.10.txt" data = np.loadtxt(data_path, skiprows=1) truth_dag = txt2generalgraph(truth_graph_path) # truth_dag is a GeneralGraph instance truth_cpdag = dag2cpdag(truth_dag) @@ -58,7 +58,7 @@ def test_ges_load_linear_10_with_local_score_BIC(self): res_map = ges(data, score_func='local_score_BIC', maxP=None, parameters=None) # Run GES and obtain the estimated graph (res_map is Dict object,which contains the updated steps, the result causal graph and the result score.) benchmark_returned_graph = np.loadtxt( - "./TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt") + "tests/TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt") assert np.all(res_map['G'].graph == benchmark_returned_graph), INCONSISTENT_RESULT_GRAPH_ERRMSG shd = SHD(truth_cpdag, res_map['G']) print(f" ges(data, score_func='local_score_BIC', maxP=None, parameters=None)\tSHD: {shd.get_shd()} of {num_edges_in_truth}") @@ -73,9 +73,9 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self): truth_DAG_directed_edges = {(0, 1), (0, 3), (1, 2), (1, 3), (2, 3), (2, 4), (3, 4)} truth_CPDAG_directed_edges = {(0, 3), (1, 3), (2, 3), (2, 4), (3, 4)} truth_CPDAG_undirected_edges = {(0, 1), (1, 2), (2, 1), (1, 0)} - truth_CPDAG = np.loadtxt("./TestData/test_ges_simulated_linear_gaussian_CPDAG.txt") + truth_CPDAG = np.loadtxt("tests/TestData/test_ges_simulated_linear_gaussian_CPDAG.txt") - ###### Simulation configuration: code to generate "./TestData/test_ges_simulated_linear_gaussian_data.txt" ###### + ###### Simulation configuration: code to generate "tests/TestData/test_ges_simulated_linear_gaussian_data.txt" ###### # np.random.seed(42) # linear_weight_minabs, linear_weight_maxabs, linear_weight_netative_prob = 0.5, 0.9, 0.5 # sample_size = 10000 @@ -89,10 +89,10 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self): # mixing_matrix = np.linalg.inv(np.eye(num_of_nodes) - adjacency_matrix) # exogenous_noise = np.random.normal(0, 1, (num_of_nodes, sample_size)) # data = (mixing_matrix @ exogenous_noise).T - # np.savetxt("./TestData/test_ges_simulated_linear_gaussian_data.txt", data) - ###### Simulation configuration: code to generate "./TestData/test_ges_simulated_linear_gaussian_data.txt" ###### + # np.savetxt("tests/TestData/test_ges_simulated_linear_gaussian_data.txt", data) + ###### Simulation configuration: code to generate "tests/TestData/test_ges_simulated_linear_gaussian_data.txt" ###### - data = np.loadtxt("./TestData/test_ges_simulated_linear_gaussian_data.txt") + data = np.loadtxt("tests/TestData/test_ges_simulated_linear_gaussian_data.txt") # Run GES with default parameters: score_func='local_score_BIC', maxP=None, parameters=None res_map = ges(data, score_func='local_score_BIC', maxP=None, parameters=None) @@ -105,8 +105,8 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self): # Load data from file "data_discrete_10.txt". Run GES with local_score_BDeu. def test_ges_load_discrete_10_with_local_score_BDeu(self): print('Now start test_ges_load_discrete_10_with_local_score_BDeu ...') - data_path = "./TestData/data_discrete_10.txt" - truth_graph_path = "./TestData/graph.10.txt" + data_path = "tests/TestData/data_discrete_10.txt" + truth_graph_path = "tests/TestData/graph.10.txt" data = np.loadtxt(data_path, skiprows=1) truth_dag = txt2generalgraph(truth_graph_path) # truth_dag is a GeneralGraph instance truth_cpdag = dag2cpdag(truth_dag) @@ -115,7 +115,7 @@ def test_ges_load_discrete_10_with_local_score_BDeu(self): # Run GES with local_score_BDeu. res_map = ges(data, score_func='local_score_BDeu', maxP=None, parameters=None) benchmark_returned_graph = np.loadtxt( - "./TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt") + "tests/TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt") assert np.all(res_map['G'].graph == benchmark_returned_graph), INCONSISTENT_RESULT_GRAPH_ERRMSG shd = SHD(truth_cpdag, res_map['G']) print(f" ges(data, score_func='local_score_BDeu', maxP=None, parameters=None)\tSHD: {shd.get_shd()} of {num_edges_in_truth}") From db180c532b7af5ac9ab27f7ead7ef28f6393a721 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Wed, 7 Feb 2024 16:42:00 -0500 Subject: [PATCH 35/84] realease 0.1.3.8 Signed-off-by: Oliver Schacht --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5bf2cf08..f132a020 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ with open('README.md', 'r') as fh: README = fh.read() -VERSION = '0.1.3.7' +VERSION = '0.1.3.8' setuptools.setup( name='causal-learn', From ae2cd6ada1ccd1522f2c9f21709fc85b348cba2e Mon Sep 17 00:00:00 2001 From: x3zvawq Date: Mon, 25 Mar 2024 08:45:41 +0800 Subject: [PATCH 36/84] perf: GRaSP allow disable stdout output Signed-off-by: x3zvawq Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/GRaSP.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index ba77b3ef..e46fb7b2 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -80,6 +80,7 @@ def grasp( depth: Optional[int] = 3, maxP: Optional[float] = None, parameters: Optional[Dict[str, Any]] = None, + verbose: Optional[bool] = True, ) -> Dict[str, Any]: """ Perform a greedy relaxation of the sparsest permutation (GRaSP) algorithm @@ -206,13 +207,15 @@ def grasp( order.bump_edges(len(y_parents)) while dfs(depth - 1, set(), [], order, score): - sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) - sys.stdout.flush() + if verbose: + sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) + sys.stdout.flush() runtime = time.perf_counter() - runtime - - sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime) - sys.stdout.flush() + + if verbose: + sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime) + sys.stdout.flush() for y in range(p): for x in order.get_parents(y): From e0d0038f5678d06deea7967a8595535885d64917 Mon Sep 17 00:00:00 2001 From: x3zvawq Date: Mon, 25 Mar 2024 08:46:26 +0800 Subject: [PATCH 37/84] perf: GRaSP function return value type Signed-off-by: x3zvawq Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/GRaSP.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index e46fb7b2..2964dc27 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -81,7 +81,7 @@ def grasp( maxP: Optional[float] = None, parameters: Optional[Dict[str, Any]] = None, verbose: Optional[bool] = True, -) -> Dict[str, Any]: +) -> GeneralGraph: """ Perform a greedy relaxation of the sparsest permutation (GRaSP) algorithm From deba251443aebb151665f1f9de2ae6670ac4aecb Mon Sep 17 00:00:00 2001 From: x3zvawq Date: Mon, 25 Mar 2024 08:59:27 +0800 Subject: [PATCH 38/84] chore: GRaSP function docs append verbose Signed-off-by: x3zvawq Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/GRaSP.py | 1 + 1 file changed, 1 insertion(+) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 2964dc27..38cdabf7 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -97,6 +97,7 @@ def grasp( parameters['lambda']: regularization parameter parameters['dlabel']: for variables with multi-dimensions, indicate which dimensions belong to the i-th variable. + verbose : whether to print the time cost and verbose output of the algorithm. Returns ------- From 8ebc854c376324bfb76a23d1b4cfa0c4aa9652cd Mon Sep 17 00:00:00 2001 From: Sven Date: Thu, 4 Apr 2024 20:05:08 +0200 Subject: [PATCH 39/84] updating pruning func for varlingam for faster processing Signed-off-by: Sven Signed-off-by: Oliver Schacht --- causallearn/search/FCMBased/lingam/var_lingam.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/causallearn/search/FCMBased/lingam/var_lingam.py b/causallearn/search/FCMBased/lingam/var_lingam.py index ecc7439a..0cb48c82 100644 --- a/causallearn/search/FCMBased/lingam/var_lingam.py +++ b/causallearn/search/FCMBased/lingam/var_lingam.py @@ -319,10 +319,14 @@ def _pruning(self, X, B_taus, causal_order): obj = np.zeros((len(blocks))) exp = np.zeros( (len(blocks), causal_order_no + n_features * self._lags)) - for j, block in enumerate(blocks): - obj[j] = block[0][i] - exp[j:] = np.concatenate( - [block[0][ancestor_indexes].flatten(), block[1:][:].flatten()], axis=0) + + # Create 3D array to hold flattened ancestor indices for each block + ancestor_indexes_flat = blocks[:, 0, ancestor_indexes].reshape(len(blocks), -1) + # Fill obj using advanced indexing + obj[:] = blocks[:, 0, i] + # Fill exp using advanced indexing + exp[:, :causal_order_no] = ancestor_indexes_flat + exp[:, causal_order_no:] = blocks[:, 1:].reshape(len(blocks), -1) # adaptive lasso gamma = 1.0 From c314875394ad6b64ab5746d3119edfb5c972582c Mon Sep 17 00:00:00 2001 From: kenneth-lee-ch Date: Thu, 15 Feb 2024 03:17:35 -0500 Subject: [PATCH 40/84] added rules 8,9,10 for FCI Signed-off-by: kenneth-lee-ch Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 168 +++++++++++++++++++++- 1 file changed, 167 insertions(+), 1 deletion(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 57d2f17a..1d09fbf1 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -15,7 +15,7 @@ from causallearn.utils.cit import * from causallearn.utils.FAS import fas from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge - +from itertools import combinations def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: if node == edge.get_node1(): @@ -542,6 +542,142 @@ def ruleR4B(graph: Graph, maxPathLength: int, data: ndarray, independence_test_m return change_flag + +def rule8(graph: Graph, nodes: List[Node]): + nodes = graph.get_nodes() + changeFlag = False + for node_B in nodes: + adj = graph.get_adjacent_nodes(node_B) + if len(adj) < 2: + continue + + cg = ChoiceGenerator(len(adj), 2) + combination = cg.next() + + while combination is not None: + node_A = adj[combination[0]] + node_C = adj[combination[1]] + combination = cg.next() + + if(graph.get_endpoint(node_A, node_B) == Endpoint.ARROW and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ + graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ + graph.is_adjacent_to(node_A, node_C) and \ + graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE) or \ + (graph.get_endpoint(node_A, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ + graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ + graph.is_adjacent_to(node_A, node_C) and \ + graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C,Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + + return changeFlag + + + +def is_possible_parent(graph: Graph, potential_parent_node, child_node): + if graph.node_map[potential_parent_node] == graph.node_map[child_node]: + return False + if not graph.is_adjacent_to(potential_parent_node, child_node): + return False + + if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW or \ + graph.get_endpoint(potential_parent_node, child_node) == Endpoint.TAIL: + return False + else: + return True + + +def find_possible_children(graph: Graph, parent_node, en_nodes=None): + if en_nodes is None: + nodes = graph.get_nodes() + en_nodes = [node for node in nodes if graph.node_map[node] != graph.node_map[parent_node]] + + potential_child_nodes = set() + for potential_node in en_nodes: + if is_possible_parent(graph, potential_parent_node=parent_node, child_node=potential_node): + potential_child_nodes.add(potential_node) + + return potential_child_nodes + +def rule9(graph: Graph, nodes: List[Node]): + changeFlag = False + nodes = graph.get_nodes() + for node_C in nodes: + intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) + for node_A in intoCArrows: + # we want A o--> C + if not graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE: + continue + + # look for a possibly directed uncovered path s.t. B and C are not connected (for the given A o--> C + a_node_idx = graph.node_map[node_A] + c_node_idx = graph.node_map[node_C] + a_adj_nodes = graph.get_adjacent_nodes(node_A) + nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= c_node_idx] + possible_children = find_possible_children(graph, node_A, nodes_set) + for node_B in possible_children: + if graph.is_adjacent_to(node_B, node_C): + continue + if existsSemiDirectedPath(node_from=node_B, node_to=node_C, G=graph): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A + return changeFlag + + +def rule10(graph: Graph): + changeFlag = False + nodes = graph.get_nodes() + for node_C in nodes: + intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) + if len(intoCArrows) < 2: + continue + # get all A where A o-> C + Anodes = [node_A for node_A in intoCArrows if graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE] + if len(Anodes) == 0: + continue + + for node_A in Anodes: + A_adj_nodes = graph.get_adjacent_nodes(node_A) + en_nodes = [i for i in A_adj_nodes if i is not node_C] + A_possible_children = find_possible_children(graph, parent_node=node_A, en_nodes=en_nodes) + if len(A_possible_children) < 2: + continue + + gen = ChoiceGenerator(len(intoCArrows), 2) + choice = gen.next() + while choice is not None: + node_B = intoCArrows[choice[0]] + node_D = intoCArrows[choice[1]] + + choice = gen.next() + # we want B->C<-D + if graph.get_endpoint(node_C, node_B) != Endpoint.TAIL: + continue + + if graph.get_endpoint(node_C, node_D) != Endpoint.TAIL: + continue + + for children in combinations(A_possible_children, 2): + child_one, child_two = children + if not existsSemiDirectedPath(node_from=child_one, node_to=node_B, G=graph) or \ + not existsSemiDirectedPath(node_from=child_two, node_to=node_D, G=graph): + continue + + if not graph.is_adjacent_to(child_one, child_two): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A + + return changeFlag + + def visibleEdgeHelperVisit(graph: Graph, node_c: Node, node_a: Node, node_b: Node, path: List[Node]) -> bool: if path.__contains__(node_a): return False @@ -692,6 +828,29 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]): + + + + + + + + + + + + + + + + + + + + + + + def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1, max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True, **kwargs) -> Tuple[Graph, List[Edge]]: @@ -787,6 +946,13 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = if verbose: print("Epoch") + # rule 8 + change_flag = rule8(graph,nodes) + # rule 9 + change_flag = rule9(graph, nodes) + # rule 10 + change_flag = rule10(graph) + graph.set_pag(True) edges = get_color_edges(graph) From 08add056c3c1f41a2aaa54709484f6803794d496 Mon Sep 17 00:00:00 2001 From: kenneth-lee-ch Date: Thu, 15 Feb 2024 03:19:05 -0500 Subject: [PATCH 41/84] clean up whitespace Signed-off-by: kenneth-lee-ch Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 24 ----------------------- 1 file changed, 24 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 1d09fbf1..44f46f98 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -827,30 +827,6 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]): break - - - - - - - - - - - - - - - - - - - - - - - - def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1, max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True, **kwargs) -> Tuple[Graph, List[Edge]]: From 4220038099c039f948d0c652a4529e0f997f97ea Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Thu, 11 Apr 2024 12:23:14 -0400 Subject: [PATCH 42/84] updated doc Signed-off-by: Oliver Schacht --- README.md | 13 ++++++++----- docs/source/getting_started.rst | 16 +++++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 1b976934..20686973 100644 --- a/README.md +++ b/README.md @@ -76,10 +76,13 @@ Although causal-learn provides python implementations for some causal discovery Please cite as: ``` -@article{causallearn, - title={Causal-learn: Causal Discovery in Python}, - author={Yujia Zheng and Biwei Huang and Wei Chen and Joseph Ramsey and Mingming Gong and Ruichu Cai and Shohei Shimizu and Peter Spirtes and Kun Zhang}, - journal={arXiv preprint arXiv:2307.16405}, - year={2023} +@article{zheng2024causal, + title={Causal-learn: Causal discovery in python}, + author={Zheng, Yujia and Huang, Biwei and Chen, Wei and Ramsey, Joseph and Gong, Mingming and Cai, Ruichu and Shimizu, Shohei and Spirtes, Peter and Zhang, Kun}, + journal={Journal of Machine Learning Research}, + volume={25}, + number={60}, + pages={1--8}, + year={2024} } ``` \ No newline at end of file diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index f86de812..c306be55 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -67,7 +67,7 @@ Contributors **Developers**: -- Wei Chen, Biwei Huang, Yuequn Liu, Zhiyi Huang, Feng Xie, Haoyue Dai: :ref:`PC `, :ref:`FCI `, :ref:`GES `, :ref:`GIN `, and :ref:`graph operations `. +- Wei Chen, Biwei Huang, Yuequn Liu, Zhiyi Huang, Feng Xie, Haoyue Dai, Xiaokai Huang: :ref:`PC `, :ref:`FCI `, :ref:`GES `, :ref:`GIN `, and :ref:`graph operations `. - Mingming Gong, Erdun Gao, Aoqi Zuo: :ref:`PNL `, :ref:`ANM `, :ref:`Granger causality `, and :ref:`KCI `. - Shohei Shimizu, Takashi Nicholas Maeda, Takashi Ikeuchi: :ref:`LiNGAM-based methods `. - Madelyn Glymour: several helpers. @@ -89,13 +89,15 @@ Please cite as: .. code-block:: none - @article{causallearn, - title={Causal-learn: Causal Discovery in Python}, - author={Yujia Zheng and Biwei Huang and Wei Chen and Joseph Ramsey and Mingming Gong and Ruichu Cai and Shohei Shimizu and Peter Spirtes and Kun Zhang}, - journal={arXiv preprint arXiv:2307.16405}, - year={2023} + @article{zheng2024causal, + title={Causal-learn: Causal discovery in python}, + author={Zheng, Yujia and Huang, Biwei and Chen, Wei and Ramsey, Joseph and Gong, Mingming and Cai, Ruichu and Shimizu, Shohei and Spirtes, Peter and Zhang, Kun}, + journal={Journal of Machine Learning Research}, + volume={25}, + number={60}, + pages={1--8}, + year={2024} } - From c3a6ac3faa86366662fd7e8da289e9e2fbf00738 Mon Sep 17 00:00:00 2001 From: kunwuz <514397511@qq.com> Date: Fri, 12 Apr 2024 14:07:07 -0400 Subject: [PATCH 43/84] updated readme Signed-off-by: Oliver Schacht --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 20686973..09ef5fd1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # causal-learn: Causal Discovery in Python -Causal-learn is a python package for causal discovery that implements both classical and state-of-the-art causal discovery algorithms, which is a Python translation and extension of [Tetrad](https://github.com/cmu-phil/tetrad). +Causal-learn ([documentation](https://causal-learn.readthedocs.io/en/latest/), [paper](https://jmlr.org/papers/volume25/23-0970/23-0970.pdf)) is a python package for causal discovery that implements both classical and state-of-the-art causal discovery algorithms, which is a Python translation and extension of [Tetrad](https://github.com/cmu-phil/tetrad). The package is actively being developed. Feedbacks (issues, suggestions, etc.) are highly encouraged. From 201f1b5f2464ee7d835f975f436d16ae76defacc Mon Sep 17 00:00:00 2001 From: Sven Pieper <101557437+svenpieper@users.noreply.github.com> Date: Mon, 8 Apr 2024 10:38:17 +0200 Subject: [PATCH 44/84] Update FCI.py for fixing isNoncollider Editing isNoncollider() for FCI Algo; Fixing Bug: If sep_set does not contain certain node combination, this will led to a KeyError. With this fix, None will automatically set by the get func. Also get_node_map() will only called once to check sep_set Signed-off-by: Sven Pieper <101557437+svenpieper@users.noreply.github.com> Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 44f46f98..95835411 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -320,7 +320,8 @@ def rulesR1R2cycle(graph: Graph, bk: BackgroundKnowledge | None, changeFlag: boo def isNoncollider(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], node_i: Node, node_j: Node, node_k: Node) -> bool: - sep_set = sep_sets[(graph.get_node_map()[node_i], graph.get_node_map()[node_k])] + node_map = graph.get_node_map() + sep_set = sep_sets.get((node_map[node_i], node_map[node_k])) return sep_set is not None and sep_set.__contains__(graph.get_node_map()[node_j]) From 9ce03886233c29b38bdb2d11ae35a2206805bf7f Mon Sep 17 00:00:00 2001 From: Sven Pieper <101557437+svenpieper@users.noreply.github.com> Date: Mon, 8 Apr 2024 10:43:50 +0200 Subject: [PATCH 45/84] Update FCI.py - using node_map to only get node_map func once Signed-off-by: Sven Pieper <101557437+svenpieper@users.noreply.github.com> Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 95835411..8a936c8c 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -322,7 +322,7 @@ def isNoncollider(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], node_ node_k: Node) -> bool: node_map = graph.get_node_map() sep_set = sep_sets.get((node_map[node_i], node_map[node_k])) - return sep_set is not None and sep_set.__contains__(graph.get_node_map()[node_j]) + return sep_set is not None and sep_set.__contains__(node_map[node_j]) def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: BackgroundKnowledge | None, changeFlag: bool, From ae9b93a00e805dae6437d087b152844c520956ac Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Tue, 28 May 2024 12:01:48 +0200 Subject: [PATCH 46/84] update UInd test Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 102 ++++++++++++++++++++------- causallearn/utils/cit.py | 4 +- 2 files changed, 80 insertions(+), 26 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index d9e2d245..5b614dd9 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -364,6 +364,8 @@ def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): self.J = J self.alpha = alpha self.trimming_thresh = trimming_thresh + self.nullss = 5000 + self.eig_thresh = 1e-5 # TODO: Adjust to causal-learn API def compute_pvalue(self, data_x=None, data_y=None): @@ -384,18 +386,14 @@ def compute_pvalue(self, data_x=None, data_y=None): self.data_y = data_y self.n = data_x.shape[0] - Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)(self.data) for i in range(self.J)) + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) self.Z_proposal, self.prob_Y = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) - test_stat, mean_appr, var_appr, log_likelihood = zip(*block_res) - self.prob_Y = np.array(self.prob_Y) - self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood))) + test_stat, null_samples, log_likelihood = zip(*block_res) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) - self.mean_appr = np.average(np.array(mean_appr), weights = self.prob_weights) - self.var_appr = np.average(np.array(var_appr), weights = self.prob_weights) - - k_appr, theta_appr = self.get_kappa(self.mean_appr, self.var_appr) - self.pvalue = 1 - stats.gamma.cdf(self.test_stat, k_appr, 0, theta_appr) + self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) return self.pvalue, self.test_stat @@ -410,7 +408,7 @@ def partition_data(self): """ Y_mean = self.data_y.mean(axis=0) Y_sd = self.data_y.std(axis=0) - mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.shape[1])) + mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.data_y.shape[1])) sigma_k = np.eye(self.data_y.shape[1]) pi_j = np.random.dirichlet([self.alpha]*self.K) ll = np.tile(np.log(pi_j),(self.n,1)) @@ -425,25 +423,30 @@ def partition_data(self): def pvalue_onblocks(self, Z_proposal): unique_Z_j = np.unique(Z_proposal) test_stat = 0 - prob_weight = 0 - mean_appr = 0 - var_appr = 0 + log_likelihood = 0 + null_samples = np.zeros((1,self.nullss)) for k in unique_Z_j: K_mask = (Z_proposal==k) - X_k = np.copy(self.data[0][K_mask]) - Y_k = np.copy(self.data[1][K_mask]) + X_k = np.copy(self.data_x[K_mask]) + Y_k = np.copy(self.data_y[K_mask]) Kx = self.kernel_matrix(X_k) Ky = self.kernel_matrix(Y_k) - n_block = Kx.shape[0] - - Kxc = Kernel.center_kernel_matrix(Kx) - Kyc = Kernel.center_kernel_matrix(Ky) - test_stat += np.einsum('ij,ji->', Kxc, Kyc) - mean_appr += np.trace(Kxc) * np.trace(Kyc) / n_block - var_appr += 2 * np.sum(Kxc ** 2) * np.sum(Kyc ** 2) / n_block / n_block - var_appr /= unique_Z_j.shape[0] - return (test_stat, mean_appr, var_appr, prob_weight) + + v_stat, Kxc, Kyc = self.HSIC_V_statistic(Kx, Ky) + + null_samples += self.null_sample_spectral(Kxc, Kyc) + test_stat += v_stat + + gpx = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(X|Y) + gpx.fit(X = Y_k, y = X_k) + likelihood = gpx.log_marginal_likelihood_value_ + log_likelihood += likelihood + + return test_stat, null_samples, log_likelihood def kernel_matrix(self, data): """ @@ -478,3 +481,54 @@ def get_kappa(self, mean_appr, var_appr): k_appr = mean_appr ** 2 / var_appr theta_appr = var_appr / mean_appr return k_appr, theta_appr + + def null_sample_spectral(self, Kxc, Kyc): + """ + Simulate data from null distribution + + Parameters + ---------- + Kxc: centralized kernel matrix for data_x (nxn) + Kyc: centralized kernel matrix for data_y (nxn) + + Returns + _________ + null_dstr: samples from the null distribution + + """ + T = Kxc.shape[0] + if T > 1000: + num_eig = int(np.floor(T / 2)) + else: + num_eig = T + lambdax = np.linalg.eigvalsh(Kxc) + lambday = np.linalg.eigvalsh(Kyc) + lambdax = -np.sort(-lambdax) + lambday = -np.sort(-lambday) + lambdax = lambdax[0:num_eig] + lambday = lambday[0:num_eig] + lambda_prod = np.dot(lambdax.reshape(num_eig, 1), lambday.reshape(1, num_eig)).reshape( + (num_eig ** 2, 1)) + lambda_prod = lambda_prod[lambda_prod > lambda_prod.max() * self.eig_thresh] + f_rand = np.random.chisquare(1, (lambda_prod.shape[0], self.nullss)) + null_dstr = lambda_prod.T.dot(f_rand) / T + return null_dstr + + def HSIC_V_statistic(self, Kx, Ky): + """ + Compute V test statistic from kernel matrices Kx and Ky + Parameters + ---------- + Kx: kernel matrix for data_x (nxn) + Ky: kernel matrix for data_y (nxn) + + Returns + _________ + Vstat: HSIC v statistics + Kxc: centralized kernel matrix for data_x (nxn) + Kyc: centralized kernel matrix for data_y (nxn) + """ + Kxc = Kernel.center_kernel_matrix(Kx) + Kyc = Kernel.center_kernel_matrix(Ky) + V_stat = np.einsum('ij,ij->', Kxc, Kyc) + return V_stat, Kxc, Kyc diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index d842281a..be168e40 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -201,13 +201,13 @@ class FastKCI(CIT_Base): def __init__(self, data, **kwargs): super().__init__(data, **kwargs) kci_ui_kwargs = {k: v for k, v in kwargs.items() if k in - ['kernelX', 'kernelY', 'null_ss', 'approx', 'est_width', 'polyd', 'kwidthx', 'kwidthy']} + ['K', 'J', 'alpha']} kci_ci_kwargs = {k: v for k, v in kwargs.items() if k in ['K', 'J', 'alpha', 'use_gp']} self.check_cache_method_consistent( 'kci', hashlib.md5(json.dumps(kci_ci_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) self.assert_input_data_is_valid() - self.kci_ui = KCI_UInd(**kci_ui_kwargs) + self.kci_ui = FastKCI_UInd(**kci_ui_kwargs) self.kci_ci = FastKCI_CInd(**kci_ci_kwargs) def __call__(self, X, Y, condition_set=None): From 430f3e5a1ba0eceffefa7bc768d02abc93ef4d3f Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:33:33 +0200 Subject: [PATCH 47/84] add rcit Signed-off-by: Oliver Schacht --- causallearn/utils/RCIT/RCIT.py | 251 +++++++++++++++++++++++++++++ causallearn/utils/RCIT/__init__.py | 0 causallearn/utils/cit.py | 26 +++ setup.py | 3 +- 4 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 causallearn/utils/RCIT/RCIT.py create mode 100644 causallearn/utils/RCIT/__init__.py diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py new file mode 100644 index 00000000..c9c931cb --- /dev/null +++ b/causallearn/utils/RCIT/RCIT.py @@ -0,0 +1,251 @@ +import numpy as np +import itertools +from scipy.stats import chi2 +from scipy.linalg import pinv +from momentchi2 import hbe, sw, lpb4 +from scipy.spatial.distance import pdist, squareform + + +class RCIT(object): + def __init__(self, approx="lpd4", num_f=100, num_f2=5, rcit=True): + self.approx = approx + self.num_f = num_f + self.num_f2 = num_f2 + self.rcit = rcit + + def compute_pvalue(self, data_x, data_y, data_z): + d = data_z.shape[1] + r = data_x.shape[0] + r1 = 500 if (r > 500) else r + + data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1) + data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1) + data_z = (data_z - data_z.mean(axis=0)) / data_z.std(axis=0, ddof=1) + + if self.rcit: + data_y = np.column_stack((data_y, data_z)) + + sigma = dict() + for key, value in [("x", data_x), ("y", data_y), ("z", data_z)]: + distances = pdist(value[:r1, :], metric='euclidean') + flattened_distances = squareform(distances).ravel() + non_zero_distances = flattened_distances[flattened_distances != 0] + sigma[key] = np.median(non_zero_distances) + + four_z = self.random_fourier_features(data_z, num_f=self.num_f, sigma=sigma["z"]) + four_x = self.random_fourier_features(data_x, num_f=self.num_f2, sigma=sigma["x"]) + four_y = self.random_fourier_features(data_y, num_f=self.num_f2, sigma=sigma["y"]) + + f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1) + f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1) + f_z = (four_z - four_z.mean(axis=0)) / four_z.std(axis=0, ddof=1) + + Cxy = f_x.T @ f_y / (f_x.shape[0] - 1) + + Czz = np.cov(f_z, rowvar=False) + + regularized_Czz = Czz + np.eye(self.num_f) * 1e-10 + L = np.linalg.cholesky(regularized_Czz) + i_Czz = np.linalg.inv(L).T.dot(np.linalg.inv(L)) + + Cxz = f_x.T @ f_z / (f_x.shape[0] - 1) + Czy = f_z.T @ f_y / (f_z.shape[0] - 1) + + z_i_Czz = f_z @ i_Czz + e_x_z = z_i_Czz @ Cxz.T + e_y_z = z_i_Czz @ Czy + + res_x = f_x - e_x_z + res_y = f_y - e_y_z + + if self.num_f2 == 1: + self.approx = "hbe" + + if self.approx == "perm": + Cxy_z = self.matrix_cov(res_x, res_y) + sta = r * np.sum(Cxy_z**2) + + nperm = 1000 + + stas = [] + for _ in range(nperm): + perm = np.random.choice(np.arange(r), size=r, replace=False) + Cxy = self.matrix_cov(res_x[perm, ], res_y) + sta_p = r * np.sum(Cxy**2) + stas.append(sta_p) + + p = 1 - (np.sum(sta >= stas) / len(stas)) + + else: + Cxy_z = Cxy - Cxz @ i_Czz @ Czy + sta = r * np.sum(Cxy_z**2) + + d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) + res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T + Cov = 1/r * res.T @ res + + if self.approx == "chi2": + i_Cov = pinv(Cov) + + sta = r * (np.dot(Cxy_z.flatten(), np.dot(i_Cov, Cxy_z.flatten()))) + p = 1 - chi2.cdf(sta, Cxy_z.size) + + else: + eigenvalues, eigenvectors = np.linalg.eigh(Cov) + eig_d = eigenvalues[eigenvalues > 0] + + if self.approx == "gamma": + p = 1 - sw(eig_d, sta) + + elif self.approx == "hbe": + p = 1 - hbe(eig_d, sta) + + elif self.approx == "lpd4": + try: + p = 1 - lpb4(eig_d, sta) + except Exception: + p = 1 - hbe(eig_d, sta) + if np.isnan(p): + p = 1 - hbe(eig_d, sta) + + if (p < 0): + p = 0 + + return p, sta + + def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): + + if num_f is None: + num_f = 25 + + r = x.shape[0] + c = x.shape[1] + + if ((sigma == 0) | (sigma is None)): + sigma = 1 + + if w is None: + w = (1/sigma) * np.random.normal(size=(num_f, c)) + b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r)) + + feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T + + return feat + + def matrix_cov(mat_a, mat_b): + n_obs = mat_a.shape[0] + + assert mat_a.shape == mat_b.shape + mat_a = mat_a - mat_a.mean(axis=0) + mat_b = mat_b - mat_b.mean(axis=0) + + mat_cov = mat_a.T @ mat_b / (n_obs - 1) + + return mat_cov + + +class RIT(object): + def __init__(self, approx="lpd4"): + self.approx = approx + + def compute_pvalue(self, data_x, data_y): + r = data_x.shape[0] + r1 = 500 if (r > 500) else r + + data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1) + data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1) + + sigma = dict() + for key, value in [("x", data_x), ("y", data_y)]: + distances = pdist(value[:r1, :], metric='euclidean') + flattened_distances = squareform(distances).ravel() + non_zero_distances = flattened_distances[flattened_distances != 0] + sigma[key] = np.median(non_zero_distances) + + four_x = self.random_fourier_features(data_x, num_f=5, sigma=sigma["x"]) + four_y = self.random_fourier_features(data_y, num_f=5, sigma=sigma["y"]) + + f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1) + f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1) + + Cxy = self.matrix_cov(f_x, f_y) + sta = r * np.sum(Cxy**2) + + if self.approx == "perm": + nperm = 1000 + + stas = [] + for _ in range(nperm): + perm = np.random.choice(np.arange(r), size=r, replace=False) + Cxy = self.matrix_cov(f_x[perm, ], f_y) + sta_p = r * np.sum(Cxy**2) + stas.append(sta_p) + + p = 1 - (np.sum(sta >= stas) / len(stas)) + + else: + res_x = f_x - np.meshgrid(f_x.mean(axis=0), (r, 1)) + res_y = f_y - np.meshgrid(f_y.mean(axis=0), (r, 1)) + + d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) + res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T + Cov = 1/r * res.T @ res + + if self.approx == "chi2": + i_Cov = pinv(Cov) + + sta = r * (np.dot(Cxy.flatten(), np.dot(i_Cov, Cxy.flatten()))) + p = 1 - chi2.cdf(sta, Cxy.size) + + else: + eigenvalues, eigenvectors = np.linalg.eigh(Cov) + eig_d = eigenvalues[eigenvalues > 0] + + if self.approx == "gamma": + p = 1 - sw(eig_d, sta) + + elif self.approx == "hbe": + p = 1 - hbe(eig_d, sta) + + elif self.approx == "lpd4": + try: + p = 1 - lpb4(eig_d, sta) + except Exception: + p = 1 - hbe(eig_d, sta) + if np.isnan(p): + p = 1 - hbe(eig_d, sta) + + if (p < 0): + p = 0 + + return p, sta + + def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): + + if num_f is None: + num_f = 25 + + r = x.shape[0] + c = x.shape[1] + + if ((sigma == 0) | (sigma is None)): + sigma = 1 + + if w is None: + w = (1/sigma) * np.random.normal(size=(num_f, c)) + b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r)) + + feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T + + return feat + + def matrix_cov(mat_a, mat_b): + n_obs = mat_a.shape[0] + + assert mat_a.shape == mat_b.shape + mat_a = mat_a - mat_a.mean(axis=0) + mat_b = mat_b - mat_b.mean(axis=0) + + mat_cov = mat_a.T @ mat_b / (n_obs - 1) + + return mat_cov diff --git a/causallearn/utils/RCIT/__init__.py b/causallearn/utils/RCIT/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index be168e40..c8858a0b 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -6,6 +6,7 @@ from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd +from causallearn.utils.RCIT.RCIT import RCIT, RIT from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -14,6 +15,7 @@ mv_fisherz = "mv_fisherz" mc_fisherz = "mc_fisherz" kci = "kci" +rcit = "rcit" fastkci = "fastkci" chisq = "chisq" gsq = "gsq" @@ -36,6 +38,8 @@ def CIT(data, method='fisherz', **kwargs): return KCI(data, **kwargs) elif method == fastkci: return FastKCI(data, **kwargs) + elif method == rcit: + return RCIT(data, **kwargs) elif method in [chisq, gsq]: return Chisq_or_Gsq(data, method_name=method, **kwargs) elif method == mv_fisherz: @@ -218,6 +222,28 @@ def __call__(self, X, Y, condition_set=None): self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] self.pvalue_cache[cache_key] = p return p + +class RCIT(CIT_Base): + def __init__(self, data, **kwargs): + super().__init__(data, **kwargs) + rit_kwargs = {k: v for k, v in kwargs.items() if k in + ['approx']} + rcit_kwargs = {k: v for k, v in kwargs.items() if k in + ['approx', 'num_f', 'num_f2', 'rcit']} + self.check_cache_method_consistent( + 'kci', hashlib.md5(json.dumps(rcit_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) + self.assert_input_data_is_valid() + self.rit = RIT(**rit_kwargs) + self.rcit = RCIT(**rcit_kwargs) + + def __call__(self, X, Y, condition_set=None): + # Kernel-based conditional independence test. + Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set) + if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key] + p = self.rit.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \ + self.rcit.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] + self.pvalue_cache[cache_key] = p + return p class Chisq_or_Gsq(CIT_Base): def __init__(self, data, method_name, **kwargs): diff --git a/setup.py b/setup.py index f132a020..f40a536d 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,8 @@ 'matplotlib', 'networkx', 'pydot', - 'tqdm' + 'tqdm', + 'momentchi2' ], url='https://github.com/py-why/causal-learn', packages=setuptools.find_packages(), From 1a700b9002746371066beaf13f6073dd3040e9d7 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:15:47 +0200 Subject: [PATCH 48/84] redo naming of rcit class Signed-off-by: Oliver Schacht --- causallearn/utils/cit.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index c8858a0b..2d664e9b 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -6,7 +6,8 @@ from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd -from causallearn.utils.RCIT.RCIT import RCIT, RIT +from causallearn.utils.RCIT.RCIT import RCIT as RCIT_CInd +from causallearn.utils.RCIT.RCIT import RIT as RCIT_UInd from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -222,7 +223,7 @@ def __call__(self, X, Y, condition_set=None): self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] self.pvalue_cache[cache_key] = p return p - + class RCIT(CIT_Base): def __init__(self, data, **kwargs): super().__init__(data, **kwargs) @@ -233,8 +234,8 @@ def __init__(self, data, **kwargs): self.check_cache_method_consistent( 'kci', hashlib.md5(json.dumps(rcit_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) self.assert_input_data_is_valid() - self.rit = RIT(**rit_kwargs) - self.rcit = RCIT(**rcit_kwargs) + self.rit = RCIT_UInd(**rit_kwargs) + self.rcit = RCIT_CInd(**rcit_kwargs) def __call__(self, X, Y, condition_set=None): # Kernel-based conditional independence test. From 043d915a2903480259f7e79f74a525deb3c828fa Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:11:44 +0200 Subject: [PATCH 49/84] debugging Signed-off-by: Oliver Schacht --- causallearn/utils/RCIT/RCIT.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py index c9c931cb..e1bb8dbc 100644 --- a/causallearn/utils/RCIT/RCIT.py +++ b/causallearn/utils/RCIT/RCIT.py @@ -132,7 +132,7 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat - def matrix_cov(mat_a, mat_b): + def matrix_cov(self, mat_a, mat_b): n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape @@ -184,8 +184,8 @@ def compute_pvalue(self, data_x, data_y): p = 1 - (np.sum(sta >= stas) / len(stas)) else: - res_x = f_x - np.meshgrid(f_x.mean(axis=0), (r, 1)) - res_y = f_y - np.meshgrid(f_y.mean(axis=0), (r, 1)) + res_x = f_x - f_x.mean(axis=0) + res_y = f_y - f_y.mean(axis=0) d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T @@ -239,7 +239,7 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat - def matrix_cov(mat_a, mat_b): + def matrix_cov(self, mat_a, mat_b): n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape From f172a267c0144ace3d0a52d66563985de92871f8 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:09:09 +0200 Subject: [PATCH 50/84] add docstrings Signed-off-by: Oliver Schacht --- causallearn/utils/RCIT/RCIT.py | 156 ++++++++++++++++++++++++++++++++- 1 file changed, 154 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py index e1bb8dbc..11608ed0 100644 --- a/causallearn/utils/RCIT/RCIT.py +++ b/causallearn/utils/RCIT/RCIT.py @@ -7,13 +7,56 @@ class RCIT(object): + """ + Python implementation of Randomized Conditional Independence Test (RCIT) test. + The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master + + References + ---------- + [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional + independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017. + """ def __init__(self, approx="lpd4", num_f=100, num_f2=5, rcit=True): + """ + Initialize the RCIT object. + + Parameters + ---------- + approx : str + Method for approximating the null distribution. + - "lpd4" for the Lindsay-Pilla-Basak method + - "hbe" for the Hall-Buckley-Eagleson method + - "gamma" for the Satterthwaite-Welch method + - "chi2" for a normalized chi-squared statistic + - "perm" for permutation testing + Default is "lpd4". + num_f : int + Number of features for conditioning set. Default is 25. + num_f2 : int + Number of features for non-conditioning sets. Default is 5. + rcit : bool + Whether to use RCIT or RCoT. Default is True. + """ self.approx = approx self.num_f = num_f self.num_f2 = num_f2 self.rcit = rcit def compute_pvalue(self, data_x, data_y, data_z): + """ + Compute the p value and return it together with the test statistic. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + ------- + p: p value + sta: test statistic + """ d = data_z.shape[1] r = data_x.shape[0] r1 = 500 if (r > 500) else r @@ -114,7 +157,27 @@ def compute_pvalue(self, data_x, data_y, data_z): return p, sta def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): - + """ + Generate random Fourier features. + + Parameters + ---------- + x : np.ndarray + Random variable x. + w : np.ndarray + RRandom coefficients. + b : np.ndarray + Random offsets. + num_f : int + Number of random Fourier features. + sigma : float + Smooth parameter of RBF kernel. + + Returns + ------- + feat : np.ndarray + Random Fourier features. + """ if num_f is None: num_f = 25 @@ -133,6 +196,22 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat def matrix_cov(self, mat_a, mat_b): + """ + Compute the covariance matrix between two matrices. + Equivalent to ``cov()`` between two matrices in R. + + Parameters + ---------- + mat_a : np.ndarray + First data matrix. + mat_b : np.ndarray + Second data matrix. + + Returns + ------- + mat_cov : np.ndarray + Covariance matrix. + """ n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape @@ -145,10 +224,47 @@ def matrix_cov(self, mat_a, mat_b): class RIT(object): + """ + Python implementation of Randomized Independence Test (RIT) test. + The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master + + References + ---------- + [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional + independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017. + """ def __init__(self, approx="lpd4"): + """ + Initialize the RIT object. + + Parameters + ---------- + approx : str + Method for approximating the null distribution. + - "lpd4" for the Lindsay-Pilla-Basak method + - "hbe" for the Hall-Buckley-Eagleson method + - "gamma" for the Satterthwaite-Welch method + - "chi2" for a normalized chi-squared statistic + - "perm" for permutation testing + Default is "lpd4". + """ self.approx = approx def compute_pvalue(self, data_x, data_y): + """ + Compute the p value and return it together with the test statistic. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + ------- + p: p value + sta: test statistic + """ r = data_x.shape[0] r1 = 500 if (r > 500) else r @@ -221,7 +337,27 @@ def compute_pvalue(self, data_x, data_y): return p, sta def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): - + """ + Generate random Fourier features. + + Parameters + ---------- + x : np.ndarray + Random variable x. + w : np.ndarray + RRandom coefficients. + b : np.ndarray + Random offsets. + num_f : int + Number of random Fourier features. + sigma : float + Smooth parameter of RBF kernel. + + Returns + ------- + feat : np.ndarray + Random Fourier features. + """ if num_f is None: num_f = 25 @@ -240,6 +376,22 @@ def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): return feat def matrix_cov(self, mat_a, mat_b): + """ + Compute the covariance matrix between two matrices. + Equivalent to ``cov()`` between two matrices in R. + + Parameters + ---------- + mat_a : np.ndarray + First data matrix. + mat_b : np.ndarray + Second data matrix. + + Returns + ------- + mat_cov : np.ndarray + Covariance matrix. + """ n_obs = mat_a.shape[0] assert mat_a.shape == mat_b.shape From 39178867ad9d24634116b422fbaebcdad57dd90e Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:30:57 +0200 Subject: [PATCH 51/84] linting Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 110 +++++++++++++-------------- 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 5b614dd9..407977dd 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -9,19 +9,21 @@ from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF import warnings + class FastKCI_CInd(object): """ - Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. + Unconditional version. References ---------- [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, - "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + "A kernel-based conditional independence test and application in causal discovery", In UAI 2011. [2] M. Zhang, and S. Williamson, - "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) - """ - def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimming_thresh = 1e-3, use_gp=False): + """ + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming_thresh=1e-3, use_gp=False): """ Initialize the FastKCI_CInd object. @@ -33,7 +35,7 @@ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimmi epsilon: Penalty for the matrix ridge regression. eig_threshold: Threshold for Eigenvalues. use_gp: Whether to use Gaussian Process Regression to determine the kernel widths - """ + """ self.K = K self.J = J self.alpha = alpha @@ -42,7 +44,6 @@ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh = 1e-6, trimmi self.trimming_thresh = trimming_thresh self.use_gp = use_gp self.nullss = 5000 - # TODO: Adjust to causal-learn API def compute_pvalue(self, data_x=None, data_y=None, data_z=None): """ @@ -67,21 +68,21 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): self.Z_proposal, self.prob_Z = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) - + log_likelihood = np.array(log_likelihood) self.prob_Z += log_likelihood - self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used + self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used self.all_null_samples = np.vstack(null_samples) self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) self.all_test_stats = np.array(test_stat) - self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) - self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) + self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) # experimental, not used - self.pvalue_alt = np.sum(np.average(null_samples, axis = 0, weights = self.prob_Z) > np.average(np.array(test_stat), weights = self.prob_Z)) / float(self.nullss) + self.pvalue_alt = np.sum(np.average(null_samples, axis=0, weights=self.prob_Z) > np.average(np.array(test_stat), weights=self.prob_Z)) / float(self.nullss) self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) - return self.pvalue, self.test_stat + return self.pvalue, self.test_stat def partition_data(self): """ @@ -94,18 +95,18 @@ def partition_data(self): """ Z_mean = self.data_z.mean(axis=0) Z_sd = self.data_z.std(axis=0) - mu_k = np.random.normal(Z_mean, Z_sd, size = (self.K,self.data_z.shape[1])) + mu_k = np.random.normal(Z_mean, Z_sd, size=(self.K, self.data_z.shape[1])) sigma_k = np.eye(self.data_z.shape[1]) pi_j = np.random.dirichlet([self.alpha]*self.K) - ll = np.tile(np.log(pi_j),(self.n,1)) + ll = np.tile(np.log(pi_j), (self.n, 1)) for k in range(self.K): - ll[:,k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k,:], cov=sigma_k, allow_singular=True) - Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + ll[:, k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k, :], cov=sigma_k, allow_singular=True) + Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) le = LabelEncoder() Z = le.fit_transform(Z) - prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed + prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed return Z, prob_Z - + def pvalue_onblocks(self, Z_proposal): """ Calculate p value on given partitions of the data. @@ -113,7 +114,7 @@ def pvalue_onblocks(self, Z_proposal): Parameters ---------- Z_proposal: partition of the data into K clusters (nxd1 array) - + Returns _________ test_stat: test statistic (scalar) @@ -123,13 +124,13 @@ def pvalue_onblocks(self, Z_proposal): unique_Z_j = np.unique(Z_proposal) test_stat = 0 log_likelihood = 0 - null_samples = np.zeros((1,self.nullss)) + null_samples = np.zeros((1, self.nullss)) for k in unique_Z_j: - K_mask = (Z_proposal==k) + K_mask = (Z_proposal == k) X_k = np.copy(self.data[0][K_mask]) Y_k = np.copy(self.data[1][K_mask]) Z_k = np.copy(self.data_z[K_mask]) - if (Z_k.shape[0]<6): # small blocks cause problems in GP, experimental + if (Z_k.shape[0] < 6): # small blocks cause problems in GP, experimental continue Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) @@ -143,7 +144,6 @@ def pvalue_onblocks(self, Z_proposal): log_likelihood += likelihood_x + likelihood_y return test_stat, null_samples, log_likelihood - def kernel_matrix(self, data_x, data_y, data_z): """ Calculates the Gaussian Kernel for given data inputs as well as the shared kernel. @@ -157,10 +157,10 @@ def kernel_matrix(self, data_x, data_y, data_z): data_x = stats.zscore(data_x, ddof=1, axis=0) data_x[np.isnan(data_x)] = 0. - + data_y = stats.zscore(data_y, ddof=1, axis=0) data_y[np.isnan(data_y)] = 0. - + data_z = stats.zscore(data_z, ddof=1, axis=0) data_z[np.isnan(data_z)] = 0. @@ -189,23 +189,23 @@ def kernel_matrix(self, data_x, data_y, data_z): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=Warning) # P(X|Z) - gpx.fit(X = data_z, y = data_x) + gpx.fit(X=data_z, y=data_x) likelihood_x = gpx.log_marginal_likelihood_value_ gpy = GaussianProcessRegressor() with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=Warning) # P(Y|X,Z) - gpy.fit(X = np.c_[data_x,data_z], y=data_y) + gpy.fit(X=np.c_[data_x, data_z], y=data_y) likelihood_y = gpy.log_marginal_likelihood_value_ - else: + else: n, Dz = data_z.shape - + widthz = np.sqrt(1.0 / (kernelX.width * data_x.shape[1])) # Instantiate a Gaussian Process model for x wx, vx = eigh(Kx) - topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])])) idx = np.argsort(-wx) wx = wx[idx] vx = vx[:, idx] @@ -228,7 +228,7 @@ def kernel_matrix(self, data_x, data_y, data_z): # Instantiate a Gaussian Process model for y wy, vy = eigh(Ky) - topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1,8])])) + topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])])) idy = np.argsort(-wy) wy = wy[idy] vy = vy[:, idy] @@ -297,7 +297,7 @@ def get_uuprod(self, Kx, Ky): uu_prod = uu.T.dot(uu) return uu_prod, size_u - + def get_kappa(self, mean_appr, var_appr): """ Get parameters for the approximated gamma distribution @@ -333,11 +333,12 @@ def null_sample_spectral(self, uu_prod, size_u, T): eig_uu = -np.sort(-eig_uu) eig_uu = eig_uu[0:np.min((T, size_u))] eig_uu = eig_uu[eig_uu > np.max(eig_uu) * self.eig_thresh] - + f_rand = np.random.chisquare(1, (eig_uu.shape[0], self.nullss)) null_dstr = eig_uu.T.dot(f_rand) return null_dstr + class FastKCI_UInd(object): """ Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. @@ -348,8 +349,8 @@ class FastKCI_UInd(object): "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) - """ - def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): + """ + def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): """ Construct the FastKCI_UInd model. @@ -359,14 +360,13 @@ def __init__(self, K=10, J=8, alpha=500, trimming_thresh = 1e-3): J: Number of independent repittitions. alpha: Parameter for the Dirichlet distribution. trimming_thresh: Threshold for trimming the propensity weights. - """ + """ self.K = K self.J = J self.alpha = alpha self.trimming_thresh = trimming_thresh self.nullss = 5000 self.eig_thresh = 1e-5 - # TODO: Adjust to causal-learn API def compute_pvalue(self, data_x=None, data_y=None): """ @@ -385,17 +385,17 @@ def compute_pvalue(self, data_x=None, data_y=None): self.data_x = data_x self.data_y = data_y self.n = data_x.shape[0] - + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) self.Z_proposal, self.prob_Y = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) - self.test_stat = np.average(np.array(test_stat), weights = self.prob_weights) - self.null_samples = np.average(null_samples, axis = 0, weights = self.prob_weights) + self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) + self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) - return self.pvalue, self.test_stat + return self.pvalue, self.test_stat def partition_data(self): """ @@ -408,25 +408,25 @@ def partition_data(self): """ Y_mean = self.data_y.mean(axis=0) Y_sd = self.data_y.std(axis=0) - mu_k = np.random.normal(Y_mean, Y_sd, size = (self.K,self.data_y.shape[1])) + mu_k = np.random.normal(Y_mean, Y_sd, size=(self.K, self.data_y.shape[1])) sigma_k = np.eye(self.data_y.shape[1]) pi_j = np.random.dirichlet([self.alpha]*self.K) - ll = np.tile(np.log(pi_j),(self.n,1)) + ll = np.tile(np.log(pi_j), (self.n, 1)) for k in range(self.K): - ll[:,k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k,:], cov=sigma_k, allow_singular=True) - Z = np.array([ np.random.multinomial(1,np.exp(ll[n,:]-logsumexp(ll[n,:]))).argmax() for n in range(self.n)]) + ll[:, k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k, :], cov=sigma_k, allow_singular=True) + Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) prop_Y = np.take_along_axis(ll, Z[:, None], axis=1).sum() le = LabelEncoder() Z = le.fit_transform(Z) return (Z, prop_Y) - + def pvalue_onblocks(self, Z_proposal): unique_Z_j = np.unique(Z_proposal) test_stat = 0 log_likelihood = 0 - null_samples = np.zeros((1,self.nullss)) + null_samples = np.zeros((1, self.nullss)) for k in unique_Z_j: - K_mask = (Z_proposal==k) + K_mask = (Z_proposal == k) X_k = np.copy(self.data_x[K_mask]) Y_k = np.copy(self.data_y[K_mask]) @@ -442,12 +442,12 @@ def pvalue_onblocks(self, Z_proposal): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=Warning) # P(X|Y) - gpx.fit(X = Y_k, y = X_k) + gpx.fit(X=Y_k, y=X_k) likelihood = gpx.log_marginal_likelihood_value_ log_likelihood += likelihood - + return test_stat, null_samples, log_likelihood - + def kernel_matrix(self, data): """ Calculates the Gaussian Kernel for given data inputs. @@ -460,12 +460,12 @@ def kernel_matrix(self, data): kernel_obj.set_width_empirical_hsic(data) data = stats.zscore(data, ddof=1, axis=0) - data[np.isnan(data)] = 0. + data[np.isnan(data)] = 0. K = kernel_obj.kernel(data) return K - + def get_kappa(self, mean_appr, var_appr): """ Get parameters for the approximated gamma distribution @@ -513,7 +513,7 @@ def null_sample_spectral(self, Kxc, Kyc): f_rand = np.random.chisquare(1, (lambda_prod.shape[0], self.nullss)) null_dstr = lambda_prod.T.dot(f_rand) / T return null_dstr - + def HSIC_V_statistic(self, Kx, Ky): """ Compute V test statistic from kernel matrices Kx and Ky From c92bd39fd773cad9bae547a0a82b0a3415b39255 Mon Sep 17 00:00:00 2001 From: Evie Qiu <65006489+EvieQ01@users.noreply.github.com> Date: Sun, 23 Jun 2024 20:56:15 -0400 Subject: [PATCH 52/84] Add node_names to GRaSP.py Signed-off-by: Evie Qiu <65006489+EvieQ01@users.noreply.github.com> Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/GRaSP.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 38cdabf7..abcbf47c 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -81,6 +81,7 @@ def grasp( maxP: Optional[float] = None, parameters: Optional[Dict[str, Any]] = None, verbose: Optional[bool] = True, + node_names: Optional[List[str]] = None, ) -> GeneralGraph: """ Perform a greedy relaxation of the sparsest permutation (GRaSP) algorithm @@ -185,7 +186,7 @@ def grasp( raise Exception("Unknown function!") score = localScoreClass - node_names = [("x%d" % i) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names nodes = [] for name in node_names: From e318aa098ff06b55b7d12813768549cf77d471ac Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:32:22 -0400 Subject: [PATCH 53/84] Add node_names Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 158 ++-------------------- 1 file changed, 9 insertions(+), 149 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 8a936c8c..b95a377d 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -15,7 +15,7 @@ from causallearn.utils.cit import * from causallearn.utils.FAS import fas from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge -from itertools import combinations + def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: if node == edge.get_node1(): @@ -320,9 +320,8 @@ def rulesR1R2cycle(graph: Graph, bk: BackgroundKnowledge | None, changeFlag: boo def isNoncollider(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], node_i: Node, node_j: Node, node_k: Node) -> bool: - node_map = graph.get_node_map() - sep_set = sep_sets.get((node_map[node_i], node_map[node_k])) - return sep_set is not None and sep_set.__contains__(node_map[node_j]) + sep_set = sep_sets[(graph.get_node_map()[node_i], graph.get_node_map()[node_k])] + return sep_set is not None and sep_set.__contains__(graph.get_node_map()[node_j]) def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: BackgroundKnowledge | None, changeFlag: bool, @@ -543,142 +542,6 @@ def ruleR4B(graph: Graph, maxPathLength: int, data: ndarray, independence_test_m return change_flag - -def rule8(graph: Graph, nodes: List[Node]): - nodes = graph.get_nodes() - changeFlag = False - for node_B in nodes: - adj = graph.get_adjacent_nodes(node_B) - if len(adj) < 2: - continue - - cg = ChoiceGenerator(len(adj), 2) - combination = cg.next() - - while combination is not None: - node_A = adj[combination[0]] - node_C = adj[combination[1]] - combination = cg.next() - - if(graph.get_endpoint(node_A, node_B) == Endpoint.ARROW and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ - graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ - graph.is_adjacent_to(node_A, node_C) and \ - graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE) or \ - (graph.get_endpoint(node_A, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ - graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ - graph.is_adjacent_to(node_A, node_C) and \ - graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE): - edge1 = graph.get_edge(node_A, node_C) - graph.remove_edge(edge1) - graph.add_edge(Edge(node_A, node_C,Endpoint.TAIL, Endpoint.ARROW)) - changeFlag = True - - return changeFlag - - - -def is_possible_parent(graph: Graph, potential_parent_node, child_node): - if graph.node_map[potential_parent_node] == graph.node_map[child_node]: - return False - if not graph.is_adjacent_to(potential_parent_node, child_node): - return False - - if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW or \ - graph.get_endpoint(potential_parent_node, child_node) == Endpoint.TAIL: - return False - else: - return True - - -def find_possible_children(graph: Graph, parent_node, en_nodes=None): - if en_nodes is None: - nodes = graph.get_nodes() - en_nodes = [node for node in nodes if graph.node_map[node] != graph.node_map[parent_node]] - - potential_child_nodes = set() - for potential_node in en_nodes: - if is_possible_parent(graph, potential_parent_node=parent_node, child_node=potential_node): - potential_child_nodes.add(potential_node) - - return potential_child_nodes - -def rule9(graph: Graph, nodes: List[Node]): - changeFlag = False - nodes = graph.get_nodes() - for node_C in nodes: - intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) - for node_A in intoCArrows: - # we want A o--> C - if not graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE: - continue - - # look for a possibly directed uncovered path s.t. B and C are not connected (for the given A o--> C - a_node_idx = graph.node_map[node_A] - c_node_idx = graph.node_map[node_C] - a_adj_nodes = graph.get_adjacent_nodes(node_A) - nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= c_node_idx] - possible_children = find_possible_children(graph, node_A, nodes_set) - for node_B in possible_children: - if graph.is_adjacent_to(node_B, node_C): - continue - if existsSemiDirectedPath(node_from=node_B, node_to=node_C, G=graph): - edge1 = graph.get_edge(node_A, node_C) - graph.remove_edge(edge1) - graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) - changeFlag = True - break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A - return changeFlag - - -def rule10(graph: Graph): - changeFlag = False - nodes = graph.get_nodes() - for node_C in nodes: - intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) - if len(intoCArrows) < 2: - continue - # get all A where A o-> C - Anodes = [node_A for node_A in intoCArrows if graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE] - if len(Anodes) == 0: - continue - - for node_A in Anodes: - A_adj_nodes = graph.get_adjacent_nodes(node_A) - en_nodes = [i for i in A_adj_nodes if i is not node_C] - A_possible_children = find_possible_children(graph, parent_node=node_A, en_nodes=en_nodes) - if len(A_possible_children) < 2: - continue - - gen = ChoiceGenerator(len(intoCArrows), 2) - choice = gen.next() - while choice is not None: - node_B = intoCArrows[choice[0]] - node_D = intoCArrows[choice[1]] - - choice = gen.next() - # we want B->C<-D - if graph.get_endpoint(node_C, node_B) != Endpoint.TAIL: - continue - - if graph.get_endpoint(node_C, node_D) != Endpoint.TAIL: - continue - - for children in combinations(A_possible_children, 2): - child_one, child_two = children - if not existsSemiDirectedPath(node_from=child_one, node_to=node_B, G=graph) or \ - not existsSemiDirectedPath(node_from=child_two, node_to=node_D, G=graph): - continue - - if not graph.is_adjacent_to(child_one, child_two): - edge1 = graph.get_edge(node_A, node_C) - graph.remove_edge(edge1) - graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) - changeFlag = True - break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A - - return changeFlag - - def visibleEdgeHelperVisit(graph: Graph, node_c: Node, node_a: Node, node_b: Node, path: List[Node]) -> bool: if path.__contains__(node_a): return False @@ -828,8 +691,10 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]): break + def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1, - max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True, + max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, + show_progress: bool = True, node_names = None, **kwargs) -> Tuple[Graph, List[Edge]]: """ Perform Fast Causal Inference (FCI) algorithm for causal discovery @@ -884,8 +749,10 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = nodes = [] + if node_names is None: + node_names = [f"X{i + 1}" for i in range(dataset.shape[1])] for i in range(dataset.shape[1]): - node = GraphNode(f"X{i + 1}") + node = GraphNode(node_names[i]) node.add_attribute("id", i) nodes.append(node) @@ -923,13 +790,6 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = if verbose: print("Epoch") - # rule 8 - change_flag = rule8(graph,nodes) - # rule 9 - change_flag = rule9(graph, nodes) - # rule 10 - change_flag = rule10(graph) - graph.set_pag(True) edges = get_color_edges(graph) From ed0895619d759901e9ae444ef2ccf78212fbb30a Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Tue, 2 Jul 2024 15:32:56 -0400 Subject: [PATCH 54/84] Allow orientation within the same tier Signed-off-by: Oliver Schacht --- causallearn/utils/PCUtils/BackgroundKnowledge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/utils/PCUtils/BackgroundKnowledge.py b/causallearn/utils/PCUtils/BackgroundKnowledge.py index 4c6bca0d..19248d48 100644 --- a/causallearn/utils/PCUtils/BackgroundKnowledge.py +++ b/causallearn/utils/PCUtils/BackgroundKnowledge.py @@ -165,7 +165,7 @@ def is_forbidden(self, node1: Node, node2: Node) -> bool: # then check in tier_map if self.tier_value_map.keys().__contains__(node1) and self.tier_value_map.keys().__contains__(node2): - if self.tier_value_map.get(node1) >= self.tier_value_map.get(node2): + if self.tier_value_map.get(node1) > self.tier_value_map.get(node2): # Allow orientation within the same tier return True return False From b74e1acdcab063b5d97b6326c9092ab3da8aa5d4 Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Wed, 3 Jul 2024 00:03:38 -0400 Subject: [PATCH 55/84] Revert "Add node_names" This reverts commit c2210d08cae59cc00aff7b94bbff817399daefc0. Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 158 ++++++++++++++++++++-- 1 file changed, 149 insertions(+), 9 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index b95a377d..8a936c8c 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -15,7 +15,7 @@ from causallearn.utils.cit import * from causallearn.utils.FAS import fas from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge - +from itertools import combinations def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: if node == edge.get_node1(): @@ -320,8 +320,9 @@ def rulesR1R2cycle(graph: Graph, bk: BackgroundKnowledge | None, changeFlag: boo def isNoncollider(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], node_i: Node, node_j: Node, node_k: Node) -> bool: - sep_set = sep_sets[(graph.get_node_map()[node_i], graph.get_node_map()[node_k])] - return sep_set is not None and sep_set.__contains__(graph.get_node_map()[node_j]) + node_map = graph.get_node_map() + sep_set = sep_sets.get((node_map[node_i], node_map[node_k])) + return sep_set is not None and sep_set.__contains__(node_map[node_j]) def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: BackgroundKnowledge | None, changeFlag: bool, @@ -542,6 +543,142 @@ def ruleR4B(graph: Graph, maxPathLength: int, data: ndarray, independence_test_m return change_flag + +def rule8(graph: Graph, nodes: List[Node]): + nodes = graph.get_nodes() + changeFlag = False + for node_B in nodes: + adj = graph.get_adjacent_nodes(node_B) + if len(adj) < 2: + continue + + cg = ChoiceGenerator(len(adj), 2) + combination = cg.next() + + while combination is not None: + node_A = adj[combination[0]] + node_C = adj[combination[1]] + combination = cg.next() + + if(graph.get_endpoint(node_A, node_B) == Endpoint.ARROW and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ + graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ + graph.is_adjacent_to(node_A, node_C) and \ + graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE) or \ + (graph.get_endpoint(node_A, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ + graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ + graph.is_adjacent_to(node_A, node_C) and \ + graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C,Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + + return changeFlag + + + +def is_possible_parent(graph: Graph, potential_parent_node, child_node): + if graph.node_map[potential_parent_node] == graph.node_map[child_node]: + return False + if not graph.is_adjacent_to(potential_parent_node, child_node): + return False + + if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW or \ + graph.get_endpoint(potential_parent_node, child_node) == Endpoint.TAIL: + return False + else: + return True + + +def find_possible_children(graph: Graph, parent_node, en_nodes=None): + if en_nodes is None: + nodes = graph.get_nodes() + en_nodes = [node for node in nodes if graph.node_map[node] != graph.node_map[parent_node]] + + potential_child_nodes = set() + for potential_node in en_nodes: + if is_possible_parent(graph, potential_parent_node=parent_node, child_node=potential_node): + potential_child_nodes.add(potential_node) + + return potential_child_nodes + +def rule9(graph: Graph, nodes: List[Node]): + changeFlag = False + nodes = graph.get_nodes() + for node_C in nodes: + intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) + for node_A in intoCArrows: + # we want A o--> C + if not graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE: + continue + + # look for a possibly directed uncovered path s.t. B and C are not connected (for the given A o--> C + a_node_idx = graph.node_map[node_A] + c_node_idx = graph.node_map[node_C] + a_adj_nodes = graph.get_adjacent_nodes(node_A) + nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= c_node_idx] + possible_children = find_possible_children(graph, node_A, nodes_set) + for node_B in possible_children: + if graph.is_adjacent_to(node_B, node_C): + continue + if existsSemiDirectedPath(node_from=node_B, node_to=node_C, G=graph): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A + return changeFlag + + +def rule10(graph: Graph): + changeFlag = False + nodes = graph.get_nodes() + for node_C in nodes: + intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) + if len(intoCArrows) < 2: + continue + # get all A where A o-> C + Anodes = [node_A for node_A in intoCArrows if graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE] + if len(Anodes) == 0: + continue + + for node_A in Anodes: + A_adj_nodes = graph.get_adjacent_nodes(node_A) + en_nodes = [i for i in A_adj_nodes if i is not node_C] + A_possible_children = find_possible_children(graph, parent_node=node_A, en_nodes=en_nodes) + if len(A_possible_children) < 2: + continue + + gen = ChoiceGenerator(len(intoCArrows), 2) + choice = gen.next() + while choice is not None: + node_B = intoCArrows[choice[0]] + node_D = intoCArrows[choice[1]] + + choice = gen.next() + # we want B->C<-D + if graph.get_endpoint(node_C, node_B) != Endpoint.TAIL: + continue + + if graph.get_endpoint(node_C, node_D) != Endpoint.TAIL: + continue + + for children in combinations(A_possible_children, 2): + child_one, child_two = children + if not existsSemiDirectedPath(node_from=child_one, node_to=node_B, G=graph) or \ + not existsSemiDirectedPath(node_from=child_two, node_to=node_D, G=graph): + continue + + if not graph.is_adjacent_to(child_one, child_two): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A + + return changeFlag + + def visibleEdgeHelperVisit(graph: Graph, node_c: Node, node_a: Node, node_b: Node, path: List[Node]) -> bool: if path.__contains__(node_a): return False @@ -691,10 +828,8 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]): break - def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1, - max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, - show_progress: bool = True, node_names = None, + max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True, **kwargs) -> Tuple[Graph, List[Edge]]: """ Perform Fast Causal Inference (FCI) algorithm for causal discovery @@ -749,10 +884,8 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = nodes = [] - if node_names is None: - node_names = [f"X{i + 1}" for i in range(dataset.shape[1])] for i in range(dataset.shape[1]): - node = GraphNode(node_names[i]) + node = GraphNode(f"X{i + 1}") node.add_attribute("id", i) nodes.append(node) @@ -790,6 +923,13 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = if verbose: print("Epoch") + # rule 8 + change_flag = rule8(graph,nodes) + # rule 9 + change_flag = rule9(graph, nodes) + # rule 10 + change_flag = rule10(graph) + graph.set_pag(True) edges = get_color_edges(graph) From 425bc053c90031a471807386206b6d2bcd511673 Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Wed, 3 Jul 2024 00:09:49 -0400 Subject: [PATCH 56/84] fixed Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 8a936c8c..cae3d4d8 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -829,7 +829,8 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]): def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1, - max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True, + max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, + show_progress: bool = True, node_names = None, **kwargs) -> Tuple[Graph, List[Edge]]: """ Perform Fast Causal Inference (FCI) algorithm for causal discovery @@ -884,8 +885,10 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = nodes = [] + if node_names is None: + node_names = [f"X{i + 1}" for i in range(dataset.shape[1])] for i in range(dataset.shape[1]): - node = GraphNode(f"X{i + 1}") + node = GraphNode(node_names[i]) node.add_attribute("id", i) nodes.append(node) From 16210d439b9ed6ca65b03b2e1db33d76d1beba83 Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Fri, 30 Aug 2024 00:34:56 -0400 Subject: [PATCH 57/84] Add R5, 6, 7 for the Augmented FCI Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 190 +++++++++++++++++++++- 1 file changed, 189 insertions(+), 1 deletion(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index cae3d4d8..c055c02b 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -17,6 +17,19 @@ from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge from itertools import combinations +def is_uncovered_path(nodes: List[Node], G: Graph) -> bool: + """ + Determines whether the given path is an uncovered path in this graph. + + A path is an uncovered path if no two nonconsecutive nodes (Vi-1 and Vi+1) in the path are + adjacent. + """ + for i in range(len(nodes) - 2): + if G.is_adjacent_to(nodes[i], nodes[i + 2]): + return False + return True + + def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: if node == edge.get_node1(): if edge.get_endpoint1() == Endpoint.TAIL or edge.get_endpoint1() == Endpoint.CIRCLE: @@ -26,8 +39,17 @@ def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: return edge.get_node1() return None +def traverseCircle(node: Node, edge: Edge) -> Node | None: + if node == edge.get_node1(): + if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE: + return edge.get_node2() + elif node == edge.get_node2(): + if edge.get_endpoint1() == Endpoint.CIRCLE or edge.get_endpoint2() == Endpoint.CIRCLE: + return edge.get_node1() + return None + -def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: +def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: ## TODO: Now it does not detect whether the path is an uncovered path Q = Queue() V = set() @@ -60,6 +82,41 @@ def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: return False +def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph) -> List[Node] | None: + Q = Queue() + V = set() + + path = [node_from] + + for node_u in G.get_adjacent_nodes(node_from): + edge = G.get_edge(node_from, node_u) + node_c = traverseCircle(node_from, edge) + + if node_c is None: + continue + + if not V.__contains__(node_c): + V.add(node_c) + Q.put((node_c, path + [node_c])) + + while not Q.empty(): + node_t, path = Q.get_nowait() + if node_t == node_to and is_uncovered_path(path, G): + return path + + for node_u in G.get_adjacent_nodes(node_t): + edge = G.get_edge(node_t, node_u) + node_c = traverseCircle(node_t, edge) + + if node_c is None: + continue + + if not V.__contains__(node_c): + V.add(node_c) + Q.put((node_c, path + [node_c])) + + return None + def existOnePathWithPossibleParents(previous, node_w: Node, node_x: Node, node_b: Node, graph: Graph) -> bool: if node_w == node_x: @@ -371,6 +428,126 @@ def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: Backgrou changeFlag = True return changeFlag +def ruleR5(graph: Graph, changeFlag: bool, + verbose: bool = False) -> bool: + """ + Rule R5 of the FCI algorithm. + by Jiji Zhang, 2008, "On the completeness of orientation rules for causal discovery in the presence of latent confounders and selection bias"] + + This function orients any edge that is part of an uncovered circle path between two nodes A and B, + if such a path exists. The path must start and end with a circle edge and must be uncovered, i.e. the + nodes on the path must not be adjacent to A or B. The orientation of the edges on the path is set to + double tail. + """ + nodes = graph.get_nodes() + for node_B in nodes: + intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) + + for node_A in intoBCircles: + if graph.get_endpoint(node_B, node_A) != Endpoint.CIRCLE: + continue + else: + # Check if there is an uncovered circle path between A and B (A o-o C .. D o-o B) + # s.t. A is not adjacent to D and B is not adjacent to C + a_node_idx = graph.node_map[node_A] + b_node_idx = graph.node_map[node_B] + a_adj_nodes = graph.get_adjacent_nodes(node_A) + b_adj_nodes = graph.get_adjacent_nodes(node_B) + + # get the adjacent nodes with circle edges of A and B + a_circle_adj_nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= b_node_idx + and graph.get_endpoint(node, node_A) == Endpoint.CIRCLE and graph.get_endpoint(node_A, node) == Endpoint.CIRCLE] + b_circle_adj_nodes_set = [node for node in b_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= b_node_idx + and graph.get_endpoint(node, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node) == Endpoint.CIRCLE] + + # get the adjacent nodes with circle edges of A and B that is non adjacent to B and A, respectively + for node_C in a_circle_adj_nodes_set: + if graph.is_adjacent_to(node_B, node_C): + continue + for node_D in b_circle_adj_nodes_set: + if graph.is_adjacent_to(node_A, node_D): + continue + path = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph) + if path is not None: + # pdb.set_trace() + changeFlag = True + # orient A - C, D - B + edge = graph.get_edge(node_A, path[0]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL)) + + edge = graph.get_edge(node_B, path[-1]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge (Double tail): " + graph.get_edge(node_A, path[0]).__str__()) + print("Orienting edge (Double tail): " + graph.get_edge(node_B, path[-1]).__str__()) + + # orient everything on the path to both tails + for i in range(len(path) - 1): + edge = graph.get_edge(path[i], path[i + 1]) + graph.remove_edge(edge) + graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__()) + + # if not is_arrow_point_allowed(node_A, path[0], graph, bk): + # break + # if not is_arrow_point_allowed(node_B, path[-1], graph, bk): + # break + + continue + + return changeFlag + +def ruleR6(graph: Graph, changeFlag: bool, + verbose: bool = False) -> bool: + nodes = graph.get_nodes() + + for node_B in nodes: + # Find A - B + intoBTails = graph.get_nodes_into(node_B, Endpoint.TAIL) + exist = False + for node_A in intoBTails: + if graph.get_endpoint(node_B, node_A) == Endpoint.TAIL: + exist = True + if not exist: + continue + # Find B o-*C + intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) + for node_C in intoBCircles: + changeFlag = True + edge = graph.get_edge(node_B, node_C) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, node_C, Endpoint.TAIL, edge.get_proximal_endpoint(node_C))) + if verbose: + print("Orienting edge by rule 6): " + graph.get_edge(node_B, node_C).__str__()) + + return changeFlag + + +def ruleR7(graph: Graph, changeFlag: bool, + verbose: bool = False) -> bool: + nodes = graph.get_nodes() + + for node_B in nodes: + # Find A -o B + intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) + node_A_list = [node for node in intoBCircles if graph.get_endpoint(node_B, node) == Endpoint.TAIL] + + # Find B o-*C + for node_C in intoBCircles: + # pdb.set_trace() + for node_A in node_A_list: + # pdb.set_trace() + if not graph.is_adjacent_to(node_A, node_C): + changeFlag = True + edge = graph.get_edge(node_B, node_C) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, node_C, Endpoint.TAIL, edge.get_proximal_endpoint(node_C))) + if verbose: + print("Orienting edge by rule 7): " + graph.get_edge(node_B, node_C).__str__()) + return changeFlag def getPath(node_c: Node, previous) -> List[Node]: l = [] @@ -896,6 +1073,7 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = graph, sep_sets, test_results = fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha, knowledge=background_knowledge, depth=depth, verbose=verbose, show_progress=show_progress) + # pdb.set_trace() reorientAllWith(graph, Endpoint.CIRCLE) rule0(graph, nodes, sep_sets, background_knowledge, verbose) @@ -926,8 +1104,18 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = if verbose: print("Epoch") + # rule 5 + change_flag = ruleR5(graph, change_flag, verbose) + + # rule 6 + change_flag = ruleR6(graph, change_flag, verbose) + + # rule 7 + change_flag = ruleR7(graph, change_flag, verbose) + # rule 8 change_flag = rule8(graph,nodes) + # rule 9 change_flag = rule9(graph, nodes) # rule 10 From b9f8e8f6bb2dec562934dedf803c328fd6f83f73 Mon Sep 17 00:00:00 2001 From: kenneth-lee-ch Date: Thu, 22 Aug 2024 15:44:05 -0400 Subject: [PATCH 58/84] fixed finding possible parents Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index c055c02b..d1af061c 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -760,8 +760,7 @@ def is_possible_parent(graph: Graph, potential_parent_node, child_node): if not graph.is_adjacent_to(potential_parent_node, child_node): return False - if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW or \ - graph.get_endpoint(potential_parent_node, child_node) == Endpoint.TAIL: + if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW: return False else: return True From 3cbfbfe8ebdd0e57d72b94c55b0d5705f75856a7 Mon Sep 17 00:00:00 2001 From: Yujia Zheng Date: Tue, 2 Jul 2024 22:36:04 -0400 Subject: [PATCH 59/84] Update CODE_OF_CONDUCT.md Signed-off-by: Yujia Zheng Signed-off-by: Oliver Schacht --- CODE_OF_CONDUCT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 94df88d5..b0b6e90f 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -5,7 +5,7 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, +identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation. From 7ba051359866165c02fb03700d6159fde0b7a241 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Sun, 14 Jul 2024 15:54:14 -0500 Subject: [PATCH 60/84] introduced a way to use the local score object without caching Signed-off-by: Oliver Schacht --- causallearn/score/LocalScoreFunctionClass.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/causallearn/score/LocalScoreFunctionClass.py b/causallearn/score/LocalScoreFunctionClass.py index 41ffb49f..8171d515 100644 --- a/causallearn/score/LocalScoreFunctionClass.py +++ b/causallearn/score/LocalScoreFunctionClass.py @@ -46,3 +46,9 @@ def score(self, i: int, PAi: List[int]) -> float: self.score_cache[i][hash_key] = self.local_score_fun(self.data, i, PAi, self.parameters) return self.score_cache[i][hash_key] + + def score_nocache(self, i: int, PAi: List[int]) -> float: + if self.local_score_fun == local_score_BIC_from_cov: + return self.local_score_fun((self.cov, self.n), i, PAi, self.parameters) + else: + return self.local_score_fun(self.data, i, PAi, self.parameters) \ No newline at end of file From 48d8a92d8d01d908436275a944e577eda4e83ebb Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Mon, 15 Jul 2024 15:31:02 -0500 Subject: [PATCH 61/84] removing maxP because it dodnt do anything Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/GRaSP.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index abcbf47c..6fcf2121 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -78,7 +78,6 @@ def grasp( X: np.ndarray, score_func: str = "local_score_BIC", depth: Optional[int] = 3, - maxP: Optional[float] = None, parameters: Optional[Dict[str, Any]] = None, verbose: Optional[bool] = True, node_names: Optional[List[str]] = None, @@ -92,7 +91,6 @@ def grasp( score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')). depth : allowed maximum depth for DFS - maxP : allowed maximum number of parents when searching the graph parameters : when using CV likelihood, parameters['kfold']: k-fold cross validation parameters['lambda']: regularization parameter @@ -118,8 +116,6 @@ def grasp( "kfold": 10, # 10 fold cross validation "lambda": 0.01, } # regularization parameter - if maxP is None: - maxP = X.shape[1] / 2 # maximum number of parents localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) @@ -128,8 +124,6 @@ def grasp( score_func == "local_score_marginal_general" ): # negative marginal likelihood based on regression in RKHS parameters = {} - if maxP is None: - maxP = X.shape[1] / 2 # maximum number of parents localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) @@ -146,8 +140,6 @@ def grasp( } # regularization parameter for i in range(X.shape[1]): parameters["dlabel"]["{}".format(i)] = i - if maxP is None: - maxP = len(parameters["dlabel"]) / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) @@ -160,8 +152,6 @@ def grasp( parameters = {"dlabel": {}} for i in range(X.shape[1]): parameters["dlabel"]["{}".format(i)] = i - if maxP is None: - maxP = len(parameters["dlabel"]) / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) @@ -169,15 +159,11 @@ def grasp( elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score parameters = {} parameters["lambda_value"] = 2 - if maxP is None: - maxP = X.shape[1] / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score - if maxP is None: - maxP = X.shape[1] / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) From 9915f0b97f14d93bc128ba3aad7e61aa7b0cdd91 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Tue, 16 Jul 2024 06:50:24 -0500 Subject: [PATCH 62/84] fixed grasp unittest Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/GRaSP.py | 11 ++++++- tests/TestGRaSP.py | 31 +++++++++----------- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 6fcf2121..fcff6c0d 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -10,6 +10,7 @@ from causallearn.score.LocalScoreFunction import ( local_score_BDeu, local_score_BIC, + local_score_BIC_from_cov, local_score_cv_general, local_score_cv_multi, local_score_marginal_general, @@ -76,7 +77,7 @@ def len(self): def grasp( X: np.ndarray, - score_func: str = "local_score_BIC", + score_func: str = "local_score_BIC_from_cov", depth: Optional[int] = 3, parameters: Optional[Dict[str, Any]] = None, verbose: Optional[bool] = True, @@ -157,12 +158,20 @@ def grasp( ) elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score + warnings.warn("Please use 'local_score_BIC_from_cov' instead") parameters = {} parameters["lambda_value"] = 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) + elif score_func == "local_score_BIC_from_cov": # Greedy equivalence search with BIC score + parameters = {} + parameters["lambda_value"] = 2 + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) + elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 7482dcee..de44ae1e 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -66,7 +66,8 @@ def parameterize_dag(g): class TestGRaSP(unittest.TestCase): def test_grasp(self): - ps = [30, 60] + # ps = [30, 60] + ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 @@ -77,19 +78,15 @@ def test_grasp(self): for rep in range(1, reps + 1): g0 = random_dag(p, d) print( - "\nNodes:", - p, - "| Edges:", - np.sum(g0), - "| Avg Degree:", - 2 * np.sum(g0) / p, - "| Rep:", - rep, + "\nNodes:", p, + "| Edges:", np.sum(g0), + "| Avg Degree:", 2 * np.sum(g0) / p, + "| Rep:", rep, ) cov = parameterize_dag(g0) X = np.random.multivariate_normal(np.zeros(p), cov, n) - node_names = [("x%d" % i) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] nodes = [] for name in node_names: @@ -105,13 +102,13 @@ def test_grasp(self): G = grasp(X) - pyd = GraphUtils.to_pydot(G) - tmp_png = pyd.create_png(f="png") - fp = io.BytesIO(tmp_png) - img = mpimg.imread(fp, format='png') - plt.axis('off') - plt.imshow(img) - plt.show() + # pyd = GraphUtils.to_pydot(G) + # tmp_png = pyd.create_png(f="png") + # fp = io.BytesIO(tmp_png) + # img = mpimg.imread(fp, format='png') + # plt.axis('off') + # plt.imshow(img) + # plt.show() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) From 1ef83e550e2119146b287fcaeaa260295c11251c Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Tue, 16 Jul 2024 20:27:18 -0500 Subject: [PATCH 63/84] adding gsts and boss Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/BOSS.py | 214 +++++++++++++++++++ causallearn/search/PermutationBased/GRaSP.py | 65 +++--- causallearn/search/PermutationBased/gst.py | 77 +++++++ tests/TestBOSS.py | 124 +++++++++++ tests/TestGRaSP.py | 59 ++--- 5 files changed, 476 insertions(+), 63 deletions(-) create mode 100644 causallearn/search/PermutationBased/BOSS.py create mode 100644 causallearn/search/PermutationBased/gst.py create mode 100644 tests/TestBOSS.py diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py new file mode 100644 index 00000000..b72d8f44 --- /dev/null +++ b/causallearn/search/PermutationBased/BOSS.py @@ -0,0 +1,214 @@ +import random +import sys +import time +import warnings +from typing import Any, Dict, List, Optional + +import numpy as np +from causallearn.graph.GeneralGraph import GeneralGraph +from causallearn.graph.GraphNode import GraphNode +from causallearn.score.LocalScoreFunction import ( + local_score_BDeu, + local_score_BIC, + local_score_BIC_from_cov, + local_score_cv_general, + local_score_cv_multi, + local_score_marginal_general, + local_score_marginal_multi, +) +from causallearn.search.PermutationBased.gst import GST; +from causallearn.score.LocalScoreFunctionClass import LocalScoreClass +from causallearn.utils.DAG2CPDAG import dag2cpdag + + +def boss( + X: np.ndarray, + score_func: str = "local_score_BIC_from_cov", + parameters: Optional[Dict[str, Any]] = None, + verbose: Optional[bool] = True, + node_names: Optional[List[str]] = None, +) -> GeneralGraph: + """ + Perform a best order score search (BOSS) algorithm + + Parameters + ---------- + X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features. + score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', + 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')). + parameters : when using CV likelihood, + parameters['kfold']: k-fold cross validation + parameters['lambda']: regularization parameter + parameters['dlabel']: for variables with multi-dimensions, + indicate which dimensions belong to the i-th variable. + verbose : whether to print the time cost and verbose output of the algorithm. + + Returns + ------- + G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. + """ + + n, p = X.shape + if n < p: + warnings.warn("The number of features is much larger than the sample size!") + + X = np.mat(X) + if score_func == "local_score_CV_general": + # % k-fold negative cross validated likelihood based on regression in RKHS + if parameters is None: + parameters = { + "kfold": 10, # 10 fold cross validation + "lambda": 0.01, + } # regularization parameter + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_general, parameters=parameters + ) + + elif score_func == "local_score_marginal_general": + # negative marginal likelihood based on regression in RKHS + parameters = {} + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_general, parameters=parameters + ) + + elif score_func == "local_score_CV_multi": + # k-fold negative cross validated likelihood based on regression in RKHS + # for data with multi-variate dimensions + if parameters is None: + parameters = { + "kfold": 10, + "lambda": 0.01, + "dlabel": {}, + } # regularization parameter + for i in range(X.shape[1]): + parameters["dlabel"]["{}".format(i)] = i + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_multi, parameters=parameters + ) + + elif score_func == "local_score_marginal_multi": + # negative marginal likelihood based on regression in RKHS + # for data with multi-variate dimensions + if parameters is None: + parameters = {"dlabel": {}} + for i in range(X.shape[1]): + parameters["dlabel"]["{}".format(i)] = i + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_multi, parameters=parameters + ) + + elif score_func == "local_score_BIC": + # SEM BIC score + warnings.warn("Please use 'local_score_BIC_from_cov' instead") + parameters = {} + parameters["lambda_value"] = 2 + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC, parameters=parameters + ) + + elif score_func == "local_score_BIC_from_cov": + # SEM BIC score + parameters = {} + parameters["lambda_value"] = 2 + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) + + elif score_func == "local_score_BDeu": + # BDeu score + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BDeu, parameters=None + ) + + else: + raise Exception("Unknown function!") + score = localScoreClass + + gsts = [GST(i, score) for i in range(p)] + + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names + nodes = [] + + for name in node_names: + node = GraphNode(name) + nodes.append(node) + + G = GeneralGraph(nodes) + + runtime = time.perf_counter() + + order = [v for v in range(p)] + + gsts = [GST(v, score) for v in order] + parents = {v: [] for v in order} + + variables = [v for v in order] + while True: + improved = False + random.shuffle(variables) + if verbose: + for i, v in enumerate(order): + parents[v].clear() + gsts[v].trace(order[:i], parents[v]) + sys.stdout.write("\rBOSS edge count: %i " % np.sum([len(parents[v]) for v in range(p)])) + sys.stdout.flush() + + for v in variables: + improved |= better_mutation(v, order, gsts) + if not improved: break + + for i, v in enumerate(order): + parents[v].clear() + gsts[v].trace(order[:i], parents[v]) + + runtime = time.perf_counter() - runtime + + if verbose: + sys.stdout.write("\nBOSS completed in: %.2fs \n" % runtime) + sys.stdout.flush() + + for y in range(p): + for x in parents[y]: + G.add_directed_edge(nodes[x], nodes[y]) + + G = dag2cpdag(G) + + return G + + +def reversed_enumerate(iter, j): + for w in reversed(iter): + yield j, w + j -= 1 + + +def better_mutation(v, order, gsts): + i = order.index(v) + p = len(order) + scores = np.zeros(p + 1) + + prefix = [] + score = 0 + for j, w in enumerate(order): + scores[j] = gsts[v].trace(prefix) + score + if v != w: + score += gsts[w].trace(prefix) + prefix.append(w) + + scores[p] = gsts[v].trace(prefix) + score + best = p + + prefix.append(v) + score = 0 + for j, w in reversed_enumerate(order, p - 1): + if v != w: + prefix.remove(w) + score += gsts[w].trace(prefix) + scores[j] += score + if scores[j] > scores[best]: best = j + + if scores[i] + 1e-6 > scores[best]: return False + order.remove(v) + order.insert(best - int(best > i), v) + + return True \ No newline at end of file diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index fcff6c0d..09416b94 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -16,6 +16,7 @@ local_score_marginal_general, local_score_marginal_multi, ) +from causallearn.search.PermutationBased.gst import GST; from causallearn.score.LocalScoreFunctionClass import LocalScoreClass from causallearn.utils.DAG2CPDAG import dag2cpdag @@ -34,6 +35,7 @@ def __init__(self, p, score): y = self.order[i] self.parents[y] = [] self.local_scores[y] = -score.score(y, []) + # self.local_scores[y] = -score.score_nocache(y, []) def get(self, i): return self.order[i] @@ -109,9 +111,8 @@ def grasp( warnings.warn("The number of features is much larger than the sample size!") X = np.mat(X) - if ( - score_func == "local_score_CV_general" - ): # % k-fold negative cross validated likelihood based on regression in RKHS + if score_func == "local_score_CV_general": + # k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: parameters = { "kfold": 10, # 10 fold cross validation @@ -121,17 +122,15 @@ def grasp( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - elif ( - score_func == "local_score_marginal_general" - ): # negative marginal likelihood based on regression in RKHS + elif score_func == "local_score_marginal_general": + # negative marginal likelihood based on regression in RKHS parameters = {} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - elif ( - score_func == "local_score_CV_multi" - ): # k-fold negative cross validated likelihood based on regression in RKHS + elif score_func == "local_score_CV_multi": + # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: parameters = { @@ -145,9 +144,8 @@ def grasp( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - elif ( - score_func == "local_score_marginal_multi" - ): # negative marginal likelihood based on regression in RKHS + elif score_func == "local_score_marginal_multi": + # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: parameters = {"dlabel": {}} @@ -157,7 +155,8 @@ def grasp( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score + elif score_func == "local_score_BIC": + # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") parameters = {} parameters["lambda_value"] = 2 @@ -165,14 +164,16 @@ def grasp( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - elif score_func == "local_score_BIC_from_cov": # Greedy equivalence search with BIC score + elif score_func == "local_score_BIC_from_cov": + # SEM BIC score parameters = {} parameters["lambda_value"] = 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) - elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score + elif score_func == "local_score_BDeu": + # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) @@ -181,6 +182,8 @@ def grasp( raise Exception("Unknown function!") score = localScoreClass + gsts = [GST(i, score) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names nodes = [] @@ -198,12 +201,15 @@ def grasp( y_parents = order.get_parents(y) candidates = [order.get(j) for j in range(0, i)] + # local_score = gsts[y].trace(candidates) + grow(y, y_parents, candidates, score) local_score = shrink(y, y_parents, score) + order.set_local_score(y, local_score) order.bump_edges(len(y_parents)) - while dfs(depth - 1, set(), [], order, score): + while dfs(depth - 1, set(), [], order, score, gsts): if verbose: sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) sys.stdout.flush() @@ -224,7 +230,7 @@ def grasp( # performs a dfs over covered tucks -def dfs(depth: int, flipped: set, history: List[set], order, score): +def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): cache = [{}, {}, {}, 0] @@ -252,7 +258,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): cache[3] = order.get_edges() tuck(i, j, order) - edge_bump, score_bump = update(i, j, order, score) + edge_bump, score_bump = update(i, j, order, score, gsts) # because things that should be zero sometimes are not if score_bump > 1e-6: @@ -271,7 +277,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): if len(flipped) > 0 and flipped not in history: history.append(flipped) - if depth > 0 and dfs(depth - 1, flipped, history, order, score): + if depth > 0 and dfs(depth - 1, flipped, history, order, score, gsts): return True del history[-1] @@ -286,7 +292,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): # updates the parents and scores after a tuck -def update(i: int, j: int, order, score): +def update(i: int, j: int, order, score, gsts): edge_bump = 0 old_score = 0 @@ -299,18 +305,21 @@ def update(i: int, j: int, order, score): edge_bump -= len(z_parents) old_score += order.get_local_score(z) + z_parents.clear() candidates = [order.get(l) for l in range(0, k)] - for w in [w for w in z_parents if w not in candidates]: - z_parents.remove(w) - shrink(z, z_parents, score) + # for w in [w for w in z_parents if w not in candidates]: + # z_parents.remove(w) + # shrink(z, z_parents, score) - for w in z_parents: - candidates.remove(w) + # for w in z_parents: + # candidates.remove(w) - grow(z, z_parents, candidates, score) + # local_score = gsts[z].trace(candidates, z_parents) + grow(z, z_parents, candidates, score) local_score = shrink(z, z_parents, score) + order.set_local_score(z, local_score) edge_bump += len(z_parents) @@ -323,6 +332,7 @@ def update(i: int, j: int, order, score): def grow(y: int, y_parents: List[int], candidates: List[int], score): best = -score.score(y, y_parents) + # best = -score.score_nocache(y, y_parents) add = None checked = [] @@ -340,6 +350,7 @@ def grow(y: int, y_parents: List[int], candidates: List[int], score): x = candidates.pop() y_parents.append(x) current = -score.score(y, y_parents) + # current = -score.score_nocache(y, y_parents) y_parents.remove(x) checked.append(x) @@ -354,6 +365,7 @@ def grow(y: int, y_parents: List[int], candidates: List[int], score): def shrink(y: int, y_parents: List[int], score): best = -score.score(y, y_parents) + # best = -score.score_nocache(y, y_parents) remove = None checked = 0 @@ -367,6 +379,7 @@ def shrink(y: int, y_parents: List[int], score): while checked < len(y_parents): x = y_parents.pop(0) current = -score.score(y, y_parents) + # current = -score.score_nocache(y, y_parents) y_parents.append(x) checked += 1 diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py new file mode 100644 index 00000000..25e0da6b --- /dev/null +++ b/causallearn/search/PermutationBased/gst.py @@ -0,0 +1,77 @@ +class GSTNode: + + def __init__(self, tree, add=None, score=None): + if score is None: score = -tree.score.score_nocache(tree.vertex, []) + # if score is None: score = -tree.score.score(tree.vertex, []) + self.tree = tree + self.add = add + self.grow_score = score + self.shrink_score = score + self.branches = None + self.remove = None + + def __lt__(self, other): + return self.grow_score < other.grow_score + + def grow(self, available, parents): + self.branches = [] + for add in available: + parents.append(add) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) + parents.remove(add) + branch = GSTNode(self.tree, add, score) + if score > self.grow_score: self.branches.append(branch) + self.branches.sort() + + def shrink(self, parents): + self.remove = [] + while True: + best = None + for remove in [parent for parent in parents]: + parents.remove(remove) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) + parents.append(remove) + if score > self.shrink_score: + self.shrink_score = score + best = remove + if best is None: break + self.remove.append(best) + parents.remove(best) + + def trace(self, prefix, available, parents): + if self.branches is None: self.grow(available, parents) + for branch in self.branches: + available.remove(branch.add) + if branch.add in prefix: + parents.append(branch.add) + return branch.trace(prefix, available, parents) + if self.remove is None: + self.shrink(parents) + return self.shrink_score + for remove in self.remove: parents.remove(remove) + return self.shrink_score + + +class GST: + + def __init__(self, vertex, score): + self.vertex = vertex + self.score = score + self.root = GSTNode(self) + self.forbidden = [vertex] + self.required = [] + + def trace(self, prefix, parents=None): + if parents is None: parents = [] + available = [i for i in range(self.score.data.shape[1]) if i not in self.forbidden] + return self.root.trace(prefix, available, parents) + + def set_knowledge(self, forbidden, required): + self.forbidden = forbidden # not implemented + self.required = required # not implemented + self.reset() + + def reset(self): + self.root = GSTNode(self) \ No newline at end of file diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py new file mode 100644 index 00000000..1d87d235 --- /dev/null +++ b/tests/TestBOSS.py @@ -0,0 +1,124 @@ +import unittest + +import numpy as np +from causallearn.graph.AdjacencyConfusion import AdjacencyConfusion +from causallearn.graph.ArrowConfusion import ArrowConfusion +from causallearn.graph.GeneralGraph import GeneralGraph +from causallearn.graph.GraphNode import GraphNode +from causallearn.search.PermutationBased.BOSS import boss +from causallearn.utils.DAG2CPDAG import dag2cpdag +from causallearn.utils.GraphUtils import GraphUtils +import matplotlib.image as mpimg +import matplotlib.pyplot as plt +import io + + +def simulate_data(p, d, n): + """ + Randomly generates an Erdos-Renyi direct acyclic graph given an ordering + and randomly simulates data with the provided parameters. + + p = |variables| + d = |edges| / |possible edges| + n = sample size + """ + + # npe = |possible edges| + pe = int(p * (p - 1) / 2) + + # ne = |edges| + ne = int(d * pe) + + # generate edges + e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) + np.random.shuffle(e) + B = np.zeros([p, p]) + B.T[np.triu_indices(p, 1)] = e + + # generate variance terms + O = np.diag(np.ones(p)) + + # simulate data + X = np.random.multivariate_normal(np.zeros(p), O, n) + for i in range(p): + J = np.where(B[i])[0] + for j in J: X[:, i] += B[i, j] * X[:, j] + + pi = [i for i in range(p)] + np.random.shuffle(pi) + + return (B != 0)[pi][:, pi], X[:, pi] + + +class TestBOSS(unittest.TestCase): + def test_boss(self): + ps = [30, 60] + # ps = [30] + ds = [0.1, 0.15] + n = 1000 + reps = 5 + + for p in ps: + for d in ds: + stats = [[], [], [], []] + for rep in range(1, reps + 1): + g0, X = simulate_data(p, d, n) + print( + "\nNodes:", p, + "| Edges:", np.sum(g0), + "| Avg Degree:", 2 * np.sum(g0) / p, + "| Rep:", rep, + ) + + node_names = [("X%d" % (i + 1)) for i in range(p)] + nodes = [] + + for name in node_names: + node = GraphNode(name) + nodes.append(node) + + G0 = GeneralGraph(nodes) + for y in range(p): + for x in np.where(g0[y] == 1)[0]: + G0.add_directed_edge(nodes[x], nodes[y]) + + G0 = dag2cpdag(G0) + + G = boss(X) + + # pyd = GraphUtils.to_pydot(G) + # tmp_png = pyd.create_png(f="png") + # fp = io.BytesIO(tmp_png) + # img = mpimg.imread(fp, format='png') + # plt.axis('off') + # plt.imshow(img) + # plt.show() + + AdjC = AdjacencyConfusion(G0, G) + stats[0].append(AdjC.get_adj_precision()) + stats[1].append(AdjC.get_adj_recall()) + + ArrC = ArrowConfusion(G0, G) + stats[2].append(ArrC.get_arrows_precision()) + stats[3].append(ArrC.get_arrows_recall()) + + print( + [ + ["AP", "AR", "AHP", "AHR"][i] + + ": " + + str(round(stats[i][-1], 2)) + for i in range(4) + ] + ) + + print( + "\nOverall Stats: \n", + [ + ["AP", "AR", "AHP", "AHR"][i] + + ": " + + str(round(np.sum(stats[i]) / reps, 2)) + for i in range(4) + ], + ) + + print("finish") diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index de44ae1e..1a8e0638 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -11,63 +11,50 @@ import matplotlib.image as mpimg import matplotlib.pyplot as plt import io +import random -def random_dag(p, d): +def simulate_data(p, d, n): """ - Randomly generates an Erdos-Renyi direct acyclic graph given an ordering. + Randomly generates an Erdos-Renyi direct acyclic graph given an ordering + and randomly simulates data with the provided parameters. p = |variables| d = |edges| / |possible edges| + n = sample size """ # npe = |possible edges| pe = int(p * (p - 1) / 2) - # e = |edges| + # ne = |edges| ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne, "uint8"), np.ones(ne, "uint8")) + e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) np.random.shuffle(e) - - # generate graph - g = np.zeros([p, p], "uint8") - g.T[np.triu_indices(p, 1)] = e - - return g - - -def parameterize_dag(g): - """ - Randomly parameterize a directed acyclic graph. - - g = directed acyclic graph (adjacency matrix) - """ - - # p = |variables| - p = g.shape[0] - - # e = |edges| - e = np.sum(g) + B = np.zeros([p, p]) + B.T[np.triu_indices(p, 1)] = e # generate variance terms - o = np.diag(np.ones(p)) + O = np.diag(np.ones(p)) - # generate edge weights (edge parameters uniformly sampled [-1.0, 1.0]) - b = np.zeros([p, p]) - b[np.where(g == 1)] = np.random.uniform(-1, 1, e) + # simulate data + X = np.random.multivariate_normal(np.zeros(p), O, n) + for i in range(p): + J = np.where(B[i])[0] + for j in J: X[:, i] += B[i, j] * X[:, j] - # calculate covariance - s = np.dot(np.dot(np.linalg.inv(np.eye(p) - b), o), np.linalg.inv(np.eye(p) - b).T) + pi = [i for i in range(p)] + np.random.shuffle(pi) - return s + return (B != 0)[pi][:, pi], X[:, pi] class TestGRaSP(unittest.TestCase): def test_grasp(self): - # ps = [30, 60] - ps = [30] + ps = [30, 60] + # ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 @@ -76,15 +63,13 @@ def test_grasp(self): for d in ds: stats = [[], [], [], []] for rep in range(1, reps + 1): - g0 = random_dag(p, d) + g0, X = simulate_data(p, d, n) print( "\nNodes:", p, "| Edges:", np.sum(g0), "| Avg Degree:", 2 * np.sum(g0) / p, "| Rep:", rep, ) - cov = parameterize_dag(g0) - X = np.random.multivariate_normal(np.zeros(p), cov, n) node_names = [("X%d" % (i + 1)) for i in range(p)] nodes = [] @@ -100,7 +85,7 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X) + G = grasp(X, depth=1) # pyd = GraphUtils.to_pydot(G) # tmp_png = pyd.create_png(f="png") From 4f4334992b994cabe382f985fe7013cc3c83802f Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Wed, 17 Jul 2024 09:10:21 -0500 Subject: [PATCH 64/84] algorithms are working by efficiency is lackluster Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/BOSS.py | 8 ++++---- causallearn/search/PermutationBased/GRaSP.py | 10 +++++----- causallearn/search/PermutationBased/gst.py | 12 ++++++------ tests/TestBOSS.py | 6 +++--- tests/TestGRaSP.py | 7 +++---- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py index b72d8f44..c77a6620 100644 --- a/causallearn/search/PermutationBased/BOSS.py +++ b/causallearn/search/PermutationBased/BOSS.py @@ -100,16 +100,16 @@ def boss( elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) elif score_func == "local_score_BIC_from_cov": # SEM BIC score - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 09416b94..62bd7632 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -92,7 +92,7 @@ def grasp( ---------- X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features. score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', - 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')). + 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')). depth : allowed maximum depth for DFS parameters : when using CV likelihood, parameters['kfold']: k-fold cross validation @@ -158,16 +158,16 @@ def grasp( elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) elif score_func == "local_score_BIC_from_cov": # SEM BIC score - parameters = {} - parameters["lambda_value"] = 2 + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py index 25e0da6b..70d07903 100644 --- a/causallearn/search/PermutationBased/gst.py +++ b/causallearn/search/PermutationBased/gst.py @@ -1,8 +1,8 @@ class GSTNode: def __init__(self, tree, add=None, score=None): - if score is None: score = -tree.score.score_nocache(tree.vertex, []) - # if score is None: score = -tree.score.score(tree.vertex, []) + # if score is None: score = -tree.score.score_nocache(tree.vertex, []) + if score is None: score = -tree.score.score(tree.vertex, []) self.tree = tree self.add = add self.grow_score = score @@ -17,8 +17,8 @@ def grow(self, available, parents): self.branches = [] for add in available: parents.append(add) - score = -self.tree.score.score_nocache(self.tree.vertex, parents) - # score = -self.tree.score.score(self.tree.vertex, parents) + # score = -self.tree.score.score_nocache(self.tree.vertex, parents) + score = -self.tree.score.score(self.tree.vertex, parents) parents.remove(add) branch = GSTNode(self.tree, add, score) if score > self.grow_score: self.branches.append(branch) @@ -30,8 +30,8 @@ def shrink(self, parents): best = None for remove in [parent for parent in parents]: parents.remove(remove) - score = -self.tree.score.score_nocache(self.tree.vertex, parents) - # score = -self.tree.score.score(self.tree.vertex, parents) + # score = -self.tree.score.score_nocache(self.tree.vertex, parents) + score = -self.tree.score.score(self.tree.vertex, parents) parents.append(remove) if score > self.shrink_score: self.shrink_score = score diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py index 1d87d235..f67fa5a3 100644 --- a/tests/TestBOSS.py +++ b/tests/TestBOSS.py @@ -30,7 +30,7 @@ def simulate_data(p, d, n): ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) + e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne)) np.random.shuffle(e) B = np.zeros([p, p]) B.T[np.triu_indices(p, 1)] = e @@ -66,7 +66,7 @@ def test_boss(self): print( "\nNodes:", p, "| Edges:", np.sum(g0), - "| Avg Degree:", 2 * np.sum(g0) / p, + "| Avg Degree:", 2 * round(np.sum(g0) / p, 2), "| Rep:", rep, ) @@ -84,7 +84,7 @@ def test_boss(self): G0 = dag2cpdag(G0) - G = boss(X) + G = boss(X, parameters={'lambda_value': 2}) # pyd = GraphUtils.to_pydot(G) # tmp_png = pyd.create_png(f="png") diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 1a8e0638..0ac1d81d 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -11,7 +11,6 @@ import matplotlib.image as mpimg import matplotlib.pyplot as plt import io -import random def simulate_data(p, d, n): @@ -31,7 +30,7 @@ def simulate_data(p, d, n): ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne), np.random.uniform(-1, 1, ne)) + e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne)) np.random.shuffle(e) B = np.zeros([p, p]) B.T[np.triu_indices(p, 1)] = e @@ -67,7 +66,7 @@ def test_grasp(self): print( "\nNodes:", p, "| Edges:", np.sum(g0), - "| Avg Degree:", 2 * np.sum(g0) / p, + "| Avg Degree:", 2 * round(np.sum(g0) / p, 2), "| Rep:", rep, ) @@ -85,7 +84,7 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X, depth=1) + G = grasp(X, depth=1, parameters={'lambda_value': 2}) # pyd = GraphUtils.to_pydot(G) # tmp_png = pyd.create_png(f="png") From 1a530550c7e0074a7d8c5bf0f329f3522ffad922 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Thu, 18 Jul 2024 13:01:03 -0500 Subject: [PATCH 65/84] final updates to BOSS and GRaSP Signed-off-by: Oliver Schacht --- causallearn/search/PermutationBased/BOSS.py | 11 +- causallearn/search/PermutationBased/GRaSP.py | 103 ++----------------- causallearn/search/PermutationBased/gst.py | 12 +-- tests/TestBOSS.py | 21 ++-- tests/TestGRaSP.py | 21 ++-- 5 files changed, 31 insertions(+), 137 deletions(-) diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py index c77a6620..465d3659 100644 --- a/causallearn/search/PermutationBased/BOSS.py +++ b/causallearn/search/PermutationBased/BOSS.py @@ -48,11 +48,11 @@ def boss( G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. """ + X = X.copy() n, p = X.shape if n < p: warnings.warn("The number of features is much larger than the sample size!") - X = np.mat(X) if score_func == "local_score_CV_general": # % k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: @@ -63,14 +63,12 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - elif score_func == "local_score_marginal_general": # negative marginal likelihood based on regression in RKHS parameters = {} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - elif score_func == "local_score_CV_multi": # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -85,7 +83,6 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - elif score_func == "local_score_marginal_multi": # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -96,7 +93,6 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") @@ -105,7 +101,6 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - elif score_func == "local_score_BIC_from_cov": # SEM BIC score if parameters is None: @@ -113,17 +108,15 @@ def boss( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) - elif score_func == "local_score_BDeu": # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) - else: raise Exception("Unknown function!") + score = localScoreClass - gsts = [GST(i, score) for i in range(p)] node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index 62bd7632..f4d4aa14 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -106,11 +106,11 @@ def grasp( G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. """ + X = X.copy() n, p = X.shape if n < p: warnings.warn("The number of features is much larger than the sample size!") - X = np.mat(X) if score_func == "local_score_CV_general": # k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: @@ -121,14 +121,12 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - elif score_func == "local_score_marginal_general": # negative marginal likelihood based on regression in RKHS parameters = {} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - elif score_func == "local_score_CV_multi": # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -143,7 +141,6 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - elif score_func == "local_score_marginal_multi": # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions @@ -154,7 +151,6 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - elif score_func == "local_score_BIC": # SEM BIC score warnings.warn("Please use 'local_score_BIC_from_cov' instead") @@ -163,7 +159,6 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - elif score_func == "local_score_BIC_from_cov": # SEM BIC score if parameters is None: @@ -171,17 +166,15 @@ def grasp( localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters ) - elif score_func == "local_score_BDeu": # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) - else: raise Exception("Unknown function!") - score = localScoreClass + score = localScoreClass gsts = [GST(i, score) for i in range(p)] node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names @@ -201,15 +194,11 @@ def grasp( y_parents = order.get_parents(y) candidates = [order.get(j) for j in range(0, i)] - # local_score = gsts[y].trace(candidates) - - grow(y, y_parents, candidates, score) - local_score = shrink(y, y_parents, score) - + local_score = gsts[y].trace(candidates, y_parents) order.set_local_score(y, local_score) order.bump_edges(len(y_parents)) - while dfs(depth - 1, set(), [], order, score, gsts): + while dfs(depth - 1, set(), [], order, gsts): if verbose: sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) sys.stdout.flush() @@ -230,7 +219,7 @@ def grasp( # performs a dfs over covered tucks -def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): +def dfs(depth: int, flipped: set, history: List[set], order, gsts): cache = [{}, {}, {}, 0] @@ -258,7 +247,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): cache[3] = order.get_edges() tuck(i, j, order) - edge_bump, score_bump = update(i, j, order, score, gsts) + edge_bump, score_bump = update(i, j, order, gsts) # because things that should be zero sometimes are not if score_bump > 1e-6: @@ -277,7 +266,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): if len(flipped) > 0 and flipped not in history: history.append(flipped) - if depth > 0 and dfs(depth - 1, flipped, history, order, score, gsts): + if depth > 0 and dfs(depth - 1, flipped, history, order, gsts): return True del history[-1] @@ -292,7 +281,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score, gsts): # updates the parents and scores after a tuck -def update(i: int, j: int, order, score, gsts): +def update(i: int, j: int, order, gsts): edge_bump = 0 old_score = 0 @@ -307,19 +296,7 @@ def update(i: int, j: int, order, score, gsts): z_parents.clear() candidates = [order.get(l) for l in range(0, k)] - - # for w in [w for w in z_parents if w not in candidates]: - # z_parents.remove(w) - # shrink(z, z_parents, score) - - # for w in z_parents: - # candidates.remove(w) - - # local_score = gsts[z].trace(candidates, z_parents) - - grow(z, z_parents, candidates, score) - local_score = shrink(z, z_parents, score) - + local_score = gsts[z].trace(candidates, z_parents) order.set_local_score(z, local_score) edge_bump += len(z_parents) @@ -328,68 +305,6 @@ def update(i: int, j: int, order, score, gsts): return edge_bump, new_score - old_score -# grow of grow-shrink -def grow(y: int, y_parents: List[int], candidates: List[int], score): - - best = -score.score(y, y_parents) - # best = -score.score_nocache(y, y_parents) - - add = None - checked = [] - while add is not None or len(candidates) > 0: - - if add is not None: - checked.remove(add) - y_parents.append(add) - candidates = checked - checked = [] - add = None - - while len(candidates) > 0: - - x = candidates.pop() - y_parents.append(x) - current = -score.score(y, y_parents) - # current = -score.score_nocache(y, y_parents) - y_parents.remove(x) - checked.append(x) - - if current > best: - best = current - add = x - - return best - - -# shrink of grow-shrink -def shrink(y: int, y_parents: List[int], score): - - best = -score.score(y, y_parents) - # best = -score.score_nocache(y, y_parents) - - remove = None - checked = 0 - while remove is not None or checked < len(y_parents): - - if remove is not None: - y_parents.remove(remove) - checked = 0 - remove = None - - while checked < len(y_parents): - x = y_parents.pop(0) - current = -score.score(y, y_parents) - # current = -score.score_nocache(y, y_parents) - y_parents.append(x) - checked += 1 - - if current > best: - best = current - remove = x - - return best - - # tucks the node at position i into position j def tuck(i: int, j: int, order): diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py index 70d07903..25e0da6b 100644 --- a/causallearn/search/PermutationBased/gst.py +++ b/causallearn/search/PermutationBased/gst.py @@ -1,8 +1,8 @@ class GSTNode: def __init__(self, tree, add=None, score=None): - # if score is None: score = -tree.score.score_nocache(tree.vertex, []) - if score is None: score = -tree.score.score(tree.vertex, []) + if score is None: score = -tree.score.score_nocache(tree.vertex, []) + # if score is None: score = -tree.score.score(tree.vertex, []) self.tree = tree self.add = add self.grow_score = score @@ -17,8 +17,8 @@ def grow(self, available, parents): self.branches = [] for add in available: parents.append(add) - # score = -self.tree.score.score_nocache(self.tree.vertex, parents) - score = -self.tree.score.score(self.tree.vertex, parents) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) parents.remove(add) branch = GSTNode(self.tree, add, score) if score > self.grow_score: self.branches.append(branch) @@ -30,8 +30,8 @@ def shrink(self, parents): best = None for remove in [parent for parent in parents]: parents.remove(remove) - # score = -self.tree.score.score_nocache(self.tree.vertex, parents) - score = -self.tree.score.score(self.tree.vertex, parents) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) parents.append(remove) if score > self.shrink_score: self.shrink_score = score diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py index f67fa5a3..ea963d47 100644 --- a/tests/TestBOSS.py +++ b/tests/TestBOSS.py @@ -7,10 +7,8 @@ from causallearn.graph.GraphNode import GraphNode from causallearn.search.PermutationBased.BOSS import boss from causallearn.utils.DAG2CPDAG import dag2cpdag -from causallearn.utils.GraphUtils import GraphUtils -import matplotlib.image as mpimg -import matplotlib.pyplot as plt -import io + +import gc def simulate_data(p, d, n): @@ -53,11 +51,13 @@ def simulate_data(p, d, n): class TestBOSS(unittest.TestCase): def test_boss(self): ps = [30, 60] - # ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 + gc.set_threshold(20000, 50, 50) + # gc.set_debug(gc.DEBUG_STATS) + for p in ps: for d in ds: stats = [[], [], [], []] @@ -84,15 +84,8 @@ def test_boss(self): G0 = dag2cpdag(G0) - G = boss(X, parameters={'lambda_value': 2}) - - # pyd = GraphUtils.to_pydot(G) - # tmp_png = pyd.create_png(f="png") - # fp = io.BytesIO(tmp_png) - # img = mpimg.imread(fp, format='png') - # plt.axis('off') - # plt.imshow(img) - # plt.show() + G = boss(X, parameters={'lambda_value': 4}) + gc.collect() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 0ac1d81d..f1fe0dd0 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -7,10 +7,8 @@ from causallearn.graph.GraphNode import GraphNode from causallearn.search.PermutationBased.GRaSP import grasp from causallearn.utils.DAG2CPDAG import dag2cpdag -from causallearn.utils.GraphUtils import GraphUtils -import matplotlib.image as mpimg -import matplotlib.pyplot as plt -import io + +import gc def simulate_data(p, d, n): @@ -53,11 +51,13 @@ def simulate_data(p, d, n): class TestGRaSP(unittest.TestCase): def test_grasp(self): ps = [30, 60] - # ps = [30] ds = [0.1, 0.15] n = 1000 reps = 5 + gc.set_threshold(20000, 50, 50) + # gc.set_debug(gc.DEBUG_STATS) + for p in ps: for d in ds: stats = [[], [], [], []] @@ -84,15 +84,8 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X, depth=1, parameters={'lambda_value': 2}) - - # pyd = GraphUtils.to_pydot(G) - # tmp_png = pyd.create_png(f="png") - # fp = io.BytesIO(tmp_png) - # img = mpimg.imread(fp, format='png') - # plt.axis('off') - # plt.imshow(img) - # plt.show() + G = grasp(X, depth=1, parameters={'lambda_value': 4}) + gc.collect() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) From ebc80c946644962981725602916f726079525659 Mon Sep 17 00:00:00 2001 From: Bryan Andrews Date: Thu, 18 Jul 2024 13:01:54 -0500 Subject: [PATCH 66/84] final updates Signed-off-by: Oliver Schacht --- causallearn/score/LocalScoreFunction.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py index e02ba737..a4af5361 100644 --- a/causallearn/score/LocalScoreFunction.py +++ b/causallearn/score/LocalScoreFunction.py @@ -34,9 +34,9 @@ def local_score_BIC(Data: ndarray, i: int, PAi: List[int], parameters=None) -> f if len(PAi) == 0: return n * np.log(cov[i, i]) - yX = np.mat(cov[np.ix_([i], PAi)]) - XX = np.mat(cov[np.ix_(PAi, PAi)]) - H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T) + yX = cov[np.ix_([i], PAi)] + XX = cov[np.ix_(PAi, PAi)] + H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T) return n * H + np.log(n) * len(PAi) * lambda_value @@ -68,9 +68,9 @@ def local_score_BIC_from_cov( if len(PAi) == 0: return n * np.log(cov[i, i]) - yX = np.mat(cov[np.ix_([i], PAi)]) - XX = np.mat(cov[np.ix_(PAi, PAi)]) - H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T) + yX = cov[np.ix_([i], PAi)] + XX = cov[np.ix_(PAi, PAi)] + H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T) return n * H + np.log(n) * len(PAi) * lambda_value From b9b4294a743a995e107a504792c372e9e38aaa75 Mon Sep 17 00:00:00 2001 From: ZhiyiHuang Date: Sun, 25 Aug 2024 10:50:46 +0800 Subject: [PATCH 67/84] fix: fix LocalScoreFunction.py Signed-off-by: ZhiyiHuang Signed-off-by: Oliver Schacht --- causallearn/score/LocalScoreFunction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py index a4af5361..488a2c45 100644 --- a/causallearn/score/LocalScoreFunction.py +++ b/causallearn/score/LocalScoreFunction.py @@ -198,7 +198,7 @@ def local_score_cv_general( PAi = list(PAi) T = Data.shape[0] - X = Data[:, Xi] + X = Data[:, [Xi]] var_lambda = parameters["lambda"] # regularization parameter k = parameters["kfold"] # k-fold cross validation n0 = math.floor(T / k) @@ -715,7 +715,7 @@ def local_score_marginal_general( """ T = Data.shape[0] - X = Data[:, Xi] + X = Data[:, [Xi]] dX = X.shape[1] # set the kernel for X From c67349579398bb2bd5a24cb6e8d2355765bb1669 Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Wed, 4 Sep 2024 21:14:11 -0400 Subject: [PATCH 68/84] Minor changes Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 56 +++++++++++------------ 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index d1af061c..4c8b9411 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -2,7 +2,7 @@ import warnings from queue import Queue -from typing import List, Set, Tuple, Dict +from typing import List, Set, Tuple, Dict, Generator from numpy import ndarray from causallearn.graph.Edge import Edge @@ -44,7 +44,7 @@ def traverseCircle(node: Node, edge: Edge) -> Node | None: if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE: return edge.get_node2() elif node == edge.get_node2(): - if edge.get_endpoint1() == Endpoint.CIRCLE or edge.get_endpoint2() == Endpoint.CIRCLE: + if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE: return edge.get_node1() return None @@ -82,7 +82,7 @@ def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: ## return False -def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph) -> List[Node] | None: +def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph) -> Generator[Node] | None: Q = Queue() V = set() @@ -102,7 +102,7 @@ def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph) -> List[Nod while not Q.empty(): node_t, path = Q.get_nowait() if node_t == node_to and is_uncovered_path(path, G): - return path + yield path for node_u in G.get_adjacent_nodes(node_t): edge = G.get_edge(node_t, node_u) @@ -467,35 +467,31 @@ def ruleR5(graph: Graph, changeFlag: bool, for node_D in b_circle_adj_nodes_set: if graph.is_adjacent_to(node_A, node_D): continue - path = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph) - if path is not None: - # pdb.set_trace() + paths = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph) + if paths is not None: + # Mark the change if find at least one path changeFlag = True - # orient A - C, D - B - edge = graph.get_edge(node_A, path[0]) - graph.remove_edge(edge) - graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL)) - - edge = graph.get_edge(node_B, path[-1]) - graph.remove_edge(edge) - graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL)) - if verbose: - print("Orienting edge (Double tail): " + graph.get_edge(node_A, path[0]).__str__()) - print("Orienting edge (Double tail): " + graph.get_edge(node_B, path[-1]).__str__()) - - # orient everything on the path to both tails - for i in range(len(path) - 1): - edge = graph.get_edge(path[i], path[i + 1]) + for path in paths: + # orient A - C, D - B + edge = graph.get_edge(node_A, path[0]) graph.remove_edge(edge) - graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL)) - if verbose: - print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__()) - - # if not is_arrow_point_allowed(node_A, path[0], graph, bk): - # break - # if not is_arrow_point_allowed(node_B, path[-1], graph, bk): - # break + graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL)) + edge = graph.get_edge(node_B, path[-1]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge (Double tail): " + graph.get_edge(node_A, path[0]).__str__()) + print("Orienting edge (Double tail): " + graph.get_edge(node_B, path[-1]).__str__()) + + # orient everything on the path to both tails + for i in range(len(path) - 1): + edge = graph.get_edge(path[i], path[i + 1]) + graph.remove_edge(edge) + graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__()) + continue return changeFlag From 28e6ba597d78136105f2ec8662ef9f01a84708b6 Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:19:33 -0400 Subject: [PATCH 69/84] Bug fix in finding undirected circle paths, and allow multiple ucp Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 69 +++++++++++++---------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 4c8b9411..97146b7d 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -82,17 +82,19 @@ def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: ## return False -def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph) -> Generator[Node] | None: +def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph, exclude_node: List[Node]) -> Generator[Node] | None: Q = Queue() V = set() path = [node_from] for node_u in G.get_adjacent_nodes(node_from): + if node_u in exclude_node: + continue edge = G.get_edge(node_from, node_u) node_c = traverseCircle(node_from, edge) - if node_c is None: + if node_c is None or node_c in exclude_node: continue if not V.__contains__(node_c): @@ -108,14 +110,13 @@ def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph) -> Generato edge = G.get_edge(node_t, node_u) node_c = traverseCircle(node_t, edge) - if node_c is None: + if node_c is None or node_c in exclude_node: continue if not V.__contains__(node_c): V.add(node_c) Q.put((node_c, path + [node_c])) - return None def existOnePathWithPossibleParents(previous, node_w: Node, node_x: Node, node_b: Node, graph: Graph) -> bool: @@ -440,10 +441,32 @@ def ruleR5(graph: Graph, changeFlag: bool, double tail. """ nodes = graph.get_nodes() + def orient_on_path_helper(path, node_A, node_B): + # orient A - C, D - B + edge = graph.get_edge(node_A, path[0]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL)) + + edge = graph.get_edge(node_B, path[-1]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge A - C (Double tail): " + graph.get_edge(node_A, path[0]).__str__()) + print("Orienting edge B - D (Double tail): " + graph.get_edge(node_B, path[-1]).__str__()) + + # orient everything on the path to both tails + for i in range(len(path) - 1): + edge = graph.get_edge(path[i], path[i + 1]) + graph.remove_edge(edge) + graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__()) + for node_B in nodes: intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) for node_A in intoBCircles: + found_paths_between_AB = [] if graph.get_endpoint(node_B, node_A) != Endpoint.CIRCLE: continue else: @@ -467,33 +490,17 @@ def ruleR5(graph: Graph, changeFlag: bool, for node_D in b_circle_adj_nodes_set: if graph.is_adjacent_to(node_A, node_D): continue - paths = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph) - if paths is not None: - # Mark the change if find at least one path - changeFlag = True - for path in paths: - # orient A - C, D - B - edge = graph.get_edge(node_A, path[0]) - graph.remove_edge(edge) - graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL)) - - edge = graph.get_edge(node_B, path[-1]) - graph.remove_edge(edge) - graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL)) - if verbose: - print("Orienting edge (Double tail): " + graph.get_edge(node_A, path[0]).__str__()) - print("Orienting edge (Double tail): " + graph.get_edge(node_B, path[-1]).__str__()) - - # orient everything on the path to both tails - for i in range(len(path) - 1): - edge = graph.get_edge(path[i], path[i + 1]) - graph.remove_edge(edge) - graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL)) - if verbose: - print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__()) - - continue - + paths = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph, exclude_node=[node_A, node_B]) # get the uncovered circle path between C and D, excluding A and B + found_paths_between_AB.append(paths) + + # Orient the uncovered circle path between A and B + for paths in found_paths_between_AB: + for path in paths: + changeFlag = True + if verbose: + print("Find uncovered circle path between A and B: " + graph.get_edge(node_A, node_B).__str__()) + orient_on_path_helper(path, node_A, node_B) + return changeFlag def ruleR6(graph: Graph, changeFlag: bool, From 6088178da7eccdc2cf8afea7dff7ca4949372e1b Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Sat, 28 Sep 2024 02:13:22 -0400 Subject: [PATCH 70/84] modify change_flag Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 97146b7d..ec05bbae 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -724,9 +724,8 @@ def ruleR4B(graph: Graph, maxPathLength: int, data: ndarray, independence_test_m -def rule8(graph: Graph, nodes: List[Node]): - nodes = graph.get_nodes() - changeFlag = False +def rule8(graph: Graph, nodes: List[Node], changeFlag): + nodes = graph.get_nodes() if nodes is None else nodes for node_B in nodes: adj = graph.get_adjacent_nodes(node_B) if len(adj) < 2: @@ -781,9 +780,9 @@ def find_possible_children(graph: Graph, parent_node, en_nodes=None): return potential_child_nodes -def rule9(graph: Graph, nodes: List[Node]): - changeFlag = False - nodes = graph.get_nodes() +def rule9(graph: Graph, nodes: List[Node], changeFlag): + # changeFlag = False + nodes = graph.get_nodes() if nodes is None else nodes for node_C in nodes: intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) for node_A in intoCArrows: @@ -809,8 +808,8 @@ def rule9(graph: Graph, nodes: List[Node]): return changeFlag -def rule10(graph: Graph): - changeFlag = False +def rule10(graph: Graph, changeFlag): + # changeFlag = False nodes = graph.get_nodes() for node_C in nodes: intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) @@ -1116,12 +1115,12 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = change_flag = ruleR7(graph, change_flag, verbose) # rule 8 - change_flag = rule8(graph,nodes) + change_flag = rule8(graph,nodes, change_flag) # rule 9 - change_flag = rule9(graph, nodes) + change_flag = rule9(graph, nodes, change_flag) # rule 10 - change_flag = rule10(graph) + change_flag = rule10(graph, change_flag) graph.set_pag(True) From 1dd6b2fdc78c15a628a83841b90b1ec8c47187a5 Mon Sep 17 00:00:00 2001 From: EvieQ01 <65006489+EvieQ01@users.noreply.github.com> Date: Sat, 28 Sep 2024 02:14:15 -0400 Subject: [PATCH 71/84] DAG to a maximal PAG by adding rule5-10 and adding selection variables Signed-off-by: Oliver Schacht --- causallearn/utils/DAG2PAG.py | 18 +++++++++++++----- tests/TestDAG2PAG.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/causallearn/utils/DAG2PAG.py b/causallearn/utils/DAG2PAG.py index 9980b96b..2a091f72 100644 --- a/causallearn/utils/DAG2PAG.py +++ b/causallearn/utils/DAG2PAG.py @@ -10,10 +10,11 @@ from causallearn.graph.Endpoint import Endpoint from causallearn.graph.GeneralGraph import GeneralGraph from causallearn.graph.Node import Node -from causallearn.search.ConstraintBased.FCI import rule0, rulesR1R2cycle, ruleR3, ruleR4B +from causallearn.search.ConstraintBased.FCI import rule0, rulesR1R2cycle, ruleR3, ruleR4B, ruleR5, ruleR6, ruleR7, rule8, rule9, rule10 from causallearn.utils.cit import CIT, d_separation -def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: + +def dag2pag(dag: Dag, islatent: List[Node], isselection: List[Node] = []) -> GeneralGraph: """ Convert a DAG to its corresponding PAG Parameters @@ -27,8 +28,8 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: dg = nx.DiGraph() true_dag = nx.DiGraph() nodes = dag.get_nodes() - observed_nodes = list(set(nodes) - set(islatent)) - mod_nodes = observed_nodes + islatent + observed_nodes = list(set(nodes) - set(islatent) - set(isselection)) + mod_nodes = observed_nodes + islatent + isselection nodes = dag.get_nodes() nodes_ids = {node: i for i, node in enumerate(nodes)} mod_nodeids = {node: i for i, node in enumerate(mod_nodes)} @@ -65,7 +66,7 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: for Z in combinations(observed_nodes, l): if nodex in Z or nodey in Z: continue - if d_separated(dg, {nodes_ids[nodex]}, {nodes_ids[nodey]}, set(nodes_ids[z] for z in Z)): + if d_separated(dg, {nodes_ids[nodex]}, {nodes_ids[nodey]}, set(nodes_ids[z] for z in Z) | set([nodes_ids[s] for s in isselection])): if edge: PAG.remove_edge(edge) sepset[(nodes_ids[nodex], nodes_ids[nodey])] |= set(Z) @@ -105,6 +106,13 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset_reindexed, change_flag=change_flag, bk=None, verbose=False) + change_flag = ruleR5(PAG, changeFlag=change_flag, verbose=True) + change_flag = ruleR6(PAG, changeFlag=change_flag) + change_flag = ruleR7(PAG, changeFlag=change_flag) + change_flag = rule8(PAG, nodes=observed_nodes, changeFlag=change_flag) + change_flag = rule9(PAG, nodes=observed_nodes, changeFlag=change_flag) + change_flag = rule10(PAG, changeFlag=change_flag) + return PAG diff --git a/tests/TestDAG2PAG.py b/tests/TestDAG2PAG.py index 569fc945..004ea897 100644 --- a/tests/TestDAG2PAG.py +++ b/tests/TestDAG2PAG.py @@ -72,3 +72,17 @@ def test_case3(self): print(pag) graphviz_pag = GraphUtils.to_pgv(pag) graphviz_pag.draw("pag.png", prog='dot', format='png') + + def test_case_selection(self): + nodes = [] + for i in range(5): + nodes.append(GraphNode(str(i))) + dag = Dag(nodes) + dag.add_directed_edge(nodes[0], nodes[1]) + dag.add_directed_edge(nodes[1], nodes[2]) + dag.add_directed_edge(nodes[2], nodes[3]) + # Selection nodes + dag.add_directed_edge(nodes[3], nodes[4]) + dag.add_directed_edge(nodes[0], nodes[4]) + pag = dag2pag(dag, islatent=[], isselection=[nodes[4]]) + print(pag) From 58c50caa39b478ee005dac50202c6f9c2a76c7e1 Mon Sep 17 00:00:00 2001 From: Zhiyi Huang <83860323+zhi-yi-huang@users.noreply.github.com> Date: Sun, 6 Oct 2024 15:27:40 +0800 Subject: [PATCH 72/84] fix: orient the edge between alpha and beta as an undirected edge. Signed-off-by: Zhiyi Huang <83860323+zhi-yi-huang@users.noreply.github.com> Signed-off-by: Oliver Schacht --- causallearn/search/ConstraintBased/FCI.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index ec05bbae..ff3f92d0 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -499,6 +499,9 @@ def orient_on_path_helper(path, node_A, node_B): changeFlag = True if verbose: print("Find uncovered circle path between A and B: " + graph.get_edge(node_A, node_B).__str__()) + edge = graph.get_edge(node_A, node_B) + graph.remove_edge(edge) + graph.add_edge(Edge(node_A, node_B, Endpoint.TAIL, Endpoint.TAIL)) orient_on_path_helper(path, node_A, node_B) return changeFlag From d8d6a0c6245093ef84c463c967121eb3d2dcb645 Mon Sep 17 00:00:00 2001 From: ZhiyiHuang Date: Sun, 6 Oct 2024 16:21:19 +0800 Subject: [PATCH 73/84] fix: fci unittest Signed-off-by: ZhiyiHuang Signed-off-by: Oliver Schacht --- ...rn_discrete_10000_alarm_fci_chisq_0.05.txt | 8 +-- ...rn_discrete_10000_andes_fci_chisq_0.05.txt | 8 +-- ...arn_discrete_10000_asia_fci_chisq_0.05.txt | 2 +- ...n_discrete_10000_barley_fci_chisq_0.05.txt | 8 +-- ...rn_discrete_10000_child_fci_chisq_0.05.txt | 4 +- ...screte_10000_hailfinder_fci_chisq_0.05.txt | 2 +- ...n_discrete_10000_hepar2_fci_chisq_0.05.txt | 2 +- ...iscrete_10000_insurance_fci_chisq_0.05.txt | 2 +- ...rn_discrete_10000_water_fci_chisq_0.05.txt | 6 +- ...discrete_10000_win95pts_fci_chisq_0.05.txt | 6 +- .../linear_10_fci_fisherz_0.05.txt | 2 +- tests/TestFCI.py | 55 +++++++++++++------ 12 files changed, 64 insertions(+), 41 deletions(-) diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt index 2430e1de..ce08eb4f 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt @@ -3,7 +3,7 @@ 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 0 0 0 -1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 @@ -12,7 +12,7 @@ 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 +0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 @@ -20,9 +20,9 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 2 0 2 0 0 0 0 2 -1 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 -1 0 -1 0 0 0 0 2 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt index a9f9886f..934b13bd 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt @@ -39,8 +39,8 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 -1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -106,7 +106,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 -1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -208,7 +208,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt index c0467173..b3aa50bb 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt @@ -1,7 +1,7 @@ 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 2 2 0 0 0 -0 0 2 0 0 2 0 0 +0 0 2 0 0 -1 0 0 0 0 2 0 0 0 0 2 0 1 0 1 0 0 0 0 2 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt index b1986bd1..864b9e5f 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt @@ -1,4 +1,4 @@ -0 0 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 -1 0 0 -1 0 0 -1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -14,8 +14,8 @@ 0 0 0 0 0 0 0 1 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 -1 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 @@ -28,7 +28,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt index bac534d8..a739c18f 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt @@ -12,9 +12,9 @@ 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 -1 -1 1 -1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 -0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 2 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -0 0 2 2 2 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 +0 0 -1 2 -1 -1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt index 894ff9cd..4671059c 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt @@ -21,7 +21,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt index 9e6c7aac..0569bd1b 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt @@ -40,7 +40,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt index 80a266db..4d6a5b4a 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt @@ -2,7 +2,7 @@ -1 0 1 1 0 0 0 0 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 -1 0 0 0 0 0 0 0 0 -1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 -1 0 0 0 1 0 0 0 -1 -1 0 0 0 0 0 0 0 -1 -0 0 2 0 0 0 2 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 +0 0 2 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 -1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 -1 0 -1 0 0 -1 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt index cee53cda..bd9c2aec 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt @@ -1,4 +1,4 @@ -0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -14,12 +14,12 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 +0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 1 -0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt index b971fb87..8bc6b692 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt @@ -8,7 +8,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 2 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -34,7 +34,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -50,7 +50,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt b/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt index 046443a3..07a248fb 100644 --- a/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt +++ b/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt @@ -5,7 +5,7 @@ 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 2 2 0 +0 0 0 0 0 0 0 0 -1 0 0 0 0 -1 0 0 0 2 -1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 diff --git a/tests/TestFCI.py b/tests/TestFCI.py index 7884cbde..e2fb683c 100644 --- a/tests/TestFCI.py +++ b/tests/TestFCI.py @@ -10,8 +10,11 @@ import pandas as pd from causallearn.graph.Dag import Dag +from causallearn.graph.Edge import Edge +from causallearn.graph.Endpoint import Endpoint +from causallearn.graph.GeneralGraph import GeneralGraph from causallearn.graph.GraphNode import GraphNode -from causallearn.search.ConstraintBased.FCI import fci +from causallearn.search.ConstraintBased.FCI import fci, ruleR5 from causallearn.utils.cit import chisq, fisherz, kci, d_separation from causallearn.utils.DAG2PAG import dag2pag from causallearn.utils.GraphUtils import GraphUtils @@ -30,21 +33,21 @@ ######################################### Test Notes ########################################### BENCHMARK_TXTFILE_TO_MD5 = { - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt": "65f54932a9d8224459e56c40129e6d8b", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt": "0312381641cb3b4818e0c8539f74e802", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt": "a1160b92ce15a700858552f08e43b7de", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt": "dced4a202fc32eceb75f53159fc81f3b", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt": "b1a28eee1e0c6ea8a64ac1624585c3f4", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt": "c3bbc2b8aba456a4258dd071a42085bc", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt": "4a5000e7a582083859ee6aef15073676", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt": "6b7858589e12f04b0f489ba4589a1254", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt": "9975942b936aa2b1fc90c09318ca2d08", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt": "48eee804d59526187b7ecd0519556ee5", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt": "6b9a6b95b6474f8530e85c022f4e749c", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt": "4aae21ff3d9aa2435515ed2ee402294c", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt": "648fdf271e1440c06ca2b31b55ef1f3f", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt": "04092ae93e54c727579f08bf5dc34c77", - "tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt": "289c86f9c665bf82bbcc4c9e1dcec3e7" + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt': '4b014aa90aee456bc02dddd008d8be1f', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt': '0312381641cb3b4818e0c8539f74e802', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt': 'a1160b92ce15a700858552f08e43b7de', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt': 'dced4a202fc32eceb75f53159fc81f3b', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt': 'b1a28eee1e0c6ea8a64ac1624585c3f4', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt': '4b06f6ea7fefe66255a3d16cb8a3711f', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt': 'a926e329b2bf2b397d3676e9d2020483', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt': '5eff3ea6ce2e2daa53d6b649bd9c31fe', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt': '55eed065b8dfc6a21610d3d6e18423c6', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt': '48917f914c8f80684ae2abde87417d5c', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt': 'bcffe793866db6ddfe9cc602081789cd', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt': '3558683a485588a41493b1a2f2c1d370', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt': '52596ace09c45656f61c70d0e66bdd6a', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt': '4b7e2ecba5eb50c7266f24670d0d0ae9', + 'tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt': 'eaabd1fbb16bc152b45e456c30166c55' } # INCONSISTENT_RESULT_GRAPH_ERRMSG = "Returned graph is inconsistent with the benchmark. Please check your code with the commit 5918419." @@ -211,3 +214,23 @@ def test_er_graph(self): print(G) print(f'fci(data, d_separation, 0.05):') self.run_simulate_data_test(pag, G) + + def test_rule5(self): + nodes = [] + for i in range(7): + nodes.append(GraphNode(str(i))) + g = GeneralGraph(nodes) + g.add_edge(Edge(nodes[0], nodes[1], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[0], nodes[2], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[0], nodes[5], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[0], nodes[6], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[1], nodes[3], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[2], nodes[4], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[3], nodes[5], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[4], nodes[6], Endpoint.CIRCLE, Endpoint.CIRCLE)) + + ruleR5(g, changeFlag=True, verbose=True) + + for edge in g.get_graph_edges(): + assert edge.get_endpoint1() == Endpoint.TAIL + assert edge.get_endpoint2() == Endpoint.TAIL From 50b603072615a39c9deb7aa44cd9ac3c5bd97b2c Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:59:32 +0200 Subject: [PATCH 74/84] edit docstring Signed-off-by: Oliver Schacht --- causallearn/utils/cit.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 2d664e9b..fbc95e95 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -28,8 +28,8 @@ def CIT(data, method='fisherz', **kwargs): Parameters ---------- data: numpy.ndarray of shape (n_samples, n_features) - method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "chisq", "gsq"] - kwargs: placeholder for future arguments, or for KCI specific arguments now + method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "rcit", "fastkci", "chisq", "gsq"] + kwargs: placeholder for future arguments, or for KCI, FastKCI or RCIT specific arguments now TODO: utimately kwargs should be replaced by explicit named parameters. check https://github.com/cmu-phil/causal-learn/pull/62#discussion_r927239028 ''' @@ -52,6 +52,7 @@ def CIT(data, method='fisherz', **kwargs): else: raise ValueError("Unknown method: {}".format(method)) + class CIT_Base(object): # Base class for CIT, contains basic operations for input check and caching, etc. def __init__(self, data, cache_path=None, **kwargs): From d3ae1bc926e7629402d425bbd9358ec5058d77ac Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Thu, 17 Oct 2024 14:00:21 +0200 Subject: [PATCH 75/84] remove experimental stuff Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 407977dd..91b99ad8 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -21,6 +21,9 @@ class FastKCI_CInd(object): "A kernel-based conditional independence test and application in causal discovery", In UAI 2011. [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) + [3] O. Schacht, and B. Huang + "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + Working Paper. """ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming_thresh=1e-3, use_gp=False): @@ -65,21 +68,17 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): self.n = data_x.shape[0] Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) - self.Z_proposal, self.prob_Z = zip(*Z_proposal) + self.Z_proposal = zip(*Z_proposal) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) log_likelihood = np.array(log_likelihood) - self.prob_Z += log_likelihood - self.prob_Z = np.around(np.exp(self.prob_Z-logsumexp(self.prob_Z)), 6) # experimental, not used self.all_null_samples = np.vstack(null_samples) self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) self.all_test_stats = np.array(test_stat) self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) - # experimental, not used - self.pvalue_alt = np.sum(np.average(null_samples, axis=0, weights=self.prob_Z) > np.average(np.array(test_stat), weights=self.prob_Z)) / float(self.nullss) self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) return self.pvalue, self.test_stat @@ -104,8 +103,7 @@ def partition_data(self): Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) le = LabelEncoder() Z = le.fit_transform(Z) - prob_Z = np.take_along_axis(ll, Z[:, None], axis=1).sum() # experimental, might be removed - return Z, prob_Z + return Z def pvalue_onblocks(self, Z_proposal): """ @@ -130,7 +128,7 @@ def pvalue_onblocks(self, Z_proposal): X_k = np.copy(self.data[0][K_mask]) Y_k = np.copy(self.data[1][K_mask]) Z_k = np.copy(self.data_z[K_mask]) - if (Z_k.shape[0] < 6): # small blocks cause problems in GP, experimental + if (Z_k.shape[0] < 6): # small blocks cause problems in GP continue Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) @@ -214,7 +212,8 @@ def kernel_matrix(self, data_x, data_y, data_z): vx = vx[:, np.abs(wx) > np.abs(wx).max() * 1e-10] wx = wx[np.abs(wx) > np.abs(wx).max() * 1e-10] vx = 2 * np.sqrt(n) * vx.dot(np.diag(np.sqrt(wx))) / np.sqrt(wx[0]) - kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \ + + WhiteKernel(0.1, (1e-10, 1e+1)) gpx = GaussianProcessRegressor(kernel=kernelx) # fit Gaussian process, including hyperparameter optimization with warnings.catch_warnings(): @@ -237,7 +236,8 @@ def kernel_matrix(self, data_x, data_y, data_z): vy = vy[:, np.abs(wy) > np.abs(wy).max() * 1e-10] wy = wy[np.abs(wy) > np.abs(wy).max() * 1e-10] vy = 2 * np.sqrt(n) * vy.dot(np.diag(np.sqrt(wy))) / np.sqrt(wy[0]) - kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) + WhiteKernel(0.1, (1e-10, 1e+1)) + kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \ + + WhiteKernel(0.1, (1e-10, 1e+1)) gpy = GaussianProcessRegressor(kernel=kernely) # fit Gaussian process, including hyperparameter optimization with warnings.catch_warnings(): @@ -341,7 +341,8 @@ def null_sample_spectral(self, uu_prod, size_u, T): class FastKCI_UInd(object): """ - Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. Unconditional version. + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. + Unconditional version. References ---------- @@ -349,6 +350,9 @@ class FastKCI_UInd(object): "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + [3] O. Schacht, and B. Huang + "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + Working Paper. """ def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): """ From f58a91aed22f29bae1e68f7af55288be839bc6ca Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:13:00 +0200 Subject: [PATCH 76/84] remove propensity trimming Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 91b99ad8..6e431568 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -26,7 +26,7 @@ class FastKCI_CInd(object): Working Paper. """ - def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming_thresh=1e-3, use_gp=False): + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, use_gp=False): """ Initialize the FastKCI_CInd object. @@ -44,7 +44,6 @@ def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, trimming self.alpha = alpha self.epsilon = epsilon self.eig_thresh = eig_thresh - self.trimming_thresh = trimming_thresh self.use_gp = use_gp self.nullss = 5000 @@ -67,8 +66,7 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): self.data_z = data_z self.n = data_x.shape[0] - Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) - self.Z_proposal = zip(*Z_proposal) + self.Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) test_stat, null_samples, log_likelihood = zip(*block_res) @@ -354,7 +352,7 @@ class FastKCI_UInd(object): "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", Working Paper. """ - def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): + def __init__(self, K=10, J=8, alpha=500): """ Construct the FastKCI_UInd model. @@ -363,12 +361,10 @@ def __init__(self, K=10, J=8, alpha=500, trimming_thresh=1e-3): K: Number of Gaussians that are assumed to be in the mixture J: Number of independent repittitions. alpha: Parameter for the Dirichlet distribution. - trimming_thresh: Threshold for trimming the propensity weights. """ self.K = K self.J = J self.alpha = alpha - self.trimming_thresh = trimming_thresh self.nullss = 5000 self.eig_thresh = 1e-5 From 299d7e5c50dafdec1893293dac0aa05a5aa168ed Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:15:40 +0200 Subject: [PATCH 77/84] add unit tests Signed-off-by: Oliver Schacht --- tests/TestCIT_FastKCI.py | 33 +++++++++++++++++++++++++++++++++ tests/TestCIT_RCIT.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/TestCIT_FastKCI.py create mode 100644 tests/TestCIT_RCIT.py diff --git a/tests/TestCIT_FastKCI.py b/tests/TestCIT_FastKCI.py new file mode 100644 index 00000000..06a84baf --- /dev/null +++ b/tests/TestCIT_FastKCI.py @@ -0,0 +1,33 @@ +import unittest + +import numpy as np + +import causallearn.utils.cit as cit + + +class TestCIT_FastKCI(unittest.TestCase): + def test_Gaussian_dist(self): + np.random.seed(10) + X = np.random.randn(1200, 1) + X_prime = np.random.randn(1200, 1) + Y = X + 0.5 * np.random.randn(1200, 1) + Z = Y + 0.5 * np.random.randn(1200, 1) + data = np.hstack((X, X_prime, Y, Z)) + + pvalue01 = [] + pvalue03 = [] + pvalue032 = [] + for K in [3, 10]: + for J in [8, 16]: + for use_gp in [True, False]: + cit_CIT = cit.CIT(data, 'fastkci', K=K, J=J, use_gp=use_gp) + pvalue01.append(round(cit_CIT(0, 1), 4)) + pvalue03.append(round(cit_CIT(0, 3), 4)) + pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + + self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), + "pvalue01 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), + "pvalue03 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)), + "pvalue032 contains invalid values") diff --git a/tests/TestCIT_RCIT.py b/tests/TestCIT_RCIT.py new file mode 100644 index 00000000..2482f971 --- /dev/null +++ b/tests/TestCIT_RCIT.py @@ -0,0 +1,35 @@ +import unittest + +import numpy as np + +import causallearn.utils.cit as cit + + +class TestCIT_RCIT(unittest.TestCase): + def test_Gaussian_dist(self): + np.random.seed(10) + X = np.random.randn(300, 1) + X_prime = np.random.randn(300, 1) + Y = X + 0.5 * np.random.randn(300, 1) + Z = Y + 0.5 * np.random.randn(300, 1) + data = np.hstack((X, X_prime, Y, Z)) + + pvalue01 = [] + pvalue03 = [] + pvalue032 = [] + for approx in ["lpd4", "hbe", "gamma", "chi2", "perm"]: + for num_f in [50, 100]: + for num_f2 in [5, 10]: + for rcit in [True, False]: + cit_CIT = cit.CIT(data, 'rcit', approx=approx, num_f=num_f, + num_f2=num_f2, rcit=rcit) + pvalue01.append(round(cit_CIT(0, 1), 4)) + pvalue03.append(round(cit_CIT(0, 3), 4)) + pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + + self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), + "pvalue01 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), + "pvalue03 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)), + "pvalue032 contains invalid values") From b13fdda7648fb331be4be398512caae0a6382a4d Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 12:23:56 +0200 Subject: [PATCH 78/84] Create python-package.yml Signed-off-by: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Signed-off-by: Oliver Schacht --- .github/workflows/python-package.yml | 52 ++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 .github/workflows/python-package.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 00000000..389f9cef --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,52 @@ +name: Python package + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.10", "3.11"] + + steps: + - name: Checkout source code + uses: actions/checkout@v4 + + - name: Set up Python matrix.python−versionuses:actions/setup−python@v3with:python−version:{{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: matrix.python−versionuses:actions/setup−python@v3with:python−version:{{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Lint code with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Run tests with unittest + run: | + python -m unittest discover -s tests -p "test*.py" + + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Codespell + uses: codespell-project/actions-codespell@v2 From a1c65e48de8bba7dc74220b4a6bc85bb5bca57d6 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 12:34:32 +0200 Subject: [PATCH 79/84] Update python-package.yml Signed-off-by: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Signed-off-by: Oliver Schacht --- .github/workflows/python-package.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 389f9cef..135512c8 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -19,10 +19,10 @@ jobs: - name: Checkout source code uses: actions/checkout@v4 - - name: Set up Python matrix.python−versionuses:actions/setup−python@v3with:python−version:{{ matrix.python-version }} + - name: Set up Python uses: actions/setup-python@v3 with: - python-version: matrix.python−versionuses:actions/setup−python@v3with:python−version:{{ matrix.python-version }} + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | From 21cb82d220db61928143fa79491941eff62104b1 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 12:38:04 +0200 Subject: [PATCH 80/84] Update python-package.yml Signed-off-by: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Signed-off-by: Oliver Schacht --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 135512c8..06ffc116 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -39,7 +39,7 @@ jobs: - name: Run tests with unittest run: | - python -m unittest discover -s tests -p "test*.py" + python -m unittest discover -s tests -p "Test*.py" codespell: name: Check for spelling errors From c517f5c78d7a9fc0a4396027cce380903f5ffcc6 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:06:09 +0200 Subject: [PATCH 81/84] spelling Signed-off-by: Oliver Schacht --- causallearn/utils/FastKCI/FastKCI.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py index 6e431568..f5fcc506 100644 --- a/causallearn/utils/FastKCI/FastKCI.py +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -22,7 +22,7 @@ class FastKCI_CInd(object): [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) [3] O. Schacht, and B. Huang - "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery", Working Paper. """ @@ -349,7 +349,7 @@ class FastKCI_UInd(object): [2] M. Zhang, and S. Williamson, "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) [3] O. Schacht, and B. Huang - "FastKCI: A fast Kernel-based Conditional Indepdence test with application to causal discovery", + "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery", Working Paper. """ def __init__(self, K=10, J=8, alpha=500): From 59b1ea9f04365f50f49ef96a0b0eaf82b392fd69 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:06:22 +0200 Subject: [PATCH 82/84] fix: array dtype Signed-off-by: Oliver Schacht --- tests/TestCIT_FastKCI.py | 3 +++ tests/TestCIT_RCIT.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/tests/TestCIT_FastKCI.py b/tests/TestCIT_FastKCI.py index 06a84baf..b8507f78 100644 --- a/tests/TestCIT_FastKCI.py +++ b/tests/TestCIT_FastKCI.py @@ -25,6 +25,9 @@ def test_Gaussian_dist(self): pvalue03.append(round(cit_CIT(0, 3), 4)) pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + pvalue01 = np.array(pvalue01) + pvalue03 = np.array(pvalue03) + pvalue032 = np.array(pvalue032) self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), "pvalue01 contains invalid values") self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), diff --git a/tests/TestCIT_RCIT.py b/tests/TestCIT_RCIT.py index 2482f971..ef6406fd 100644 --- a/tests/TestCIT_RCIT.py +++ b/tests/TestCIT_RCIT.py @@ -27,6 +27,9 @@ def test_Gaussian_dist(self): pvalue03.append(round(cit_CIT(0, 3), 4)) pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + pvalue01 = np.array(pvalue01) + pvalue03 = np.array(pvalue03) + pvalue032 = np.array(pvalue032) self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), "pvalue01 contains invalid values") self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), From 5b89aa84a75a05eab15bed7804d73f38cf6988b9 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 12:40:44 +0200 Subject: [PATCH 83/84] Update python-package.yml Signed-off-by: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Signed-off-by: Oliver Schacht --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 06ffc116..11737e3a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -28,7 +28,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install -e . - name: Lint code with flake8 run: | From a742e9fdada77b3110631ca77990cf9384901510 Mon Sep 17 00:00:00 2001 From: OliverSchacht <65898638+OliverSchacht@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:19:26 +0200 Subject: [PATCH 84/84] remove my workflow Signed-off-by: Oliver Schacht --- .github/workflows/python-package.yml | 52 ---------------------------- 1 file changed, 52 deletions(-) delete mode 100644 .github/workflows/python-package.yml diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml deleted file mode 100644 index 11737e3a..00000000 --- a/.github/workflows/python-package.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Python package - -on: - push: - branches: [ "main" ] - pull_request: - branches: [ "main" ] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.10", "3.11"] - - steps: - - name: Checkout source code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: ${{ matrix.python-version }} - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 - pip install -e . - - - name: Lint code with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - - name: Run tests with unittest - run: | - python -m unittest discover -s tests -p "Test*.py" - - codespell: - name: Check for spelling errors - runs-on: ubuntu-latest - - steps: - - name: Checkout - uses: actions/checkout@v3 - - name: Codespell - uses: codespell-project/actions-codespell@v2