From 2ee93c607c37d38e78f83251a492b842dc9d28a3 Mon Sep 17 00:00:00 2001 From: cogito233 Date: Sun, 10 Jul 2022 21:08:54 +0200 Subject: [PATCH 1/8] Fix NaN problem while normailze the data --- causallearn/utils/KCI/KCI.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index baf76202..7fd0134d 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -145,8 +145,16 @@ def kernel_matrix(self, data_x, data_y): else: raise Exception('Undefined kernel function') - data_x = stats.zscore(data_x, axis=0) - data_y = stats.zscore(data_y, axis=0) + if (np.var(data_x)==0): + data_x-=np.average(data_x) + else: + data_x = stats.zscore(data_x, axis=0) + + if (np.var(data_y)==0): + data_y-=np.average(data_y) + else: + data_y = stats.zscore(data_y, axis=0) + Kx = kernelX.kernel(data_x) Ky = kernelY.kernel(data_y) return Kx, Ky @@ -322,9 +330,20 @@ def kernel_matrix(self, data_x, data_y, data_z): kzy: centering kernel matrix for data_y (nxn) """ # normalize the data - data_x = stats.zscore(data_x, axis=0) - data_y = stats.zscore(data_y, axis=0) - data_z = stats.zscore(data_z, axis=0) + if (np.var(data_x)==0): + data_x-=np.average(data_x) + else: + data_x = stats.zscore(data_x, axis=0) + + if (np.var(data_y)==0): + data_y-=np.average(data_y) + else: + data_y = stats.zscore(data_y, axis=0) + + if (np.var(data_z)==0): + data_z-=np.average(data_z) + else: + data_z = stats.zscore(data_z, axis=0) # concatenate x and z data_x = np.concatenate((data_x, 0.5 * data_z), axis=1) From 7f409372ad7362e649bc37b1331cdaf76ec70d0d Mon Sep 17 00:00:00 2001 From: ZHIHENG LYU <37894651+cogito233@users.noreply.github.com> Date: Mon, 11 Jul 2022 12:41:58 +0200 Subject: [PATCH 2/8] Update causallearn/utils/KCI/KCI.py Co-authored-by: Yewen Fan --- causallearn/utils/KCI/KCI.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 7fd0134d..61a6f3e3 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -151,7 +151,7 @@ def kernel_matrix(self, data_x, data_y): data_x = stats.zscore(data_x, axis=0) if (np.var(data_y)==0): - data_y-=np.average(data_y) + data_y -= np.average(data_y) else: data_y = stats.zscore(data_y, axis=0) From 67f6ca7325fafa355f40d1d7e0c0dfedc8b60f21 Mon Sep 17 00:00:00 2001 From: cogito233 Date: Mon, 11 Jul 2022 12:46:15 +0200 Subject: [PATCH 3/8] code style change --- causallearn/utils/KCI/KCI.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 61a6f3e3..7db9079d 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -145,12 +145,12 @@ def kernel_matrix(self, data_x, data_y): else: raise Exception('Undefined kernel function') - if (np.var(data_x)==0): - data_x-=np.average(data_x) + if np.var(data_x) == 0: + data_x -= np.average(data_x) else: data_x = stats.zscore(data_x, axis=0) - if (np.var(data_y)==0): + if np.var(data_y) == 0: data_y -= np.average(data_y) else: data_y = stats.zscore(data_y, axis=0) @@ -330,18 +330,18 @@ def kernel_matrix(self, data_x, data_y, data_z): kzy: centering kernel matrix for data_y (nxn) """ # normalize the data - if (np.var(data_x)==0): - data_x-=np.average(data_x) + if np.var(data_x) == 0: + data_x -= np.average(data_x) else: data_x = stats.zscore(data_x, axis=0) - if (np.var(data_y)==0): - data_y-=np.average(data_y) + if np.var(data_y) == 0: + data_y -= np.average(data_y) else: data_y = stats.zscore(data_y, axis=0) - if (np.var(data_z)==0): - data_z-=np.average(data_z) + if np.var(data_z) == 0: + data_z -= np.average(data_z) else: data_z = stats.zscore(data_z, axis=0) From ffe75f95c4003fa7e9d7d5f3bbec4ace90ed3a41 Mon Sep 17 00:00:00 2001 From: cogito233 Date: Mon, 11 Jul 2022 13:22:35 +0200 Subject: [PATCH 4/8] add multi-variable support to cit --- causallearn/utils/cit.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index da30891d..1a7923df 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -59,9 +59,18 @@ def _unique(column): } def kci(self, X, Y, condition_set): + if condition_set == None: + condition_set = [] + if type(X) == int: + X = [X] + if type(Y) == int: + Y = [Y] + if type(condition_set) == int: + condition_set = [condition_set] + if len(condition_set) == 0: - return self.kci_ui.compute_pvalue(self.data[:, [X]], self.data[:, [Y]])[0] - return self.kci_ci.compute_pvalue(self.data[:, [X]], self.data[:, [Y]], self.data[:, list(condition_set)])[0] + return self.kci_ui.compute_pvalue(self.data[:, X], self.data[:, Y])[0] + return self.kci_ci.compute_pvalue(self.data[:, X], self.data[:, Y], self.data[:, list(condition_set)])[0] def fisherz(self, X, Y, condition_set): """ From 3603458de665ca41054d84dd3fb866b49e8a13c4 Mon Sep 17 00:00:00 2001 From: cogito233 Date: Mon, 11 Jul 2022 15:25:25 +0200 Subject: [PATCH 5/8] some marginal changes --- causallearn/utils/cit.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 1a7923df..ac6d45ad 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -63,10 +63,12 @@ def kci(self, X, Y, condition_set): condition_set = [] if type(X) == int: X = [X] + elif type(X) != list: + Y = list(X) if type(Y) == int: Y = [Y] - if type(condition_set) == int: - condition_set = [condition_set] + elif type(Y) != list: + Y = list(Y) if len(condition_set) == 0: return self.kci_ui.compute_pvalue(self.data[:, X], self.data[:, Y])[0] From c58c8168a7cac000b10034fa8c8c6afaa3b3775a Mon Sep 17 00:00:00 2001 From: cogito233 Date: Mon, 11 Jul 2022 15:31:15 +0200 Subject: [PATCH 6/8] some marginal changes 2 --- causallearn/utils/cit.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index ac6d45ad..0f5af568 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -59,8 +59,6 @@ def _unique(column): } def kci(self, X, Y, condition_set): - if condition_set == None: - condition_set = [] if type(X) == int: X = [X] elif type(X) != list: From 709999d93a6e1b2cc67945224801971b7b0eae89 Mon Sep 17 00:00:00 2001 From: cogito233 Date: Mon, 11 Jul 2022 18:51:02 +0200 Subject: [PATCH 7/8] add multi-variate valid check --- causallearn/utils/cit.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 0f5af568..2f916b37 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -1,4 +1,5 @@ from math import log, sqrt +from collections.abc import Iterable import numpy as np from scipy.stats import chi2, norm @@ -335,7 +336,23 @@ def __call__(self, X, Y, condition_set=None, *args): else: assert len(args) == 2, "Arguments other than skel and prt_m are provided for mc_fisherz." if condition_set is None: condition_set = tuple() - assert X not in condition_set and Y not in condition_set, "X, Y cannot be in condition_set." + + if type(X) == int and type(Y) == int: + assert X not in condition_set and Y not in condition_set, "X, Y cannot be in condition_set." + else: + if isinstance(X, Iterable): + assert len(set(condition_set).intersection(X)) == 0, "X cannot be in condition_set." + elif isinstance(X, int): + assert X not in condition_set, "X cannot be in condition_set." + else: + raise Exception("Undefined type of X, X should be int or Iterable") + if isinstance(Y, Iterable): + assert len(set(condition_set).intersection(Y)) == 0, "Y cannot be in condition_set." + elif isinstance(Y, int): + assert Y not in condition_set, "Y cannot be in condition_set." + else: + raise Exception("Undefined type of Y, Y should be int or Iterable") + i, j = (X, Y) if (X < Y) else (Y, X) cache_key = (i, j, frozenset(condition_set)) From 04e2ba5dda3d22c10babe3fb35bdc3088f767e50 Mon Sep 17 00:00:00 2001 From: cogito233 Date: Tue, 12 Jul 2022 10:46:03 +0200 Subject: [PATCH 8/8] fix multi-variable nan problem; Thanks @MarkDana. :) --- causallearn/utils/KCI/KCI.py | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 7db9079d..379894e6 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -145,15 +145,11 @@ def kernel_matrix(self, data_x, data_y): else: raise Exception('Undefined kernel function') - if np.var(data_x) == 0: - data_x -= np.average(data_x) - else: - data_x = stats.zscore(data_x, axis=0) - - if np.var(data_y) == 0: - data_y -= np.average(data_y) - else: - data_y = stats.zscore(data_y, axis=0) + data_x = stats.zscore(data_x, axis=0) + data_x[np.isnan(data_x)] = 0. + + data_y = stats.zscore(data_y, axis=0) + data_y[np.isnan(data_y)] = 0. Kx = kernelX.kernel(data_x) Ky = kernelY.kernel(data_y) @@ -330,20 +326,14 @@ def kernel_matrix(self, data_x, data_y, data_z): kzy: centering kernel matrix for data_y (nxn) """ # normalize the data - if np.var(data_x) == 0: - data_x -= np.average(data_x) - else: - data_x = stats.zscore(data_x, axis=0) - - if np.var(data_y) == 0: - data_y -= np.average(data_y) - else: - data_y = stats.zscore(data_y, axis=0) - - if np.var(data_z) == 0: - data_z -= np.average(data_z) - else: - data_z = stats.zscore(data_z, axis=0) + data_x = stats.zscore(data_x, axis=0) + data_x[np.isnan(data_x)] = 0. + + data_y = stats.zscore(data_y, axis=0) + data_y[np.isnan(data_y)] = 0. + + data_z = stats.zscore(data_z, axis=0) + data_z[np.isnan(data_z)] = 0. # concatenate x and z data_x = np.concatenate((data_x, 0.5 * data_z), axis=1)