diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 94df88d5..b0b6e90f 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -5,7 +5,7 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, +identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation. diff --git a/README.md b/README.md index 1b976934..09ef5fd1 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # causal-learn: Causal Discovery in Python -Causal-learn is a python package for causal discovery that implements both classical and state-of-the-art causal discovery algorithms, which is a Python translation and extension of [Tetrad](https://github.com/cmu-phil/tetrad). +Causal-learn ([documentation](https://causal-learn.readthedocs.io/en/latest/), [paper](https://jmlr.org/papers/volume25/23-0970/23-0970.pdf)) is a python package for causal discovery that implements both classical and state-of-the-art causal discovery algorithms, which is a Python translation and extension of [Tetrad](https://github.com/cmu-phil/tetrad). The package is actively being developed. Feedbacks (issues, suggestions, etc.) are highly encouraged. @@ -76,10 +76,13 @@ Although causal-learn provides python implementations for some causal discovery Please cite as: ``` -@article{causallearn, - title={Causal-learn: Causal Discovery in Python}, - author={Yujia Zheng and Biwei Huang and Wei Chen and Joseph Ramsey and Mingming Gong and Ruichu Cai and Shohei Shimizu and Peter Spirtes and Kun Zhang}, - journal={arXiv preprint arXiv:2307.16405}, - year={2023} +@article{zheng2024causal, + title={Causal-learn: Causal discovery in python}, + author={Zheng, Yujia and Huang, Biwei and Chen, Wei and Ramsey, Joseph and Gong, Mingming and Cai, Ruichu and Shimizu, Shohei and Spirtes, Peter and Zhang, Kun}, + journal={Journal of Machine Learning Research}, + volume={25}, + number={60}, + pages={1--8}, + year={2024} } ``` \ No newline at end of file diff --git a/causallearn/graph/Endpoint.py b/causallearn/graph/Endpoint.py index df551955..83f1ece4 100644 --- a/causallearn/graph/Endpoint.py +++ b/causallearn/graph/Endpoint.py @@ -20,4 +20,4 @@ def __str__(self): return self.name def __eq__(self, other): - return self.value == other.value + return isinstance(other, Endpoint) and self.value == other.value diff --git a/causallearn/graph/GeneralGraph.py b/causallearn/graph/GeneralGraph.py index 41076590..0969f44e 100644 --- a/causallearn/graph/GeneralGraph.py +++ b/causallearn/graph/GeneralGraph.py @@ -508,13 +508,15 @@ def is_ancestor_of(self, node1: Node, node2: Node) -> bool: def is_child_of(self, node1: Node, node2: Node) -> bool: i = self.node_map[node1] j = self.node_map[node2] - return self.graph[i, j] == Endpoint.TAIL.value or self.graph[i, j] == Endpoint.ARROW_AND_ARROW.value + return (self.graph[j, i] == Endpoint.TAIL.value and self.graph[i, j] == Endpoint.ARROW.value) \ + or self.graph[j, i] == Endpoint.TAIL_AND_ARROW.value # Returns true iff node1 is a parent of node2. def is_parent_of(self, node1: Node, node2: Node) -> bool: i = self.node_map[node1] j = self.node_map[node2] - return self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value + return (self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value) \ + or self.graph[i, j] == Endpoint.TAIL_AND_ARROW.value # Returns true iff node1 is a proper ancestor of node2. def is_proper_ancestor_of(self, node1: Node, node2: Node) -> bool: diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py index e02ba737..488a2c45 100644 --- a/causallearn/score/LocalScoreFunction.py +++ b/causallearn/score/LocalScoreFunction.py @@ -34,9 +34,9 @@ def local_score_BIC(Data: ndarray, i: int, PAi: List[int], parameters=None) -> f if len(PAi) == 0: return n * np.log(cov[i, i]) - yX = np.mat(cov[np.ix_([i], PAi)]) - XX = np.mat(cov[np.ix_(PAi, PAi)]) - H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T) + yX = cov[np.ix_([i], PAi)] + XX = cov[np.ix_(PAi, PAi)] + H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T) return n * H + np.log(n) * len(PAi) * lambda_value @@ -68,9 +68,9 @@ def local_score_BIC_from_cov( if len(PAi) == 0: return n * np.log(cov[i, i]) - yX = np.mat(cov[np.ix_([i], PAi)]) - XX = np.mat(cov[np.ix_(PAi, PAi)]) - H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T) + yX = cov[np.ix_([i], PAi)] + XX = cov[np.ix_(PAi, PAi)] + H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T) return n * H + np.log(n) * len(PAi) * lambda_value @@ -198,7 +198,7 @@ def local_score_cv_general( PAi = list(PAi) T = Data.shape[0] - X = Data[:, Xi] + X = Data[:, [Xi]] var_lambda = parameters["lambda"] # regularization parameter k = parameters["kfold"] # k-fold cross validation n0 = math.floor(T / k) @@ -715,7 +715,7 @@ def local_score_marginal_general( """ T = Data.shape[0] - X = Data[:, Xi] + X = Data[:, [Xi]] dX = X.shape[1] # set the kernel for X diff --git a/causallearn/score/LocalScoreFunctionClass.py b/causallearn/score/LocalScoreFunctionClass.py index 41ffb49f..8171d515 100644 --- a/causallearn/score/LocalScoreFunctionClass.py +++ b/causallearn/score/LocalScoreFunctionClass.py @@ -46,3 +46,9 @@ def score(self, i: int, PAi: List[int]) -> float: self.score_cache[i][hash_key] = self.local_score_fun(self.data, i, PAi, self.parameters) return self.score_cache[i][hash_key] + + def score_nocache(self, i: int, PAi: List[int]) -> float: + if self.local_score_fun == local_score_BIC_from_cov: + return self.local_score_fun((self.cov, self.n), i, PAi, self.parameters) + else: + return self.local_score_fun(self.data, i, PAi, self.parameters) \ No newline at end of file diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py index 9f5af378..ff3f92d0 100644 --- a/causallearn/search/ConstraintBased/FCI.py +++ b/causallearn/search/ConstraintBased/FCI.py @@ -2,7 +2,7 @@ import warnings from queue import Queue -from typing import List, Set, Tuple, Dict +from typing import List, Set, Tuple, Dict, Generator from numpy import ndarray from causallearn.graph.Edge import Edge @@ -15,6 +15,19 @@ from causallearn.utils.cit import * from causallearn.utils.FAS import fas from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge +from itertools import combinations + +def is_uncovered_path(nodes: List[Node], G: Graph) -> bool: + """ + Determines whether the given path is an uncovered path in this graph. + + A path is an uncovered path if no two nonconsecutive nodes (Vi-1 and Vi+1) in the path are + adjacent. + """ + for i in range(len(nodes) - 2): + if G.is_adjacent_to(nodes[i], nodes[i + 2]): + return False + return True def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: @@ -26,8 +39,17 @@ def traverseSemiDirected(node: Node, edge: Edge) -> Node | None: return edge.get_node1() return None +def traverseCircle(node: Node, edge: Edge) -> Node | None: + if node == edge.get_node1(): + if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE: + return edge.get_node2() + elif node == edge.get_node2(): + if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE: + return edge.get_node1() + return None -def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: + +def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: ## TODO: Now it does not detect whether the path is an uncovered path Q = Queue() V = set() @@ -60,6 +82,42 @@ def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: return False +def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph, exclude_node: List[Node]) -> Generator[Node] | None: + Q = Queue() + V = set() + + path = [node_from] + + for node_u in G.get_adjacent_nodes(node_from): + if node_u in exclude_node: + continue + edge = G.get_edge(node_from, node_u) + node_c = traverseCircle(node_from, edge) + + if node_c is None or node_c in exclude_node: + continue + + if not V.__contains__(node_c): + V.add(node_c) + Q.put((node_c, path + [node_c])) + + while not Q.empty(): + node_t, path = Q.get_nowait() + if node_t == node_to and is_uncovered_path(path, G): + yield path + + for node_u in G.get_adjacent_nodes(node_t): + edge = G.get_edge(node_t, node_u) + node_c = traverseCircle(node_t, edge) + + if node_c is None or node_c in exclude_node: + continue + + if not V.__contains__(node_c): + V.add(node_c) + Q.put((node_c, path + [node_c])) + + def existOnePathWithPossibleParents(previous, node_w: Node, node_x: Node, node_b: Node, graph: Graph) -> bool: if node_w == node_x: @@ -320,8 +378,9 @@ def rulesR1R2cycle(graph: Graph, bk: BackgroundKnowledge | None, changeFlag: boo def isNoncollider(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], node_i: Node, node_j: Node, node_k: Node) -> bool: - sep_set = sep_sets[(graph.get_node_map()[node_i], graph.get_node_map()[node_k])] - return sep_set is not None and sep_set.__contains__(graph.get_node_map()[node_j]) + node_map = graph.get_node_map() + sep_set = sep_sets.get((node_map[node_i], node_map[node_k])) + return sep_set is not None and sep_set.__contains__(node_map[node_j]) def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: BackgroundKnowledge | None, changeFlag: bool, @@ -370,6 +429,131 @@ def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: Backgrou changeFlag = True return changeFlag +def ruleR5(graph: Graph, changeFlag: bool, + verbose: bool = False) -> bool: + """ + Rule R5 of the FCI algorithm. + by Jiji Zhang, 2008, "On the completeness of orientation rules for causal discovery in the presence of latent confounders and selection bias"] + + This function orients any edge that is part of an uncovered circle path between two nodes A and B, + if such a path exists. The path must start and end with a circle edge and must be uncovered, i.e. the + nodes on the path must not be adjacent to A or B. The orientation of the edges on the path is set to + double tail. + """ + nodes = graph.get_nodes() + def orient_on_path_helper(path, node_A, node_B): + # orient A - C, D - B + edge = graph.get_edge(node_A, path[0]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL)) + + edge = graph.get_edge(node_B, path[-1]) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge A - C (Double tail): " + graph.get_edge(node_A, path[0]).__str__()) + print("Orienting edge B - D (Double tail): " + graph.get_edge(node_B, path[-1]).__str__()) + + # orient everything on the path to both tails + for i in range(len(path) - 1): + edge = graph.get_edge(path[i], path[i + 1]) + graph.remove_edge(edge) + graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL)) + if verbose: + print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__()) + + for node_B in nodes: + intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) + + for node_A in intoBCircles: + found_paths_between_AB = [] + if graph.get_endpoint(node_B, node_A) != Endpoint.CIRCLE: + continue + else: + # Check if there is an uncovered circle path between A and B (A o-o C .. D o-o B) + # s.t. A is not adjacent to D and B is not adjacent to C + a_node_idx = graph.node_map[node_A] + b_node_idx = graph.node_map[node_B] + a_adj_nodes = graph.get_adjacent_nodes(node_A) + b_adj_nodes = graph.get_adjacent_nodes(node_B) + + # get the adjacent nodes with circle edges of A and B + a_circle_adj_nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= b_node_idx + and graph.get_endpoint(node, node_A) == Endpoint.CIRCLE and graph.get_endpoint(node_A, node) == Endpoint.CIRCLE] + b_circle_adj_nodes_set = [node for node in b_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= b_node_idx + and graph.get_endpoint(node, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node) == Endpoint.CIRCLE] + + # get the adjacent nodes with circle edges of A and B that is non adjacent to B and A, respectively + for node_C in a_circle_adj_nodes_set: + if graph.is_adjacent_to(node_B, node_C): + continue + for node_D in b_circle_adj_nodes_set: + if graph.is_adjacent_to(node_A, node_D): + continue + paths = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph, exclude_node=[node_A, node_B]) # get the uncovered circle path between C and D, excluding A and B + found_paths_between_AB.append(paths) + + # Orient the uncovered circle path between A and B + for paths in found_paths_between_AB: + for path in paths: + changeFlag = True + if verbose: + print("Find uncovered circle path between A and B: " + graph.get_edge(node_A, node_B).__str__()) + edge = graph.get_edge(node_A, node_B) + graph.remove_edge(edge) + graph.add_edge(Edge(node_A, node_B, Endpoint.TAIL, Endpoint.TAIL)) + orient_on_path_helper(path, node_A, node_B) + + return changeFlag + +def ruleR6(graph: Graph, changeFlag: bool, + verbose: bool = False) -> bool: + nodes = graph.get_nodes() + + for node_B in nodes: + # Find A - B + intoBTails = graph.get_nodes_into(node_B, Endpoint.TAIL) + exist = False + for node_A in intoBTails: + if graph.get_endpoint(node_B, node_A) == Endpoint.TAIL: + exist = True + if not exist: + continue + # Find B o-*C + intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) + for node_C in intoBCircles: + changeFlag = True + edge = graph.get_edge(node_B, node_C) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, node_C, Endpoint.TAIL, edge.get_proximal_endpoint(node_C))) + if verbose: + print("Orienting edge by rule 6): " + graph.get_edge(node_B, node_C).__str__()) + + return changeFlag + + +def ruleR7(graph: Graph, changeFlag: bool, + verbose: bool = False) -> bool: + nodes = graph.get_nodes() + + for node_B in nodes: + # Find A -o B + intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE) + node_A_list = [node for node in intoBCircles if graph.get_endpoint(node_B, node) == Endpoint.TAIL] + + # Find B o-*C + for node_C in intoBCircles: + # pdb.set_trace() + for node_A in node_A_list: + # pdb.set_trace() + if not graph.is_adjacent_to(node_A, node_C): + changeFlag = True + edge = graph.get_edge(node_B, node_C) + graph.remove_edge(edge) + graph.add_edge(Edge(node_B, node_C, Endpoint.TAIL, edge.get_proximal_endpoint(node_C))) + if verbose: + print("Orienting edge by rule 7): " + graph.get_edge(node_B, node_C).__str__()) + return changeFlag def getPath(node_c: Node, previous) -> List[Node]: l = [] @@ -542,6 +726,140 @@ def ruleR4B(graph: Graph, maxPathLength: int, data: ndarray, independence_test_m return change_flag + +def rule8(graph: Graph, nodes: List[Node], changeFlag): + nodes = graph.get_nodes() if nodes is None else nodes + for node_B in nodes: + adj = graph.get_adjacent_nodes(node_B) + if len(adj) < 2: + continue + + cg = ChoiceGenerator(len(adj), 2) + combination = cg.next() + + while combination is not None: + node_A = adj[combination[0]] + node_C = adj[combination[1]] + combination = cg.next() + + if(graph.get_endpoint(node_A, node_B) == Endpoint.ARROW and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ + graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ + graph.is_adjacent_to(node_A, node_C) and \ + graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE) or \ + (graph.get_endpoint(node_A, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \ + graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \ + graph.is_adjacent_to(node_A, node_C) and \ + graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C,Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + + return changeFlag + + + +def is_possible_parent(graph: Graph, potential_parent_node, child_node): + if graph.node_map[potential_parent_node] == graph.node_map[child_node]: + return False + if not graph.is_adjacent_to(potential_parent_node, child_node): + return False + + if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW: + return False + else: + return True + + +def find_possible_children(graph: Graph, parent_node, en_nodes=None): + if en_nodes is None: + nodes = graph.get_nodes() + en_nodes = [node for node in nodes if graph.node_map[node] != graph.node_map[parent_node]] + + potential_child_nodes = set() + for potential_node in en_nodes: + if is_possible_parent(graph, potential_parent_node=parent_node, child_node=potential_node): + potential_child_nodes.add(potential_node) + + return potential_child_nodes + +def rule9(graph: Graph, nodes: List[Node], changeFlag): + # changeFlag = False + nodes = graph.get_nodes() if nodes is None else nodes + for node_C in nodes: + intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) + for node_A in intoCArrows: + # we want A o--> C + if not graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE: + continue + + # look for a possibly directed uncovered path s.t. B and C are not connected (for the given A o--> C + a_node_idx = graph.node_map[node_A] + c_node_idx = graph.node_map[node_C] + a_adj_nodes = graph.get_adjacent_nodes(node_A) + nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= c_node_idx] + possible_children = find_possible_children(graph, node_A, nodes_set) + for node_B in possible_children: + if graph.is_adjacent_to(node_B, node_C): + continue + if existsSemiDirectedPath(node_from=node_B, node_to=node_C, G=graph): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A + return changeFlag + + +def rule10(graph: Graph, changeFlag): + # changeFlag = False + nodes = graph.get_nodes() + for node_C in nodes: + intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW) + if len(intoCArrows) < 2: + continue + # get all A where A o-> C + Anodes = [node_A for node_A in intoCArrows if graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE] + if len(Anodes) == 0: + continue + + for node_A in Anodes: + A_adj_nodes = graph.get_adjacent_nodes(node_A) + en_nodes = [i for i in A_adj_nodes if i is not node_C] + A_possible_children = find_possible_children(graph, parent_node=node_A, en_nodes=en_nodes) + if len(A_possible_children) < 2: + continue + + gen = ChoiceGenerator(len(intoCArrows), 2) + choice = gen.next() + while choice is not None: + node_B = intoCArrows[choice[0]] + node_D = intoCArrows[choice[1]] + + choice = gen.next() + # we want B->C<-D + if graph.get_endpoint(node_C, node_B) != Endpoint.TAIL: + continue + + if graph.get_endpoint(node_C, node_D) != Endpoint.TAIL: + continue + + for children in combinations(A_possible_children, 2): + child_one, child_two = children + if not existsSemiDirectedPath(node_from=child_one, node_to=node_B, G=graph) or \ + not existsSemiDirectedPath(node_from=child_two, node_to=node_D, G=graph): + continue + + if not graph.is_adjacent_to(child_one, child_two): + edge1 = graph.get_edge(node_A, node_C) + graph.remove_edge(edge1) + graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW)) + changeFlag = True + break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A + + return changeFlag + + def visibleEdgeHelperVisit(graph: Graph, node_c: Node, node_a: Node, node_b: Node, path: List[Node]) -> bool: if path.__contains__(node_a): return False @@ -691,9 +1009,9 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]): break - def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1, - max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True, + max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, + show_progress: bool = True, node_names = None, **kwargs) -> Tuple[Graph, List[Edge]]: """ Perform Fast Causal Inference (FCI) algorithm for causal discovery @@ -716,7 +1034,7 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = Returns ------- - graph : a CausalGraph object, where graph.graph[j,i]=1 and graph.graph[i,j]=-1 indicates i --> j , + graph : a GeneralGraph object, where graph.graph[j,i]=1 and graph.graph[i,j]=-1 indicates i --> j , graph.graph[i,j] = graph.graph[j,i] = -1 indicates i --- j, graph.graph[i,j] = graph.graph[j,i] = 1 indicates i <-> j, graph.graph[j,i]=1 and graph.graph[i,j]=2 indicates i o-> j. @@ -748,8 +1066,10 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = nodes = [] + if node_names is None: + node_names = [f"X{i + 1}" for i in range(dataset.shape[1])] for i in range(dataset.shape[1]): - node = GraphNode(f"X{i + 1}") + node = GraphNode(node_names[i]) node.add_attribute("id", i) nodes.append(node) @@ -757,6 +1077,7 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = graph, sep_sets, test_results = fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha, knowledge=background_knowledge, depth=depth, verbose=verbose, show_progress=show_progress) + # pdb.set_trace() reorientAllWith(graph, Endpoint.CIRCLE) rule0(graph, nodes, sep_sets, background_knowledge, verbose) @@ -787,6 +1108,23 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = if verbose: print("Epoch") + # rule 5 + change_flag = ruleR5(graph, change_flag, verbose) + + # rule 6 + change_flag = ruleR6(graph, change_flag, verbose) + + # rule 7 + change_flag = ruleR7(graph, change_flag, verbose) + + # rule 8 + change_flag = rule8(graph,nodes, change_flag) + + # rule 9 + change_flag = rule9(graph, nodes, change_flag) + # rule 10 + change_flag = rule10(graph, change_flag) + graph.set_pag(True) edges = get_color_edges(graph) diff --git a/causallearn/search/FCMBased/lingam/var_lingam.py b/causallearn/search/FCMBased/lingam/var_lingam.py index 1f2d931f..0cb48c82 100644 --- a/causallearn/search/FCMBased/lingam/var_lingam.py +++ b/causallearn/search/FCMBased/lingam/var_lingam.py @@ -253,14 +253,14 @@ def _estimate_var_coefs(self, X): # XXX: VAR.fit() is not searching lags correctly if self._criterion not in ['aic', 'fpe', 'hqic', 'bic']: var = VAR(X) - result = var.fit(maxlags=self._lags, trend='nc') + result = var.fit(maxlags=self._lags, trend='n') else: min_value = float('Inf') result = None for lag in range(1, self._lags + 1): var = VAR(X) - fitted = var.fit(maxlags=lag, ic=None, trend='nc') + fitted = var.fit(maxlags=lag, ic=None, trend='n') value = getattr(fitted, self._criterion) if value < min_value: @@ -319,10 +319,14 @@ def _pruning(self, X, B_taus, causal_order): obj = np.zeros((len(blocks))) exp = np.zeros( (len(blocks), causal_order_no + n_features * self._lags)) - for j, block in enumerate(blocks): - obj[j] = block[0][i] - exp[j:] = np.concatenate( - [block[0][ancestor_indexes].flatten(), block[1:][:].flatten()], axis=0) + + # Create 3D array to hold flattened ancestor indices for each block + ancestor_indexes_flat = blocks[:, 0, ancestor_indexes].reshape(len(blocks), -1) + # Fill obj using advanced indexing + obj[:] = blocks[:, 0, i] + # Fill exp using advanced indexing + exp[:, :causal_order_no] = ancestor_indexes_flat + exp[:, causal_order_no:] = blocks[:, 1:].reshape(len(blocks), -1) # adaptive lasso gamma = 1.0 diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py new file mode 100644 index 00000000..465d3659 --- /dev/null +++ b/causallearn/search/PermutationBased/BOSS.py @@ -0,0 +1,207 @@ +import random +import sys +import time +import warnings +from typing import Any, Dict, List, Optional + +import numpy as np +from causallearn.graph.GeneralGraph import GeneralGraph +from causallearn.graph.GraphNode import GraphNode +from causallearn.score.LocalScoreFunction import ( + local_score_BDeu, + local_score_BIC, + local_score_BIC_from_cov, + local_score_cv_general, + local_score_cv_multi, + local_score_marginal_general, + local_score_marginal_multi, +) +from causallearn.search.PermutationBased.gst import GST; +from causallearn.score.LocalScoreFunctionClass import LocalScoreClass +from causallearn.utils.DAG2CPDAG import dag2cpdag + + +def boss( + X: np.ndarray, + score_func: str = "local_score_BIC_from_cov", + parameters: Optional[Dict[str, Any]] = None, + verbose: Optional[bool] = True, + node_names: Optional[List[str]] = None, +) -> GeneralGraph: + """ + Perform a best order score search (BOSS) algorithm + + Parameters + ---------- + X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features. + score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', + 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')). + parameters : when using CV likelihood, + parameters['kfold']: k-fold cross validation + parameters['lambda']: regularization parameter + parameters['dlabel']: for variables with multi-dimensions, + indicate which dimensions belong to the i-th variable. + verbose : whether to print the time cost and verbose output of the algorithm. + + Returns + ------- + G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. + """ + + X = X.copy() + n, p = X.shape + if n < p: + warnings.warn("The number of features is much larger than the sample size!") + + if score_func == "local_score_CV_general": + # % k-fold negative cross validated likelihood based on regression in RKHS + if parameters is None: + parameters = { + "kfold": 10, # 10 fold cross validation + "lambda": 0.01, + } # regularization parameter + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_general, parameters=parameters + ) + elif score_func == "local_score_marginal_general": + # negative marginal likelihood based on regression in RKHS + parameters = {} + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_general, parameters=parameters + ) + elif score_func == "local_score_CV_multi": + # k-fold negative cross validated likelihood based on regression in RKHS + # for data with multi-variate dimensions + if parameters is None: + parameters = { + "kfold": 10, + "lambda": 0.01, + "dlabel": {}, + } # regularization parameter + for i in range(X.shape[1]): + parameters["dlabel"]["{}".format(i)] = i + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_cv_multi, parameters=parameters + ) + elif score_func == "local_score_marginal_multi": + # negative marginal likelihood based on regression in RKHS + # for data with multi-variate dimensions + if parameters is None: + parameters = {"dlabel": {}} + for i in range(X.shape[1]): + parameters["dlabel"]["{}".format(i)] = i + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_marginal_multi, parameters=parameters + ) + elif score_func == "local_score_BIC": + # SEM BIC score + warnings.warn("Please use 'local_score_BIC_from_cov' instead") + if parameters is None: + parameters = {"lambda_value": 2} + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC, parameters=parameters + ) + elif score_func == "local_score_BIC_from_cov": + # SEM BIC score + if parameters is None: + parameters = {"lambda_value": 2} + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) + elif score_func == "local_score_BDeu": + # BDeu score + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BDeu, parameters=None + ) + else: + raise Exception("Unknown function!") + + score = localScoreClass + gsts = [GST(i, score) for i in range(p)] + + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names + nodes = [] + + for name in node_names: + node = GraphNode(name) + nodes.append(node) + + G = GeneralGraph(nodes) + + runtime = time.perf_counter() + + order = [v for v in range(p)] + + gsts = [GST(v, score) for v in order] + parents = {v: [] for v in order} + + variables = [v for v in order] + while True: + improved = False + random.shuffle(variables) + if verbose: + for i, v in enumerate(order): + parents[v].clear() + gsts[v].trace(order[:i], parents[v]) + sys.stdout.write("\rBOSS edge count: %i " % np.sum([len(parents[v]) for v in range(p)])) + sys.stdout.flush() + + for v in variables: + improved |= better_mutation(v, order, gsts) + if not improved: break + + for i, v in enumerate(order): + parents[v].clear() + gsts[v].trace(order[:i], parents[v]) + + runtime = time.perf_counter() - runtime + + if verbose: + sys.stdout.write("\nBOSS completed in: %.2fs \n" % runtime) + sys.stdout.flush() + + for y in range(p): + for x in parents[y]: + G.add_directed_edge(nodes[x], nodes[y]) + + G = dag2cpdag(G) + + return G + + +def reversed_enumerate(iter, j): + for w in reversed(iter): + yield j, w + j -= 1 + + +def better_mutation(v, order, gsts): + i = order.index(v) + p = len(order) + scores = np.zeros(p + 1) + + prefix = [] + score = 0 + for j, w in enumerate(order): + scores[j] = gsts[v].trace(prefix) + score + if v != w: + score += gsts[w].trace(prefix) + prefix.append(w) + + scores[p] = gsts[v].trace(prefix) + score + best = p + + prefix.append(v) + score = 0 + for j, w in reversed_enumerate(order, p - 1): + if v != w: + prefix.remove(w) + score += gsts[w].trace(prefix) + scores[j] += score + if scores[j] > scores[best]: best = j + + if scores[i] + 1e-6 > scores[best]: return False + order.remove(v) + order.insert(best - int(best > i), v) + + return True \ No newline at end of file diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py index ba77b3ef..f4d4aa14 100644 --- a/causallearn/search/PermutationBased/GRaSP.py +++ b/causallearn/search/PermutationBased/GRaSP.py @@ -10,11 +10,13 @@ from causallearn.score.LocalScoreFunction import ( local_score_BDeu, local_score_BIC, + local_score_BIC_from_cov, local_score_cv_general, local_score_cv_multi, local_score_marginal_general, local_score_marginal_multi, ) +from causallearn.search.PermutationBased.gst import GST; from causallearn.score.LocalScoreFunctionClass import LocalScoreClass from causallearn.utils.DAG2CPDAG import dag2cpdag @@ -33,6 +35,7 @@ def __init__(self, p, score): y = self.order[i] self.parents[y] = [] self.local_scores[y] = -score.score(y, []) + # self.local_scores[y] = -score.score_nocache(y, []) def get(self, i): return self.order[i] @@ -76,11 +79,12 @@ def len(self): def grasp( X: np.ndarray, - score_func: str = "local_score_BIC", + score_func: str = "local_score_BIC_from_cov", depth: Optional[int] = 3, - maxP: Optional[float] = None, parameters: Optional[Dict[str, Any]] = None, -) -> Dict[str, Any]: + verbose: Optional[bool] = True, + node_names: Optional[List[str]] = None, +) -> GeneralGraph: """ Perform a greedy relaxation of the sparsest permutation (GRaSP) algorithm @@ -88,52 +92,43 @@ def grasp( ---------- X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features. score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general', - 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')). + 'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')). depth : allowed maximum depth for DFS - maxP : allowed maximum number of parents when searching the graph parameters : when using CV likelihood, parameters['kfold']: k-fold cross validation parameters['lambda']: regularization parameter parameters['dlabel']: for variables with multi-dimensions, indicate which dimensions belong to the i-th variable. + verbose : whether to print the time cost and verbose output of the algorithm. Returns ------- G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j. """ + X = X.copy() n, p = X.shape if n < p: warnings.warn("The number of features is much larger than the sample size!") - X = np.mat(X) - if ( - score_func == "local_score_CV_general" - ): # % k-fold negative cross validated likelihood based on regression in RKHS + if score_func == "local_score_CV_general": + # k-fold negative cross validated likelihood based on regression in RKHS if parameters is None: parameters = { "kfold": 10, # 10 fold cross validation "lambda": 0.01, } # regularization parameter - if maxP is None: - maxP = X.shape[1] / 2 # maximum number of parents localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_general, parameters=parameters ) - - elif ( - score_func == "local_score_marginal_general" - ): # negative marginal likelihood based on regression in RKHS + elif score_func == "local_score_marginal_general": + # negative marginal likelihood based on regression in RKHS parameters = {} - if maxP is None: - maxP = X.shape[1] / 2 # maximum number of parents localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_general, parameters=parameters ) - - elif ( - score_func == "local_score_CV_multi" - ): # k-fold negative cross validated likelihood based on regression in RKHS + elif score_func == "local_score_CV_multi": + # k-fold negative cross validated likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: parameters = { @@ -143,47 +138,46 @@ def grasp( } # regularization parameter for i in range(X.shape[1]): parameters["dlabel"]["{}".format(i)] = i - if maxP is None: - maxP = len(parameters["dlabel"]) / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_cv_multi, parameters=parameters ) - - elif ( - score_func == "local_score_marginal_multi" - ): # negative marginal likelihood based on regression in RKHS + elif score_func == "local_score_marginal_multi": + # negative marginal likelihood based on regression in RKHS # for data with multi-variate dimensions if parameters is None: parameters = {"dlabel": {}} for i in range(X.shape[1]): parameters["dlabel"]["{}".format(i)] = i - if maxP is None: - maxP = len(parameters["dlabel"]) / 2 localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_marginal_multi, parameters=parameters ) - - elif score_func == "local_score_BIC": # Greedy equivalence search with BIC score - parameters = {} - parameters["lambda_value"] = 2 - if maxP is None: - maxP = X.shape[1] / 2 + elif score_func == "local_score_BIC": + # SEM BIC score + warnings.warn("Please use 'local_score_BIC_from_cov' instead") + if parameters is None: + parameters = {"lambda_value": 2} localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BIC, parameters=parameters ) - - elif score_func == "local_score_BDeu": # Greedy equivalence search with BDeu score - if maxP is None: - maxP = X.shape[1] / 2 + elif score_func == "local_score_BIC_from_cov": + # SEM BIC score + if parameters is None: + parameters = {"lambda_value": 2} + localScoreClass = LocalScoreClass( + data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters + ) + elif score_func == "local_score_BDeu": + # BDeu score localScoreClass = LocalScoreClass( data=X, local_score_fun=local_score_BDeu, parameters=None ) - else: raise Exception("Unknown function!") + score = localScoreClass + gsts = [GST(i, score) for i in range(p)] - node_names = [("x%d" % i) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names nodes = [] for name in node_names: @@ -200,19 +194,20 @@ def grasp( y_parents = order.get_parents(y) candidates = [order.get(j) for j in range(0, i)] - grow(y, y_parents, candidates, score) - local_score = shrink(y, y_parents, score) + local_score = gsts[y].trace(candidates, y_parents) order.set_local_score(y, local_score) order.bump_edges(len(y_parents)) - while dfs(depth - 1, set(), [], order, score): - sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) - sys.stdout.flush() + while dfs(depth - 1, set(), [], order, gsts): + if verbose: + sys.stdout.write("\rGRaSP edge count: %i " % order.get_edges()) + sys.stdout.flush() runtime = time.perf_counter() - runtime - - sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime) - sys.stdout.flush() + + if verbose: + sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime) + sys.stdout.flush() for y in range(p): for x in order.get_parents(y): @@ -224,7 +219,7 @@ def grasp( # performs a dfs over covered tucks -def dfs(depth: int, flipped: set, history: List[set], order, score): +def dfs(depth: int, flipped: set, history: List[set], order, gsts): cache = [{}, {}, {}, 0] @@ -252,7 +247,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): cache[3] = order.get_edges() tuck(i, j, order) - edge_bump, score_bump = update(i, j, order, score) + edge_bump, score_bump = update(i, j, order, gsts) # because things that should be zero sometimes are not if score_bump > 1e-6: @@ -271,7 +266,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): if len(flipped) > 0 and flipped not in history: history.append(flipped) - if depth > 0 and dfs(depth - 1, flipped, history, order, score): + if depth > 0 and dfs(depth - 1, flipped, history, order, gsts): return True del history[-1] @@ -286,7 +281,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score): # updates the parents and scores after a tuck -def update(i: int, j: int, order, score): +def update(i: int, j: int, order, gsts): edge_bump = 0 old_score = 0 @@ -299,18 +294,9 @@ def update(i: int, j: int, order, score): edge_bump -= len(z_parents) old_score += order.get_local_score(z) + z_parents.clear() candidates = [order.get(l) for l in range(0, k)] - - for w in [w for w in z_parents if w not in candidates]: - z_parents.remove(w) - shrink(z, z_parents, score) - - for w in z_parents: - candidates.remove(w) - - grow(z, z_parents, candidates, score) - - local_score = shrink(z, z_parents, score) + local_score = gsts[z].trace(candidates, z_parents) order.set_local_score(z, local_score) edge_bump += len(z_parents) @@ -319,64 +305,6 @@ def update(i: int, j: int, order, score): return edge_bump, new_score - old_score -# grow of grow-shrink -def grow(y: int, y_parents: List[int], candidates: List[int], score): - - best = -score.score(y, y_parents) - - add = None - checked = [] - while add is not None or len(candidates) > 0: - - if add is not None: - checked.remove(add) - y_parents.append(add) - candidates = checked - checked = [] - add = None - - while len(candidates) > 0: - - x = candidates.pop() - y_parents.append(x) - current = -score.score(y, y_parents) - y_parents.remove(x) - checked.append(x) - - if current > best: - best = current - add = x - - return best - - -# shrink of grow-shrink -def shrink(y: int, y_parents: List[int], score): - - best = -score.score(y, y_parents) - - remove = None - checked = 0 - while remove is not None or checked < len(y_parents): - - if remove is not None: - y_parents.remove(remove) - checked = 0 - remove = None - - while checked < len(y_parents): - x = y_parents.pop(0) - current = -score.score(y, y_parents) - y_parents.append(x) - checked += 1 - - if current > best: - best = current - remove = x - - return best - - # tucks the node at position i into position j def tuck(i: int, j: int, order): diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py new file mode 100644 index 00000000..25e0da6b --- /dev/null +++ b/causallearn/search/PermutationBased/gst.py @@ -0,0 +1,77 @@ +class GSTNode: + + def __init__(self, tree, add=None, score=None): + if score is None: score = -tree.score.score_nocache(tree.vertex, []) + # if score is None: score = -tree.score.score(tree.vertex, []) + self.tree = tree + self.add = add + self.grow_score = score + self.shrink_score = score + self.branches = None + self.remove = None + + def __lt__(self, other): + return self.grow_score < other.grow_score + + def grow(self, available, parents): + self.branches = [] + for add in available: + parents.append(add) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) + parents.remove(add) + branch = GSTNode(self.tree, add, score) + if score > self.grow_score: self.branches.append(branch) + self.branches.sort() + + def shrink(self, parents): + self.remove = [] + while True: + best = None + for remove in [parent for parent in parents]: + parents.remove(remove) + score = -self.tree.score.score_nocache(self.tree.vertex, parents) + # score = -self.tree.score.score(self.tree.vertex, parents) + parents.append(remove) + if score > self.shrink_score: + self.shrink_score = score + best = remove + if best is None: break + self.remove.append(best) + parents.remove(best) + + def trace(self, prefix, available, parents): + if self.branches is None: self.grow(available, parents) + for branch in self.branches: + available.remove(branch.add) + if branch.add in prefix: + parents.append(branch.add) + return branch.trace(prefix, available, parents) + if self.remove is None: + self.shrink(parents) + return self.shrink_score + for remove in self.remove: parents.remove(remove) + return self.shrink_score + + +class GST: + + def __init__(self, vertex, score): + self.vertex = vertex + self.score = score + self.root = GSTNode(self) + self.forbidden = [vertex] + self.required = [] + + def trace(self, prefix, parents=None): + if parents is None: parents = [] + available = [i for i in range(self.score.data.shape[1]) if i not in self.forbidden] + return self.root.trace(prefix, available, parents) + + def set_knowledge(self, forbidden, required): + self.forbidden = forbidden # not implemented + self.required = required # not implemented + self.reset() + + def reset(self): + self.root = GSTNode(self) \ No newline at end of file diff --git a/causallearn/search/ScoreBased/GES.py b/causallearn/search/ScoreBased/GES.py index 1c73fbc4..c9475755 100644 --- a/causallearn/search/ScoreBased/GES.py +++ b/causallearn/search/ScoreBased/GES.py @@ -5,10 +5,11 @@ from causallearn.utils.DAG2CPDAG import dag2cpdag from causallearn.utils.GESUtils import * from causallearn.utils.PDAG2DAG import pdag2dag +from typing import Union def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = None, - parameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + parameters: Optional[Dict[str, Any]] = None, node_names: Union[List[str], None] = None,) -> Dict[str, Any]: """ Perform greedy equivalence search (GES) algorithm @@ -95,7 +96,8 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = raise Exception('Unknown function!') score_func = localScoreClass - node_names = [("X%d" % (i + 1)) for i in range(N)] + if node_names is None: + node_names = [("X%d" % (i + 1)) for i in range(N)] nodes = [] for name in node_names: @@ -137,7 +139,7 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = np.where(G.graph[j, :] == Endpoint.TAIL.value)[0]) # neighbors of Xj Ti = np.union1d(np.where(G.graph[:, i] != Endpoint.NULL.value)[0], - np.where(G.graph[i, 0] != Endpoint.NULL.value)[0]) # adjacent to Xi + np.where(G.graph[i, :] != Endpoint.NULL.value)[0]) # adjacent to Xi NTi = np.setdiff1d(np.arange(N), Ti) T0 = np.intersect1d(Tj, NTi) # find the neighbours of Xj that are not adjacent to Xi @@ -180,7 +182,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = break G = insert(G, min_desc[0], min_desc[1], min_desc[2]) update1.append([min_desc[0], min_desc[1], min_desc[2]]) - print(G.graph) G = pdag2dag(G) G = dag2cpdag(G) G_step1.append(G) @@ -190,7 +191,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = ## -------------------------------------------------------------------- # backward greedy search - print('backward') count2 = 0 score_new = score update2 = [] @@ -244,7 +244,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = break G = delete(G, min_desc[0], min_desc[1], min_desc[2]) update2.append([min_desc[0], min_desc[1], min_desc[2]]) - print(G.graph) G = pdag2dag(G) G = dag2cpdag(G) G_step2.append(G) diff --git a/causallearn/utils/DAG2PAG.py b/causallearn/utils/DAG2PAG.py index 581f5f20..2a091f72 100644 --- a/causallearn/utils/DAG2PAG.py +++ b/causallearn/utils/DAG2PAG.py @@ -10,10 +10,11 @@ from causallearn.graph.Endpoint import Endpoint from causallearn.graph.GeneralGraph import GeneralGraph from causallearn.graph.Node import Node -from causallearn.search.ConstraintBased.FCI import rule0, rulesR1R2cycle, ruleR3, ruleR4B +from causallearn.search.ConstraintBased.FCI import rule0, rulesR1R2cycle, ruleR3, ruleR4B, ruleR5, ruleR6, ruleR7, rule8, rule9, rule10 from causallearn.utils.cit import CIT, d_separation -def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: + +def dag2pag(dag: Dag, islatent: List[Node], isselection: List[Node] = []) -> GeneralGraph: """ Convert a DAG to its corresponding PAG Parameters @@ -27,8 +28,8 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: dg = nx.DiGraph() true_dag = nx.DiGraph() nodes = dag.get_nodes() - observed_nodes = list(set(nodes) - set(islatent)) - mod_nodes = observed_nodes + islatent + observed_nodes = list(set(nodes) - set(islatent) - set(isselection)) + mod_nodes = observed_nodes + islatent + isselection nodes = dag.get_nodes() nodes_ids = {node: i for i, node in enumerate(nodes)} mod_nodeids = {node: i for i, node in enumerate(mod_nodes)} @@ -65,7 +66,7 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: for Z in combinations(observed_nodes, l): if nodex in Z or nodey in Z: continue - if d_separated(dg, {nodes_ids[nodex]}, {nodes_ids[nodey]}, set(nodes_ids[z] for z in Z)): + if d_separated(dg, {nodes_ids[nodex]}, {nodes_ids[nodey]}, set(nodes_ids[z] for z in Z) | set([nodes_ids[s] for s in isselection])): if edge: PAG.remove_edge(edge) sepset[(nodes_ids[nodex], nodes_ids[nodey])] |= set(Z) @@ -96,14 +97,22 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph: data = np.empty(shape=(0, len(observed_nodes))) independence_test_method = CIT(data, method=d_separation, true_dag=true_dag) - + node_map = PAG.get_node_map() + sepset_reindexed = {(node_map[nodes[i]], node_map[nodes[j]]): sepset[(i, j)] for (i, j) in sepset} while change_flag: change_flag = False change_flag = rulesR1R2cycle(PAG, None, change_flag, False) - change_flag = ruleR3(PAG, sepset, None, change_flag, False) - change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset, + change_flag = ruleR3(PAG, sepset_reindexed, None, change_flag, False) + change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset_reindexed, change_flag=change_flag, bk=None, verbose=False) + change_flag = ruleR5(PAG, changeFlag=change_flag, verbose=True) + change_flag = ruleR6(PAG, changeFlag=change_flag) + change_flag = ruleR7(PAG, changeFlag=change_flag) + change_flag = rule8(PAG, nodes=observed_nodes, changeFlag=change_flag) + change_flag = rule9(PAG, nodes=observed_nodes, changeFlag=change_flag) + change_flag = rule10(PAG, changeFlag=change_flag) + return PAG diff --git a/causallearn/utils/Dataset.py b/causallearn/utils/Dataset.py new file mode 100644 index 00000000..6e62c2ee --- /dev/null +++ b/causallearn/utils/Dataset.py @@ -0,0 +1,41 @@ +import numpy as np +import urllib.request +from io import StringIO + +def load_dataset(dataset_name): + ''' + Load real-world datasets. Processed datasets are from https://github.com/cmu-phil/example-causal-datasets/tree/main + + Parameters + ---------- + dataset_name : str, ['sachs', 'boston_housing', 'airfoil'] + + Returns + ------- + data = np.array + labels = list + ''' + + url_mapping = { + 'sachs': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/sachs/data/sachs.2005.continuous.txt', + 'boston_housing': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/boston-housing/data/boston-housing.continuous.txt', + 'airfoil': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/airfoil-self-noise/data/airfoil-self-noise.continuous.txt' + } + + if dataset_name not in url_mapping: + raise ValueError("Invalid dataset name") + + url = url_mapping[dataset_name] + with urllib.request.urlopen(url) as response: + content = response.read().decode('utf-8') # Read content and decode to string + + # Use StringIO to turn the string content into a file-like object so numpy can read from it + labels_array = np.genfromtxt(StringIO(content), delimiter="\t", dtype=str, max_rows=1) + data = np.loadtxt(StringIO(content), skiprows=1) + + # Convert labels_array to a list of strings + labels_list = labels_array.tolist() + if isinstance(labels_list, str): # handle the case where there's only one label + labels_list = [labels_list] + + return data, labels_list \ No newline at end of file diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py new file mode 100644 index 00000000..f5fcc506 --- /dev/null +++ b/causallearn/utils/FastKCI/FastKCI.py @@ -0,0 +1,534 @@ +from causallearn.utils.KCI.KCI import GaussianKernel, Kernel +import numpy as np +from numpy.linalg import eigh +import scipy.stats as stats +from scipy.special import logsumexp +from sklearn.preprocessing import LabelEncoder +from joblib import Parallel, delayed +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF +import warnings + + +class FastKCI_CInd(object): + """ + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. + Unconditional version. + + References + ---------- + [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, + "A kernel-based conditional independence test and application in causal discovery", In UAI 2011. + [2] M. Zhang, and S. Williamson, + "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019) + [3] O. Schacht, and B. Huang + "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery", + Working Paper. + + """ + def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, use_gp=False): + """ + Initialize the FastKCI_CInd object. + + Parameters + ---------- + K: Number of Gaussians that are assumed to be in the mixture. + J: Number of independent repittitions. + alpha: Parameter for the Dirichlet distribution. + epsilon: Penalty for the matrix ridge regression. + eig_threshold: Threshold for Eigenvalues. + use_gp: Whether to use Gaussian Process Regression to determine the kernel widths + """ + self.K = K + self.J = J + self.alpha = alpha + self.epsilon = epsilon + self.eig_thresh = eig_thresh + self.use_gp = use_gp + self.nullss = 5000 + + def compute_pvalue(self, data_x=None, data_y=None, data_z=None): + """ + Main function: compute the p value of H_0: X|Y conditional on Z. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + _________ + pvalue: p value (scalar) + test_stat: test statistic (scalar) + """ + self.data = [data_x, data_y] + self.data_z = data_z + self.n = data_x.shape[0] + + self.Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) + block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) + test_stat, null_samples, log_likelihood = zip(*block_res) + + log_likelihood = np.array(log_likelihood) + self.all_null_samples = np.vstack(null_samples) + self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)]) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) + self.all_test_stats = np.array(test_stat) + self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) + self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) + self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) + + return self.pvalue, self.test_stat + + def partition_data(self): + """ + Partitions data into K Gaussians following an expectation maximization approach on Z. + + Returns + _________ + Z: Latent Gaussian that was drawn for each observation (nx1 array) + prob_Z: Log-Likelihood of that random assignment (scalar) + """ + Z_mean = self.data_z.mean(axis=0) + Z_sd = self.data_z.std(axis=0) + mu_k = np.random.normal(Z_mean, Z_sd, size=(self.K, self.data_z.shape[1])) + sigma_k = np.eye(self.data_z.shape[1]) + pi_j = np.random.dirichlet([self.alpha]*self.K) + ll = np.tile(np.log(pi_j), (self.n, 1)) + for k in range(self.K): + ll[:, k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k, :], cov=sigma_k, allow_singular=True) + Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) + le = LabelEncoder() + Z = le.fit_transform(Z) + return Z + + def pvalue_onblocks(self, Z_proposal): + """ + Calculate p value on given partitions of the data. + + Parameters + ---------- + Z_proposal: partition of the data into K clusters (nxd1 array) + + Returns + _________ + test_stat: test statistic (scalar) + null_samples: bootstrapped sampled of the null distribution (self.nullssx1 array) + log_likelihood: estimated probability P(X,Y|Z,k) + """ + unique_Z_j = np.unique(Z_proposal) + test_stat = 0 + log_likelihood = 0 + null_samples = np.zeros((1, self.nullss)) + for k in unique_Z_j: + K_mask = (Z_proposal == k) + X_k = np.copy(self.data[0][K_mask]) + Y_k = np.copy(self.data[1][K_mask]) + Z_k = np.copy(self.data_z[K_mask]) + if (Z_k.shape[0] < 6): # small blocks cause problems in GP + continue + Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k) + KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x) + if epsilon_x != epsilon_y: + KyR, Rzy = Kernel.center_kernel_matrix_regression(Ky, Kzy, epsilon_y) + else: + KyR = Rzx.dot(Ky.dot(Rzx)) + test_stat += np.einsum('ij,ji->', KxR, KyR) + uu_prod, size_u = self.get_uuprod(KxR, KyR) + null_samples += self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + log_likelihood += likelihood_x + likelihood_y + return test_stat, null_samples, log_likelihood + + def kernel_matrix(self, data_x, data_y, data_z): + """ + Calculates the Gaussian Kernel for given data inputs as well as the shared kernel. + + Returns + _________ + K: Kernel matrices (n_kxn_k array) + """ + kernel_obj = GaussianKernel() + kernel_obj.set_width_empirical_kci(data_z) + + data_x = stats.zscore(data_x, ddof=1, axis=0) + data_x[np.isnan(data_x)] = 0. + + data_y = stats.zscore(data_y, ddof=1, axis=0) + data_y[np.isnan(data_y)] = 0. + + data_z = stats.zscore(data_z, ddof=1, axis=0) + data_z[np.isnan(data_z)] = 0. + + data_x = np.concatenate((data_x, 0.5 * data_z), axis=1) + + kernelX = GaussianKernel() + kernelX.set_width_empirical_kci(data_z) + kernelY = GaussianKernel() + kernelY.set_width_empirical_kci(data_z) + + Kx = kernelX.kernel(data_x) + Ky = kernelY.kernel(data_y) + + # centering kernel matrix + Kx = Kernel.center_kernel_matrix(Kx) + Ky = Kernel.center_kernel_matrix(Ky) + + if not self.use_gp: + kernelZ = GaussianKernel() + kernelZ.set_width_empirical_kci(data_z) + Kzx = kernelZ.kernel(data_z) + Kzx = Kernel.center_kernel_matrix(Kzx) + Kzy = Kzx + epsilon_x, epsilon_y = self.epsilon, self.epsilon + gpx = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(X|Z) + gpx.fit(X=data_z, y=data_x) + likelihood_x = gpx.log_marginal_likelihood_value_ + gpy = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(Y|X,Z) + gpy.fit(X=np.c_[data_x, data_z], y=data_y) + likelihood_y = gpy.log_marginal_likelihood_value_ + + else: + n, Dz = data_z.shape + + widthz = np.sqrt(1.0 / (kernelX.width * data_x.shape[1])) + + # Instantiate a Gaussian Process model for x + wx, vx = eigh(Kx) + topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])])) + idx = np.argsort(-wx) + wx = wx[idx] + vx = vx[:, idx] + wx = wx[0:topkx] + vx = vx[:, 0:topkx] + vx = vx[:, np.abs(wx) > np.abs(wx).max() * 1e-10] + wx = wx[np.abs(wx) > np.abs(wx).max() * 1e-10] + vx = 2 * np.sqrt(n) * vx.dot(np.diag(np.sqrt(wx))) / np.sqrt(wx[0]) + kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \ + + WhiteKernel(0.1, (1e-10, 1e+1)) + gpx = GaussianProcessRegressor(kernel=kernelx) + # fit Gaussian process, including hyperparameter optimization + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + gpx.fit(data_z, vx) + + # construct Gaussian kernels according to learned hyperparameters + Kzx = gpx.kernel_.k1(data_z, data_z) + epsilon_x = np.exp(gpx.kernel_.theta[-1]) + likelihood_x = gpx.log_marginal_likelihood_value_ + + # Instantiate a Gaussian Process model for y + wy, vy = eigh(Ky) + topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])])) + idy = np.argsort(-wy) + wy = wy[idy] + vy = vy[:, idy] + wy = wy[0:topky] + vy = vy[:, 0:topky] + vy = vy[:, np.abs(wy) > np.abs(wy).max() * 1e-10] + wy = wy[np.abs(wy) > np.abs(wy).max() * 1e-10] + vy = 2 * np.sqrt(n) * vy.dot(np.diag(np.sqrt(wy))) / np.sqrt(wy[0]) + kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \ + + WhiteKernel(0.1, (1e-10, 1e+1)) + gpy = GaussianProcessRegressor(kernel=kernely) + # fit Gaussian process, including hyperparameter optimization + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + gpy.fit(data_z, vy) + + # construct Gaussian kernels according to learned hyperparameters + Kzy = gpy.kernel_.k1(data_z, data_z) + epsilon_y = np.exp(gpy.kernel_.theta[-1]) + likelihood_y = gpy.log_marginal_likelihood_value_ + + return Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y + + def get_uuprod(self, Kx, Ky): + """ + Compute eigenvalues for null distribution estimation + + Parameters + ---------- + Kx: centralized kernel matrix for data_x (nxn) + Ky: centralized kernel matrix for data_y (nxn) + + Returns + _________ + uu_prod: product of the eigenvectors of Kx and Ky + size_u: number of produced eigenvectors + + """ + n_block = Kx.shape[0] + wx, vx = eigh(Kx) + wy, vy = eigh(Ky) + idx = np.argsort(-wx) + idy = np.argsort(-wy) + wx = wx[idx] + vx = vx[:, idx] + wy = wy[idy] + vy = vy[:, idy] + vx = vx[:, wx > np.max(wx) * self.eig_thresh] + wx = wx[wx > np.max(wx) * self.eig_thresh] + vy = vy[:, wy > np.max(wy) * self.eig_thresh] + wy = wy[wy > np.max(wy) * self.eig_thresh] + vx = vx.dot(np.diag(np.sqrt(wx))) + vy = vy.dot(np.diag(np.sqrt(wy))) + + # calculate their product + num_eigx = vx.shape[1] + num_eigy = vy.shape[1] + size_u = num_eigx * num_eigy + uu = np.zeros((n_block, size_u)) + for i in range(0, num_eigx): + for j in range(0, num_eigy): + uu[:, i * num_eigy + j] = vx[:, i] * vy[:, j] + + if size_u > self.n: + uu_prod = uu.dot(uu.T) + else: + uu_prod = uu.T.dot(uu) + + return uu_prod, size_u + + def get_kappa(self, mean_appr, var_appr): + """ + Get parameters for the approximated gamma distribution + Parameters + ---------- + uu_prod: product of the eigenvectors of Kx and Ky + + Returns + ---------- + k_appr, theta_appr: approximated parameters of the gamma distribution + + """ + k_appr = mean_appr ** 2 / var_appr + theta_appr = var_appr / mean_appr + return k_appr, theta_appr + + def null_sample_spectral(self, uu_prod, size_u, T): + """ + Simulate data from null distribution + + Parameters + ---------- + uu_prod: product of the eigenvectors of Kx and Ky + size_u: number of produced eigenvectors + T: sample size + + Returns + _________ + null_dstr: samples from the null distribution + + """ + eig_uu = np.linalg.eigvalsh(uu_prod) + eig_uu = -np.sort(-eig_uu) + eig_uu = eig_uu[0:np.min((T, size_u))] + eig_uu = eig_uu[eig_uu > np.max(eig_uu) * self.eig_thresh] + + f_rand = np.random.chisquare(1, (eig_uu.shape[0], self.nullss)) + null_dstr = eig_uu.T.dot(f_rand) + return null_dstr + + +class FastKCI_UInd(object): + """ + Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. + Unconditional version. + + References + ---------- + [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf, + "A kernel-based conditional independence test and application in causal discovery," In UAI 2011. + [2] M. Zhang, and S. Williamson, + "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019) + [3] O. Schacht, and B. Huang + "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery", + Working Paper. + """ + def __init__(self, K=10, J=8, alpha=500): + """ + Construct the FastKCI_UInd model. + + Parameters + ---------- + K: Number of Gaussians that are assumed to be in the mixture + J: Number of independent repittitions. + alpha: Parameter for the Dirichlet distribution. + """ + self.K = K + self.J = J + self.alpha = alpha + self.nullss = 5000 + self.eig_thresh = 1e-5 + + def compute_pvalue(self, data_x=None, data_y=None): + """ + Main function: compute the p value and return it together with the test statistic + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + + Returns + _________ + pvalue: p value (scalar) + test_stat: test statistic (scalar) + """ + self.data_x = data_x + self.data_y = data_y + self.n = data_x.shape[0] + + Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J)) + self.Z_proposal, self.prob_Y = zip(*Z_proposal) + block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J)) + test_stat, null_samples, log_likelihood = zip(*block_res) + self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6) + self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights) + self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights) + self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss) + + return self.pvalue, self.test_stat + + def partition_data(self): + """ + Partitions data into K Gaussians + + Returns + _________ + Z: Latent Gaussian that was drawn for each observation (nx1 array) + prob_Y: Log-Likelihood of that random assignment (scalar) + """ + Y_mean = self.data_y.mean(axis=0) + Y_sd = self.data_y.std(axis=0) + mu_k = np.random.normal(Y_mean, Y_sd, size=(self.K, self.data_y.shape[1])) + sigma_k = np.eye(self.data_y.shape[1]) + pi_j = np.random.dirichlet([self.alpha]*self.K) + ll = np.tile(np.log(pi_j), (self.n, 1)) + for k in range(self.K): + ll[:, k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k, :], cov=sigma_k, allow_singular=True) + Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)]) + prop_Y = np.take_along_axis(ll, Z[:, None], axis=1).sum() + le = LabelEncoder() + Z = le.fit_transform(Z) + return (Z, prop_Y) + + def pvalue_onblocks(self, Z_proposal): + unique_Z_j = np.unique(Z_proposal) + test_stat = 0 + log_likelihood = 0 + null_samples = np.zeros((1, self.nullss)) + for k in unique_Z_j: + K_mask = (Z_proposal == k) + X_k = np.copy(self.data_x[K_mask]) + Y_k = np.copy(self.data_y[K_mask]) + + Kx = self.kernel_matrix(X_k) + Ky = self.kernel_matrix(Y_k) + + v_stat, Kxc, Kyc = self.HSIC_V_statistic(Kx, Ky) + + null_samples += self.null_sample_spectral(Kxc, Kyc) + test_stat += v_stat + + gpx = GaussianProcessRegressor() + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=Warning) + # P(X|Y) + gpx.fit(X=Y_k, y=X_k) + likelihood = gpx.log_marginal_likelihood_value_ + log_likelihood += likelihood + + return test_stat, null_samples, log_likelihood + + def kernel_matrix(self, data): + """ + Calculates the Gaussian Kernel for given data inputs. + + Returns + _________ + K: Kernel matrix (n_kxn_k array) + """ + kernel_obj = GaussianKernel() + kernel_obj.set_width_empirical_hsic(data) + + data = stats.zscore(data, ddof=1, axis=0) + data[np.isnan(data)] = 0. + + K = kernel_obj.kernel(data) + + return K + + def get_kappa(self, mean_appr, var_appr): + """ + Get parameters for the approximated gamma distribution + Parameters + ---------- + Kx: kernel matrix for data_x (nxn) + Ky: kernel matrix for data_y (nxn) + + Returns + _________ + k_appr, theta_appr: approximated parameters of the gamma distribution + """ + k_appr = mean_appr ** 2 / var_appr + theta_appr = var_appr / mean_appr + return k_appr, theta_appr + + def null_sample_spectral(self, Kxc, Kyc): + """ + Simulate data from null distribution + + Parameters + ---------- + Kxc: centralized kernel matrix for data_x (nxn) + Kyc: centralized kernel matrix for data_y (nxn) + + Returns + _________ + null_dstr: samples from the null distribution + + """ + T = Kxc.shape[0] + if T > 1000: + num_eig = int(np.floor(T / 2)) + else: + num_eig = T + lambdax = np.linalg.eigvalsh(Kxc) + lambday = np.linalg.eigvalsh(Kyc) + lambdax = -np.sort(-lambdax) + lambday = -np.sort(-lambday) + lambdax = lambdax[0:num_eig] + lambday = lambday[0:num_eig] + lambda_prod = np.dot(lambdax.reshape(num_eig, 1), lambday.reshape(1, num_eig)).reshape( + (num_eig ** 2, 1)) + lambda_prod = lambda_prod[lambda_prod > lambda_prod.max() * self.eig_thresh] + f_rand = np.random.chisquare(1, (lambda_prod.shape[0], self.nullss)) + null_dstr = lambda_prod.T.dot(f_rand) / T + return null_dstr + + def HSIC_V_statistic(self, Kx, Ky): + """ + Compute V test statistic from kernel matrices Kx and Ky + Parameters + ---------- + Kx: kernel matrix for data_x (nxn) + Ky: kernel matrix for data_y (nxn) + + Returns + _________ + Vstat: HSIC v statistics + Kxc: centralized kernel matrix for data_x (nxn) + Kyc: centralized kernel matrix for data_y (nxn) + """ + Kxc = Kernel.center_kernel_matrix(Kx) + Kyc = Kernel.center_kernel_matrix(Ky) + V_stat = np.einsum('ij,ij->', Kxc, Kyc) + return V_stat, Kxc, Kyc diff --git a/causallearn/utils/FastKCI/__init__.py b/causallearn/utils/FastKCI/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py index 40a33452..681d48d4 100644 --- a/causallearn/utils/KCI/KCI.py +++ b/causallearn/utils/KCI/KCI.py @@ -191,7 +191,7 @@ def null_sample_spectral(self, Kxc, Kyc): """ T = Kxc.shape[0] if T > 1000: - num_eig = np.int(np.floor(T / 2)) + num_eig = int(np.floor(T / 2)) else: num_eig = T lambdax = eigvalsh(Kxc) @@ -306,8 +306,8 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None): k_appr, theta_appr = self.get_kappa(uu_prod) pvalue = 1 - stats.gamma.cdf(test_stat, k_appr, 0, theta_appr) else: - null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) - pvalue = sum(null_samples > test_stat) / float(self.nullss) + self.null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0]) + pvalue = sum(self.null_samples > test_stat) / float(self.nullss) return pvalue, test_stat def kernel_matrix(self, data_x, data_y, data_z): diff --git a/causallearn/utils/PCUtils/BackgroundKnowledge.py b/causallearn/utils/PCUtils/BackgroundKnowledge.py index 4c6bca0d..19248d48 100644 --- a/causallearn/utils/PCUtils/BackgroundKnowledge.py +++ b/causallearn/utils/PCUtils/BackgroundKnowledge.py @@ -165,7 +165,7 @@ def is_forbidden(self, node1: Node, node2: Node) -> bool: # then check in tier_map if self.tier_value_map.keys().__contains__(node1) and self.tier_value_map.keys().__contains__(node2): - if self.tier_value_map.get(node1) >= self.tier_value_map.get(node2): + if self.tier_value_map.get(node1) > self.tier_value_map.get(node2): # Allow orientation within the same tier return True return False diff --git a/causallearn/utils/PCUtils/SkeletonDiscovery.py b/causallearn/utils/PCUtils/SkeletonDiscovery.py index 6cfaa492..428d2cee 100644 --- a/causallearn/utils/PCUtils/SkeletonDiscovery.py +++ b/causallearn/utils/PCUtils/SkeletonDiscovery.py @@ -120,8 +120,9 @@ def skeleton_discovery( else: if verbose: print('%d dep %d | %s with p-value %f\n' % (x, y, S, p)) - append_value(cg.sepset, x, y, tuple(sepsets)) - append_value(cg.sepset, y, x, tuple(sepsets)) + if (x, y) in edge_removal or not cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y]): + append_value(cg.sepset, x, y, tuple(sepsets)) + append_value(cg.sepset, y, x, tuple(sepsets)) if show_progress: pbar.refresh() diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py new file mode 100644 index 00000000..11608ed0 --- /dev/null +++ b/causallearn/utils/RCIT/RCIT.py @@ -0,0 +1,403 @@ +import numpy as np +import itertools +from scipy.stats import chi2 +from scipy.linalg import pinv +from momentchi2 import hbe, sw, lpb4 +from scipy.spatial.distance import pdist, squareform + + +class RCIT(object): + """ + Python implementation of Randomized Conditional Independence Test (RCIT) test. + The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master + + References + ---------- + [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional + independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017. + """ + def __init__(self, approx="lpd4", num_f=100, num_f2=5, rcit=True): + """ + Initialize the RCIT object. + + Parameters + ---------- + approx : str + Method for approximating the null distribution. + - "lpd4" for the Lindsay-Pilla-Basak method + - "hbe" for the Hall-Buckley-Eagleson method + - "gamma" for the Satterthwaite-Welch method + - "chi2" for a normalized chi-squared statistic + - "perm" for permutation testing + Default is "lpd4". + num_f : int + Number of features for conditioning set. Default is 25. + num_f2 : int + Number of features for non-conditioning sets. Default is 5. + rcit : bool + Whether to use RCIT or RCoT. Default is True. + """ + self.approx = approx + self.num_f = num_f + self.num_f2 = num_f2 + self.rcit = rcit + + def compute_pvalue(self, data_x, data_y, data_z): + """ + Compute the p value and return it together with the test statistic. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + ------- + p: p value + sta: test statistic + """ + d = data_z.shape[1] + r = data_x.shape[0] + r1 = 500 if (r > 500) else r + + data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1) + data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1) + data_z = (data_z - data_z.mean(axis=0)) / data_z.std(axis=0, ddof=1) + + if self.rcit: + data_y = np.column_stack((data_y, data_z)) + + sigma = dict() + for key, value in [("x", data_x), ("y", data_y), ("z", data_z)]: + distances = pdist(value[:r1, :], metric='euclidean') + flattened_distances = squareform(distances).ravel() + non_zero_distances = flattened_distances[flattened_distances != 0] + sigma[key] = np.median(non_zero_distances) + + four_z = self.random_fourier_features(data_z, num_f=self.num_f, sigma=sigma["z"]) + four_x = self.random_fourier_features(data_x, num_f=self.num_f2, sigma=sigma["x"]) + four_y = self.random_fourier_features(data_y, num_f=self.num_f2, sigma=sigma["y"]) + + f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1) + f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1) + f_z = (four_z - four_z.mean(axis=0)) / four_z.std(axis=0, ddof=1) + + Cxy = f_x.T @ f_y / (f_x.shape[0] - 1) + + Czz = np.cov(f_z, rowvar=False) + + regularized_Czz = Czz + np.eye(self.num_f) * 1e-10 + L = np.linalg.cholesky(regularized_Czz) + i_Czz = np.linalg.inv(L).T.dot(np.linalg.inv(L)) + + Cxz = f_x.T @ f_z / (f_x.shape[0] - 1) + Czy = f_z.T @ f_y / (f_z.shape[0] - 1) + + z_i_Czz = f_z @ i_Czz + e_x_z = z_i_Czz @ Cxz.T + e_y_z = z_i_Czz @ Czy + + res_x = f_x - e_x_z + res_y = f_y - e_y_z + + if self.num_f2 == 1: + self.approx = "hbe" + + if self.approx == "perm": + Cxy_z = self.matrix_cov(res_x, res_y) + sta = r * np.sum(Cxy_z**2) + + nperm = 1000 + + stas = [] + for _ in range(nperm): + perm = np.random.choice(np.arange(r), size=r, replace=False) + Cxy = self.matrix_cov(res_x[perm, ], res_y) + sta_p = r * np.sum(Cxy**2) + stas.append(sta_p) + + p = 1 - (np.sum(sta >= stas) / len(stas)) + + else: + Cxy_z = Cxy - Cxz @ i_Czz @ Czy + sta = r * np.sum(Cxy_z**2) + + d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) + res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T + Cov = 1/r * res.T @ res + + if self.approx == "chi2": + i_Cov = pinv(Cov) + + sta = r * (np.dot(Cxy_z.flatten(), np.dot(i_Cov, Cxy_z.flatten()))) + p = 1 - chi2.cdf(sta, Cxy_z.size) + + else: + eigenvalues, eigenvectors = np.linalg.eigh(Cov) + eig_d = eigenvalues[eigenvalues > 0] + + if self.approx == "gamma": + p = 1 - sw(eig_d, sta) + + elif self.approx == "hbe": + p = 1 - hbe(eig_d, sta) + + elif self.approx == "lpd4": + try: + p = 1 - lpb4(eig_d, sta) + except Exception: + p = 1 - hbe(eig_d, sta) + if np.isnan(p): + p = 1 - hbe(eig_d, sta) + + if (p < 0): + p = 0 + + return p, sta + + def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): + """ + Generate random Fourier features. + + Parameters + ---------- + x : np.ndarray + Random variable x. + w : np.ndarray + RRandom coefficients. + b : np.ndarray + Random offsets. + num_f : int + Number of random Fourier features. + sigma : float + Smooth parameter of RBF kernel. + + Returns + ------- + feat : np.ndarray + Random Fourier features. + """ + if num_f is None: + num_f = 25 + + r = x.shape[0] + c = x.shape[1] + + if ((sigma == 0) | (sigma is None)): + sigma = 1 + + if w is None: + w = (1/sigma) * np.random.normal(size=(num_f, c)) + b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r)) + + feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T + + return feat + + def matrix_cov(self, mat_a, mat_b): + """ + Compute the covariance matrix between two matrices. + Equivalent to ``cov()`` between two matrices in R. + + Parameters + ---------- + mat_a : np.ndarray + First data matrix. + mat_b : np.ndarray + Second data matrix. + + Returns + ------- + mat_cov : np.ndarray + Covariance matrix. + """ + n_obs = mat_a.shape[0] + + assert mat_a.shape == mat_b.shape + mat_a = mat_a - mat_a.mean(axis=0) + mat_b = mat_b - mat_b.mean(axis=0) + + mat_cov = mat_a.T @ mat_b / (n_obs - 1) + + return mat_cov + + +class RIT(object): + """ + Python implementation of Randomized Independence Test (RIT) test. + The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master + + References + ---------- + [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional + independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017. + """ + def __init__(self, approx="lpd4"): + """ + Initialize the RIT object. + + Parameters + ---------- + approx : str + Method for approximating the null distribution. + - "lpd4" for the Lindsay-Pilla-Basak method + - "hbe" for the Hall-Buckley-Eagleson method + - "gamma" for the Satterthwaite-Welch method + - "chi2" for a normalized chi-squared statistic + - "perm" for permutation testing + Default is "lpd4". + """ + self.approx = approx + + def compute_pvalue(self, data_x, data_y): + """ + Compute the p value and return it together with the test statistic. + + Parameters + ---------- + data_x: input data for x (nxd1 array) + data_y: input data for y (nxd2 array) + data_z: input data for z (nxd3 array) + + Returns + ------- + p: p value + sta: test statistic + """ + r = data_x.shape[0] + r1 = 500 if (r > 500) else r + + data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1) + data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1) + + sigma = dict() + for key, value in [("x", data_x), ("y", data_y)]: + distances = pdist(value[:r1, :], metric='euclidean') + flattened_distances = squareform(distances).ravel() + non_zero_distances = flattened_distances[flattened_distances != 0] + sigma[key] = np.median(non_zero_distances) + + four_x = self.random_fourier_features(data_x, num_f=5, sigma=sigma["x"]) + four_y = self.random_fourier_features(data_y, num_f=5, sigma=sigma["y"]) + + f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1) + f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1) + + Cxy = self.matrix_cov(f_x, f_y) + sta = r * np.sum(Cxy**2) + + if self.approx == "perm": + nperm = 1000 + + stas = [] + for _ in range(nperm): + perm = np.random.choice(np.arange(r), size=r, replace=False) + Cxy = self.matrix_cov(f_x[perm, ], f_y) + sta_p = r * np.sum(Cxy**2) + stas.append(sta_p) + + p = 1 - (np.sum(sta >= stas) / len(stas)) + + else: + res_x = f_x - f_x.mean(axis=0) + res_y = f_y - f_y.mean(axis=0) + + d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1]))) + res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T + Cov = 1/r * res.T @ res + + if self.approx == "chi2": + i_Cov = pinv(Cov) + + sta = r * (np.dot(Cxy.flatten(), np.dot(i_Cov, Cxy.flatten()))) + p = 1 - chi2.cdf(sta, Cxy.size) + + else: + eigenvalues, eigenvectors = np.linalg.eigh(Cov) + eig_d = eigenvalues[eigenvalues > 0] + + if self.approx == "gamma": + p = 1 - sw(eig_d, sta) + + elif self.approx == "hbe": + p = 1 - hbe(eig_d, sta) + + elif self.approx == "lpd4": + try: + p = 1 - lpb4(eig_d, sta) + except Exception: + p = 1 - hbe(eig_d, sta) + if np.isnan(p): + p = 1 - hbe(eig_d, sta) + + if (p < 0): + p = 0 + + return p, sta + + def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None): + """ + Generate random Fourier features. + + Parameters + ---------- + x : np.ndarray + Random variable x. + w : np.ndarray + RRandom coefficients. + b : np.ndarray + Random offsets. + num_f : int + Number of random Fourier features. + sigma : float + Smooth parameter of RBF kernel. + + Returns + ------- + feat : np.ndarray + Random Fourier features. + """ + if num_f is None: + num_f = 25 + + r = x.shape[0] + c = x.shape[1] + + if ((sigma == 0) | (sigma is None)): + sigma = 1 + + if w is None: + w = (1/sigma) * np.random.normal(size=(num_f, c)) + b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r)) + + feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T + + return feat + + def matrix_cov(self, mat_a, mat_b): + """ + Compute the covariance matrix between two matrices. + Equivalent to ``cov()`` between two matrices in R. + + Parameters + ---------- + mat_a : np.ndarray + First data matrix. + mat_b : np.ndarray + Second data matrix. + + Returns + ------- + mat_cov : np.ndarray + Covariance matrix. + """ + n_obs = mat_a.shape[0] + + assert mat_a.shape == mat_b.shape + mat_a = mat_a - mat_a.mean(axis=0) + mat_b = mat_b - mat_b.mean(axis=0) + + mat_cov = mat_a.T @ mat_b / (n_obs - 1) + + return mat_cov diff --git a/causallearn/utils/RCIT/__init__.py b/causallearn/utils/RCIT/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/causallearn/utils/ScoreUtils.py b/causallearn/utils/ScoreUtils.py index e5af6ca1..6bd075aa 100644 --- a/causallearn/utils/ScoreUtils.py +++ b/causallearn/utils/ScoreUtils.py @@ -523,7 +523,7 @@ def cov_noise(logtheta=None, x=None, z=None, nargout=1): def cov_seard(loghyper=None, x=None, z=None, nargout=1): - # Squared Exponential covariance function with Automatic Relevance Detemination + # Squared Exponential covariance function with Automatic Relevance Determination # (ARD) distance measure. The covariance function is parameterized as: # # k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2) diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py index 8119dd72..fbc95e95 100644 --- a/causallearn/utils/cit.py +++ b/causallearn/utils/cit.py @@ -5,6 +5,9 @@ from scipy.stats import chi2, norm from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd +from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd +from causallearn.utils.RCIT.RCIT import RCIT as RCIT_CInd +from causallearn.utils.RCIT.RCIT import RIT as RCIT_UInd from causallearn.utils.PCUtils import Helper CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5 @@ -13,6 +16,8 @@ mv_fisherz = "mv_fisherz" mc_fisherz = "mc_fisherz" kci = "kci" +rcit = "rcit" +fastkci = "fastkci" chisq = "chisq" gsq = "gsq" d_separation = "d_separation" @@ -23,8 +28,8 @@ def CIT(data, method='fisherz', **kwargs): Parameters ---------- data: numpy.ndarray of shape (n_samples, n_features) - method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "chisq", "gsq"] - kwargs: placeholder for future arguments, or for KCI specific arguments now + method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "rcit", "fastkci", "chisq", "gsq"] + kwargs: placeholder for future arguments, or for KCI, FastKCI or RCIT specific arguments now TODO: utimately kwargs should be replaced by explicit named parameters. check https://github.com/cmu-phil/causal-learn/pull/62#discussion_r927239028 ''' @@ -32,6 +37,10 @@ def CIT(data, method='fisherz', **kwargs): return FisherZ(data, **kwargs) elif method == kci: return KCI(data, **kwargs) + elif method == fastkci: + return FastKCI(data, **kwargs) + elif method == rcit: + return RCIT(data, **kwargs) elif method in [chisq, gsq]: return Chisq_or_Gsq(data, method_name=method, **kwargs) elif method == mv_fisherz: @@ -43,6 +52,7 @@ def CIT(data, method='fisherz', **kwargs): else: raise ValueError("Unknown method: {}".format(method)) + class CIT_Base(object): # Base class for CIT, contains basic operations for input check and caching, etc. def __init__(self, data, cache_path=None, **kwargs): @@ -193,6 +203,50 @@ def __call__(self, X, Y, condition_set=None): self.pvalue_cache[cache_key] = p return p +class FastKCI(CIT_Base): + def __init__(self, data, **kwargs): + super().__init__(data, **kwargs) + kci_ui_kwargs = {k: v for k, v in kwargs.items() if k in + ['K', 'J', 'alpha']} + kci_ci_kwargs = {k: v for k, v in kwargs.items() if k in + ['K', 'J', 'alpha', 'use_gp']} + self.check_cache_method_consistent( + 'kci', hashlib.md5(json.dumps(kci_ci_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) + self.assert_input_data_is_valid() + self.kci_ui = FastKCI_UInd(**kci_ui_kwargs) + self.kci_ci = FastKCI_CInd(**kci_ci_kwargs) + + def __call__(self, X, Y, condition_set=None): + # Kernel-based conditional independence test. + Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set) + if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key] + p = self.kci_ui.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \ + self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] + self.pvalue_cache[cache_key] = p + return p + +class RCIT(CIT_Base): + def __init__(self, data, **kwargs): + super().__init__(data, **kwargs) + rit_kwargs = {k: v for k, v in kwargs.items() if k in + ['approx']} + rcit_kwargs = {k: v for k, v in kwargs.items() if k in + ['approx', 'num_f', 'num_f2', 'rcit']} + self.check_cache_method_consistent( + 'kci', hashlib.md5(json.dumps(rcit_kwargs, sort_keys=True).encode('utf-8')).hexdigest()) + self.assert_input_data_is_valid() + self.rit = RCIT_UInd(**rit_kwargs) + self.rcit = RCIT_CInd(**rcit_kwargs) + + def __call__(self, X, Y, condition_set=None): + # Kernel-based conditional independence test. + Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set) + if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key] + p = self.rit.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \ + self.rcit.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0] + self.pvalue_cache[cache_key] = p + return p + class Chisq_or_Gsq(CIT_Base): def __init__(self, data, method_name, **kwargs): def _unique(column): diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml index 670a48c7..5e8dd497 100644 --- a/docs/.readthedocs.yaml +++ b/docs/.readthedocs.yaml @@ -2,12 +2,15 @@ # Read the Docs configuration file # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details -# Required version: 2 -# Set the version of Python and other tools you might need build: os: ubuntu-22.04 tools: python: "3.11" +python: + install: + - requirements: docs/requirements.txt + + diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..52b04f2e --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1 @@ +sphinx_rtd_theme \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 4019298f..d649ff4c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,7 +14,6 @@ # import sys # sys.path.insert(0, os.path.abspath('.')) - # -- Project information ----------------------------------------------------- project = 'causal-learn' @@ -22,7 +21,7 @@ author = 'CLeaR' # The full version, including alpha/beta/rc tags -release = '0.1.2.8' +release = '0.1.3.6' # -- General configuration --------------------------------------------------- diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst index f86de812..c306be55 100644 --- a/docs/source/getting_started.rst +++ b/docs/source/getting_started.rst @@ -67,7 +67,7 @@ Contributors **Developers**: -- Wei Chen, Biwei Huang, Yuequn Liu, Zhiyi Huang, Feng Xie, Haoyue Dai: :ref:`PC `, :ref:`FCI `, :ref:`GES `, :ref:`GIN `, and :ref:`graph operations `. +- Wei Chen, Biwei Huang, Yuequn Liu, Zhiyi Huang, Feng Xie, Haoyue Dai, Xiaokai Huang: :ref:`PC `, :ref:`FCI `, :ref:`GES `, :ref:`GIN `, and :ref:`graph operations `. - Mingming Gong, Erdun Gao, Aoqi Zuo: :ref:`PNL `, :ref:`ANM `, :ref:`Granger causality `, and :ref:`KCI `. - Shohei Shimizu, Takashi Nicholas Maeda, Takashi Ikeuchi: :ref:`LiNGAM-based methods `. - Madelyn Glymour: several helpers. @@ -89,13 +89,15 @@ Please cite as: .. code-block:: none - @article{causallearn, - title={Causal-learn: Causal Discovery in Python}, - author={Yujia Zheng and Biwei Huang and Wei Chen and Joseph Ramsey and Mingming Gong and Ruichu Cai and Shohei Shimizu and Peter Spirtes and Kun Zhang}, - journal={arXiv preprint arXiv:2307.16405}, - year={2023} + @article{zheng2024causal, + title={Causal-learn: Causal discovery in python}, + author={Zheng, Yujia and Huang, Biwei and Chen, Wei and Ramsey, Joseph and Gong, Mingming and Cai, Ruichu and Shimizu, Shohei and Spirtes, Peter and Zhang, Kun}, + journal={Journal of Machine Learning Research}, + volume={25}, + number={60}, + pages={1--8}, + year={2024} } - diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst index c8603c39..2999b3e7 100644 --- a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst +++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst @@ -16,7 +16,7 @@ Usage from causallearn.search.ConstraintBased.CDNOD import cdnod # default parameters - cg = cdnod(data) + cg = cdnod(data, c_indx) # or customized parameters cg = cdnod(data, c_indx, alpha, indep_test, stable, uc_rule, uc_priority, mvcdnod, @@ -51,7 +51,7 @@ and n_features is the number of features. - ":ref:`kci `": kernel-based conditional independence test. (As a kernel method, its complexity is cubic in the sample size, so it might be slow if the same size is not small.) - ":ref:`mv_fisherz `": Missing-value Fisher's Z conditional independence test. -**stable**: run stabilized skeleton discovery if True. Default: True. +**stable**: run stabilized skeleton discovery [3]_ if True. Default: True. **uc_rule**: how unshielded colliders are oriented. Default: 0. - 0: run uc_sepset. @@ -82,4 +82,5 @@ Returns **cg** : a CausalGraph object, where cg.G.graph[j,i]=1 and cg.G.graph[i,j]=-1 indicate i --> j; cg.G.graph[i,j] = cg.G.graph[j,i] = -1 indicates i --- j; cg.G.graph[i,j] = cg.G.graph[j,i] = 1 indicates i <-> j. .. [1] Huang, B., Zhang, K., Zhang, J., Ramsey, J. D., Sanchez-Romero, R., Glymour, C., & Schölkopf, B. (2020). Causal Discovery from Heterogeneous/Nonstationary Data. J. Mach. Learn. Res., 21(89), 1-53. -.. [2] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. \ No newline at end of file +.. [2] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. +.. [3] Colombo, D., & Maathuis, M. H. (2014). Order-independent constraint-based causal structure learning. J. Mach. Learn. Res., 15(1), 3741-3782. \ No newline at end of file diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst index 5c71f1ad..2ebadb57 100644 --- a/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst +++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst @@ -16,16 +16,16 @@ Usage from causallearn.search.ConstraintBased.FCI import fci # default parameters - G, edges = fci(data) + g, edges = fci(data) # or customized parameters - G, edges = fci(data, independence_test_method, alpha, depth, max_path_length, + g, edges = fci(data, independence_test_method, alpha, depth, max_path_length, verbose, background_knowledge, cache_variables_map) # visualization from causallearn.utils.GraphUtils import GraphUtils - pdy = GraphUtils.to_pydot(G) + pdy = GraphUtils.to_pydot(g) pdy.write_png('simple_test.png') Visualization using pydot is recommended. If specific label names are needed, please refer to this `usage example `_. @@ -60,7 +60,7 @@ For detailed usage, please kindly refer to its `usage example `": kernel-based conditional independence test. (As a kernel method, its complexity is cubic in the sample size, so it might be slow if the same size is not small.) - ":ref:`mv_fisherz `": Missing-value Fisher's Z conditional independence test. -**stable**: run stabilized skeleton discovery if True. Default: True. +**stable**: run stabilized skeleton discovery [4]_ if True. Default: True. **uc_rule**: how unshielded colliders are oriented. Default: 0. - 0: run uc_sepset. @@ -114,4 +114,5 @@ Returns .. [1] Spirtes, P., Glymour, C. N., Scheines, R., & Heckerman, D. (2000). Causation, prediction, and search. MIT press. .. [2] Tu, R., Zhang, C., Ackermann, P., Mohan, K., Kjellström, H., & Zhang, K. (2019, April). Causal discovery in the presence of missing data. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 1762-1770). PMLR. -.. [3] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. \ No newline at end of file +.. [3] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378. +.. [4] Colombo, D., & Maathuis, M. H. (2014). Order-independent constraint-based causal structure learning. J. Mach. Learn. Res., 15(1), 3741-3782. \ No newline at end of file diff --git a/docs/source/utilities_index/Datasets.rst b/docs/source/utilities_index/Datasets.rst new file mode 100644 index 00000000..fad90d9a --- /dev/null +++ b/docs/source/utilities_index/Datasets.rst @@ -0,0 +1,27 @@ +.. _datasets: + +Datasets +============================================== + + + +Usage +---------------------------- +.. code-block:: python + + from causallearn.utils.Dataset import load_dataset + + data, labels = load_dataset(dataset_name) + +Parameters +------------------- +**dataset_name**: str, the name of a dataset in ['sachs', 'boston_housing', 'airfoil'] + + +Returns +------------------- + +**data**: np.array, data with a shape of (number of samples, number of variables). + +**labels**: list, labels of variables in the data. + diff --git a/docs/source/utilities_index/index.rst b/docs/source/utilities_index/index.rst index c175e226..ec4dfc83 100644 --- a/docs/source/utilities_index/index.rst +++ b/docs/source/utilities_index/index.rst @@ -10,3 +10,4 @@ Contents: Graph operations/index Evaluations + Datasets diff --git a/setup.py b/setup.py index 90aadea9..f40a536d 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ with open('README.md', 'r') as fh: README = fh.read() -VERSION = '0.1.3.4' +VERSION = '0.1.3.8' setuptools.setup( name='causal-learn', @@ -22,7 +22,8 @@ 'matplotlib', 'networkx', 'pydot', - 'tqdm' + 'tqdm', + 'momentchi2' ], url='https://github.com/py-why/causal-learn', packages=setuptools.find_packages(), diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py new file mode 100644 index 00000000..ea963d47 --- /dev/null +++ b/tests/TestBOSS.py @@ -0,0 +1,117 @@ +import unittest + +import numpy as np +from causallearn.graph.AdjacencyConfusion import AdjacencyConfusion +from causallearn.graph.ArrowConfusion import ArrowConfusion +from causallearn.graph.GeneralGraph import GeneralGraph +from causallearn.graph.GraphNode import GraphNode +from causallearn.search.PermutationBased.BOSS import boss +from causallearn.utils.DAG2CPDAG import dag2cpdag + +import gc + + +def simulate_data(p, d, n): + """ + Randomly generates an Erdos-Renyi direct acyclic graph given an ordering + and randomly simulates data with the provided parameters. + + p = |variables| + d = |edges| / |possible edges| + n = sample size + """ + + # npe = |possible edges| + pe = int(p * (p - 1) / 2) + + # ne = |edges| + ne = int(d * pe) + + # generate edges + e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne)) + np.random.shuffle(e) + B = np.zeros([p, p]) + B.T[np.triu_indices(p, 1)] = e + + # generate variance terms + O = np.diag(np.ones(p)) + + # simulate data + X = np.random.multivariate_normal(np.zeros(p), O, n) + for i in range(p): + J = np.where(B[i])[0] + for j in J: X[:, i] += B[i, j] * X[:, j] + + pi = [i for i in range(p)] + np.random.shuffle(pi) + + return (B != 0)[pi][:, pi], X[:, pi] + + +class TestBOSS(unittest.TestCase): + def test_boss(self): + ps = [30, 60] + ds = [0.1, 0.15] + n = 1000 + reps = 5 + + gc.set_threshold(20000, 50, 50) + # gc.set_debug(gc.DEBUG_STATS) + + for p in ps: + for d in ds: + stats = [[], [], [], []] + for rep in range(1, reps + 1): + g0, X = simulate_data(p, d, n) + print( + "\nNodes:", p, + "| Edges:", np.sum(g0), + "| Avg Degree:", 2 * round(np.sum(g0) / p, 2), + "| Rep:", rep, + ) + + node_names = [("X%d" % (i + 1)) for i in range(p)] + nodes = [] + + for name in node_names: + node = GraphNode(name) + nodes.append(node) + + G0 = GeneralGraph(nodes) + for y in range(p): + for x in np.where(g0[y] == 1)[0]: + G0.add_directed_edge(nodes[x], nodes[y]) + + G0 = dag2cpdag(G0) + + G = boss(X, parameters={'lambda_value': 4}) + gc.collect() + + AdjC = AdjacencyConfusion(G0, G) + stats[0].append(AdjC.get_adj_precision()) + stats[1].append(AdjC.get_adj_recall()) + + ArrC = ArrowConfusion(G0, G) + stats[2].append(ArrC.get_arrows_precision()) + stats[3].append(ArrC.get_arrows_recall()) + + print( + [ + ["AP", "AR", "AHP", "AHR"][i] + + ": " + + str(round(stats[i][-1], 2)) + for i in range(4) + ] + ) + + print( + "\nOverall Stats: \n", + [ + ["AP", "AR", "AHP", "AHR"][i] + + ": " + + str(round(np.sum(stats[i]) / reps, 2)) + for i in range(4) + ], + ) + + print("finish") diff --git a/tests/TestCIT_FastKCI.py b/tests/TestCIT_FastKCI.py new file mode 100644 index 00000000..b8507f78 --- /dev/null +++ b/tests/TestCIT_FastKCI.py @@ -0,0 +1,36 @@ +import unittest + +import numpy as np + +import causallearn.utils.cit as cit + + +class TestCIT_FastKCI(unittest.TestCase): + def test_Gaussian_dist(self): + np.random.seed(10) + X = np.random.randn(1200, 1) + X_prime = np.random.randn(1200, 1) + Y = X + 0.5 * np.random.randn(1200, 1) + Z = Y + 0.5 * np.random.randn(1200, 1) + data = np.hstack((X, X_prime, Y, Z)) + + pvalue01 = [] + pvalue03 = [] + pvalue032 = [] + for K in [3, 10]: + for J in [8, 16]: + for use_gp in [True, False]: + cit_CIT = cit.CIT(data, 'fastkci', K=K, J=J, use_gp=use_gp) + pvalue01.append(round(cit_CIT(0, 1), 4)) + pvalue03.append(round(cit_CIT(0, 3), 4)) + pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + + pvalue01 = np.array(pvalue01) + pvalue03 = np.array(pvalue03) + pvalue032 = np.array(pvalue032) + self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), + "pvalue01 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), + "pvalue03 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)), + "pvalue032 contains invalid values") diff --git a/tests/TestCIT_RCIT.py b/tests/TestCIT_RCIT.py new file mode 100644 index 00000000..ef6406fd --- /dev/null +++ b/tests/TestCIT_RCIT.py @@ -0,0 +1,38 @@ +import unittest + +import numpy as np + +import causallearn.utils.cit as cit + + +class TestCIT_RCIT(unittest.TestCase): + def test_Gaussian_dist(self): + np.random.seed(10) + X = np.random.randn(300, 1) + X_prime = np.random.randn(300, 1) + Y = X + 0.5 * np.random.randn(300, 1) + Z = Y + 0.5 * np.random.randn(300, 1) + data = np.hstack((X, X_prime, Y, Z)) + + pvalue01 = [] + pvalue03 = [] + pvalue032 = [] + for approx in ["lpd4", "hbe", "gamma", "chi2", "perm"]: + for num_f in [50, 100]: + for num_f2 in [5, 10]: + for rcit in [True, False]: + cit_CIT = cit.CIT(data, 'rcit', approx=approx, num_f=num_f, + num_f2=num_f2, rcit=rcit) + pvalue01.append(round(cit_CIT(0, 1), 4)) + pvalue03.append(round(cit_CIT(0, 3), 4)) + pvalue032.append(round(cit_CIT(0, 3, {2}), 4)) + + pvalue01 = np.array(pvalue01) + pvalue03 = np.array(pvalue03) + pvalue032 = np.array(pvalue032) + self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)), + "pvalue01 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)), + "pvalue03 contains invalid values") + self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)), + "pvalue032 contains invalid values") diff --git a/tests/TestDAG2PAG.py b/tests/TestDAG2PAG.py index 569fc945..004ea897 100644 --- a/tests/TestDAG2PAG.py +++ b/tests/TestDAG2PAG.py @@ -72,3 +72,17 @@ def test_case3(self): print(pag) graphviz_pag = GraphUtils.to_pgv(pag) graphviz_pag.draw("pag.png", prog='dot', format='png') + + def test_case_selection(self): + nodes = [] + for i in range(5): + nodes.append(GraphNode(str(i))) + dag = Dag(nodes) + dag.add_directed_edge(nodes[0], nodes[1]) + dag.add_directed_edge(nodes[1], nodes[2]) + dag.add_directed_edge(nodes[2], nodes[3]) + # Selection nodes + dag.add_directed_edge(nodes[3], nodes[4]) + dag.add_directed_edge(nodes[0], nodes[4]) + pag = dag2pag(dag, islatent=[], isselection=[nodes[4]]) + print(pag) diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt index 2430e1de..ce08eb4f 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt @@ -3,7 +3,7 @@ 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 -1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 0 0 0 -1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 @@ -12,7 +12,7 @@ 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 +0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 @@ -20,9 +20,9 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 2 0 2 0 0 0 0 2 -1 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 -1 0 -1 0 0 0 0 2 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt index a9f9886f..934b13bd 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt @@ -39,8 +39,8 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 -1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -106,7 +106,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 -1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -208,7 +208,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt index c0467173..b3aa50bb 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt @@ -1,7 +1,7 @@ 0 0 0 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 2 2 0 0 0 -0 0 2 0 0 2 0 0 +0 0 2 0 0 -1 0 0 0 0 2 0 0 0 0 2 0 1 0 1 0 0 0 0 2 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt index b1986bd1..864b9e5f 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt @@ -1,4 +1,4 @@ -0 0 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 -1 0 0 -1 0 0 -1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -14,8 +14,8 @@ 0 0 0 0 0 0 0 1 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 -1 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 @@ -28,7 +28,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt index bac534d8..a739c18f 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt @@ -12,9 +12,9 @@ 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 -1 -1 1 -1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 -0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 2 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 -1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -0 0 2 2 2 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0 +0 0 -1 2 -1 -1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt index 894ff9cd..4671059c 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt @@ -21,7 +21,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt index 9e6c7aac..0569bd1b 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt @@ -40,7 +40,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt index 80a266db..4d6a5b4a 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt @@ -2,7 +2,7 @@ -1 0 1 1 0 0 0 0 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 0 0 0 -1 0 0 0 0 0 0 0 0 -1 1 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 -1 0 0 0 1 0 0 0 -1 -1 0 0 0 0 0 0 0 -1 -0 0 2 0 0 0 2 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0 +0 0 2 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 -1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 -1 0 -1 0 0 -1 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt index cee53cda..bd9c2aec 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt @@ -1,4 +1,4 @@ -0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -14,12 +14,12 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 +0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 1 -0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt index b971fb87..8bc6b692 100644 --- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt +++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt @@ -8,7 +8,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 2 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -34,7 +34,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 @@ -50,7 +50,7 @@ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 diff --git a/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt b/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt index 046443a3..07a248fb 100644 --- a/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt +++ b/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt @@ -5,7 +5,7 @@ 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 2 2 0 +0 0 0 0 0 0 0 0 -1 0 0 0 0 -1 0 0 0 2 -1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 -1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0 diff --git a/tests/TestFCI.py b/tests/TestFCI.py index 7884cbde..e2fb683c 100644 --- a/tests/TestFCI.py +++ b/tests/TestFCI.py @@ -10,8 +10,11 @@ import pandas as pd from causallearn.graph.Dag import Dag +from causallearn.graph.Edge import Edge +from causallearn.graph.Endpoint import Endpoint +from causallearn.graph.GeneralGraph import GeneralGraph from causallearn.graph.GraphNode import GraphNode -from causallearn.search.ConstraintBased.FCI import fci +from causallearn.search.ConstraintBased.FCI import fci, ruleR5 from causallearn.utils.cit import chisq, fisherz, kci, d_separation from causallearn.utils.DAG2PAG import dag2pag from causallearn.utils.GraphUtils import GraphUtils @@ -30,21 +33,21 @@ ######################################### Test Notes ########################################### BENCHMARK_TXTFILE_TO_MD5 = { - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt": "65f54932a9d8224459e56c40129e6d8b", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt": "0312381641cb3b4818e0c8539f74e802", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt": "a1160b92ce15a700858552f08e43b7de", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt": "dced4a202fc32eceb75f53159fc81f3b", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt": "b1a28eee1e0c6ea8a64ac1624585c3f4", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt": "c3bbc2b8aba456a4258dd071a42085bc", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt": "4a5000e7a582083859ee6aef15073676", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt": "6b7858589e12f04b0f489ba4589a1254", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt": "9975942b936aa2b1fc90c09318ca2d08", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt": "48eee804d59526187b7ecd0519556ee5", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt": "6b9a6b95b6474f8530e85c022f4e749c", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt": "4aae21ff3d9aa2435515ed2ee402294c", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt": "648fdf271e1440c06ca2b31b55ef1f3f", - "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt": "04092ae93e54c727579f08bf5dc34c77", - "tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt": "289c86f9c665bf82bbcc4c9e1dcec3e7" + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt': '4b014aa90aee456bc02dddd008d8be1f', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt': '0312381641cb3b4818e0c8539f74e802', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt': 'a1160b92ce15a700858552f08e43b7de', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt': 'dced4a202fc32eceb75f53159fc81f3b', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt': 'b1a28eee1e0c6ea8a64ac1624585c3f4', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt': '4b06f6ea7fefe66255a3d16cb8a3711f', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt': 'a926e329b2bf2b397d3676e9d2020483', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt': '5eff3ea6ce2e2daa53d6b649bd9c31fe', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt': '55eed065b8dfc6a21610d3d6e18423c6', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt': '48917f914c8f80684ae2abde87417d5c', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt': 'bcffe793866db6ddfe9cc602081789cd', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt': '3558683a485588a41493b1a2f2c1d370', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt': '52596ace09c45656f61c70d0e66bdd6a', + 'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt': '4b7e2ecba5eb50c7266f24670d0d0ae9', + 'tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt': 'eaabd1fbb16bc152b45e456c30166c55' } # INCONSISTENT_RESULT_GRAPH_ERRMSG = "Returned graph is inconsistent with the benchmark. Please check your code with the commit 5918419." @@ -211,3 +214,23 @@ def test_er_graph(self): print(G) print(f'fci(data, d_separation, 0.05):') self.run_simulate_data_test(pag, G) + + def test_rule5(self): + nodes = [] + for i in range(7): + nodes.append(GraphNode(str(i))) + g = GeneralGraph(nodes) + g.add_edge(Edge(nodes[0], nodes[1], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[0], nodes[2], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[0], nodes[5], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[0], nodes[6], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[1], nodes[3], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[2], nodes[4], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[3], nodes[5], Endpoint.CIRCLE, Endpoint.CIRCLE)) + g.add_edge(Edge(nodes[4], nodes[6], Endpoint.CIRCLE, Endpoint.CIRCLE)) + + ruleR5(g, changeFlag=True, verbose=True) + + for edge in g.get_graph_edges(): + assert edge.get_endpoint1() == Endpoint.TAIL + assert edge.get_endpoint2() == Endpoint.TAIL diff --git a/tests/TestGES.py b/tests/TestGES.py index 137a7e9a..ba7e919f 100644 --- a/tests/TestGES.py +++ b/tests/TestGES.py @@ -10,7 +10,7 @@ ######################################### Test Notes ########################################### -# All the benchmark results of loaded files (e.g. "./TestData/benchmark_returned_results/") # +# All the benchmark results of loaded files (e.g. "tests/TestData/benchmark_returned_results/")# # are obtained from the code of causal-learn as of commit # # https://github.com/cmu-phil/causal-learn/commit/b51d788 (07-08-2022). # # # @@ -23,13 +23,13 @@ BENCHMARK_TXTFILE_TO_MD5 = { - "./TestData/data_linear_10.txt": "95a17e15038d4cade0845140b67c05a6", - "./TestData/data_discrete_10.txt": "ccb51c6c1946d8524a8b29a49aef2cc4", - "./TestData/graph.10.txt": "4970d4ecb8be999a82a665e5f5e0825b", - "./TestData/test_ges_simulated_linear_gaussian_data.txt": "0d2490eeb9ee8ef3b18bf21d7e936e1e", - "./TestData/test_ges_simulated_linear_gaussian_CPDAG.txt": "aa0146777186b07e56421ce46ed52914", - "./TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt": "3accb3673d2ccb4c110f3703d60fe702", - "./TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt": "eebd11747c1b927b2fdd048a55c8c3a5", + "tests/TestData/data_linear_10.txt": "95a17e15038d4cade0845140b67c05a6", + "tests/TestData/data_discrete_10.txt": "ccb51c6c1946d8524a8b29a49aef2cc4", + "tests/TestData/graph.10.txt": "4970d4ecb8be999a82a665e5f5e0825b", + "tests/TestData/test_ges_simulated_linear_gaussian_data.txt": "0d2490eeb9ee8ef3b18bf21d7e936e1e", + "tests/TestData/test_ges_simulated_linear_gaussian_CPDAG.txt": "aa0146777186b07e56421ce46ed52914", + "tests/TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt": "3accb3673d2ccb4c110f3703d60fe702", + "tests/TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt": "eebd11747c1b927b2fdd048a55c8c3a5", } INCONSISTENT_RESULT_GRAPH_ERRMSG = "Returned graph is inconsistent with the benchmark. Please check your code with the commit b51d788." @@ -47,8 +47,8 @@ class TestGES(unittest.TestCase): # Load data from file "data_linear_10.txt". Run GES with local_score_BIC. def test_ges_load_linear_10_with_local_score_BIC(self): print('Now start test_ges_load_linear_10_with_local_score_BIC ...') - data_path = "./TestData/data_linear_10.txt" - truth_graph_path = "./TestData/graph.10.txt" + data_path = "tests/TestData/data_linear_10.txt" + truth_graph_path = "tests/TestData/graph.10.txt" data = np.loadtxt(data_path, skiprows=1) truth_dag = txt2generalgraph(truth_graph_path) # truth_dag is a GeneralGraph instance truth_cpdag = dag2cpdag(truth_dag) @@ -58,7 +58,7 @@ def test_ges_load_linear_10_with_local_score_BIC(self): res_map = ges(data, score_func='local_score_BIC', maxP=None, parameters=None) # Run GES and obtain the estimated graph (res_map is Dict object,which contains the updated steps, the result causal graph and the result score.) benchmark_returned_graph = np.loadtxt( - "./TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt") + "tests/TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt") assert np.all(res_map['G'].graph == benchmark_returned_graph), INCONSISTENT_RESULT_GRAPH_ERRMSG shd = SHD(truth_cpdag, res_map['G']) print(f" ges(data, score_func='local_score_BIC', maxP=None, parameters=None)\tSHD: {shd.get_shd()} of {num_edges_in_truth}") @@ -73,9 +73,9 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self): truth_DAG_directed_edges = {(0, 1), (0, 3), (1, 2), (1, 3), (2, 3), (2, 4), (3, 4)} truth_CPDAG_directed_edges = {(0, 3), (1, 3), (2, 3), (2, 4), (3, 4)} truth_CPDAG_undirected_edges = {(0, 1), (1, 2), (2, 1), (1, 0)} - truth_CPDAG = np.loadtxt("./TestData/test_ges_simulated_linear_gaussian_CPDAG.txt") + truth_CPDAG = np.loadtxt("tests/TestData/test_ges_simulated_linear_gaussian_CPDAG.txt") - ###### Simulation configuration: code to generate "./TestData/test_ges_simulated_linear_gaussian_data.txt" ###### + ###### Simulation configuration: code to generate "tests/TestData/test_ges_simulated_linear_gaussian_data.txt" ###### # np.random.seed(42) # linear_weight_minabs, linear_weight_maxabs, linear_weight_netative_prob = 0.5, 0.9, 0.5 # sample_size = 10000 @@ -89,10 +89,10 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self): # mixing_matrix = np.linalg.inv(np.eye(num_of_nodes) - adjacency_matrix) # exogenous_noise = np.random.normal(0, 1, (num_of_nodes, sample_size)) # data = (mixing_matrix @ exogenous_noise).T - # np.savetxt("./TestData/test_ges_simulated_linear_gaussian_data.txt", data) - ###### Simulation configuration: code to generate "./TestData/test_ges_simulated_linear_gaussian_data.txt" ###### + # np.savetxt("tests/TestData/test_ges_simulated_linear_gaussian_data.txt", data) + ###### Simulation configuration: code to generate "tests/TestData/test_ges_simulated_linear_gaussian_data.txt" ###### - data = np.loadtxt("./TestData/test_ges_simulated_linear_gaussian_data.txt") + data = np.loadtxt("tests/TestData/test_ges_simulated_linear_gaussian_data.txt") # Run GES with default parameters: score_func='local_score_BIC', maxP=None, parameters=None res_map = ges(data, score_func='local_score_BIC', maxP=None, parameters=None) @@ -105,8 +105,8 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self): # Load data from file "data_discrete_10.txt". Run GES with local_score_BDeu. def test_ges_load_discrete_10_with_local_score_BDeu(self): print('Now start test_ges_load_discrete_10_with_local_score_BDeu ...') - data_path = "./TestData/data_discrete_10.txt" - truth_graph_path = "./TestData/graph.10.txt" + data_path = "tests/TestData/data_discrete_10.txt" + truth_graph_path = "tests/TestData/graph.10.txt" data = np.loadtxt(data_path, skiprows=1) truth_dag = txt2generalgraph(truth_graph_path) # truth_dag is a GeneralGraph instance truth_cpdag = dag2cpdag(truth_dag) @@ -115,7 +115,7 @@ def test_ges_load_discrete_10_with_local_score_BDeu(self): # Run GES with local_score_BDeu. res_map = ges(data, score_func='local_score_BDeu', maxP=None, parameters=None) benchmark_returned_graph = np.loadtxt( - "./TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt") + "tests/TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt") assert np.all(res_map['G'].graph == benchmark_returned_graph), INCONSISTENT_RESULT_GRAPH_ERRMSG shd = SHD(truth_cpdag, res_map['G']) print(f" ges(data, score_func='local_score_BDeu', maxP=None, parameters=None)\tSHD: {shd.get_shd()} of {num_edges_in_truth}") diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py index 7482dcee..f1fe0dd0 100644 --- a/tests/TestGRaSP.py +++ b/tests/TestGRaSP.py @@ -7,61 +7,45 @@ from causallearn.graph.GraphNode import GraphNode from causallearn.search.PermutationBased.GRaSP import grasp from causallearn.utils.DAG2CPDAG import dag2cpdag -from causallearn.utils.GraphUtils import GraphUtils -import matplotlib.image as mpimg -import matplotlib.pyplot as plt -import io +import gc -def random_dag(p, d): + +def simulate_data(p, d, n): """ - Randomly generates an Erdos-Renyi direct acyclic graph given an ordering. + Randomly generates an Erdos-Renyi direct acyclic graph given an ordering + and randomly simulates data with the provided parameters. p = |variables| d = |edges| / |possible edges| + n = sample size """ # npe = |possible edges| pe = int(p * (p - 1) / 2) - # e = |edges| + # ne = |edges| ne = int(d * pe) # generate edges - e = np.append(np.zeros(pe - ne, "uint8"), np.ones(ne, "uint8")) + e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne)) np.random.shuffle(e) - - # generate graph - g = np.zeros([p, p], "uint8") - g.T[np.triu_indices(p, 1)] = e - - return g - - -def parameterize_dag(g): - """ - Randomly parameterize a directed acyclic graph. - - g = directed acyclic graph (adjacency matrix) - """ - - # p = |variables| - p = g.shape[0] - - # e = |edges| - e = np.sum(g) + B = np.zeros([p, p]) + B.T[np.triu_indices(p, 1)] = e # generate variance terms - o = np.diag(np.ones(p)) + O = np.diag(np.ones(p)) - # generate edge weights (edge parameters uniformly sampled [-1.0, 1.0]) - b = np.zeros([p, p]) - b[np.where(g == 1)] = np.random.uniform(-1, 1, e) + # simulate data + X = np.random.multivariate_normal(np.zeros(p), O, n) + for i in range(p): + J = np.where(B[i])[0] + for j in J: X[:, i] += B[i, j] * X[:, j] - # calculate covariance - s = np.dot(np.dot(np.linalg.inv(np.eye(p) - b), o), np.linalg.inv(np.eye(p) - b).T) + pi = [i for i in range(p)] + np.random.shuffle(pi) - return s + return (B != 0)[pi][:, pi], X[:, pi] class TestGRaSP(unittest.TestCase): @@ -71,25 +55,22 @@ def test_grasp(self): n = 1000 reps = 5 + gc.set_threshold(20000, 50, 50) + # gc.set_debug(gc.DEBUG_STATS) + for p in ps: for d in ds: stats = [[], [], [], []] for rep in range(1, reps + 1): - g0 = random_dag(p, d) + g0, X = simulate_data(p, d, n) print( - "\nNodes:", - p, - "| Edges:", - np.sum(g0), - "| Avg Degree:", - 2 * np.sum(g0) / p, - "| Rep:", - rep, + "\nNodes:", p, + "| Edges:", np.sum(g0), + "| Avg Degree:", 2 * round(np.sum(g0) / p, 2), + "| Rep:", rep, ) - cov = parameterize_dag(g0) - X = np.random.multivariate_normal(np.zeros(p), cov, n) - node_names = [("x%d" % i) for i in range(p)] + node_names = [("X%d" % (i + 1)) for i in range(p)] nodes = [] for name in node_names: @@ -103,15 +84,8 @@ def test_grasp(self): G0 = dag2cpdag(G0) - G = grasp(X) - - pyd = GraphUtils.to_pydot(G) - tmp_png = pyd.create_png(f="png") - fp = io.BytesIO(tmp_png) - img = mpimg.imread(fp, format='png') - plt.axis('off') - plt.imshow(img) - plt.show() + G = grasp(X, depth=1, parameters={'lambda_value': 4}) + gc.collect() AdjC = AdjacencyConfusion(G0, G) stats[0].append(AdjC.get_adj_precision()) diff --git a/tests/TestSkeletonDiscovery.py b/tests/TestSkeletonDiscovery.py new file mode 100644 index 00000000..10b0a5a2 --- /dev/null +++ b/tests/TestSkeletonDiscovery.py @@ -0,0 +1,18 @@ +from unittest import TestCase +import numpy as np +from causallearn.search.ConstraintBased.PC import pc +import networkx as nx +from causallearn.utils.cit import chisq, fisherz, gsq, kci, mv_fisherz, d_separation + + +class TestSkeletonDiscovery(TestCase): + def test_sepset(self): + truth_DAG_directed_edges = {(0, 2), (1, 2), (2, 3), (2, 4)} + + true_dag_netx = nx.DiGraph() + true_dag_netx.add_nodes_from(list(range(5))) + true_dag_netx.add_edges_from(truth_DAG_directed_edges) + + data = np.zeros((100, len(true_dag_netx.nodes))) # just a placeholder + cg = pc(data, 0.05, d_separation, True, 0, -1, true_dag=true_dag_netx) + assert cg.sepset[0, 2] is None \ No newline at end of file