diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 94df88d5..b0b6e90f 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -5,7 +5,7 @@
 We as members, contributors, and leaders pledge to make participation in our
 community a harassment-free experience for everyone, regardless of age, body
 size, visible or invisible disability, ethnicity, sex characteristics, gender
-identity and expression, level of experience, education, socio-economic status,
+identity and expression, level of experience, education, socioeconomic status,
 nationality, personal appearance, race, religion, or sexual identity
 and orientation.
 
diff --git a/README.md b/README.md
index 1b976934..09ef5fd1 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # causal-learn: Causal Discovery in Python
 
-Causal-learn is a python package for causal discovery that implements both classical and state-of-the-art causal discovery algorithms, which is a Python translation and extension of [Tetrad](https://github.com/cmu-phil/tetrad).
+Causal-learn ([documentation](https://causal-learn.readthedocs.io/en/latest/), [paper](https://jmlr.org/papers/volume25/23-0970/23-0970.pdf)) is a python package for causal discovery that implements both classical and state-of-the-art causal discovery algorithms, which is a Python translation and extension of [Tetrad](https://github.com/cmu-phil/tetrad).
 
 The package is actively being developed. Feedbacks (issues, suggestions, etc.) are highly encouraged.
 
@@ -76,10 +76,13 @@ Although causal-learn provides python implementations for some causal discovery
 Please cite as:
 
 ```
-@article{causallearn,
-  title={Causal-learn: Causal Discovery in Python},
-  author={Yujia Zheng and Biwei Huang and Wei Chen and Joseph Ramsey and Mingming Gong and Ruichu Cai and Shohei Shimizu and Peter Spirtes and Kun Zhang},
-  journal={arXiv preprint arXiv:2307.16405},
-  year={2023}
+@article{zheng2024causal,
+  title={Causal-learn: Causal discovery in python},
+  author={Zheng, Yujia and Huang, Biwei and Chen, Wei and Ramsey, Joseph and Gong, Mingming and Cai, Ruichu and Shimizu, Shohei and Spirtes, Peter and Zhang, Kun},
+  journal={Journal of Machine Learning Research},
+  volume={25},
+  number={60},
+  pages={1--8},
+  year={2024}
 }
 ```
\ No newline at end of file
diff --git a/causallearn/graph/Endpoint.py b/causallearn/graph/Endpoint.py
index df551955..83f1ece4 100644
--- a/causallearn/graph/Endpoint.py
+++ b/causallearn/graph/Endpoint.py
@@ -20,4 +20,4 @@ def __str__(self):
         return self.name
 
     def __eq__(self, other):
-        return self.value == other.value
+        return isinstance(other, Endpoint) and self.value == other.value
diff --git a/causallearn/graph/GeneralGraph.py b/causallearn/graph/GeneralGraph.py
index 41076590..0969f44e 100644
--- a/causallearn/graph/GeneralGraph.py
+++ b/causallearn/graph/GeneralGraph.py
@@ -508,13 +508,15 @@ def is_ancestor_of(self, node1: Node, node2: Node) -> bool:
     def is_child_of(self, node1: Node, node2: Node) -> bool:
         i = self.node_map[node1]
         j = self.node_map[node2]
-        return self.graph[i, j] == Endpoint.TAIL.value or self.graph[i, j] == Endpoint.ARROW_AND_ARROW.value
+        return (self.graph[j, i] == Endpoint.TAIL.value and self.graph[i, j] == Endpoint.ARROW.value) \
+               or self.graph[j, i] == Endpoint.TAIL_AND_ARROW.value
 
     # Returns true iff node1 is a parent of node2.
     def is_parent_of(self, node1: Node, node2: Node) -> bool:
         i = self.node_map[node1]
         j = self.node_map[node2]
-        return self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value
+        return (self.graph[j, i] == Endpoint.ARROW.value and self.graph[i, j] == Endpoint.TAIL.value) \
+               or self.graph[i, j] == Endpoint.TAIL_AND_ARROW.value
 
     # Returns true iff node1 is a proper ancestor of node2.
     def is_proper_ancestor_of(self, node1: Node, node2: Node) -> bool:
diff --git a/causallearn/score/LocalScoreFunction.py b/causallearn/score/LocalScoreFunction.py
index e02ba737..488a2c45 100644
--- a/causallearn/score/LocalScoreFunction.py
+++ b/causallearn/score/LocalScoreFunction.py
@@ -34,9 +34,9 @@ def local_score_BIC(Data: ndarray, i: int, PAi: List[int], parameters=None) -> f
     if len(PAi) == 0:
         return n * np.log(cov[i, i])
 
-    yX = np.mat(cov[np.ix_([i], PAi)])
-    XX = np.mat(cov[np.ix_(PAi, PAi)])
-    H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T)
+    yX = cov[np.ix_([i], PAi)]
+    XX = cov[np.ix_(PAi, PAi)]
+    H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T)
 
     return n * H + np.log(n) * len(PAi) * lambda_value
 
@@ -68,9 +68,9 @@ def local_score_BIC_from_cov(
     if len(PAi) == 0:
         return n * np.log(cov[i, i])
 
-    yX = np.mat(cov[np.ix_([i], PAi)])
-    XX = np.mat(cov[np.ix_(PAi, PAi)])
-    H = np.log(cov[i, i] - yX * np.linalg.inv(XX) * yX.T)
+    yX = cov[np.ix_([i], PAi)]
+    XX = cov[np.ix_(PAi, PAi)]
+    H = np.log(cov[i, i] - yX @ np.linalg.inv(XX) @ yX.T)
 
     return n * H + np.log(n) * len(PAi) * lambda_value
 
@@ -198,7 +198,7 @@ def local_score_cv_general(
     PAi = list(PAi)
 
     T = Data.shape[0]
-    X = Data[:, Xi]
+    X = Data[:, [Xi]]
     var_lambda = parameters["lambda"]  # regularization parameter
     k = parameters["kfold"]  # k-fold cross validation
     n0 = math.floor(T / k)
@@ -715,7 +715,7 @@ def local_score_marginal_general(
     """
 
     T = Data.shape[0]
-    X = Data[:, Xi]
+    X = Data[:, [Xi]]
     dX = X.shape[1]
 
     # set the kernel for X
diff --git a/causallearn/score/LocalScoreFunctionClass.py b/causallearn/score/LocalScoreFunctionClass.py
index 41ffb49f..8171d515 100644
--- a/causallearn/score/LocalScoreFunctionClass.py
+++ b/causallearn/score/LocalScoreFunctionClass.py
@@ -46,3 +46,9 @@ def score(self, i: int, PAi: List[int]) -> float:
                 self.score_cache[i][hash_key] = self.local_score_fun(self.data, i, PAi, self.parameters)
 
         return self.score_cache[i][hash_key]
+
+    def score_nocache(self, i: int, PAi: List[int]) -> float:
+        if self.local_score_fun == local_score_BIC_from_cov:
+            return self.local_score_fun((self.cov, self.n), i, PAi, self.parameters)
+        else:
+            return self.local_score_fun(self.data, i, PAi, self.parameters)
\ No newline at end of file
diff --git a/causallearn/search/ConstraintBased/FCI.py b/causallearn/search/ConstraintBased/FCI.py
index 9f5af378..ff3f92d0 100644
--- a/causallearn/search/ConstraintBased/FCI.py
+++ b/causallearn/search/ConstraintBased/FCI.py
@@ -2,7 +2,7 @@
 
 import warnings
 from queue import Queue
-from typing import List, Set, Tuple, Dict
+from typing import List, Set, Tuple, Dict, Generator
 from numpy import ndarray
 
 from causallearn.graph.Edge import Edge
@@ -15,6 +15,19 @@
 from causallearn.utils.cit import *
 from causallearn.utils.FAS import fas
 from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
+from itertools import combinations
+
+def is_uncovered_path(nodes: List[Node], G: Graph) -> bool:
+        """
+        Determines whether the given path is an uncovered path in this graph.
+
+        A path is an uncovered path if no two nonconsecutive nodes (Vi-1 and Vi+1) in the path are
+        adjacent.
+        """
+        for i in range(len(nodes) - 2):
+            if G.is_adjacent_to(nodes[i], nodes[i + 2]):
+                return False
+        return True
 
 
 def traverseSemiDirected(node: Node, edge: Edge) -> Node | None:
@@ -26,8 +39,17 @@ def traverseSemiDirected(node: Node, edge: Edge) -> Node | None:
             return edge.get_node1()
     return None
 
+def traverseCircle(node: Node, edge: Edge) -> Node | None:
+    if node == edge.get_node1():
+        if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE:
+            return edge.get_node2()
+    elif node == edge.get_node2():
+        if edge.get_endpoint1() == Endpoint.CIRCLE and edge.get_endpoint2() == Endpoint.CIRCLE:
+            return edge.get_node1()
+    return None
 
-def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool:
+
+def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool: ## TODO: Now it does not detect whether the path is an uncovered path
     Q = Queue()
     V = set()
 
@@ -60,6 +82,42 @@ def existsSemiDirectedPath(node_from: Node, node_to: Node, G: Graph) -> bool:
 
     return False
 
+def GetUncoveredCirclePath(node_from: Node, node_to: Node, G: Graph, exclude_node: List[Node]) -> Generator[Node] | None:
+    Q = Queue()
+    V = set()
+
+    path = [node_from]
+
+    for node_u in G.get_adjacent_nodes(node_from):
+        if node_u in exclude_node:
+            continue
+        edge = G.get_edge(node_from, node_u)
+        node_c = traverseCircle(node_from, edge)
+
+        if node_c is None or node_c in exclude_node:
+            continue
+
+        if not V.__contains__(node_c):
+            V.add(node_c)
+            Q.put((node_c, path + [node_c]))
+
+    while not Q.empty():
+        node_t, path = Q.get_nowait()
+        if node_t == node_to and is_uncovered_path(path, G):
+            yield path
+
+        for node_u in G.get_adjacent_nodes(node_t):
+            edge = G.get_edge(node_t, node_u)
+            node_c = traverseCircle(node_t, edge)
+
+            if node_c is None or node_c in exclude_node:
+                continue
+
+            if not V.__contains__(node_c):
+                V.add(node_c)
+                Q.put((node_c, path + [node_c]))
+
+
 
 def existOnePathWithPossibleParents(previous, node_w: Node, node_x: Node, node_b: Node, graph: Graph) -> bool:
     if node_w == node_x:
@@ -320,8 +378,9 @@ def rulesR1R2cycle(graph: Graph, bk: BackgroundKnowledge | None, changeFlag: boo
 
 def isNoncollider(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], node_i: Node, node_j: Node,
                   node_k: Node) -> bool:
-    sep_set = sep_sets[(graph.get_node_map()[node_i], graph.get_node_map()[node_k])]
-    return sep_set is not None and sep_set.__contains__(graph.get_node_map()[node_j])
+    node_map = graph.get_node_map()
+    sep_set = sep_sets.get((node_map[node_i], node_map[node_k]))
+    return sep_set is not None and sep_set.__contains__(node_map[node_j])
 
 
 def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: BackgroundKnowledge | None, changeFlag: bool,
@@ -370,6 +429,131 @@ def ruleR3(graph: Graph, sep_sets: Dict[Tuple[int, int], Set[int]], bk: Backgrou
                 changeFlag = True
     return changeFlag
 
+def ruleR5(graph: Graph, changeFlag: bool,
+           verbose: bool = False) -> bool:
+    """
+    Rule R5 of the FCI algorithm. 
+    by Jiji Zhang, 2008, "On the completeness of orientation rules for causal discovery in the presence of latent confounders and selection bias"]
+
+    This function orients any edge that is part of an uncovered circle path between two nodes A and B,
+    if such a path exists. The path must start and end with a circle edge and must be uncovered, i.e. the
+    nodes on the path must not be adjacent to A or B. The orientation of the edges on the path is set to
+    double tail.
+    """
+    nodes = graph.get_nodes()
+    def orient_on_path_helper(path, node_A, node_B):
+        # orient A - C, D - B
+        edge = graph.get_edge(node_A, path[0])
+        graph.remove_edge(edge)
+        graph.add_edge(Edge(node_A, path[0], Endpoint.TAIL, Endpoint.TAIL))
+
+        edge = graph.get_edge(node_B, path[-1])
+        graph.remove_edge(edge)
+        graph.add_edge(Edge(node_B, path[-1], Endpoint.TAIL, Endpoint.TAIL))
+        if verbose:
+            print("Orienting edge A - C (Double tail): " + graph.get_edge(node_A, path[0]).__str__())
+            print("Orienting edge B - D (Double tail): " + graph.get_edge(node_B, path[-1]).__str__())
+
+        # orient everything on the path to both tails
+        for i in range(len(path) - 1):
+            edge = graph.get_edge(path[i], path[i + 1])
+            graph.remove_edge(edge)
+            graph.add_edge(Edge(path[i], path[i + 1], Endpoint.TAIL, Endpoint.TAIL))
+            if verbose:
+                print("Orienting edge (Double tail): " + graph.get_edge(path[i], path[i + 1]).__str__())
+    
+    for node_B in nodes:
+        intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE)
+
+        for node_A in intoBCircles:
+            found_paths_between_AB = []
+            if graph.get_endpoint(node_B, node_A) != Endpoint.CIRCLE:
+                continue
+            else:
+                # Check if there is an uncovered circle path between A and B (A o-o C ..  D o-o B)
+                # s.t. A is not adjacent to D and B is not adjacent to C
+                a_node_idx = graph.node_map[node_A]
+                b_node_idx = graph.node_map[node_B]
+                a_adj_nodes = graph.get_adjacent_nodes(node_A)
+                b_adj_nodes = graph.get_adjacent_nodes(node_B)
+                
+                # get the adjacent nodes with circle edges of A and B
+                a_circle_adj_nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= b_node_idx
+                                            and graph.get_endpoint(node, node_A) == Endpoint.CIRCLE and graph.get_endpoint(node_A, node) == Endpoint.CIRCLE]
+                b_circle_adj_nodes_set = [node for node in b_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= b_node_idx 
+                                          and graph.get_endpoint(node, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node) == Endpoint.CIRCLE]
+
+                #  get the adjacent nodes with circle edges of A and B that is non adjacent to B and A, respectively
+                for node_C in a_circle_adj_nodes_set:
+                    if graph.is_adjacent_to(node_B, node_C):
+                        continue
+                    for node_D in b_circle_adj_nodes_set:
+                        if graph.is_adjacent_to(node_A, node_D):
+                            continue
+                        paths = GetUncoveredCirclePath(node_from=node_C, node_to=node_D, G=graph, exclude_node=[node_A, node_B]) # get the uncovered circle path between C and D, excluding A and B
+                        found_paths_between_AB.append(paths)
+
+                # Orient the uncovered circle path between A and B
+                for paths in found_paths_between_AB:                    
+                    for path in paths:
+                        changeFlag = True
+                        if verbose:
+                            print("Find uncovered circle path between A and B: " + graph.get_edge(node_A, node_B).__str__())
+                        edge = graph.get_edge(node_A, node_B)
+                        graph.remove_edge(edge)
+                        graph.add_edge(Edge(node_A, node_B, Endpoint.TAIL, Endpoint.TAIL))
+                        orient_on_path_helper(path, node_A, node_B)
+
+    return changeFlag
+
+def ruleR6(graph: Graph, changeFlag: bool,
+           verbose: bool = False) -> bool:
+    nodes = graph.get_nodes()
+
+    for node_B in nodes:
+        # Find A - B
+        intoBTails = graph.get_nodes_into(node_B, Endpoint.TAIL)
+        exist = False
+        for node_A in intoBTails:
+            if graph.get_endpoint(node_B, node_A) == Endpoint.TAIL:
+                exist = True
+        if not exist:
+            continue
+        # Find B o-*C
+        intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE)
+        for node_C in intoBCircles:
+            changeFlag = True
+            edge = graph.get_edge(node_B, node_C)
+            graph.remove_edge(edge)
+            graph.add_edge(Edge(node_B, node_C, Endpoint.TAIL, edge.get_proximal_endpoint(node_C)))
+            if verbose:
+                print("Orienting edge by rule 6): " + graph.get_edge(node_B, node_C).__str__())
+
+    return changeFlag
+
+
+def ruleR7(graph: Graph, changeFlag: bool,
+           verbose: bool = False) -> bool:
+    nodes = graph.get_nodes()
+
+    for node_B in nodes:
+        # Find A -o B
+        intoBCircles = graph.get_nodes_into(node_B, Endpoint.CIRCLE)
+        node_A_list = [node for node in intoBCircles if graph.get_endpoint(node_B, node) == Endpoint.TAIL]
+
+        # Find B o-*C
+        for node_C in intoBCircles:
+            # pdb.set_trace()
+            for node_A in node_A_list:
+                # pdb.set_trace()
+                if not graph.is_adjacent_to(node_A, node_C):
+                    changeFlag = True
+                    edge = graph.get_edge(node_B, node_C)
+                    graph.remove_edge(edge)
+                    graph.add_edge(Edge(node_B, node_C, Endpoint.TAIL, edge.get_proximal_endpoint(node_C)))
+                    if verbose:
+                        print("Orienting edge by rule 7): " + graph.get_edge(node_B, node_C).__str__())
+    return changeFlag
 
 def getPath(node_c: Node, previous) -> List[Node]:
     l = []
@@ -542,6 +726,140 @@ def ruleR4B(graph: Graph, maxPathLength: int, data: ndarray, independence_test_m
     return change_flag
 
 
+
+def rule8(graph: Graph, nodes: List[Node], changeFlag):
+    nodes = graph.get_nodes() if nodes is None else nodes
+    for node_B in nodes:
+        adj = graph.get_adjacent_nodes(node_B)
+        if len(adj) < 2:
+            continue
+
+        cg = ChoiceGenerator(len(adj), 2)
+        combination = cg.next()
+
+        while combination is not None:
+            node_A = adj[combination[0]]
+            node_C = adj[combination[1]]
+            combination = cg.next()
+            
+            if(graph.get_endpoint(node_A, node_B) == Endpoint.ARROW and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \
+                graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \
+                    graph.is_adjacent_to(node_A, node_C) and \
+                        graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE) or \
+                        (graph.get_endpoint(node_A, node_B) == Endpoint.CIRCLE and graph.get_endpoint(node_B, node_A) == Endpoint.TAIL and \
+                graph.get_endpoint(node_B, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_B) == Endpoint.TAIL and \
+                    graph.is_adjacent_to(node_A, node_C) and \
+                        graph.get_endpoint(node_A, node_C) == Endpoint.ARROW and graph.get_endpoint(node_C, node_A)== Endpoint.CIRCLE):
+                edge1 = graph.get_edge(node_A, node_C)
+                graph.remove_edge(edge1)
+                graph.add_edge(Edge(node_A, node_C,Endpoint.TAIL, Endpoint.ARROW))
+                changeFlag = True
+
+    return changeFlag
+
+
+
+def is_possible_parent(graph: Graph, potential_parent_node, child_node):
+    if graph.node_map[potential_parent_node] == graph.node_map[child_node]:
+        return False
+    if not graph.is_adjacent_to(potential_parent_node, child_node):
+        return False
+
+    if graph.get_endpoint(child_node, potential_parent_node) == Endpoint.ARROW: 
+        return False
+    else:
+        return True
+
+
+def find_possible_children(graph: Graph, parent_node, en_nodes=None):
+    if en_nodes is None:
+        nodes = graph.get_nodes()
+        en_nodes = [node for node in nodes if graph.node_map[node] != graph.node_map[parent_node]]
+
+    potential_child_nodes = set()
+    for potential_node in en_nodes:
+        if is_possible_parent(graph, potential_parent_node=parent_node, child_node=potential_node):
+            potential_child_nodes.add(potential_node)
+
+    return potential_child_nodes
+
+def rule9(graph: Graph, nodes: List[Node], changeFlag):
+    # changeFlag = False
+    nodes = graph.get_nodes() if nodes is None else nodes
+    for node_C in nodes:
+        intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW)
+        for node_A in intoCArrows:
+            # we want A o--> C
+            if not graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE:
+                continue
+        
+            # look for a possibly directed uncovered path s.t. B and C are not connected (for the given A o--> C
+            a_node_idx = graph.node_map[node_A]
+            c_node_idx = graph.node_map[node_C]
+            a_adj_nodes = graph.get_adjacent_nodes(node_A)
+            nodes_set = [node for node in a_adj_nodes if graph.node_map[node] != a_node_idx and graph.node_map[node]!= c_node_idx]
+            possible_children = find_possible_children(graph, node_A, nodes_set)
+            for node_B in possible_children:
+                if graph.is_adjacent_to(node_B, node_C):
+                    continue
+                if existsSemiDirectedPath(node_from=node_B, node_to=node_C, G=graph):
+                    edge1 = graph.get_edge(node_A, node_C)
+                    graph.remove_edge(edge1)
+                    graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW))
+                    changeFlag = True
+                    break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A 
+    return changeFlag
+
+
+def rule10(graph: Graph, changeFlag):
+    # changeFlag = False
+    nodes = graph.get_nodes()
+    for node_C in nodes:
+        intoCArrows = graph.get_nodes_into(node_C, Endpoint.ARROW)
+        if len(intoCArrows) < 2:
+                continue
+        # get all A where A o-> C
+        Anodes = [node_A for node_A in intoCArrows if graph.get_endpoint(node_C, node_A) == Endpoint.CIRCLE]
+        if len(Anodes) == 0:
+            continue
+        
+        for node_A in Anodes:
+            A_adj_nodes = graph.get_adjacent_nodes(node_A)
+            en_nodes = [i for i in A_adj_nodes if i is not node_C]
+            A_possible_children = find_possible_children(graph, parent_node=node_A, en_nodes=en_nodes)
+            if len(A_possible_children) < 2:
+                continue
+
+            gen = ChoiceGenerator(len(intoCArrows), 2)
+            choice = gen.next()
+            while choice is not None:
+                node_B = intoCArrows[choice[0]]
+                node_D = intoCArrows[choice[1]]
+
+                choice = gen.next()
+                # we want B->C<-D 
+                if graph.get_endpoint(node_C, node_B) != Endpoint.TAIL:
+                    continue
+
+                if graph.get_endpoint(node_C, node_D) != Endpoint.TAIL:
+                    continue
+
+                for children in combinations(A_possible_children, 2):
+                    child_one, child_two = children
+                    if not existsSemiDirectedPath(node_from=child_one, node_to=node_B, G=graph) or \
+                        not existsSemiDirectedPath(node_from=child_two, node_to=node_D, G=graph):
+                        continue
+
+                    if not graph.is_adjacent_to(child_one, child_two):
+                        edge1 = graph.get_edge(node_A, node_C)
+                        graph.remove_edge(edge1)
+                        graph.add_edge(Edge(node_A, node_C, Endpoint.TAIL, Endpoint.ARROW))
+                        changeFlag = True
+                        break #once we found it, break out since we have already oriented Ao->C to A->C, we want to find the next A 
+
+    return changeFlag
+
+
 def visibleEdgeHelperVisit(graph: Graph, node_c: Node, node_a: Node, node_b: Node, path: List[Node]) -> bool:
     if path.__contains__(node_a):
         return False
@@ -691,9 +1009,9 @@ def _contains_all(set_a: Set[Node], set_b: Set[Node]):
                     break
 
 
-
 def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float = 0.05, depth: int = -1,
-        max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, show_progress: bool = True,
+        max_path_length: int = -1, verbose: bool = False, background_knowledge: BackgroundKnowledge | None = None, 
+        show_progress: bool = True, node_names = None,
         **kwargs) -> Tuple[Graph, List[Edge]]:
     """
     Perform Fast Causal Inference (FCI) algorithm for causal discovery
@@ -716,7 +1034,7 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float =
 
     Returns
     -------
-    graph : a CausalGraph object, where graph.graph[j,i]=1 and graph.graph[i,j]=-1 indicates  i --> j ,
+    graph : a GeneralGraph object, where graph.graph[j,i]=1 and graph.graph[i,j]=-1 indicates  i --> j ,
                     graph.graph[i,j] = graph.graph[j,i] = -1 indicates i --- j,
                     graph.graph[i,j] = graph.graph[j,i] = 1 indicates i <-> j,
                     graph.graph[j,i]=1 and graph.graph[i,j]=2 indicates  i o-> j.
@@ -748,8 +1066,10 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float =
 
 
     nodes = []
+    if node_names is None:
+        node_names = [f"X{i + 1}" for i in range(dataset.shape[1])]
     for i in range(dataset.shape[1]):
-        node = GraphNode(f"X{i + 1}")
+        node = GraphNode(node_names[i])
         node.add_attribute("id", i)
         nodes.append(node)
 
@@ -757,6 +1077,7 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float =
     graph, sep_sets, test_results = fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha,
                                         knowledge=background_knowledge, depth=depth, verbose=verbose, show_progress=show_progress)
 
+    # pdb.set_trace()
     reorientAllWith(graph, Endpoint.CIRCLE)
 
     rule0(graph, nodes, sep_sets, background_knowledge, verbose)
@@ -787,6 +1108,23 @@ def fci(dataset: ndarray, independence_test_method: str=fisherz, alpha: float =
             if verbose:
                 print("Epoch")
 
+        # rule 5
+        change_flag = ruleR5(graph, change_flag, verbose)
+        
+        # rule 6
+        change_flag = ruleR6(graph, change_flag, verbose)
+        
+        # rule 7
+        change_flag = ruleR7(graph, change_flag, verbose)
+        
+        # rule 8
+        change_flag = rule8(graph,nodes, change_flag)
+        
+        # rule 9
+        change_flag = rule9(graph, nodes, change_flag)
+        # rule 10
+        change_flag = rule10(graph, change_flag)
+
     graph.set_pag(True)
 
     edges = get_color_edges(graph)
diff --git a/causallearn/search/FCMBased/lingam/var_lingam.py b/causallearn/search/FCMBased/lingam/var_lingam.py
index 1f2d931f..0cb48c82 100644
--- a/causallearn/search/FCMBased/lingam/var_lingam.py
+++ b/causallearn/search/FCMBased/lingam/var_lingam.py
@@ -253,14 +253,14 @@ def _estimate_var_coefs(self, X):
         # XXX: VAR.fit() is not searching lags correctly
         if self._criterion not in ['aic', 'fpe', 'hqic', 'bic']:
             var = VAR(X)
-            result = var.fit(maxlags=self._lags, trend='nc')
+            result = var.fit(maxlags=self._lags, trend='n')
         else:
             min_value = float('Inf')
             result = None
 
             for lag in range(1, self._lags + 1):
                 var = VAR(X)
-                fitted = var.fit(maxlags=lag, ic=None, trend='nc')
+                fitted = var.fit(maxlags=lag, ic=None, trend='n')
 
                 value = getattr(fitted, self._criterion)
                 if value < min_value:
@@ -319,10 +319,14 @@ def _pruning(self, X, B_taus, causal_order):
             obj = np.zeros((len(blocks)))
             exp = np.zeros(
                 (len(blocks), causal_order_no + n_features * self._lags))
-            for j, block in enumerate(blocks):
-                obj[j] = block[0][i]
-                exp[j:] = np.concatenate(
-                    [block[0][ancestor_indexes].flatten(), block[1:][:].flatten()], axis=0)
+            
+            # Create 3D array to hold flattened ancestor indices for each block
+            ancestor_indexes_flat = blocks[:, 0, ancestor_indexes].reshape(len(blocks), -1)
+            # Fill obj using advanced indexing
+            obj[:] = blocks[:, 0, i]
+            # Fill exp using advanced indexing
+            exp[:, :causal_order_no] = ancestor_indexes_flat
+            exp[:, causal_order_no:] = blocks[:, 1:].reshape(len(blocks), -1)
 
             # adaptive lasso
             gamma = 1.0
diff --git a/causallearn/search/PermutationBased/BOSS.py b/causallearn/search/PermutationBased/BOSS.py
new file mode 100644
index 00000000..465d3659
--- /dev/null
+++ b/causallearn/search/PermutationBased/BOSS.py
@@ -0,0 +1,207 @@
+import random
+import sys
+import time
+import warnings
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+from causallearn.graph.GeneralGraph import GeneralGraph
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.score.LocalScoreFunction import (
+    local_score_BDeu,
+    local_score_BIC,
+    local_score_BIC_from_cov,
+    local_score_cv_general,
+    local_score_cv_multi,
+    local_score_marginal_general,
+    local_score_marginal_multi,
+)
+from causallearn.search.PermutationBased.gst import GST;
+from causallearn.score.LocalScoreFunctionClass import LocalScoreClass
+from causallearn.utils.DAG2CPDAG import dag2cpdag
+
+
+def boss(
+    X: np.ndarray,
+    score_func: str = "local_score_BIC_from_cov",
+    parameters: Optional[Dict[str, Any]] = None,
+    verbose: Optional[bool] = True,
+    node_names: Optional[List[str]] = None,
+) -> GeneralGraph:
+    """
+    Perform a best order score search (BOSS) algorithm
+
+    Parameters
+    ----------
+    X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features.
+    score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general',
+                    'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')).
+    parameters : when using CV likelihood,
+                  parameters['kfold']: k-fold cross validation
+                  parameters['lambda']: regularization parameter
+                  parameters['dlabel']: for variables with multi-dimensions,
+                               indicate which dimensions belong to the i-th variable.
+    verbose : whether to print the time cost and verbose output of the algorithm.
+
+    Returns
+    -------
+    G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j.
+    """
+
+    X = X.copy()
+    n, p = X.shape
+    if n < p:
+        warnings.warn("The number of features is much larger than the sample size!")
+
+    if score_func == "local_score_CV_general":  
+        # % k-fold negative cross validated likelihood based on regression in RKHS
+        if parameters is None:
+            parameters = {
+                "kfold": 10,  # 10 fold cross validation
+                "lambda": 0.01,
+            }  # regularization parameter
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_cv_general, parameters=parameters
+        )
+    elif score_func == "local_score_marginal_general":  
+        # negative marginal likelihood based on regression in RKHS
+        parameters = {}
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_marginal_general, parameters=parameters
+        )
+    elif score_func == "local_score_CV_multi":  
+        # k-fold negative cross validated likelihood based on regression in RKHS
+        # for data with multi-variate dimensions
+        if parameters is None:
+            parameters = {
+                "kfold": 10,
+                "lambda": 0.01,
+                "dlabel": {},
+            }  # regularization parameter
+            for i in range(X.shape[1]):
+                parameters["dlabel"]["{}".format(i)] = i
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_cv_multi, parameters=parameters
+        )
+    elif score_func == "local_score_marginal_multi":  
+        # negative marginal likelihood based on regression in RKHS
+        # for data with multi-variate dimensions
+        if parameters is None:
+            parameters = {"dlabel": {}}
+            for i in range(X.shape[1]):
+                parameters["dlabel"]["{}".format(i)] = i
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
+        )
+    elif score_func == "local_score_BIC":  
+        # SEM BIC score
+        warnings.warn("Please use 'local_score_BIC_from_cov' instead")
+        if parameters is None:
+            parameters = {"lambda_value": 2}
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BIC, parameters=parameters
+        )
+    elif score_func == "local_score_BIC_from_cov":  
+        # SEM BIC score
+        if parameters is None:
+            parameters = {"lambda_value": 2}
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
+        )
+    elif score_func == "local_score_BDeu":  
+        # BDeu score
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BDeu, parameters=None
+        )
+    else:
+        raise Exception("Unknown function!")
+    
+    score = localScoreClass
+    gsts = [GST(i, score) for i in range(p)]
+
+    node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names
+    nodes = []
+
+    for name in node_names:
+        node = GraphNode(name)
+        nodes.append(node)
+
+    G = GeneralGraph(nodes)
+
+    runtime = time.perf_counter()
+        
+    order = [v for v in range(p)]
+
+    gsts = [GST(v, score) for v in order]
+    parents = {v: [] for v in order}
+    
+    variables = [v for v in order]
+    while True:
+        improved = False
+        random.shuffle(variables)
+        if verbose:
+            for i, v in enumerate(order):
+                parents[v].clear()
+                gsts[v].trace(order[:i], parents[v])
+            sys.stdout.write("\rBOSS edge count: %i    " % np.sum([len(parents[v]) for v in range(p)]))
+            sys.stdout.flush()
+
+        for v in variables:
+            improved |= better_mutation(v, order, gsts)
+        if not improved: break
+
+    for i, v in enumerate(order):
+        parents[v].clear()
+        gsts[v].trace(order[:i], parents[v])
+
+    runtime = time.perf_counter() - runtime
+    
+    if verbose:
+        sys.stdout.write("\nBOSS completed in: %.2fs \n" % runtime)
+        sys.stdout.flush()
+
+    for y in range(p):
+        for x in parents[y]:
+            G.add_directed_edge(nodes[x], nodes[y])
+
+    G = dag2cpdag(G)
+
+    return G
+
+
+def reversed_enumerate(iter, j):
+    for w in reversed(iter):
+        yield j, w
+        j -= 1
+
+
+def better_mutation(v, order, gsts):
+    i = order.index(v)
+    p = len(order)
+    scores = np.zeros(p + 1)
+
+    prefix = []
+    score = 0
+    for j, w in enumerate(order):
+        scores[j] = gsts[v].trace(prefix) + score
+        if v != w:
+            score += gsts[w].trace(prefix)
+            prefix.append(w)
+
+    scores[p] = gsts[v].trace(prefix) + score
+    best = p
+
+    prefix.append(v)
+    score = 0
+    for j, w in reversed_enumerate(order, p - 1):
+        if v != w:
+            prefix.remove(w)
+            score += gsts[w].trace(prefix)
+        scores[j] += score
+        if scores[j] > scores[best]: best = j
+        
+    if scores[i] + 1e-6 > scores[best]: return False
+    order.remove(v)
+    order.insert(best - int(best > i), v)
+
+    return True
\ No newline at end of file
diff --git a/causallearn/search/PermutationBased/GRaSP.py b/causallearn/search/PermutationBased/GRaSP.py
index ba77b3ef..f4d4aa14 100644
--- a/causallearn/search/PermutationBased/GRaSP.py
+++ b/causallearn/search/PermutationBased/GRaSP.py
@@ -10,11 +10,13 @@
 from causallearn.score.LocalScoreFunction import (
     local_score_BDeu,
     local_score_BIC,
+    local_score_BIC_from_cov,
     local_score_cv_general,
     local_score_cv_multi,
     local_score_marginal_general,
     local_score_marginal_multi,
 )
+from causallearn.search.PermutationBased.gst import GST;
 from causallearn.score.LocalScoreFunctionClass import LocalScoreClass
 from causallearn.utils.DAG2CPDAG import dag2cpdag
 
@@ -33,6 +35,7 @@ def __init__(self, p, score):
             y = self.order[i]
             self.parents[y] = []
             self.local_scores[y] = -score.score(y, [])
+            # self.local_scores[y] = -score.score_nocache(y, [])
 
     def get(self, i):
         return self.order[i]
@@ -76,11 +79,12 @@ def len(self):
 
 def grasp(
     X: np.ndarray,
-    score_func: str = "local_score_BIC",
+    score_func: str = "local_score_BIC_from_cov",
     depth: Optional[int] = 3,
-    maxP: Optional[float] = None,
     parameters: Optional[Dict[str, Any]] = None,
-) -> Dict[str, Any]:
+    verbose: Optional[bool] = True,
+    node_names: Optional[List[str]] = None,
+) -> GeneralGraph:
     """
     Perform a greedy relaxation of the sparsest permutation (GRaSP) algorithm
 
@@ -88,52 +92,43 @@ def grasp(
     ----------
     X : data set (numpy ndarray), shape (n_samples, n_features). The input data, where n_samples is the number of samples and n_features is the number of features.
     score_func : the string name of score function. (str(one of 'local_score_CV_general', 'local_score_marginal_general',
-                    'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BDeu')).
+                    'local_score_CV_multi', 'local_score_marginal_multi', 'local_score_BIC', 'local_score_BIC_from_cov', 'local_score_BDeu')).
     depth : allowed maximum depth for DFS
-    maxP : allowed maximum number of parents when searching the graph
     parameters : when using CV likelihood,
                   parameters['kfold']: k-fold cross validation
                   parameters['lambda']: regularization parameter
                   parameters['dlabel']: for variables with multi-dimensions,
                                indicate which dimensions belong to the i-th variable.
+    verbose : whether to print the time cost and verbose output of the algorithm.
 
     Returns
     -------
     G : learned causal graph, where G.graph[j,i] = 1 and G.graph[i,j] = -1 indicates i --> j, G.graph[i,j] = G.graph[j,i] = -1 indicates i --- j.
     """
 
+    X = X.copy()
     n, p = X.shape
     if n < p:
         warnings.warn("The number of features is much larger than the sample size!")
 
-    X = np.mat(X)
-    if (
-        score_func == "local_score_CV_general"
-    ):  # % k-fold negative cross validated likelihood based on regression in RKHS
+    if score_func == "local_score_CV_general":  
+        # k-fold negative cross validated likelihood based on regression in RKHS
         if parameters is None:
             parameters = {
                 "kfold": 10,  # 10 fold cross validation
                 "lambda": 0.01,
             }  # regularization parameter
-        if maxP is None:
-            maxP = X.shape[1] / 2  # maximum number of parents
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_general, parameters=parameters
         )
-
-    elif (
-        score_func == "local_score_marginal_general"
-    ):  # negative marginal likelihood based on regression in RKHS
+    elif score_func == "local_score_marginal_general":
+        # negative marginal likelihood based on regression in RKHS
         parameters = {}
-        if maxP is None:
-            maxP = X.shape[1] / 2  # maximum number of parents
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_general, parameters=parameters
         )
-
-    elif (
-        score_func == "local_score_CV_multi"
-    ):  # k-fold negative cross validated likelihood based on regression in RKHS
+    elif score_func == "local_score_CV_multi": 
+        # k-fold negative cross validated likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
             parameters = {
@@ -143,47 +138,46 @@ def grasp(
             }  # regularization parameter
             for i in range(X.shape[1]):
                 parameters["dlabel"]["{}".format(i)] = i
-        if maxP is None:
-            maxP = len(parameters["dlabel"]) / 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_cv_multi, parameters=parameters
         )
-
-    elif (
-        score_func == "local_score_marginal_multi"
-    ):  # negative marginal likelihood based on regression in RKHS
+    elif score_func == "local_score_marginal_multi":  
+        # negative marginal likelihood based on regression in RKHS
         # for data with multi-variate dimensions
         if parameters is None:
             parameters = {"dlabel": {}}
             for i in range(X.shape[1]):
                 parameters["dlabel"]["{}".format(i)] = i
-        if maxP is None:
-            maxP = len(parameters["dlabel"]) / 2
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_marginal_multi, parameters=parameters
         )
-
-    elif score_func == "local_score_BIC":  # Greedy equivalence search with BIC score
-        parameters = {}
-        parameters["lambda_value"] = 2
-        if maxP is None:
-            maxP = X.shape[1] / 2
+    elif score_func == "local_score_BIC":  
+        # SEM BIC score
+        warnings.warn("Please use 'local_score_BIC_from_cov' instead")
+        if parameters is None:
+            parameters = {"lambda_value": 2}
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BIC, parameters=parameters
         )
-
-    elif score_func == "local_score_BDeu":  # Greedy equivalence search with BDeu score
-        if maxP is None:
-            maxP = X.shape[1] / 2
+    elif score_func == "local_score_BIC_from_cov":  
+        # SEM BIC score
+        if parameters is None:
+            parameters = {"lambda_value": 2}
+        localScoreClass = LocalScoreClass(
+            data=X, local_score_fun=local_score_BIC_from_cov, parameters=parameters
+        )
+    elif score_func == "local_score_BDeu":  
+        # BDeu score
         localScoreClass = LocalScoreClass(
             data=X, local_score_fun=local_score_BDeu, parameters=None
         )
-
     else:
         raise Exception("Unknown function!")
+
     score = localScoreClass
+    gsts = [GST(i, score) for i in range(p)]
 
-    node_names = [("x%d" % i) for i in range(p)]
+    node_names = [("X%d" % (i + 1)) for i in range(p)] if node_names is None else node_names
     nodes = []
 
     for name in node_names:
@@ -200,19 +194,20 @@ def grasp(
         y_parents = order.get_parents(y)
 
         candidates = [order.get(j) for j in range(0, i)]
-        grow(y, y_parents, candidates, score)
-        local_score = shrink(y, y_parents, score)
+        local_score = gsts[y].trace(candidates, y_parents)
         order.set_local_score(y, local_score)
         order.bump_edges(len(y_parents))
 
-    while dfs(depth - 1, set(), [], order, score):
-        sys.stdout.write("\rGRaSP edge count: %i    " % order.get_edges())
-        sys.stdout.flush()
+    while dfs(depth - 1, set(), [], order, gsts):
+        if verbose:
+            sys.stdout.write("\rGRaSP edge count: %i    " % order.get_edges())
+            sys.stdout.flush()
 
     runtime = time.perf_counter() - runtime
-
-    sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime)
-    sys.stdout.flush()
+    
+    if verbose:
+        sys.stdout.write("\nGRaSP completed in: %.2fs \n" % runtime)
+        sys.stdout.flush()
 
     for y in range(p):
         for x in order.get_parents(y):
@@ -224,7 +219,7 @@ def grasp(
 
 
 # performs a dfs over covered tucks
-def dfs(depth: int, flipped: set, history: List[set], order, score):
+def dfs(depth: int, flipped: set, history: List[set], order, gsts):
 
     cache = [{}, {}, {}, 0]
 
@@ -252,7 +247,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score):
             cache[3] = order.get_edges()
 
             tuck(i, j, order)
-            edge_bump, score_bump = update(i, j, order, score)
+            edge_bump, score_bump = update(i, j, order, gsts)
 
             # because things that should be zero sometimes are not
             if score_bump > 1e-6:
@@ -271,7 +266,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score):
 
                 if len(flipped) > 0 and flipped not in history:
                     history.append(flipped)
-                    if depth > 0 and dfs(depth - 1, flipped, history, order, score):
+                    if depth > 0 and dfs(depth - 1, flipped, history, order, gsts):
                         return True
                     del history[-1]
 
@@ -286,7 +281,7 @@ def dfs(depth: int, flipped: set, history: List[set], order, score):
 
 
 # updates the parents and scores after a tuck
-def update(i: int, j: int, order, score):
+def update(i: int, j: int, order, gsts):
 
     edge_bump = 0
     old_score = 0
@@ -299,18 +294,9 @@ def update(i: int, j: int, order, score):
         edge_bump -= len(z_parents)
         old_score += order.get_local_score(z)
 
+        z_parents.clear()
         candidates = [order.get(l) for l in range(0, k)]
-
-        for w in [w for w in z_parents if w not in candidates]:
-            z_parents.remove(w)
-        shrink(z, z_parents, score)
-
-        for w in z_parents:
-            candidates.remove(w)
-
-        grow(z, z_parents, candidates, score)
-
-        local_score = shrink(z, z_parents, score)
+        local_score = gsts[z].trace(candidates, z_parents)
         order.set_local_score(z, local_score)
 
         edge_bump += len(z_parents)
@@ -319,64 +305,6 @@ def update(i: int, j: int, order, score):
     return edge_bump, new_score - old_score
 
 
-# grow of grow-shrink
-def grow(y: int, y_parents: List[int], candidates: List[int], score):
-
-    best = -score.score(y, y_parents)
-
-    add = None
-    checked = []
-    while add is not None or len(candidates) > 0:
-
-        if add is not None:
-            checked.remove(add)
-            y_parents.append(add)
-            candidates = checked
-            checked = []
-            add = None
-
-        while len(candidates) > 0:
-
-            x = candidates.pop()
-            y_parents.append(x)
-            current = -score.score(y, y_parents)
-            y_parents.remove(x)
-            checked.append(x)
-
-            if current > best:
-                best = current
-                add = x
-
-    return best
-
-
-# shrink of grow-shrink
-def shrink(y: int, y_parents: List[int], score):
-
-    best = -score.score(y, y_parents)
-
-    remove = None
-    checked = 0
-    while remove is not None or checked < len(y_parents):
-
-        if remove is not None:
-            y_parents.remove(remove)
-            checked = 0
-            remove = None
-
-        while checked < len(y_parents):
-            x = y_parents.pop(0)
-            current = -score.score(y, y_parents)
-            y_parents.append(x)
-            checked += 1
-
-            if current > best:
-                best = current
-                remove = x
-
-    return best
-
-
 # tucks the node at position i into position j
 def tuck(i: int, j: int, order):
 
diff --git a/causallearn/search/PermutationBased/gst.py b/causallearn/search/PermutationBased/gst.py
new file mode 100644
index 00000000..25e0da6b
--- /dev/null
+++ b/causallearn/search/PermutationBased/gst.py
@@ -0,0 +1,77 @@
+class GSTNode:
+
+    def __init__(self, tree, add=None, score=None):
+        if score is None: score = -tree.score.score_nocache(tree.vertex, [])
+        # if score is None: score = -tree.score.score(tree.vertex, [])
+        self.tree = tree
+        self.add = add
+        self.grow_score = score
+        self.shrink_score = score
+        self.branches = None
+        self.remove = None
+
+    def __lt__(self, other):
+        return self.grow_score < other.grow_score
+
+    def grow(self, available, parents):
+        self.branches = []
+        for add in available:
+            parents.append(add)
+            score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+            # score = -self.tree.score.score(self.tree.vertex, parents)
+            parents.remove(add)
+            branch = GSTNode(self.tree, add, score)
+            if score > self.grow_score: self.branches.append(branch)
+        self.branches.sort()
+
+    def shrink(self, parents):
+        self.remove = []
+        while True:
+            best = None
+            for remove in [parent for parent in parents]:
+                parents.remove(remove)
+                score = -self.tree.score.score_nocache(self.tree.vertex, parents)
+                # score = -self.tree.score.score(self.tree.vertex, parents)
+                parents.append(remove)
+                if score > self.shrink_score:
+                    self.shrink_score = score
+                    best = remove
+            if best is None: break
+            self.remove.append(best)
+            parents.remove(best)
+
+    def trace(self, prefix, available, parents):
+        if self.branches is None: self.grow(available, parents)
+        for branch in self.branches:
+            available.remove(branch.add)
+            if branch.add in prefix:
+                parents.append(branch.add)
+                return branch.trace(prefix, available, parents)
+        if self.remove is None:
+            self.shrink(parents)
+            return self.shrink_score
+        for remove in self.remove: parents.remove(remove)
+        return self.shrink_score
+
+
+class GST:
+
+    def __init__(self, vertex, score):
+        self.vertex = vertex
+        self.score = score
+        self.root = GSTNode(self)
+        self.forbidden = [vertex]
+        self.required = []
+
+    def trace(self, prefix, parents=None):
+        if parents is None: parents = []
+        available = [i for i in range(self.score.data.shape[1]) if i not in self.forbidden]
+        return self.root.trace(prefix, available, parents)
+
+    def set_knowledge(self, forbidden, required):
+        self.forbidden = forbidden  # not implemented
+        self.required = required  # not implemented
+        self.reset()
+
+    def reset(self):
+        self.root = GSTNode(self)
\ No newline at end of file
diff --git a/causallearn/search/ScoreBased/GES.py b/causallearn/search/ScoreBased/GES.py
index 1c73fbc4..c9475755 100644
--- a/causallearn/search/ScoreBased/GES.py
+++ b/causallearn/search/ScoreBased/GES.py
@@ -5,10 +5,11 @@
 from causallearn.utils.DAG2CPDAG import dag2cpdag
 from causallearn.utils.GESUtils import *
 from causallearn.utils.PDAG2DAG import pdag2dag
+from typing import Union
 
 
 def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] = None,
-        parameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
+        parameters: Optional[Dict[str, Any]] = None, node_names: Union[List[str], None] = None,) -> Dict[str, Any]:
     """
     Perform greedy equivalence search (GES) algorithm
 
@@ -95,7 +96,8 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] =
         raise Exception('Unknown function!')
     score_func = localScoreClass
 
-    node_names = [("X%d" % (i + 1)) for i in range(N)]
+    if node_names is None:
+        node_names = [("X%d" % (i + 1)) for i in range(N)]
     nodes = []
 
     for name in node_names:
@@ -137,7 +139,7 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] =
                                         np.where(G.graph[j, :] == Endpoint.TAIL.value)[0])  # neighbors of Xj
 
                     Ti = np.union1d(np.where(G.graph[:, i] != Endpoint.NULL.value)[0],
-                                    np.where(G.graph[i, 0] != Endpoint.NULL.value)[0])  # adjacent to Xi
+                                    np.where(G.graph[i, :] != Endpoint.NULL.value)[0])  # adjacent to Xi
 
                     NTi = np.setdiff1d(np.arange(N), Ti)
                     T0 = np.intersect1d(Tj, NTi)  # find the neighbours of Xj that are not adjacent to Xi
@@ -180,7 +182,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] =
                 break
             G = insert(G, min_desc[0], min_desc[1], min_desc[2])
             update1.append([min_desc[0], min_desc[1], min_desc[2]])
-            print(G.graph)
             G = pdag2dag(G)
             G = dag2cpdag(G)
             G_step1.append(G)
@@ -190,7 +191,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] =
 
     ## --------------------------------------------------------------------
     # backward greedy search
-    print('backward')
     count2 = 0
     score_new = score
     update2 = []
@@ -244,7 +244,6 @@ def ges(X: ndarray, score_func: str = 'local_score_BIC', maxP: Optional[float] =
                 break
             G = delete(G, min_desc[0], min_desc[1], min_desc[2])
             update2.append([min_desc[0], min_desc[1], min_desc[2]])
-            print(G.graph)
             G = pdag2dag(G)
             G = dag2cpdag(G)
             G_step2.append(G)
diff --git a/causallearn/utils/DAG2PAG.py b/causallearn/utils/DAG2PAG.py
index 581f5f20..2a091f72 100644
--- a/causallearn/utils/DAG2PAG.py
+++ b/causallearn/utils/DAG2PAG.py
@@ -10,10 +10,11 @@
 from causallearn.graph.Endpoint import Endpoint
 from causallearn.graph.GeneralGraph import GeneralGraph
 from causallearn.graph.Node import Node
-from causallearn.search.ConstraintBased.FCI import rule0, rulesR1R2cycle, ruleR3, ruleR4B
+from causallearn.search.ConstraintBased.FCI import rule0, rulesR1R2cycle, ruleR3, ruleR4B, ruleR5, ruleR6, ruleR7, rule8, rule9, rule10
 from causallearn.utils.cit import CIT, d_separation
 
-def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph:
+
+def dag2pag(dag: Dag, islatent: List[Node], isselection: List[Node] = []) -> GeneralGraph:
     """
     Convert a DAG to its corresponding PAG
     Parameters
@@ -27,8 +28,8 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph:
     dg = nx.DiGraph()
     true_dag = nx.DiGraph()
     nodes = dag.get_nodes()
-    observed_nodes = list(set(nodes) - set(islatent))
-    mod_nodes = observed_nodes + islatent
+    observed_nodes = list(set(nodes) - set(islatent) - set(isselection))
+    mod_nodes = observed_nodes + islatent + isselection
     nodes = dag.get_nodes()
     nodes_ids = {node: i for i, node in enumerate(nodes)}
     mod_nodeids = {node: i for i, node in enumerate(mod_nodes)}
@@ -65,7 +66,7 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph:
             for Z in combinations(observed_nodes, l):
                 if nodex in Z or nodey in Z:
                     continue
-                if d_separated(dg, {nodes_ids[nodex]}, {nodes_ids[nodey]}, set(nodes_ids[z] for z in Z)):
+                if d_separated(dg, {nodes_ids[nodex]}, {nodes_ids[nodey]}, set(nodes_ids[z] for z in Z) | set([nodes_ids[s] for s in isselection])):
                     if edge:
                         PAG.remove_edge(edge)
                     sepset[(nodes_ids[nodex], nodes_ids[nodey])] |= set(Z)
@@ -96,14 +97,22 @@ def dag2pag(dag: Dag, islatent: List[Node]) -> GeneralGraph:
 
     data = np.empty(shape=(0, len(observed_nodes)))
     independence_test_method = CIT(data, method=d_separation, true_dag=true_dag)
-
+    node_map = PAG.get_node_map()
+    sepset_reindexed = {(node_map[nodes[i]], node_map[nodes[j]]): sepset[(i, j)] for (i, j) in sepset}
     while change_flag:
         change_flag = False
         change_flag = rulesR1R2cycle(PAG, None, change_flag, False)
-        change_flag = ruleR3(PAG, sepset, None, change_flag, False)
-        change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset,
+        change_flag = ruleR3(PAG, sepset_reindexed, None, change_flag, False)
+        change_flag = ruleR4B(PAG, -1, data, independence_test_method, 0.05, sep_sets=sepset_reindexed,
                           change_flag=change_flag,
                           bk=None, verbose=False)
+        change_flag = ruleR5(PAG, changeFlag=change_flag, verbose=True)
+        change_flag = ruleR6(PAG, changeFlag=change_flag)
+        change_flag = ruleR7(PAG, changeFlag=change_flag)
+        change_flag = rule8(PAG, nodes=observed_nodes, changeFlag=change_flag)
+        change_flag = rule9(PAG, nodes=observed_nodes, changeFlag=change_flag)
+        change_flag = rule10(PAG, changeFlag=change_flag)
+
     return PAG
 
 
diff --git a/causallearn/utils/Dataset.py b/causallearn/utils/Dataset.py
new file mode 100644
index 00000000..6e62c2ee
--- /dev/null
+++ b/causallearn/utils/Dataset.py
@@ -0,0 +1,41 @@
+import numpy as np
+import urllib.request
+from io import StringIO
+
+def load_dataset(dataset_name):
+    '''
+    Load real-world datasets. Processed datasets are from https://github.com/cmu-phil/example-causal-datasets/tree/main
+
+    Parameters
+    ----------
+    dataset_name : str, ['sachs', 'boston_housing', 'airfoil']
+
+    Returns
+    -------
+    data = np.array
+    labels = list
+    '''
+
+    url_mapping = {
+        'sachs': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/sachs/data/sachs.2005.continuous.txt',
+        'boston_housing': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/boston-housing/data/boston-housing.continuous.txt',
+        'airfoil': 'https://raw.githubusercontent.com/cmu-phil/example-causal-datasets/main/real/airfoil-self-noise/data/airfoil-self-noise.continuous.txt'
+    }
+
+    if dataset_name not in url_mapping:
+        raise ValueError("Invalid dataset name")
+
+    url = url_mapping[dataset_name]
+    with urllib.request.urlopen(url) as response:
+        content = response.read().decode('utf-8')  # Read content and decode to string
+
+    # Use StringIO to turn the string content into a file-like object so numpy can read from it
+    labels_array = np.genfromtxt(StringIO(content), delimiter="\t", dtype=str, max_rows=1)
+    data = np.loadtxt(StringIO(content), skiprows=1)
+
+    # Convert labels_array to a list of strings
+    labels_list = labels_array.tolist()
+    if isinstance(labels_list, str):  # handle the case where there's only one label
+        labels_list = [labels_list]
+
+    return data, labels_list
\ No newline at end of file
diff --git a/causallearn/utils/FastKCI/FastKCI.py b/causallearn/utils/FastKCI/FastKCI.py
new file mode 100644
index 00000000..f5fcc506
--- /dev/null
+++ b/causallearn/utils/FastKCI/FastKCI.py
@@ -0,0 +1,534 @@
+from causallearn.utils.KCI.KCI import GaussianKernel, Kernel
+import numpy as np
+from numpy.linalg import eigh
+import scipy.stats as stats
+from scipy.special import logsumexp
+from sklearn.preprocessing import LabelEncoder
+from joblib import Parallel, delayed
+from sklearn.gaussian_process import GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF
+import warnings
+
+
+class FastKCI_CInd(object):
+    """
+    Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test.
+    Unconditional version.
+
+    References
+    ----------
+    [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf,
+    "A kernel-based conditional independence test and application in causal discovery", In UAI 2011.
+    [2] M. Zhang, and S. Williamson,
+    "Embarrassingly Parallel Inference for Gaussian Processes", In JMLR 20 (2019)
+    [3] O. Schacht, and B. Huang
+    "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery",
+    Working Paper.
+
+    """
+    def __init__(self, K=10, J=8, alpha=500, epsilon=1e-3, eig_thresh=1e-6, use_gp=False):
+        """
+        Initialize the FastKCI_CInd object.
+
+        Parameters
+        ----------
+        K: Number of Gaussians that are assumed to be in the mixture.
+        J: Number of independent repittitions.
+        alpha: Parameter for the Dirichlet distribution.
+        epsilon: Penalty for the matrix ridge regression.
+        eig_threshold: Threshold for Eigenvalues.
+        use_gp: Whether to use Gaussian Process Regression to determine the kernel widths
+        """
+        self.K = K
+        self.J = J
+        self.alpha = alpha
+        self.epsilon = epsilon
+        self.eig_thresh = eig_thresh
+        self.use_gp = use_gp
+        self.nullss = 5000
+
+    def compute_pvalue(self, data_x=None, data_y=None, data_z=None):
+        """
+        Main function: compute the p value of H_0: X|Y conditional on Z.
+
+        Parameters
+        ----------
+        data_x: input data for x (nxd1 array)
+        data_y: input data for y (nxd2 array)
+        data_z: input data for z (nxd3 array)
+
+        Returns
+        _________
+        pvalue: p value (scalar)
+        test_stat: test statistic (scalar)
+        """
+        self.data = [data_x, data_y]
+        self.data_z = data_z
+        self.n = data_x.shape[0]
+
+        self.Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J))
+        block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J))
+        test_stat, null_samples, log_likelihood = zip(*block_res)
+
+        log_likelihood = np.array(log_likelihood)
+        self.all_null_samples = np.vstack(null_samples)
+        self.all_p = np.array([np.sum(self.all_null_samples[i,] > test_stat[i]) / float(self.nullss) for i in range(self.J)])
+        self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6)
+        self.all_test_stats = np.array(test_stat)
+        self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights)
+        self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights)
+        self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss)
+
+        return self.pvalue, self.test_stat
+
+    def partition_data(self):
+        """
+        Partitions data into K Gaussians following an expectation maximization approach on Z.
+
+        Returns
+        _________
+        Z: Latent Gaussian that was drawn for each observation (nx1 array)
+        prob_Z: Log-Likelihood of that random assignment (scalar)
+        """
+        Z_mean = self.data_z.mean(axis=0)
+        Z_sd = self.data_z.std(axis=0)
+        mu_k = np.random.normal(Z_mean, Z_sd, size=(self.K, self.data_z.shape[1]))
+        sigma_k = np.eye(self.data_z.shape[1])
+        pi_j = np.random.dirichlet([self.alpha]*self.K)
+        ll = np.tile(np.log(pi_j), (self.n, 1))
+        for k in range(self.K):
+            ll[:, k] += stats.multivariate_normal.logpdf(self.data_z, mu_k[k, :], cov=sigma_k, allow_singular=True)
+        Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)])
+        le = LabelEncoder()
+        Z = le.fit_transform(Z)
+        return Z
+
+    def pvalue_onblocks(self, Z_proposal):
+        """
+        Calculate p value on given partitions of the data.
+
+        Parameters
+        ----------
+        Z_proposal: partition of the data into K clusters (nxd1 array)
+
+        Returns
+        _________
+        test_stat: test statistic (scalar)
+        null_samples: bootstrapped sampled of the null distribution (self.nullssx1 array)
+        log_likelihood: estimated probability P(X,Y|Z,k)
+        """
+        unique_Z_j = np.unique(Z_proposal)
+        test_stat = 0
+        log_likelihood = 0
+        null_samples = np.zeros((1, self.nullss))
+        for k in unique_Z_j:
+            K_mask = (Z_proposal == k)
+            X_k = np.copy(self.data[0][K_mask])
+            Y_k = np.copy(self.data[1][K_mask])
+            Z_k = np.copy(self.data_z[K_mask])
+            if (Z_k.shape[0] < 6):  # small blocks cause problems in GP
+                continue
+            Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y = self.kernel_matrix(X_k, Y_k, Z_k)
+            KxR, Rzx = Kernel.center_kernel_matrix_regression(Kx, Kzx, epsilon_x)
+            if epsilon_x != epsilon_y:
+                KyR, Rzy = Kernel.center_kernel_matrix_regression(Ky, Kzy, epsilon_y)
+            else:
+                KyR = Rzx.dot(Ky.dot(Rzx))
+            test_stat += np.einsum('ij,ji->', KxR, KyR)
+            uu_prod, size_u = self.get_uuprod(KxR, KyR)
+            null_samples += self.null_sample_spectral(uu_prod, size_u, Kx.shape[0])
+            log_likelihood += likelihood_x + likelihood_y
+        return test_stat, null_samples, log_likelihood
+
+    def kernel_matrix(self, data_x, data_y, data_z):
+        """
+        Calculates the Gaussian Kernel for given data inputs as well as the shared kernel.
+
+        Returns
+        _________
+        K: Kernel matrices (n_kxn_k array)
+        """
+        kernel_obj = GaussianKernel()
+        kernel_obj.set_width_empirical_kci(data_z)
+
+        data_x = stats.zscore(data_x, ddof=1, axis=0)
+        data_x[np.isnan(data_x)] = 0.
+
+        data_y = stats.zscore(data_y, ddof=1, axis=0)
+        data_y[np.isnan(data_y)] = 0.
+
+        data_z = stats.zscore(data_z, ddof=1, axis=0)
+        data_z[np.isnan(data_z)] = 0.
+
+        data_x = np.concatenate((data_x, 0.5 * data_z), axis=1)
+
+        kernelX = GaussianKernel()
+        kernelX.set_width_empirical_kci(data_z)
+        kernelY = GaussianKernel()
+        kernelY.set_width_empirical_kci(data_z)
+
+        Kx = kernelX.kernel(data_x)
+        Ky = kernelY.kernel(data_y)
+
+        # centering kernel matrix
+        Kx = Kernel.center_kernel_matrix(Kx)
+        Ky = Kernel.center_kernel_matrix(Ky)
+
+        if not self.use_gp:
+            kernelZ = GaussianKernel()
+            kernelZ.set_width_empirical_kci(data_z)
+            Kzx = kernelZ.kernel(data_z)
+            Kzx = Kernel.center_kernel_matrix(Kzx)
+            Kzy = Kzx
+            epsilon_x, epsilon_y = self.epsilon, self.epsilon
+            gpx = GaussianProcessRegressor()
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=Warning)
+                # P(X|Z)
+                gpx.fit(X=data_z, y=data_x)
+            likelihood_x = gpx.log_marginal_likelihood_value_
+            gpy = GaussianProcessRegressor()
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=Warning)
+                # P(Y|X,Z)
+                gpy.fit(X=np.c_[data_x, data_z], y=data_y)
+            likelihood_y = gpy.log_marginal_likelihood_value_
+
+        else:
+            n, Dz = data_z.shape
+
+            widthz = np.sqrt(1.0 / (kernelX.width * data_x.shape[1]))
+
+            # Instantiate a Gaussian Process model for x
+            wx, vx = eigh(Kx)
+            topkx = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])]))
+            idx = np.argsort(-wx)
+            wx = wx[idx]
+            vx = vx[:, idx]
+            wx = wx[0:topkx]
+            vx = vx[:, 0:topkx]
+            vx = vx[:, np.abs(wx) > np.abs(wx).max() * 1e-10]
+            wx = wx[np.abs(wx) > np.abs(wx).max() * 1e-10]
+            vx = 2 * np.sqrt(n) * vx.dot(np.diag(np.sqrt(wx))) / np.sqrt(wx[0])
+            kernelx = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \
+                + WhiteKernel(0.1, (1e-10, 1e+1))
+            gpx = GaussianProcessRegressor(kernel=kernelx)
+            # fit Gaussian process, including hyperparameter optimization
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=Warning)
+                gpx.fit(data_z, vx)
+
+            # construct Gaussian kernels according to learned hyperparameters
+            Kzx = gpx.kernel_.k1(data_z, data_z)
+            epsilon_x = np.exp(gpx.kernel_.theta[-1])
+            likelihood_x = gpx.log_marginal_likelihood_value_
+
+            # Instantiate a Gaussian Process model for y
+            wy, vy = eigh(Ky)
+            topky = int(np.max([np.min([400, np.floor(n / 4)]), np.min([n+1, 8])]))
+            idy = np.argsort(-wy)
+            wy = wy[idy]
+            vy = vy[:, idy]
+            wy = wy[0:topky]
+            vy = vy[:, 0:topky]
+            vy = vy[:, np.abs(wy) > np.abs(wy).max() * 1e-10]
+            wy = wy[np.abs(wy) > np.abs(wy).max() * 1e-10]
+            vy = 2 * np.sqrt(n) * vy.dot(np.diag(np.sqrt(wy))) / np.sqrt(wy[0])
+            kernely = ConstantKernel(1.0, (1e-3, 1e3)) * RBF(widthz * np.ones(Dz), (1e-2, 1e2)) \
+                + WhiteKernel(0.1, (1e-10, 1e+1))
+            gpy = GaussianProcessRegressor(kernel=kernely)
+            # fit Gaussian process, including hyperparameter optimization
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=Warning)
+                gpy.fit(data_z, vy)
+
+            # construct Gaussian kernels according to learned hyperparameters
+            Kzy = gpy.kernel_.k1(data_z, data_z)
+            epsilon_y = np.exp(gpy.kernel_.theta[-1])
+            likelihood_y = gpy.log_marginal_likelihood_value_
+
+        return Kx, Ky, Kzx, Kzy, epsilon_x, epsilon_y, likelihood_x, likelihood_y
+
+    def get_uuprod(self, Kx, Ky):
+        """
+        Compute eigenvalues for null distribution estimation
+
+        Parameters
+        ----------
+        Kx: centralized kernel matrix for data_x (nxn)
+        Ky: centralized kernel matrix for data_y (nxn)
+
+        Returns
+        _________
+        uu_prod: product of the eigenvectors of Kx and Ky
+        size_u: number of produced eigenvectors
+
+        """
+        n_block = Kx.shape[0]
+        wx, vx = eigh(Kx)
+        wy, vy = eigh(Ky)
+        idx = np.argsort(-wx)
+        idy = np.argsort(-wy)
+        wx = wx[idx]
+        vx = vx[:, idx]
+        wy = wy[idy]
+        vy = vy[:, idy]
+        vx = vx[:, wx > np.max(wx) * self.eig_thresh]
+        wx = wx[wx > np.max(wx) * self.eig_thresh]
+        vy = vy[:, wy > np.max(wy) * self.eig_thresh]
+        wy = wy[wy > np.max(wy) * self.eig_thresh]
+        vx = vx.dot(np.diag(np.sqrt(wx)))
+        vy = vy.dot(np.diag(np.sqrt(wy)))
+
+        # calculate their product
+        num_eigx = vx.shape[1]
+        num_eigy = vy.shape[1]
+        size_u = num_eigx * num_eigy
+        uu = np.zeros((n_block, size_u))
+        for i in range(0, num_eigx):
+            for j in range(0, num_eigy):
+                uu[:, i * num_eigy + j] = vx[:, i] * vy[:, j]
+
+        if size_u > self.n:
+            uu_prod = uu.dot(uu.T)
+        else:
+            uu_prod = uu.T.dot(uu)
+
+        return uu_prod, size_u
+
+    def get_kappa(self, mean_appr, var_appr):
+        """
+        Get parameters for the approximated gamma distribution
+        Parameters
+        ----------
+        uu_prod: product of the eigenvectors of Kx and Ky
+
+        Returns
+        ----------
+        k_appr, theta_appr: approximated parameters of the gamma distribution
+
+        """
+        k_appr = mean_appr ** 2 / var_appr
+        theta_appr = var_appr / mean_appr
+        return k_appr, theta_appr
+
+    def null_sample_spectral(self, uu_prod, size_u, T):
+        """
+        Simulate data from null distribution
+
+        Parameters
+        ----------
+        uu_prod: product of the eigenvectors of Kx and Ky
+        size_u: number of produced eigenvectors
+        T: sample size
+
+        Returns
+        _________
+        null_dstr: samples from the null distribution
+
+        """
+        eig_uu = np.linalg.eigvalsh(uu_prod)
+        eig_uu = -np.sort(-eig_uu)
+        eig_uu = eig_uu[0:np.min((T, size_u))]
+        eig_uu = eig_uu[eig_uu > np.max(eig_uu) * self.eig_thresh]
+
+        f_rand = np.random.chisquare(1, (eig_uu.shape[0], self.nullss))
+        null_dstr = eig_uu.T.dot(f_rand)
+        return null_dstr
+
+
+class FastKCI_UInd(object):
+    """
+    Python implementation of as speed-up version of the Kernel-based Conditional Independence (KCI) test. 
+    Unconditional version.
+
+    References
+    ----------
+    [1] K. Zhang, J. Peters, D. Janzing, and B. Schölkopf,
+    "A kernel-based conditional independence test and application in causal discovery," In UAI 2011.
+    [2] M. Zhang, and S. Williamson,
+    "Embarrassingly Parallel Inference for Gaussian Processes" In JMLR 20 (2019)
+    [3] O. Schacht, and B. Huang
+    "FastKCI: A fast Kernel-based Conditional Independence test with application to causal discovery",
+    Working Paper.
+    """
+    def __init__(self, K=10, J=8, alpha=500):
+        """
+        Construct the FastKCI_UInd model.
+
+        Parameters
+        ----------
+        K: Number of Gaussians that are assumed to be in the mixture
+        J: Number of independent repittitions.
+        alpha: Parameter for the Dirichlet distribution.
+        """
+        self.K = K
+        self.J = J
+        self.alpha = alpha
+        self.nullss = 5000
+        self.eig_thresh = 1e-5
+
+    def compute_pvalue(self, data_x=None, data_y=None):
+        """
+        Main function: compute the p value and return it together with the test statistic
+
+        Parameters
+        ----------
+        data_x: input data for x (nxd1 array)
+        data_y: input data for y (nxd2 array)
+
+        Returns
+        _________
+        pvalue: p value (scalar)
+        test_stat: test statistic (scalar)
+        """
+        self.data_x = data_x
+        self.data_y = data_y
+        self.n = data_x.shape[0]
+
+        Z_proposal = Parallel(n_jobs=-1)(delayed(self.partition_data)() for i in range(self.J))
+        self.Z_proposal, self.prob_Y = zip(*Z_proposal)
+        block_res = Parallel(n_jobs=-1)(delayed(self.pvalue_onblocks)(self.Z_proposal[i]) for i in range(self.J))
+        test_stat, null_samples, log_likelihood = zip(*block_res)
+        self.prob_weights = np.around(np.exp(log_likelihood-logsumexp(log_likelihood)), 6)
+        self.test_stat = np.average(np.array(test_stat), weights=self.prob_weights)
+        self.null_samples = np.average(null_samples, axis=0, weights=self.prob_weights)
+        self.pvalue = np.sum(self.null_samples > self.test_stat) / float(self.nullss)
+
+        return self.pvalue, self.test_stat
+
+    def partition_data(self):
+        """
+        Partitions data into K Gaussians
+
+        Returns
+        _________
+        Z: Latent Gaussian that was drawn for each observation (nx1 array)
+        prob_Y: Log-Likelihood of that random assignment (scalar)
+        """
+        Y_mean = self.data_y.mean(axis=0)
+        Y_sd = self.data_y.std(axis=0)
+        mu_k = np.random.normal(Y_mean, Y_sd, size=(self.K, self.data_y.shape[1]))
+        sigma_k = np.eye(self.data_y.shape[1])
+        pi_j = np.random.dirichlet([self.alpha]*self.K)
+        ll = np.tile(np.log(pi_j), (self.n, 1))
+        for k in range(self.K):
+            ll[:, k] += stats.multivariate_normal.logpdf(self.data_y, mu_k[k, :], cov=sigma_k, allow_singular=True)
+        Z = np.array([np.random.multinomial(1, np.exp(ll[n, :]-logsumexp(ll[n, :]))).argmax() for n in range(self.n)])
+        prop_Y = np.take_along_axis(ll, Z[:, None], axis=1).sum()
+        le = LabelEncoder()
+        Z = le.fit_transform(Z)
+        return (Z, prop_Y)
+
+    def pvalue_onblocks(self, Z_proposal):
+        unique_Z_j = np.unique(Z_proposal)
+        test_stat = 0
+        log_likelihood = 0
+        null_samples = np.zeros((1, self.nullss))
+        for k in unique_Z_j:
+            K_mask = (Z_proposal == k)
+            X_k = np.copy(self.data_x[K_mask])
+            Y_k = np.copy(self.data_y[K_mask])
+
+            Kx = self.kernel_matrix(X_k)
+            Ky = self.kernel_matrix(Y_k)
+
+            v_stat, Kxc, Kyc = self.HSIC_V_statistic(Kx, Ky)
+
+            null_samples += self.null_sample_spectral(Kxc, Kyc)
+            test_stat += v_stat
+
+            gpx = GaussianProcessRegressor()
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=Warning)
+                # P(X|Y)
+                gpx.fit(X=Y_k, y=X_k)
+            likelihood = gpx.log_marginal_likelihood_value_
+            log_likelihood += likelihood
+
+        return test_stat, null_samples, log_likelihood
+
+    def kernel_matrix(self, data):
+        """
+        Calculates the Gaussian Kernel for given data inputs.
+
+        Returns
+        _________
+        K: Kernel matrix (n_kxn_k array)
+        """
+        kernel_obj = GaussianKernel()
+        kernel_obj.set_width_empirical_hsic(data)
+
+        data = stats.zscore(data, ddof=1, axis=0)
+        data[np.isnan(data)] = 0.
+
+        K = kernel_obj.kernel(data)
+
+        return K
+
+    def get_kappa(self, mean_appr, var_appr):
+        """
+        Get parameters for the approximated gamma distribution
+        Parameters
+        ----------
+        Kx: kernel matrix for data_x (nxn)
+        Ky: kernel matrix for data_y (nxn)
+
+        Returns
+        _________
+        k_appr, theta_appr: approximated parameters of the gamma distribution
+        """
+        k_appr = mean_appr ** 2 / var_appr
+        theta_appr = var_appr / mean_appr
+        return k_appr, theta_appr
+
+    def null_sample_spectral(self, Kxc, Kyc):
+        """
+        Simulate data from null distribution
+
+        Parameters
+        ----------
+        Kxc: centralized kernel matrix for data_x (nxn)
+        Kyc: centralized kernel matrix for data_y (nxn)
+
+        Returns
+        _________
+        null_dstr: samples from the null distribution
+
+        """
+        T = Kxc.shape[0]
+        if T > 1000:
+            num_eig = int(np.floor(T / 2))
+        else:
+            num_eig = T
+        lambdax = np.linalg.eigvalsh(Kxc)
+        lambday = np.linalg.eigvalsh(Kyc)
+        lambdax = -np.sort(-lambdax)
+        lambday = -np.sort(-lambday)
+        lambdax = lambdax[0:num_eig]
+        lambday = lambday[0:num_eig]
+        lambda_prod = np.dot(lambdax.reshape(num_eig, 1), lambday.reshape(1, num_eig)).reshape(
+            (num_eig ** 2, 1))
+        lambda_prod = lambda_prod[lambda_prod > lambda_prod.max() * self.eig_thresh]
+        f_rand = np.random.chisquare(1, (lambda_prod.shape[0], self.nullss))
+        null_dstr = lambda_prod.T.dot(f_rand) / T
+        return null_dstr
+
+    def HSIC_V_statistic(self, Kx, Ky):
+        """
+        Compute V test statistic from kernel matrices Kx and Ky
+        Parameters
+        ----------
+        Kx: kernel matrix for data_x (nxn)
+        Ky: kernel matrix for data_y (nxn)
+
+        Returns
+        _________
+        Vstat: HSIC v statistics
+        Kxc: centralized kernel matrix for data_x (nxn)
+        Kyc: centralized kernel matrix for data_y (nxn)
+        """
+        Kxc = Kernel.center_kernel_matrix(Kx)
+        Kyc = Kernel.center_kernel_matrix(Ky)
+        V_stat = np.einsum('ij,ij->', Kxc, Kyc)
+        return V_stat, Kxc, Kyc
diff --git a/causallearn/utils/FastKCI/__init__.py b/causallearn/utils/FastKCI/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/causallearn/utils/KCI/KCI.py b/causallearn/utils/KCI/KCI.py
index 40a33452..681d48d4 100644
--- a/causallearn/utils/KCI/KCI.py
+++ b/causallearn/utils/KCI/KCI.py
@@ -191,7 +191,7 @@ def null_sample_spectral(self, Kxc, Kyc):
         """
         T = Kxc.shape[0]
         if T > 1000:
-            num_eig = np.int(np.floor(T / 2))
+            num_eig = int(np.floor(T / 2))
         else:
             num_eig = T
         lambdax = eigvalsh(Kxc)
@@ -306,8 +306,8 @@ def compute_pvalue(self, data_x=None, data_y=None, data_z=None):
             k_appr, theta_appr = self.get_kappa(uu_prod)
             pvalue = 1 - stats.gamma.cdf(test_stat, k_appr, 0, theta_appr)
         else:
-            null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0])
-            pvalue = sum(null_samples > test_stat) / float(self.nullss)
+            self.null_samples = self.null_sample_spectral(uu_prod, size_u, Kx.shape[0])
+            pvalue = sum(self.null_samples > test_stat) / float(self.nullss)
         return pvalue, test_stat
 
     def kernel_matrix(self, data_x, data_y, data_z):
diff --git a/causallearn/utils/PCUtils/BackgroundKnowledge.py b/causallearn/utils/PCUtils/BackgroundKnowledge.py
index 4c6bca0d..19248d48 100644
--- a/causallearn/utils/PCUtils/BackgroundKnowledge.py
+++ b/causallearn/utils/PCUtils/BackgroundKnowledge.py
@@ -165,7 +165,7 @@ def is_forbidden(self, node1: Node, node2: Node) -> bool:
 
         # then check in tier_map
         if self.tier_value_map.keys().__contains__(node1) and self.tier_value_map.keys().__contains__(node2):
-            if self.tier_value_map.get(node1) >= self.tier_value_map.get(node2):
+            if self.tier_value_map.get(node1) > self.tier_value_map.get(node2): # Allow orientation within the same tier
                 return True
 
         return False
diff --git a/causallearn/utils/PCUtils/SkeletonDiscovery.py b/causallearn/utils/PCUtils/SkeletonDiscovery.py
index 6cfaa492..428d2cee 100644
--- a/causallearn/utils/PCUtils/SkeletonDiscovery.py
+++ b/causallearn/utils/PCUtils/SkeletonDiscovery.py
@@ -120,8 +120,9 @@ def skeleton_discovery(
                     else:
                         if verbose:
                             print('%d dep %d | %s with p-value %f\n' % (x, y, S, p))
-                append_value(cg.sepset, x, y, tuple(sepsets))
-                append_value(cg.sepset, y, x, tuple(sepsets))
+                if (x, y) in edge_removal or not cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y]):
+                    append_value(cg.sepset, x, y, tuple(sepsets))
+                    append_value(cg.sepset, y, x, tuple(sepsets))
 
         if show_progress:
             pbar.refresh()
diff --git a/causallearn/utils/RCIT/RCIT.py b/causallearn/utils/RCIT/RCIT.py
new file mode 100644
index 00000000..11608ed0
--- /dev/null
+++ b/causallearn/utils/RCIT/RCIT.py
@@ -0,0 +1,403 @@
+import numpy as np
+import itertools
+from scipy.stats import chi2
+from scipy.linalg import pinv
+from momentchi2 import hbe, sw, lpb4
+from scipy.spatial.distance import pdist, squareform
+
+
+class RCIT(object):
+    """
+    Python implementation of Randomized Conditional Independence Test (RCIT) test.
+    The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master
+
+    References
+    ----------
+    [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional
+    independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017.
+    """
+    def __init__(self, approx="lpd4", num_f=100, num_f2=5, rcit=True):
+        """
+        Initialize the RCIT object.
+
+        Parameters
+        ----------
+        approx : str
+            Method for approximating the null distribution.
+            - "lpd4" for the Lindsay-Pilla-Basak method
+            - "hbe" for the Hall-Buckley-Eagleson method
+            - "gamma" for the Satterthwaite-Welch method
+            - "chi2" for a normalized chi-squared statistic
+            - "perm" for permutation testing
+            Default is "lpd4".
+        num_f : int
+            Number of features for conditioning set. Default is 25.
+        num_f2 : int
+            Number of features for non-conditioning sets. Default is 5.
+        rcit : bool
+            Whether to use RCIT or RCoT. Default is True.
+        """
+        self.approx = approx
+        self.num_f = num_f
+        self.num_f2 = num_f2
+        self.rcit = rcit
+
+    def compute_pvalue(self, data_x, data_y, data_z):
+        """
+        Compute the p value and return it together with the test statistic.
+
+        Parameters
+        ----------
+        data_x: input data for x (nxd1 array)
+        data_y: input data for y (nxd2 array)
+        data_z: input data for z (nxd3 array)
+
+        Returns
+        -------
+        p: p value
+        sta: test statistic
+        """
+        d = data_z.shape[1]
+        r = data_x.shape[0]
+        r1 = 500 if (r > 500) else r
+
+        data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1)
+        data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1)
+        data_z = (data_z - data_z.mean(axis=0)) / data_z.std(axis=0, ddof=1)
+
+        if self.rcit:
+            data_y = np.column_stack((data_y, data_z))
+
+        sigma = dict()
+        for key, value in [("x", data_x), ("y", data_y), ("z", data_z)]:
+            distances = pdist(value[:r1, :], metric='euclidean')
+            flattened_distances = squareform(distances).ravel()
+            non_zero_distances = flattened_distances[flattened_distances != 0]
+            sigma[key] = np.median(non_zero_distances)
+
+        four_z = self.random_fourier_features(data_z, num_f=self.num_f, sigma=sigma["z"])
+        four_x = self.random_fourier_features(data_x, num_f=self.num_f2, sigma=sigma["x"])
+        four_y = self.random_fourier_features(data_y, num_f=self.num_f2, sigma=sigma["y"])
+
+        f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1)
+        f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1)
+        f_z = (four_z - four_z.mean(axis=0)) / four_z.std(axis=0, ddof=1)
+
+        Cxy = f_x.T @ f_y / (f_x.shape[0] - 1)
+
+        Czz = np.cov(f_z, rowvar=False)
+
+        regularized_Czz = Czz + np.eye(self.num_f) * 1e-10
+        L = np.linalg.cholesky(regularized_Czz)
+        i_Czz = np.linalg.inv(L).T.dot(np.linalg.inv(L))
+
+        Cxz = f_x.T @ f_z / (f_x.shape[0] - 1)
+        Czy = f_z.T @ f_y / (f_z.shape[0] - 1)
+
+        z_i_Czz = f_z @ i_Czz
+        e_x_z = z_i_Czz @ Cxz.T
+        e_y_z = z_i_Czz @ Czy
+
+        res_x = f_x - e_x_z
+        res_y = f_y - e_y_z
+
+        if self.num_f2 == 1:
+            self.approx = "hbe"
+
+        if self.approx == "perm":
+            Cxy_z = self.matrix_cov(res_x, res_y)
+            sta = r * np.sum(Cxy_z**2)
+
+            nperm = 1000
+
+            stas = []
+            for _ in range(nperm):
+                perm = np.random.choice(np.arange(r), size=r, replace=False)
+                Cxy = self.matrix_cov(res_x[perm, ], res_y)
+                sta_p = r * np.sum(Cxy**2)
+                stas.append(sta_p)
+
+            p = 1 - (np.sum(sta >= stas) / len(stas))
+
+        else:
+            Cxy_z = Cxy - Cxz @ i_Czz @ Czy
+            sta = r * np.sum(Cxy_z**2)
+
+            d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1])))
+            res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T
+            Cov = 1/r * res.T @ res
+
+            if self.approx == "chi2":
+                i_Cov = pinv(Cov)
+
+                sta = r * (np.dot(Cxy_z.flatten(), np.dot(i_Cov, Cxy_z.flatten())))
+                p = 1 - chi2.cdf(sta, Cxy_z.size)
+
+            else:
+                eigenvalues, eigenvectors = np.linalg.eigh(Cov)
+                eig_d = eigenvalues[eigenvalues > 0]
+
+                if self.approx == "gamma":
+                    p = 1 - sw(eig_d, sta)
+
+                elif self.approx == "hbe":
+                    p = 1 - hbe(eig_d, sta)
+
+                elif self.approx == "lpd4":
+                    try:
+                        p = 1 - lpb4(eig_d, sta)
+                    except Exception:
+                        p = 1 - hbe(eig_d, sta)
+                    if np.isnan(p):
+                        p = 1 - hbe(eig_d, sta)
+
+        if (p < 0):
+            p = 0
+
+        return p, sta
+
+    def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None):
+        """
+        Generate random Fourier features.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Random variable x.
+        w : np.ndarray
+            RRandom coefficients.
+        b : np.ndarray
+            Random offsets.
+        num_f : int
+            Number of random Fourier features.
+        sigma : float
+            Smooth parameter of RBF kernel.
+
+        Returns
+        -------
+        feat : np.ndarray
+            Random Fourier features.
+        """
+        if num_f is None:
+            num_f = 25
+
+        r = x.shape[0]
+        c = x.shape[1]
+
+        if ((sigma == 0) | (sigma is None)):
+            sigma = 1
+
+        if w is None:
+            w = (1/sigma) * np.random.normal(size=(num_f, c))
+            b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r))
+
+        feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T
+
+        return feat
+
+    def matrix_cov(self, mat_a, mat_b):
+        """
+        Compute the covariance matrix between two matrices.
+        Equivalent to ``cov()`` between two matrices in R.
+
+        Parameters
+        ----------
+        mat_a : np.ndarray
+            First data matrix.
+        mat_b : np.ndarray
+            Second data matrix.
+
+        Returns
+        -------
+        mat_cov : np.ndarray
+            Covariance matrix.
+        """
+        n_obs = mat_a.shape[0]
+
+        assert mat_a.shape == mat_b.shape
+        mat_a = mat_a - mat_a.mean(axis=0)
+        mat_b = mat_b - mat_b.mean(axis=0)
+
+        mat_cov = mat_a.T @ mat_b / (n_obs - 1)
+
+        return mat_cov
+
+
+class RIT(object):
+    """
+    Python implementation of Randomized Independence Test (RIT) test.
+    The original R implementation can be found at https://github.com/ericstrobl/RCIT/tree/master
+
+    References
+    ----------
+    [1] Strobl, E. V., Zhang, K., and Visweswaran, S. (2019). "Approximate kernel-based conditional
+    independence tests for fast non-parametric causal discovery." Journal of Causal Inference, 7(1), 20180017.
+    """
+    def __init__(self, approx="lpd4"):
+        """
+        Initialize the RIT object.
+
+        Parameters
+        ----------
+        approx : str
+            Method for approximating the null distribution.
+            - "lpd4" for the Lindsay-Pilla-Basak method
+            - "hbe" for the Hall-Buckley-Eagleson method
+            - "gamma" for the Satterthwaite-Welch method
+            - "chi2" for a normalized chi-squared statistic
+            - "perm" for permutation testing
+            Default is "lpd4".
+        """
+        self.approx = approx
+
+    def compute_pvalue(self, data_x, data_y):
+        """
+        Compute the p value and return it together with the test statistic.
+
+        Parameters
+        ----------
+        data_x: input data for x (nxd1 array)
+        data_y: input data for y (nxd2 array)
+        data_z: input data for z (nxd3 array)
+
+        Returns
+        -------
+        p: p value
+        sta: test statistic
+        """
+        r = data_x.shape[0]
+        r1 = 500 if (r > 500) else r
+
+        data_x = (data_x - data_x.mean(axis=0)) / data_x.std(axis=0, ddof=1)
+        data_y = (data_y - data_y.mean(axis=0)) / data_y.std(axis=0, ddof=1)
+
+        sigma = dict()
+        for key, value in [("x", data_x), ("y", data_y)]:
+            distances = pdist(value[:r1, :], metric='euclidean')
+            flattened_distances = squareform(distances).ravel()
+            non_zero_distances = flattened_distances[flattened_distances != 0]
+            sigma[key] = np.median(non_zero_distances)
+
+        four_x = self.random_fourier_features(data_x, num_f=5, sigma=sigma["x"])
+        four_y = self.random_fourier_features(data_y, num_f=5, sigma=sigma["y"])
+
+        f_x = (four_x - four_x.mean(axis=0)) / four_x.std(axis=0, ddof=1)
+        f_y = (four_y - four_y.mean(axis=0)) / four_y.std(axis=0, ddof=1)
+
+        Cxy = self.matrix_cov(f_x, f_y)
+        sta = r * np.sum(Cxy**2)
+
+        if self.approx == "perm":
+            nperm = 1000
+
+            stas = []
+            for _ in range(nperm):
+                perm = np.random.choice(np.arange(r), size=r, replace=False)
+                Cxy = self.matrix_cov(f_x[perm, ], f_y)
+                sta_p = r * np.sum(Cxy**2)
+                stas.append(sta_p)
+
+            p = 1 - (np.sum(sta >= stas) / len(stas))
+
+        else:
+            res_x = f_x - f_x.mean(axis=0)
+            res_y = f_y - f_y.mean(axis=0)
+
+            d = list(itertools.product(range(f_x.shape[1]), range(f_y.shape[1])))
+            res = np.array([res_x[:, idx_x] * res_y[:, idx_y] for idx_x, idx_y in d]).T
+            Cov = 1/r * res.T @ res
+
+            if self.approx == "chi2":
+                i_Cov = pinv(Cov)
+
+                sta = r * (np.dot(Cxy.flatten(), np.dot(i_Cov, Cxy.flatten())))
+                p = 1 - chi2.cdf(sta, Cxy.size)
+
+            else:
+                eigenvalues, eigenvectors = np.linalg.eigh(Cov)
+                eig_d = eigenvalues[eigenvalues > 0]
+
+                if self.approx == "gamma":
+                    p = 1 - sw(eig_d, sta)
+
+                elif self.approx == "hbe":
+                    p = 1 - hbe(eig_d, sta)
+
+                elif self.approx == "lpd4":
+                    try:
+                        p = 1 - lpb4(eig_d, sta)
+                    except Exception:
+                        p = 1 - hbe(eig_d, sta)
+                    if np.isnan(p):
+                        p = 1 - hbe(eig_d, sta)
+
+        if (p < 0):
+            p = 0
+
+        return p, sta
+
+    def random_fourier_features(self, x, w=None, b=None, num_f=None, sigma=None):
+        """
+        Generate random Fourier features.
+
+        Parameters
+        ----------
+        x : np.ndarray
+            Random variable x.
+        w : np.ndarray
+            RRandom coefficients.
+        b : np.ndarray
+            Random offsets.
+        num_f : int
+            Number of random Fourier features.
+        sigma : float
+            Smooth parameter of RBF kernel.
+
+        Returns
+        -------
+        feat : np.ndarray
+            Random Fourier features.
+        """
+        if num_f is None:
+            num_f = 25
+
+        r = x.shape[0]
+        c = x.shape[1]
+
+        if ((sigma == 0) | (sigma is None)):
+            sigma = 1
+
+        if w is None:
+            w = (1/sigma) * np.random.normal(size=(num_f, c))
+            b = np.tile(2*np.pi*np.random.uniform(size=(num_f, 1)), (1, r))
+
+        feat = np.sqrt(2) * (np.cos(w[0:num_f, 0:c] @ x.T + b[0:num_f, :])).T
+
+        return feat
+
+    def matrix_cov(self, mat_a, mat_b):
+        """
+        Compute the covariance matrix between two matrices.
+        Equivalent to ``cov()`` between two matrices in R.
+
+        Parameters
+        ----------
+        mat_a : np.ndarray
+            First data matrix.
+        mat_b : np.ndarray
+            Second data matrix.
+
+        Returns
+        -------
+        mat_cov : np.ndarray
+            Covariance matrix.
+        """
+        n_obs = mat_a.shape[0]
+
+        assert mat_a.shape == mat_b.shape
+        mat_a = mat_a - mat_a.mean(axis=0)
+        mat_b = mat_b - mat_b.mean(axis=0)
+
+        mat_cov = mat_a.T @ mat_b / (n_obs - 1)
+
+        return mat_cov
diff --git a/causallearn/utils/RCIT/__init__.py b/causallearn/utils/RCIT/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/causallearn/utils/ScoreUtils.py b/causallearn/utils/ScoreUtils.py
index e5af6ca1..6bd075aa 100644
--- a/causallearn/utils/ScoreUtils.py
+++ b/causallearn/utils/ScoreUtils.py
@@ -523,7 +523,7 @@ def cov_noise(logtheta=None, x=None, z=None, nargout=1):
 
 
 def cov_seard(loghyper=None, x=None, z=None, nargout=1):
-    # Squared Exponential covariance function with Automatic Relevance Detemination
+    # Squared Exponential covariance function with Automatic Relevance Determination
     # (ARD) distance measure. The covariance function is parameterized as:
     #
     # k(x^p,x^q) = sf2 * exp(-(x^p - x^q)'*inv(P)*(x^p - x^q)/2)
diff --git a/causallearn/utils/cit.py b/causallearn/utils/cit.py
index 8119dd72..fbc95e95 100644
--- a/causallearn/utils/cit.py
+++ b/causallearn/utils/cit.py
@@ -5,6 +5,9 @@
 from scipy.stats import chi2, norm
 
 from causallearn.utils.KCI.KCI import KCI_CInd, KCI_UInd
+from causallearn.utils.FastKCI.FastKCI import FastKCI_CInd, FastKCI_UInd
+from causallearn.utils.RCIT.RCIT import RCIT as RCIT_CInd
+from causallearn.utils.RCIT.RCIT import RIT as RCIT_UInd
 from causallearn.utils.PCUtils import Helper
 
 CONST_BINCOUNT_UNIQUE_THRESHOLD = 1e5
@@ -13,6 +16,8 @@
 mv_fisherz = "mv_fisherz"
 mc_fisherz = "mc_fisherz"
 kci = "kci"
+rcit = "rcit"
+fastkci = "fastkci"
 chisq = "chisq"
 gsq = "gsq"
 d_separation = "d_separation"
@@ -23,8 +28,8 @@ def CIT(data, method='fisherz', **kwargs):
     Parameters
     ----------
     data: numpy.ndarray of shape (n_samples, n_features)
-    method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "chisq", "gsq"]
-    kwargs: placeholder for future arguments, or for KCI specific arguments now
+    method: str, in ["fisherz", "mv_fisherz", "mc_fisherz", "kci", "rcit", "fastkci", "chisq", "gsq"]
+    kwargs: placeholder for future arguments, or for KCI, FastKCI or RCIT specific arguments now
         TODO: utimately kwargs should be replaced by explicit named parameters.
               check https://github.com/cmu-phil/causal-learn/pull/62#discussion_r927239028
     '''
@@ -32,6 +37,10 @@ def CIT(data, method='fisherz', **kwargs):
         return FisherZ(data, **kwargs)
     elif method == kci:
         return KCI(data, **kwargs)
+    elif method == fastkci:
+        return FastKCI(data, **kwargs)
+    elif method == rcit:
+        return RCIT(data, **kwargs)
     elif method in [chisq, gsq]:
         return Chisq_or_Gsq(data, method_name=method, **kwargs)
     elif method == mv_fisherz:
@@ -43,6 +52,7 @@ def CIT(data, method='fisherz', **kwargs):
     else:
         raise ValueError("Unknown method: {}".format(method))
 
+
 class CIT_Base(object):
     # Base class for CIT, contains basic operations for input check and caching, etc.
     def __init__(self, data, cache_path=None, **kwargs):
@@ -193,6 +203,50 @@ def __call__(self, X, Y, condition_set=None):
         self.pvalue_cache[cache_key] = p
         return p
 
+class FastKCI(CIT_Base):
+    def __init__(self, data, **kwargs):
+        super().__init__(data, **kwargs)
+        kci_ui_kwargs = {k: v for k, v in kwargs.items() if k in
+                         ['K', 'J', 'alpha']}
+        kci_ci_kwargs = {k: v for k, v in kwargs.items() if k in
+                         ['K', 'J', 'alpha', 'use_gp']}
+        self.check_cache_method_consistent(
+            'kci', hashlib.md5(json.dumps(kci_ci_kwargs, sort_keys=True).encode('utf-8')).hexdigest())
+        self.assert_input_data_is_valid()
+        self.kci_ui = FastKCI_UInd(**kci_ui_kwargs)
+        self.kci_ci = FastKCI_CInd(**kci_ci_kwargs)
+
+    def __call__(self, X, Y, condition_set=None):
+        # Kernel-based conditional independence test.
+        Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set)
+        if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key]
+        p = self.kci_ui.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \
+            self.kci_ci.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0]
+        self.pvalue_cache[cache_key] = p
+        return p
+
+class RCIT(CIT_Base):
+    def __init__(self, data, **kwargs):
+        super().__init__(data, **kwargs)
+        rit_kwargs = {k: v for k, v in kwargs.items() if k in
+                      ['approx']}
+        rcit_kwargs = {k: v for k, v in kwargs.items() if k in
+                       ['approx', 'num_f', 'num_f2', 'rcit']}
+        self.check_cache_method_consistent(
+            'kci', hashlib.md5(json.dumps(rcit_kwargs, sort_keys=True).encode('utf-8')).hexdigest())
+        self.assert_input_data_is_valid()
+        self.rit = RCIT_UInd(**rit_kwargs)
+        self.rcit = RCIT_CInd(**rcit_kwargs)
+
+    def __call__(self, X, Y, condition_set=None):
+        # Kernel-based conditional independence test.
+        Xs, Ys, condition_set, cache_key = self.get_formatted_XYZ_and_cachekey(X, Y, condition_set)
+        if cache_key in self.pvalue_cache: return self.pvalue_cache[cache_key]
+        p = self.rit.compute_pvalue(self.data[:, Xs], self.data[:, Ys])[0] if len(condition_set) == 0 else \
+            self.rcit.compute_pvalue(self.data[:, Xs], self.data[:, Ys], self.data[:, condition_set])[0]
+        self.pvalue_cache[cache_key] = p
+        return p
+
 class Chisq_or_Gsq(CIT_Base):
     def __init__(self, data, method_name, **kwargs):
         def _unique(column):
diff --git a/docs/.readthedocs.yaml b/docs/.readthedocs.yaml
index 670a48c7..5e8dd497 100644
--- a/docs/.readthedocs.yaml
+++ b/docs/.readthedocs.yaml
@@ -2,12 +2,15 @@
 # Read the Docs configuration file
 # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 
-# Required
 version: 2
 
-# Set the version of Python and other tools you might need
 build:
   os: ubuntu-22.04
   tools:
     python: "3.11"
 
+python:
+  install:
+    - requirements: docs/requirements.txt
+
+
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..52b04f2e
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1 @@
+sphinx_rtd_theme
\ No newline at end of file
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4019298f..d649ff4c 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -14,7 +14,6 @@
 # import sys
 # sys.path.insert(0, os.path.abspath('.'))
 
-
 # -- Project information -----------------------------------------------------
 
 project = 'causal-learn'
@@ -22,7 +21,7 @@
 author = 'CLeaR'
 
 # The full version, including alpha/beta/rc tags
-release = '0.1.2.8'
+release = '0.1.3.6'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/source/getting_started.rst b/docs/source/getting_started.rst
index f86de812..c306be55 100644
--- a/docs/source/getting_started.rst
+++ b/docs/source/getting_started.rst
@@ -67,7 +67,7 @@ Contributors
 
 **Developers**:
 
-- Wei Chen, Biwei Huang, Yuequn Liu, Zhiyi Huang, Feng Xie, Haoyue Dai: :ref:`PC <pc>`, :ref:`FCI <fci>`, :ref:`GES <ges>`, :ref:`GIN <gin>`, and :ref:`graph operations <graphoperation>`.
+- Wei Chen, Biwei Huang, Yuequn Liu, Zhiyi Huang, Feng Xie, Haoyue Dai, Xiaokai Huang: :ref:`PC <pc>`, :ref:`FCI <fci>`, :ref:`GES <ges>`, :ref:`GIN <gin>`, and :ref:`graph operations <graphoperation>`.
 - Mingming Gong, Erdun Gao, Aoqi Zuo: :ref:`PNL <pnl>`, :ref:`ANM <anm>`, :ref:`Granger causality <granger>`, and :ref:`KCI <Kernel-based conditional independence (KCI) test and independence test>`.
 - Shohei Shimizu, Takashi Nicholas Maeda, Takashi Ikeuchi: :ref:`LiNGAM-based methods <lingam>`.
 - Madelyn Glymour: several helpers.
@@ -89,13 +89,15 @@ Please cite as:
 
 .. code-block:: none
 
-  @article{causallearn,
-    title={Causal-learn: Causal Discovery in Python},
-    author={Yujia Zheng and Biwei Huang and Wei Chen and Joseph Ramsey and Mingming Gong and Ruichu Cai and Shohei Shimizu and Peter Spirtes and Kun Zhang},
-    journal={arXiv preprint arXiv:2307.16405},
-    year={2023}
+  @article{zheng2024causal,
+    title={Causal-learn: Causal discovery in python},
+    author={Zheng, Yujia and Huang, Biwei and Chen, Wei and Ramsey, Joseph and Gong, Mingming and Cai, Ruichu and Shimizu, Shohei and Spirtes, Peter and Zhang, Kun},
+    journal={Journal of Machine Learning Research},
+    volume={25},
+    number={60},
+    pages={1--8},
+    year={2024}
   }
 
 
 
-
diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst
index c8603c39..2999b3e7 100644
--- a/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst	
+++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/CDNOD.rst	
@@ -16,7 +16,7 @@ Usage
     from causallearn.search.ConstraintBased.CDNOD import cdnod
 
     # default parameters
-    cg = cdnod(data)
+    cg = cdnod(data, c_indx)
 
     # or customized parameters
     cg = cdnod(data, c_indx, alpha, indep_test, stable, uc_rule, uc_priority, mvcdnod,
@@ -51,7 +51,7 @@ and n_features is the number of features.
        - ":ref:`kci <Kernel-based conditional independence (KCI) test and independence test>`": kernel-based conditional independence test. (As a kernel method, its complexity is cubic in the sample size, so it might be slow if the same size is not small.)
        - ":ref:`mv_fisherz <Missing-value Fisher-z test>`": Missing-value Fisher's Z conditional independence test.
 
-**stable**: run stabilized skeleton discovery if True. Default: True.
+**stable**: run stabilized skeleton discovery [3]_ if True. Default: True.
 
 **uc_rule**: how unshielded colliders are oriented. Default: 0.
        - 0: run uc_sepset.
@@ -82,4 +82,5 @@ Returns
 **cg** : a CausalGraph object, where cg.G.graph[j,i]=1 and cg.G.graph[i,j]=-1 indicate  i --> j; cg.G.graph[i,j] = cg.G.graph[j,i] = -1 indicates i --- j; cg.G.graph[i,j] = cg.G.graph[j,i] = 1 indicates i <-> j.
 
 .. [1] Huang, B., Zhang, K., Zhang, J., Ramsey, J. D., Sanchez-Romero, R., Glymour, C., & Schölkopf, B. (2020). Causal Discovery from Heterogeneous/Nonstationary Data. J. Mach. Learn. Res., 21(89), 1-53.
-.. [2] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378.
\ No newline at end of file
+.. [2] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378.
+.. [3] Colombo, D., & Maathuis, M. H. (2014). Order-independent constraint-based causal structure learning. J. Mach. Learn. Res., 15(1), 3741-3782.
\ No newline at end of file
diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst
index 5c71f1ad..2ebadb57 100644
--- a/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst	
+++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/FCI.rst	
@@ -16,16 +16,16 @@ Usage
     from causallearn.search.ConstraintBased.FCI import fci
 
     # default parameters
-    G, edges = fci(data)
+    g, edges = fci(data)
 
     # or customized parameters
-    G, edges = fci(data, independence_test_method, alpha, depth, max_path_length,
+    g, edges = fci(data, independence_test_method, alpha, depth, max_path_length,
         verbose, background_knowledge, cache_variables_map)
 
     # visualization
     from causallearn.utils.GraphUtils import GraphUtils
 
-    pdy = GraphUtils.to_pydot(G)
+    pdy = GraphUtils.to_pydot(g)
     pdy.write_png('simple_test.png')
 
 Visualization using pydot is recommended. If specific label names are needed, please refer to this `usage example <https://github.com/cmu-phil/causal-learn/blob/main/tests/TestGraphVisualization.py>`_.
@@ -60,7 +60,7 @@ For detailed usage, please kindly refer to its `usage example <https://github.co
 
 Returns
 -------------------
-**G**: a CausalGraph object, where G is a PAG and the illustration of its end nodes is as follows:
+**g**: a GeneralGraph object, where g.graph is a PAG and the illustration of its end nodes is as follows (denotes G = g.graph):
 
 .. image:: pag.png
   :width: 500
diff --git a/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst b/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst
index 349889c7..81c70f69 100644
--- a/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst	
+++ b/docs/source/search_methods_index/Constraint-based causal discovery methods/PC.rst	
@@ -81,7 +81,7 @@ and n_features is the number of features.
        - ":ref:`kci <Kernel-based conditional independence (KCI) test and independence test>`": kernel-based conditional independence test. (As a kernel method, its complexity is cubic in the sample size, so it might be slow if the same size is not small.)
        - ":ref:`mv_fisherz <Missing-value Fisher-z test>`": Missing-value Fisher's Z conditional independence test.
 
-**stable**: run stabilized skeleton discovery if True. Default: True.
+**stable**: run stabilized skeleton discovery [4]_ if True. Default: True.
 
 **uc_rule**: how unshielded colliders are oriented. Default: 0.
        - 0: run uc_sepset.
@@ -114,4 +114,5 @@ Returns
 
 .. [1] Spirtes, P., Glymour, C. N., Scheines, R., & Heckerman, D. (2000). Causation, prediction, and search. MIT press.
 .. [2] Tu, R., Zhang, C., Ackermann, P., Mohan, K., Kjellström, H., & Zhang, K. (2019, April). Causal discovery in the presence of missing data. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 1762-1770). PMLR.
-.. [3] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378.
\ No newline at end of file
+.. [3] Ramsey, J. (2016). Improving accuracy and scalability of the pc algorithm by maximizing p-value. arXiv preprint arXiv:1610.00378.
+.. [4] Colombo, D., & Maathuis, M. H. (2014). Order-independent constraint-based causal structure learning. J. Mach. Learn. Res., 15(1), 3741-3782.
\ No newline at end of file
diff --git a/docs/source/utilities_index/Datasets.rst b/docs/source/utilities_index/Datasets.rst
new file mode 100644
index 00000000..fad90d9a
--- /dev/null
+++ b/docs/source/utilities_index/Datasets.rst
@@ -0,0 +1,27 @@
+.. _datasets:
+
+Datasets
+==============================================
+
+
+
+Usage
+----------------------------
+.. code-block:: python
+
+    from causallearn.utils.Dataset import load_dataset
+
+    data, labels = load_dataset(dataset_name)
+
+Parameters
+-------------------
+**dataset_name**: str, the name of a dataset in ['sachs', 'boston_housing', 'airfoil']
+
+
+Returns
+-------------------
+
+**data**: np.array, data with a shape of (number of samples, number of variables).
+
+**labels**: list, labels of variables in the data.
+
diff --git a/docs/source/utilities_index/index.rst b/docs/source/utilities_index/index.rst
index c175e226..ec4dfc83 100644
--- a/docs/source/utilities_index/index.rst
+++ b/docs/source/utilities_index/index.rst
@@ -10,3 +10,4 @@ Contents:
 
     Graph operations/index
     Evaluations
+    Datasets
diff --git a/setup.py b/setup.py
index 90aadea9..f40a536d 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 with open('README.md', 'r') as fh:
     README = fh.read()
 
-VERSION = '0.1.3.4'
+VERSION = '0.1.3.8'
 
 setuptools.setup(
     name='causal-learn',
@@ -22,7 +22,8 @@
         'matplotlib',
         'networkx',
         'pydot',
-        'tqdm'
+        'tqdm',
+        'momentchi2'
     ],
     url='https://github.com/py-why/causal-learn',
     packages=setuptools.find_packages(),
diff --git a/tests/TestBOSS.py b/tests/TestBOSS.py
new file mode 100644
index 00000000..ea963d47
--- /dev/null
+++ b/tests/TestBOSS.py
@@ -0,0 +1,117 @@
+import unittest
+
+import numpy as np
+from causallearn.graph.AdjacencyConfusion import AdjacencyConfusion
+from causallearn.graph.ArrowConfusion import ArrowConfusion
+from causallearn.graph.GeneralGraph import GeneralGraph
+from causallearn.graph.GraphNode import GraphNode
+from causallearn.search.PermutationBased.BOSS import boss
+from causallearn.utils.DAG2CPDAG import dag2cpdag
+
+import gc
+
+
+def simulate_data(p, d, n):
+    """
+    Randomly generates an Erdos-Renyi direct acyclic graph given an ordering 
+    and randomly simulates data with the provided parameters.
+
+    p = |variables|
+    d = |edges| / |possible edges|
+    n = sample size
+    """
+
+    # npe = |possible edges|
+    pe = int(p * (p - 1) / 2)
+
+    # ne = |edges|
+    ne = int(d * pe)
+
+    # generate edges
+    e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne))
+    np.random.shuffle(e)
+    B = np.zeros([p, p])
+    B.T[np.triu_indices(p, 1)] = e
+
+    # generate variance terms
+    O = np.diag(np.ones(p))
+
+    # simulate data
+    X = np.random.multivariate_normal(np.zeros(p), O, n)
+    for i in range(p):
+        J = np.where(B[i])[0]
+        for j in J: X[:, i] += B[i, j] * X[:, j]
+
+    pi = [i for i in range(p)]
+    np.random.shuffle(pi)
+
+    return (B != 0)[pi][:, pi], X[:, pi]
+
+
+class TestBOSS(unittest.TestCase):
+    def test_boss(self):
+        ps = [30, 60]
+        ds = [0.1, 0.15]
+        n = 1000
+        reps = 5
+
+        gc.set_threshold(20000, 50, 50)
+        # gc.set_debug(gc.DEBUG_STATS)
+
+        for p in ps:
+            for d in ds:
+                stats = [[], [], [], []]
+                for rep in range(1, reps + 1):
+                    g0, X = simulate_data(p, d, n)
+                    print(
+                        "\nNodes:", p,
+                        "| Edges:", np.sum(g0),
+                        "| Avg Degree:", 2 * round(np.sum(g0) / p, 2),
+                        "| Rep:", rep,
+                    )
+
+                    node_names = [("X%d" % (i + 1)) for i in range(p)]
+                    nodes = []
+
+                    for name in node_names:
+                        node = GraphNode(name)
+                        nodes.append(node)
+
+                    G0 = GeneralGraph(nodes)
+                    for y in range(p):
+                        for x in np.where(g0[y] == 1)[0]:
+                            G0.add_directed_edge(nodes[x], nodes[y])
+
+                    G0 = dag2cpdag(G0)
+
+                    G = boss(X, parameters={'lambda_value': 4})
+                    gc.collect()
+
+                    AdjC = AdjacencyConfusion(G0, G)
+                    stats[0].append(AdjC.get_adj_precision())
+                    stats[1].append(AdjC.get_adj_recall())
+
+                    ArrC = ArrowConfusion(G0, G)
+                    stats[2].append(ArrC.get_arrows_precision())
+                    stats[3].append(ArrC.get_arrows_recall())
+
+                    print(
+                        [
+                            ["AP", "AR", "AHP", "AHR"][i]
+                            + ": "
+                            + str(round(stats[i][-1], 2))
+                            for i in range(4)
+                        ]
+                    )
+
+                print(
+                    "\nOverall Stats: \n",
+                    [
+                        ["AP", "AR", "AHP", "AHR"][i]
+                        + ": "
+                        + str(round(np.sum(stats[i]) / reps, 2))
+                        for i in range(4)
+                    ],
+                )
+
+        print("finish")
diff --git a/tests/TestCIT_FastKCI.py b/tests/TestCIT_FastKCI.py
new file mode 100644
index 00000000..b8507f78
--- /dev/null
+++ b/tests/TestCIT_FastKCI.py
@@ -0,0 +1,36 @@
+import unittest
+
+import numpy as np
+
+import causallearn.utils.cit as cit
+
+
+class TestCIT_FastKCI(unittest.TestCase):
+    def test_Gaussian_dist(self):
+        np.random.seed(10)
+        X = np.random.randn(1200, 1)
+        X_prime = np.random.randn(1200, 1)
+        Y = X + 0.5 * np.random.randn(1200, 1)
+        Z = Y + 0.5 * np.random.randn(1200, 1)
+        data = np.hstack((X, X_prime, Y, Z))
+
+        pvalue01 = []
+        pvalue03 = []
+        pvalue032 = []
+        for K in [3, 10]:
+            for J in [8, 16]:
+                for use_gp in [True, False]:
+                    cit_CIT = cit.CIT(data, 'fastkci', K=K, J=J, use_gp=use_gp)
+                    pvalue01.append(round(cit_CIT(0, 1), 4))
+                    pvalue03.append(round(cit_CIT(0, 3), 4))
+                    pvalue032.append(round(cit_CIT(0, 3, {2}), 4))
+
+        pvalue01 = np.array(pvalue01)
+        pvalue03 = np.array(pvalue03)
+        pvalue032 = np.array(pvalue032)
+        self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)),
+                        "pvalue01 contains invalid values")
+        self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)),
+                        "pvalue03 contains invalid values")
+        self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)),
+                        "pvalue032 contains invalid values")
diff --git a/tests/TestCIT_RCIT.py b/tests/TestCIT_RCIT.py
new file mode 100644
index 00000000..ef6406fd
--- /dev/null
+++ b/tests/TestCIT_RCIT.py
@@ -0,0 +1,38 @@
+import unittest
+
+import numpy as np
+
+import causallearn.utils.cit as cit
+
+
+class TestCIT_RCIT(unittest.TestCase):
+    def test_Gaussian_dist(self):
+        np.random.seed(10)
+        X = np.random.randn(300, 1)
+        X_prime = np.random.randn(300, 1)
+        Y = X + 0.5 * np.random.randn(300, 1)
+        Z = Y + 0.5 * np.random.randn(300, 1)
+        data = np.hstack((X, X_prime, Y, Z))
+
+        pvalue01 = []
+        pvalue03 = []
+        pvalue032 = []
+        for approx in ["lpd4", "hbe", "gamma", "chi2", "perm"]:
+            for num_f in [50, 100]:
+                for num_f2 in [5, 10]:
+                    for rcit in [True, False]:
+                        cit_CIT = cit.CIT(data, 'rcit', approx=approx, num_f=num_f,
+                                          num_f2=num_f2, rcit=rcit)
+                        pvalue01.append(round(cit_CIT(0, 1), 4))
+                        pvalue03.append(round(cit_CIT(0, 3), 4))
+                        pvalue032.append(round(cit_CIT(0, 3, {2}), 4))
+
+        pvalue01 = np.array(pvalue01)
+        pvalue03 = np.array(pvalue03)
+        pvalue032 = np.array(pvalue032)
+        self.assertTrue(np.all((0.0 <= pvalue01) & (pvalue01 <= 1.0)),
+                        "pvalue01 contains invalid values")
+        self.assertTrue(np.all((0.0 <= pvalue03) & (pvalue03 <= 1.0)),
+                        "pvalue03 contains invalid values")
+        self.assertTrue(np.all((0.0 <= pvalue032) & (pvalue032 <= 1.0)),
+                        "pvalue032 contains invalid values")
diff --git a/tests/TestDAG2PAG.py b/tests/TestDAG2PAG.py
index 569fc945..004ea897 100644
--- a/tests/TestDAG2PAG.py
+++ b/tests/TestDAG2PAG.py
@@ -72,3 +72,17 @@ def test_case3(self):
         print(pag)
         graphviz_pag = GraphUtils.to_pgv(pag)
         graphviz_pag.draw("pag.png", prog='dot', format='png')
+
+    def test_case_selection(self):
+        nodes = []
+        for i in range(5):
+            nodes.append(GraphNode(str(i)))
+        dag = Dag(nodes)
+        dag.add_directed_edge(nodes[0], nodes[1])
+        dag.add_directed_edge(nodes[1], nodes[2])
+        dag.add_directed_edge(nodes[2], nodes[3])
+        # Selection nodes
+        dag.add_directed_edge(nodes[3], nodes[4])
+        dag.add_directed_edge(nodes[0], nodes[4])
+        pag = dag2pag(dag, islatent=[], isselection=[nodes[4]])
+        print(pag)
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt
index 2430e1de..ce08eb4f 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt
@@ -3,7 +3,7 @@
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 -1 -1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-2 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+2 0 0 0 -1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0
 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
@@ -12,7 +12,7 @@
 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2
+0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0
@@ -20,9 +20,9 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 2 0 2 0 0 0 0 2 -1 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 -1 0 -1 0 0 0 0 2 -1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 -1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt
index a9f9886f..934b13bd 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt
@@ -39,8 +39,8 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 2 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 -1 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
@@ -106,7 +106,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 -1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
@@ -208,7 +208,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt
index c0467173..b3aa50bb 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt
@@ -1,7 +1,7 @@
 0 0 0 0 0 0 2 0
 0 0 0 0 0 2 0 0
 0 0 0 2 2 0 0 0
-0 0 2 0 0 2 0 0
+0 0 2 0 0 -1 0 0
 0 0 2 0 0 0 0 2
 0 1 0 1 0 0 0 0
 2 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt
index b1986bd1..864b9e5f 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt
@@ -1,4 +1,4 @@
-0 0 0 2 0 0 2 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 -1 0 0 -1 0 0 -1 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 1 0 0 0 0 1 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
@@ -14,8 +14,8 @@
 0 0 0 0 0 0 0 1 0 0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 -1 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0
@@ -28,7 +28,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt
index bac534d8..a739c18f 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt
@@ -12,9 +12,9 @@
 1 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 -1 -1 1 -1 1
 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
-0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 2 0 0 0 0 -1 0 0 0 0 0 0 0 0
 0 -1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 -1 -1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
-0 0 2 2 2 2 0 0 0 0 0 2 0 0 0 0 0 0 0 0
+0 0 -1 2 -1 -1 0 0 0 0 0 -1 0 0 0 0 0 0 0 0
 0 0 0 0 -1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 2 0 0 0 0 0 2 0 -1 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt
index 894ff9cd..4671059c 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt
@@ -21,7 +21,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt
index 9e6c7aac..0569bd1b 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt
@@ -40,7 +40,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt
index 80a266db..4d6a5b4a 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt
@@ -2,7 +2,7 @@
 -1 0 1 1 0 0 0 0 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 1 0 1 1 0 0 0 -1 0 0 0 0 0 0 0 0 -1 1 0 0 1 0 0 0 0 0
 0 1 1 0 0 0 0 0 0 -1 0 0 0 1 0 0 0 -1 -1 0 0 0 0 0 0 0 -1
-0 0 2 0 0 0 2 0 0 0 0 2 0 0 0 0 2 0 0 0 0 0 0 0 2 0 0
+0 0 2 0 0 0 -1 0 0 0 0 -1 0 0 0 0 -1 0 0 0 0 0 0 0 -1 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0
 0 0 0 0 0 -1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 -1 0 -1 0 0 -1 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt
index cee53cda..bd9c2aec 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt
@@ -1,4 +1,4 @@
-0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 -1 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
@@ -14,12 +14,12 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 2 0 0 0 0 0
+0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 -1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 -1 0
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 1
-0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1
 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt
index b971fb87..8bc6b692 100644
--- a/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt
@@ -8,7 +8,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 2 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 2 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
@@ -34,7 +34,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
@@ -50,7 +50,7 @@
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 -1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
diff --git a/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt b/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt
index 046443a3..07a248fb 100644
--- a/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt
+++ b/tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt
@@ -5,7 +5,7 @@
 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
-0 0 0 0 0 0 0 0 2 0 0 0 0 2 0 0 0 2 2 0
+0 0 0 0 0 0 0 0 -1 0 0 0 0 -1 0 0 0 2 -1 0
 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0 -1 0 0 0 -1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 2 2 0 0 0 0 0
diff --git a/tests/TestFCI.py b/tests/TestFCI.py
index 7884cbde..e2fb683c 100644
--- a/tests/TestFCI.py
+++ b/tests/TestFCI.py
@@ -10,8 +10,11 @@
 import pandas as pd
 
 from causallearn.graph.Dag import Dag
+from causallearn.graph.Edge import Edge
+from causallearn.graph.Endpoint import Endpoint
+from causallearn.graph.GeneralGraph import GeneralGraph
 from causallearn.graph.GraphNode import GraphNode
-from causallearn.search.ConstraintBased.FCI import fci
+from causallearn.search.ConstraintBased.FCI import fci, ruleR5
 from causallearn.utils.cit import chisq, fisherz, kci, d_separation
 from causallearn.utils.DAG2PAG import dag2pag
 from causallearn.utils.GraphUtils import GraphUtils
@@ -30,21 +33,21 @@
 ######################################### Test Notes ###########################################
 
 BENCHMARK_TXTFILE_TO_MD5 = {
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt": "65f54932a9d8224459e56c40129e6d8b",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt": "0312381641cb3b4818e0c8539f74e802",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt": "a1160b92ce15a700858552f08e43b7de",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt": "dced4a202fc32eceb75f53159fc81f3b",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt": "b1a28eee1e0c6ea8a64ac1624585c3f4",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt": "c3bbc2b8aba456a4258dd071a42085bc",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt": "4a5000e7a582083859ee6aef15073676",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt": "6b7858589e12f04b0f489ba4589a1254",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt": "9975942b936aa2b1fc90c09318ca2d08",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt": "48eee804d59526187b7ecd0519556ee5",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt": "6b9a6b95b6474f8530e85c022f4e749c",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt": "4aae21ff3d9aa2435515ed2ee402294c",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt": "648fdf271e1440c06ca2b31b55ef1f3f",
-    "tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt": "04092ae93e54c727579f08bf5dc34c77",
-    "tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt": "289c86f9c665bf82bbcc4c9e1dcec3e7"
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_asia_fci_chisq_0.05.txt': '4b014aa90aee456bc02dddd008d8be1f',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_cancer_fci_chisq_0.05.txt': '0312381641cb3b4818e0c8539f74e802',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_earthquake_fci_chisq_0.05.txt': 'a1160b92ce15a700858552f08e43b7de',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_sachs_fci_chisq_0.05.txt': 'dced4a202fc32eceb75f53159fc81f3b',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_survey_fci_chisq_0.05.txt': 'b1a28eee1e0c6ea8a64ac1624585c3f4',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_alarm_fci_chisq_0.05.txt': '4b06f6ea7fefe66255a3d16cb8a3711f',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_barley_fci_chisq_0.05.txt': 'a926e329b2bf2b397d3676e9d2020483',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_child_fci_chisq_0.05.txt': '5eff3ea6ce2e2daa53d6b649bd9c31fe',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_insurance_fci_chisq_0.05.txt': '55eed065b8dfc6a21610d3d6e18423c6',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_water_fci_chisq_0.05.txt': '48917f914c8f80684ae2abde87417d5c',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hailfinder_fci_chisq_0.05.txt': 'bcffe793866db6ddfe9cc602081789cd',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_hepar2_fci_chisq_0.05.txt': '3558683a485588a41493b1a2f2c1d370',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_win95pts_fci_chisq_0.05.txt': '52596ace09c45656f61c70d0e66bdd6a',
+    'tests/TestData/benchmark_returned_results/bnlearn_discrete_10000_andes_fci_chisq_0.05.txt': '4b7e2ecba5eb50c7266f24670d0d0ae9',
+    'tests/TestData/benchmark_returned_results/linear_10_fci_fisherz_0.05.txt': 'eaabd1fbb16bc152b45e456c30166c55'
 }
 #
 INCONSISTENT_RESULT_GRAPH_ERRMSG = "Returned graph is inconsistent with the benchmark. Please check your code with the commit 5918419."
@@ -211,3 +214,23 @@ def test_er_graph(self):
             print(G)
             print(f'fci(data, d_separation, 0.05):')
             self.run_simulate_data_test(pag, G)
+
+    def test_rule5(self):
+        nodes = []
+        for i in range(7):
+            nodes.append(GraphNode(str(i)))
+        g = GeneralGraph(nodes)
+        g.add_edge(Edge(nodes[0], nodes[1], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[0], nodes[2], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[0], nodes[5], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[0], nodes[6], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[1], nodes[3], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[2], nodes[4], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[3], nodes[5], Endpoint.CIRCLE, Endpoint.CIRCLE))
+        g.add_edge(Edge(nodes[4], nodes[6], Endpoint.CIRCLE, Endpoint.CIRCLE))
+
+        ruleR5(g, changeFlag=True, verbose=True)
+
+        for edge in g.get_graph_edges():
+            assert edge.get_endpoint1() == Endpoint.TAIL
+            assert edge.get_endpoint2() == Endpoint.TAIL
diff --git a/tests/TestGES.py b/tests/TestGES.py
index 137a7e9a..ba7e919f 100644
--- a/tests/TestGES.py
+++ b/tests/TestGES.py
@@ -10,7 +10,7 @@
 
 
 ######################################### Test Notes ###########################################
-# All the benchmark results of loaded files (e.g. "./TestData/benchmark_returned_results/")    #
+# All the benchmark results of loaded files (e.g. "tests/TestData/benchmark_returned_results/")#
 # are obtained from the code of causal-learn as of commit                                      #
 # https://github.com/cmu-phil/causal-learn/commit/b51d788 (07-08-2022).                        #
 #                                                                                              #
@@ -23,13 +23,13 @@
 
 
 BENCHMARK_TXTFILE_TO_MD5 = {
-    "./TestData/data_linear_10.txt": "95a17e15038d4cade0845140b67c05a6",
-    "./TestData/data_discrete_10.txt": "ccb51c6c1946d8524a8b29a49aef2cc4",
-    "./TestData/graph.10.txt": "4970d4ecb8be999a82a665e5f5e0825b",
-    "./TestData/test_ges_simulated_linear_gaussian_data.txt": "0d2490eeb9ee8ef3b18bf21d7e936e1e",
-    "./TestData/test_ges_simulated_linear_gaussian_CPDAG.txt": "aa0146777186b07e56421ce46ed52914",
-    "./TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt": "3accb3673d2ccb4c110f3703d60fe702",
-    "./TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt": "eebd11747c1b927b2fdd048a55c8c3a5",
+    "tests/TestData/data_linear_10.txt": "95a17e15038d4cade0845140b67c05a6",
+    "tests/TestData/data_discrete_10.txt": "ccb51c6c1946d8524a8b29a49aef2cc4",
+    "tests/TestData/graph.10.txt": "4970d4ecb8be999a82a665e5f5e0825b",
+    "tests/TestData/test_ges_simulated_linear_gaussian_data.txt": "0d2490eeb9ee8ef3b18bf21d7e936e1e",
+    "tests/TestData/test_ges_simulated_linear_gaussian_CPDAG.txt": "aa0146777186b07e56421ce46ed52914",
+    "tests/TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt": "3accb3673d2ccb4c110f3703d60fe702",
+    "tests/TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt": "eebd11747c1b927b2fdd048a55c8c3a5",
 }
 
 INCONSISTENT_RESULT_GRAPH_ERRMSG = "Returned graph is inconsistent with the benchmark. Please check your code with the commit b51d788."
@@ -47,8 +47,8 @@ class TestGES(unittest.TestCase):
     # Load data from file "data_linear_10.txt". Run GES with local_score_BIC.
     def test_ges_load_linear_10_with_local_score_BIC(self):
         print('Now start test_ges_load_linear_10_with_local_score_BIC ...')
-        data_path = "./TestData/data_linear_10.txt"
-        truth_graph_path = "./TestData/graph.10.txt"
+        data_path = "tests/TestData/data_linear_10.txt"
+        truth_graph_path = "tests/TestData/graph.10.txt"
         data = np.loadtxt(data_path, skiprows=1)
         truth_dag = txt2generalgraph(truth_graph_path)  # truth_dag is a GeneralGraph instance
         truth_cpdag = dag2cpdag(truth_dag)
@@ -58,7 +58,7 @@ def test_ges_load_linear_10_with_local_score_BIC(self):
         res_map = ges(data, score_func='local_score_BIC', maxP=None, parameters=None)  # Run GES and obtain the estimated graph (res_map is Dict object，which contains the updated steps, the result causal graph and the result score.)
 
         benchmark_returned_graph = np.loadtxt(
-            "./TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt")
+            "tests/TestData/benchmark_returned_results/linear_10_ges_local_score_BIC_none_none.txt")
         assert np.all(res_map['G'].graph == benchmark_returned_graph), INCONSISTENT_RESULT_GRAPH_ERRMSG
         shd = SHD(truth_cpdag, res_map['G'])
         print(f"    ges(data, score_func='local_score_BIC', maxP=None, parameters=None)\tSHD: {shd.get_shd()} of {num_edges_in_truth}")
@@ -73,9 +73,9 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self):
         truth_DAG_directed_edges = {(0, 1), (0, 3), (1, 2), (1, 3), (2, 3), (2, 4), (3, 4)}
         truth_CPDAG_directed_edges = {(0, 3), (1, 3), (2, 3), (2, 4), (3, 4)}
         truth_CPDAG_undirected_edges = {(0, 1), (1, 2), (2, 1), (1, 0)}
-        truth_CPDAG = np.loadtxt("./TestData/test_ges_simulated_linear_gaussian_CPDAG.txt")
+        truth_CPDAG = np.loadtxt("tests/TestData/test_ges_simulated_linear_gaussian_CPDAG.txt")
 
-        ###### Simulation configuration: code to generate "./TestData/test_ges_simulated_linear_gaussian_data.txt" ######
+        ###### Simulation configuration: code to generate "tests/TestData/test_ges_simulated_linear_gaussian_data.txt" ######
         # np.random.seed(42)
         # linear_weight_minabs, linear_weight_maxabs, linear_weight_netative_prob = 0.5, 0.9, 0.5
         # sample_size = 10000
@@ -89,10 +89,10 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self):
         # mixing_matrix = np.linalg.inv(np.eye(num_of_nodes) - adjacency_matrix)
         # exogenous_noise = np.random.normal(0, 1, (num_of_nodes, sample_size))
         # data = (mixing_matrix @ exogenous_noise).T
-        # np.savetxt("./TestData/test_ges_simulated_linear_gaussian_data.txt", data)
-        ###### Simulation configuration: code to generate "./TestData/test_ges_simulated_linear_gaussian_data.txt" ######
+        # np.savetxt("tests/TestData/test_ges_simulated_linear_gaussian_data.txt", data)
+        ###### Simulation configuration: code to generate "tests/TestData/test_ges_simulated_linear_gaussian_data.txt" ######
 
-        data = np.loadtxt("./TestData/test_ges_simulated_linear_gaussian_data.txt")
+        data = np.loadtxt("tests/TestData/test_ges_simulated_linear_gaussian_data.txt")
 
         # Run GES with default parameters: score_func='local_score_BIC', maxP=None, parameters=None
         res_map = ges(data, score_func='local_score_BIC', maxP=None, parameters=None)
@@ -105,8 +105,8 @@ def test_ges_simulate_linear_gaussian_with_local_score_BIC(self):
     # Load data from file "data_discrete_10.txt". Run GES with local_score_BDeu.
     def test_ges_load_discrete_10_with_local_score_BDeu(self):
         print('Now start test_ges_load_discrete_10_with_local_score_BDeu ...')
-        data_path = "./TestData/data_discrete_10.txt"
-        truth_graph_path = "./TestData/graph.10.txt"
+        data_path = "tests/TestData/data_discrete_10.txt"
+        truth_graph_path = "tests/TestData/graph.10.txt"
         data = np.loadtxt(data_path, skiprows=1)
         truth_dag = txt2generalgraph(truth_graph_path)  # truth_dag is a GeneralGraph instance
         truth_cpdag = dag2cpdag(truth_dag)
@@ -115,7 +115,7 @@ def test_ges_load_discrete_10_with_local_score_BDeu(self):
         # Run GES with local_score_BDeu.
         res_map = ges(data, score_func='local_score_BDeu', maxP=None, parameters=None)
         benchmark_returned_graph = np.loadtxt(
-            "./TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt")
+            "tests/TestData/benchmark_returned_results/discrete_10_ges_local_score_BDeu_none_none.txt")
         assert np.all(res_map['G'].graph == benchmark_returned_graph), INCONSISTENT_RESULT_GRAPH_ERRMSG
         shd = SHD(truth_cpdag, res_map['G'])
         print(f"    ges(data, score_func='local_score_BDeu', maxP=None, parameters=None)\tSHD: {shd.get_shd()} of {num_edges_in_truth}")
diff --git a/tests/TestGRaSP.py b/tests/TestGRaSP.py
index 7482dcee..f1fe0dd0 100644
--- a/tests/TestGRaSP.py
+++ b/tests/TestGRaSP.py
@@ -7,61 +7,45 @@
 from causallearn.graph.GraphNode import GraphNode
 from causallearn.search.PermutationBased.GRaSP import grasp
 from causallearn.utils.DAG2CPDAG import dag2cpdag
-from causallearn.utils.GraphUtils import GraphUtils
-import matplotlib.image as mpimg
-import matplotlib.pyplot as plt
-import io
 
+import gc
 
-def random_dag(p, d):
+
+def simulate_data(p, d, n):
     """
-    Randomly generates an Erdos-Renyi direct acyclic graph given an ordering.
+    Randomly generates an Erdos-Renyi direct acyclic graph given an ordering 
+    and randomly simulates data with the provided parameters.
 
     p = |variables|
     d = |edges| / |possible edges|
+    n = sample size
     """
 
     # npe = |possible edges|
     pe = int(p * (p - 1) / 2)
 
-    # e = |edges|
+    # ne = |edges|
     ne = int(d * pe)
 
     # generate edges
-    e = np.append(np.zeros(pe - ne, "uint8"), np.ones(ne, "uint8"))
+    e = np.append(np.zeros(pe - ne), 0.5 * np.random.uniform(-1, 1, ne))
     np.random.shuffle(e)
-
-    # generate graph
-    g = np.zeros([p, p], "uint8")
-    g.T[np.triu_indices(p, 1)] = e
-
-    return g
-
-
-def parameterize_dag(g):
-    """
-    Randomly parameterize a directed acyclic graph.
-
-    g = directed acyclic graph (adjacency matrix)
-    """
-
-    # p = |variables|
-    p = g.shape[0]
-
-    # e = |edges|
-    e = np.sum(g)
+    B = np.zeros([p, p])
+    B.T[np.triu_indices(p, 1)] = e
 
     # generate variance terms
-    o = np.diag(np.ones(p))
+    O = np.diag(np.ones(p))
 
-    # generate edge weights (edge parameters uniformly sampled [-1.0, 1.0])
-    b = np.zeros([p, p])
-    b[np.where(g == 1)] = np.random.uniform(-1, 1, e)
+    # simulate data
+    X = np.random.multivariate_normal(np.zeros(p), O, n)
+    for i in range(p):
+        J = np.where(B[i])[0]
+        for j in J: X[:, i] += B[i, j] * X[:, j]
 
-    # calculate covariance
-    s = np.dot(np.dot(np.linalg.inv(np.eye(p) - b), o), np.linalg.inv(np.eye(p) - b).T)
+    pi = [i for i in range(p)]
+    np.random.shuffle(pi)
 
-    return s
+    return (B != 0)[pi][:, pi], X[:, pi]
 
 
 class TestGRaSP(unittest.TestCase):
@@ -71,25 +55,22 @@ def test_grasp(self):
         n = 1000
         reps = 5
 
+        gc.set_threshold(20000, 50, 50)
+        # gc.set_debug(gc.DEBUG_STATS)
+
         for p in ps:
             for d in ds:
                 stats = [[], [], [], []]
                 for rep in range(1, reps + 1):
-                    g0 = random_dag(p, d)
+                    g0, X = simulate_data(p, d, n)
                     print(
-                        "\nNodes:",
-                        p,
-                        "| Edges:",
-                        np.sum(g0),
-                        "| Avg Degree:",
-                        2 * np.sum(g0) / p,
-                        "| Rep:",
-                        rep,
+                        "\nNodes:", p,
+                        "| Edges:", np.sum(g0),
+                        "| Avg Degree:", 2 * round(np.sum(g0) / p, 2),
+                        "| Rep:", rep,
                     )
-                    cov = parameterize_dag(g0)
-                    X = np.random.multivariate_normal(np.zeros(p), cov, n)
 
-                    node_names = [("x%d" % i) for i in range(p)]
+                    node_names = [("X%d" % (i + 1)) for i in range(p)]
                     nodes = []
 
                     for name in node_names:
@@ -103,15 +84,8 @@ def test_grasp(self):
 
                     G0 = dag2cpdag(G0)
 
-                    G = grasp(X)
-
-                    pyd = GraphUtils.to_pydot(G)
-                    tmp_png = pyd.create_png(f="png")
-                    fp = io.BytesIO(tmp_png)
-                    img = mpimg.imread(fp, format='png')
-                    plt.axis('off')
-                    plt.imshow(img)
-                    plt.show()
+                    G = grasp(X, depth=1, parameters={'lambda_value': 4})
+                    gc.collect()
 
                     AdjC = AdjacencyConfusion(G0, G)
                     stats[0].append(AdjC.get_adj_precision())
diff --git a/tests/TestSkeletonDiscovery.py b/tests/TestSkeletonDiscovery.py
new file mode 100644
index 00000000..10b0a5a2
--- /dev/null
+++ b/tests/TestSkeletonDiscovery.py
@@ -0,0 +1,18 @@
+from unittest import TestCase
+import numpy as np
+from causallearn.search.ConstraintBased.PC import pc
+import networkx as nx
+from causallearn.utils.cit import chisq, fisherz, gsq, kci, mv_fisherz, d_separation
+
+
+class TestSkeletonDiscovery(TestCase):
+    def test_sepset(self):
+        truth_DAG_directed_edges = {(0, 2), (1, 2), (2, 3), (2, 4)}
+
+        true_dag_netx = nx.DiGraph()
+        true_dag_netx.add_nodes_from(list(range(5)))
+        true_dag_netx.add_edges_from(truth_DAG_directed_edges)
+
+        data = np.zeros((100, len(true_dag_netx.nodes)))  # just a placeholder
+        cg = pc(data, 0.05, d_separation, True, 0, -1, true_dag=true_dag_netx)
+        assert cg.sepset[0, 2] is None
\ No newline at end of file