Skip to content
24 changes: 16 additions & 8 deletions causallearn/graph/GraphClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def __init__(self, no_of_var):
self.labels = {}
self.prt_m = {} # store the parents of missingness indicators
self.mvpc = None
self.cardinalities = None # only works when self.data is discrete, i.e. self.test is chisq or gsq
self.is_discrete = False
self.citest_cache = dict()

def set_ind_test(self, indep_test, mvpc=False):
"""Set the conditional independence test that will be used"""
Expand All @@ -51,23 +54,28 @@ def set_ind_test(self, indep_test, mvpc=False):

def ci_test(self, i, j, S):
"""Define the conditional independence test"""
# assert i != j and not i in S and not j in S
if self.mvpc:
return self.test(self.data, self.nx_skel, self.prt_m, i, j, S, self.data.shape[0])
return self.test(self.data, i, j, S)

i, j = (i, j) if (i < j) else (j, i)
ijS_key = (i, j, frozenset(S))
if ijS_key in self.citest_cache:
return self.citest_cache[ijS_key]
# if discrete, assert self.test is chisq or gsq, pass into cardinalities
pValue = self.test(self.data, i, j, S, self.cardinalities) if self.is_discrete \
else self.test(self.data, i, j, S)
self.citest_cache[ijS_key] = pValue
return pValue


def neighbors(self, i):
"""Find the neighbors of node i in adjmat"""
return np.where(self.G.graph[i, :] != 0)[0]

def max_degree(self):
"""Return the maximum number of edges connected to a node in adjmat"""
nodes = range(len(self.G.graph))
max_degree = 0
for i in nodes:
len_neigh_i = len(self.neighbors(i))
if len_neigh_i > max_degree:
max_degree = len_neigh_i
return max_degree
return max(np.sum(self.G.graph != 0, axis=1))

def find_arrow_heads(self):
"""Return the list of i o-> j in adjmat as (i, j)"""
Expand Down
41 changes: 37 additions & 4 deletions causallearn/search/ConstraintBased/FCI.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
citest_cache = dict() # added by haoyue@12/18/2021

from queue import Queue
from causallearn.graph.Edge import Edge
from causallearn.graph.GraphNode import GraphNode
from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
from causallearn.utils.cit import fisherz
from causallearn.utils.cit import fisherz, chisq, gsq
from causallearn.utils.Fas import fas
from causallearn.graph.Endpoint import Endpoint
from causallearn.utils.ChoiceGenerator import ChoiceGenerator
import numpy as np


class SepsetsPossibleDsep():
Expand Down Expand Up @@ -187,7 +190,15 @@ def get_cond_set(self, node_1, node_2, max_path_length):
condSet = [self.graph.node_map[possParents[index]] for index in choice]
choice = cg.next()

p_value = self.independence_test(self.data, self.graph.node_map[node_1], self.graph.node_map[node_2], tuple(condSet))
X, Y = self.graph.node_map[node_1], self.graph.node_map[node_2]
X, Y = (X, Y) if (X < Y) else (Y, X)
XYS_key = (X, Y, frozenset(condSet))
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = self.independence_test(self.data, X, Y, tuple(condSet))
citest_cache[XYS_key] = p_value

independent = p_value > self.alpha

if independent and noEdgeRequired:
Expand Down Expand Up @@ -427,13 +438,30 @@ def doDdpOrientation(node_d, node_a, node_b, node_c, previous, graph, data, inde
raise Exception("illegal argument!")
path = getPath(node_d, previous)

p_value = independence_test_method(data, graph.node_map[node_d], graph.node_map[node_c], tuple([graph.node_map[nn] for nn in path]))
X, Y = graph.node_map[node_d], graph.node_map[node_c]
X, Y = (X, Y) if (X < Y) else (Y, X)
condSet = tuple([graph.node_map[nn] for nn in path])
XYS_key = (X, Y, frozenset(condSet))
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = independence_test_method(data, X, Y, condSet)
citest_cache[XYS_key] = p_value
ind = p_value > alpha

path2 = list(path)
path2.remove(node_b)

p_value2 = independence_test_method(data, graph.node_map[node_d], graph.node_map[node_c], tuple([graph.node_map[nn2] for nn2 in path2]))
X, Y = graph.node_map[node_d], graph.node_map[node_c]
X, Y = (X, Y) if (X < Y) else (Y, X)
condSet = tuple([graph.node_map[nn2] for nn2 in path2])
XYS_key = (X, Y, frozenset(condSet))
if XYS_key in citest_cache:
p_value2 = citest_cache[XYS_key]
else:
p_value2 = independence_test_method(data, X, Y, condSet)
citest_cache[XYS_key] = p_value2

ind2 = p_value2 > alpha

if not ind and not ind2:
Expand Down Expand Up @@ -571,6 +599,11 @@ def fci(dataset, independence_test_method = fisherz, alpha=0.05, depth=-1, max_p
-------
graph : Causal graph
'''
def _unique(column):
return np.unique(column, return_inverse=True)[1]

if independence_test_method == chisq or independence_test_method == gsq:
dataset = np.apply_along_axis(_unique, 0, dataset).astype(np.int64)


## ------- check parameters ------------
Expand Down
14 changes: 7 additions & 7 deletions causallearn/search/ConstraintBased/PC.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@


def pc(data, alpha, indep_test, stable, uc_rule, uc_priority, mvpc=False, correction_name='MV_Crtn_Fisher_Z',
background_knowledge=None):
background_knowledge=None, verbose=False):
if mvpc:
return mvpc_alg(data=data, alpha=alpha, indep_test=indep_test, correction_name=correction_name, stable=stable,
uc_rule=uc_rule, uc_priority=uc_priority)
uc_rule=uc_rule, uc_priority=uc_priority, verbose=verbose)
else:
return pc_alg(data=data, alpha=alpha, indep_test=indep_test, stable=stable, uc_rule=uc_rule,
uc_priority=uc_priority, background_knowledge=background_knowledge)
uc_priority=uc_priority, background_knowledge=background_knowledge, verbose=verbose)


def pc_alg(data, alpha, indep_test, stable, uc_rule, uc_priority, background_knowledge=None):
def pc_alg(data, alpha, indep_test, stable, uc_rule, uc_priority, background_knowledge=None, verbose=False):
'''
Perform Peter-Clark algorithm for causal discovery

Expand Down Expand Up @@ -56,7 +56,7 @@ def pc_alg(data, alpha, indep_test, stable, uc_rule, uc_priority, background_kno

start = time.time()
cg_1 = SkeletonDiscovery.skeleton_discovery(data, alpha, indep_test, stable,
background_knowledge=background_knowledge)
background_knowledge=background_knowledge, verbose=verbose)

if background_knowledge is not None:
orient_by_background_knowledge(cg_1, background_knowledge)
Expand Down Expand Up @@ -89,7 +89,7 @@ def pc_alg(data, alpha, indep_test, stable, uc_rule, uc_priority, background_kno
return cg


def mvpc_alg(data, alpha, indep_test, correction_name, stable, uc_rule, uc_priority):
def mvpc_alg(data, alpha, indep_test, correction_name, stable, uc_rule, uc_priority, verbose):
"""
:param data: data set (numpy ndarray)
:param alpha: desired significance level (float) in (0, 1)
Expand Down Expand Up @@ -125,7 +125,7 @@ def mvpc_alg(data, alpha, indep_test, correction_name, stable, uc_rule, uc_prior

## Step 2:
## a) Run PC algorithm with the 1st step skeleton;
cg_pre = SkeletonDiscovery.skeleton_discovery(data, alpha, indep_test, stable)
cg_pre = SkeletonDiscovery.skeleton_discovery(data, alpha, indep_test, stable, verbose=verbose)
cg_pre.to_nx_skeleton()
# print('Finish skeleton search with test-wise deletion.')

Expand Down
18 changes: 16 additions & 2 deletions causallearn/utils/Fas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from causallearn.graph.Edges import Edges
from causallearn.utils.ChoiceGenerator import ChoiceGenerator
from copy import deepcopy
from causallearn.search.ConstraintBased.FCI import citest_cache


def possible_parents(node_x, adjx, knowledge=None):
Expand Down Expand Up @@ -47,7 +48,12 @@ def searchAtDepth0(data, nodes, adjacencies, sep_sets, independence_test_method=
print(nodes[i + 1].get_name())

for j in range(i+1, len(nodes)):
p_value = independence_test_method(data, i, j, tuple(empty))
ijS_key = (i, j, frozenset())
if ijS_key in citest_cache:
p_value = citest_cache[ijS_key]
else:
p_value = independence_test_method(data, i, j, tuple(empty))
citest_cache[ijS_key] = p_value
independent = p_value > alpha
no_edge_required = True if knowledge is None else \
((not knowledge.is_required(nodes[i], nodes[j])) or knowledge.is_required(nodes[j], nodes[i]))
Expand Down Expand Up @@ -80,7 +86,15 @@ def edge(adjx, i, adjacencies_completed_edge):
cond_set = [nodes.index(ppx[index]) for index in choice]
choice = cg.next()

p_value = independence_test_method(data, i, nodes.index(adjx[j]), tuple(cond_set))
Y = nodes.index(adjx[j])
X, Y = (i, Y) if (i < Y) else (Y, i)
XYS_key = (X, Y, frozenset(cond_set))
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = independence_test_method(data, X, Y, tuple(cond_set))
citest_cache[XYS_key] = p_value

independent = p_value > alpha

no_edge_required = True if knowledge is None else \
Expand Down
24 changes: 19 additions & 5 deletions causallearn/utils/PCUtils/SkeletonDiscovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

from causallearn.graph.GraphClass import CausalGraph
from causallearn.utils.PCUtils.Helper import append_value
from causallearn.utils.cit import chisq, gsq


def skeleton_discovery(data, alpha, indep_test, stable=True, background_knowledge=None):
def skeleton_discovery(data, alpha, indep_test, stable=True, background_knowledge=None, verbose=False):
'''
Perform skeleton discovery

Expand All @@ -31,8 +32,21 @@ def skeleton_discovery(data, alpha, indep_test, stable=True, background_knowledg

no_of_var = data.shape[1]
cg = CausalGraph(no_of_var)
cg.data = data
cg.set_ind_test(indep_test)
if indep_test == chisq or indep_test == gsq:
# if dealing with discrete data, data is numpy.ndarray with n rows m columns,
# for each column, translate the discrete values to int indexs starting from 0,
# e.g. [45, 45, 6, 7, 6, 7] -> [2, 2, 0, 1, 0, 1]
# ['apple', 'apple', 'pear', 'peach', 'pear'] -> [0, 0, 2, 1, 2]
# in old code, its presumed that discrete `data` is already indexed,
# but here we make sure it's in indexed form, so allow more user input e.g. 'apple' ..
def _unique(column):
return np.unique(column, return_inverse=True)[1]
cg.is_discrete = True
cg.data = np.apply_along_axis(_unique, 0, data).astype(np.int64)
cg.cardinalities = np.max(cg.data, axis=0) + 1
else:
cg.data = data

depth = -1
while cg.max_degree() - 1 > depth:
Expand All @@ -48,7 +62,7 @@ def skeleton_discovery(data, alpha, indep_test, stable=True, background_knowledg
if background_knowledge is not None and (
background_knowledge.is_forbidden(cg.G.nodes[x], cg.G.nodes[y])
and background_knowledge.is_forbidden(cg.G.nodes[y], cg.G.nodes[x])):
print('%d ind %d | %s with background background_knowledge\n' % (x, y, S))
if verbose: print('%d ind %d | %s with background background_knowledge\n' % (x, y, S))
if not stable:
edge1 = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
if edge1 is not None:
Expand All @@ -65,7 +79,7 @@ def skeleton_discovery(data, alpha, indep_test, stable=True, background_knowledg
else:
p = cg.ci_test(x, y, S)
if p > alpha:
print('%d ind %d | %s with p-value %f\n' % (x, y, S, p))
if verbose: print('%d ind %d | %s with p-value %f\n' % (x, y, S, p))
if not stable:
edge1 = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
if edge1 is not None:
Expand All @@ -80,7 +94,7 @@ def skeleton_discovery(data, alpha, indep_test, stable=True, background_knowledg
append_value(cg.sepset, y, x, S)
break
else:
print('%d dep %d | %s with p-value %f\n' % (x, y, S, p))
if verbose: print('%d dep %d | %s with p-value %f\n' % (x, y, S, p))

for (x, y) in list(set(edge_removal)):
edge1 = cg.G.get_edge(cg.G.nodes[x], cg.G.nodes[y])
Expand Down
Loading