Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,5 @@ dmypy.json

# Developer's local tests
*localtest.py
data-localtest/

5 changes: 1 addition & 4 deletions causallearn/graph/GraphClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,16 +44,13 @@ def __init__(self, no_of_var):
self.cardinalities = None # only works when self.data is discrete, i.e. self.test is chisq or gsq
self.is_discrete = False
self.citest_cache = dict()
self.data_hash_key = None
self.ci_test_hash_key = None

def set_ind_test(self, indep_test, mvpc=False):
"""Set the conditional independence test that will be used"""
# assert name_of_test in ["Fisher_Z", "Chi_sq", "G_sq"]
if mvpc:
self.mvpc = True
self.test = indep_test
self.ci_test_hash_key = hash(indep_test)

def ci_test(self, i, j, S):
"""Define the conditional independence test"""
Expand All @@ -62,7 +59,7 @@ def ci_test(self, i, j, S):
return self.test(self.data, self.nx_skel, self.prt_m, i, j, S, self.data.shape[0])

i, j = (i, j) if (i < j) else (j, i)
ijS_key = (i, j, frozenset(S), self.data_hash_key, self.ci_test_hash_key)
ijS_key = (i, j, frozenset(S))
if ijS_key in self.citest_cache:
return self.citest_cache[ijS_key]
# if discrete, assert self.test is chisq or gsq, pass into cardinalities
Expand Down
41 changes: 14 additions & 27 deletions causallearn/search/ConstraintBased/FCI.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from causallearn.graph.GraphNode import GraphNode
from causallearn.utils.PCUtils.BackgroundKnowledge import BackgroundKnowledge
from causallearn.utils.cit import *
from causallearn.utils.Fas import fas, citest_cache
from causallearn.utils import Fas
from causallearn.graph.Endpoint import Endpoint
from causallearn.utils.ChoiceGenerator import ChoiceGenerator
import numpy as np
Expand All @@ -20,8 +20,6 @@ def __init__(self, data, graph, independence_test, alpha, knowledge, depth, maxP
self.depth = depth
self.maxPathLength = maxPathLength
self.verbose = verbose
self.data_hash_key = hash(self.data.tobytes())
self.ci_test_hash_key = hash(self.independence_test)


def traverseSemiDirected(self, node, edge):
Expand Down Expand Up @@ -193,13 +191,8 @@ def get_cond_set(self, node_1, node_2, max_path_length):
choice = cg.next()

X, Y = self.graph.node_map[node_1], self.graph.node_map[node_2]
X, Y = (X, Y) if (X < Y) else (Y, X)
XYS_key = (X, Y, frozenset(condSet), self.data_hash_key, self.ci_test_hash_key)
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = self.independence_test(self.data, X, Y, tuple(condSet))
citest_cache[XYS_key] = p_value
p_value = Fas.ci_test(self.independence_test, self.data,
X, Y, tuple(condSet))
independent = p_value > self.alpha

if independent and noEdgeRequired:
Expand Down Expand Up @@ -440,30 +433,18 @@ def doDdpOrientation(node_d, node_a, node_b, node_c, previous, graph, data, inde
path = getPath(node_d, previous)

X, Y = graph.node_map[node_d], graph.node_map[node_c]
X, Y = (X, Y) if (X < Y) else (Y, X)
condSet = tuple([graph.node_map[nn] for nn in path])
data_hash_key = hash(data.tobytes())
ci_test_hash_key = hash(independence_test_method)
XYS_key = (X, Y, frozenset(condSet), data_hash_key, ci_test_hash_key)
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = independence_test_method(data, X, Y, condSet)
citest_cache[XYS_key] = p_value
p_value = Fas.ci_test(independence_test_method, data,
X, Y, tuple(condSet))
ind = p_value > alpha

path2 = list(path)
path2.remove(node_b)

X, Y = graph.node_map[node_d], graph.node_map[node_c]
X, Y = (X, Y) if (X < Y) else (Y, X)
condSet = tuple([graph.node_map[nn2] for nn2 in path2])
XYS_key = (X, Y, frozenset(condSet), data_hash_key, ci_test_hash_key)
if XYS_key in citest_cache:
p_value2 = citest_cache[XYS_key]
else:
p_value2 = independence_test_method(data, X, Y, condSet)
citest_cache[XYS_key] = p_value2
p_value2 = Fas.ci_test(independence_test_method, data,
X, Y, condSet)
ind2 = p_value2 > alpha

if not ind and not ind2:
Expand Down Expand Up @@ -613,11 +594,17 @@ def fci(dataset, independence_test_method = fisherz, alpha=0.05, depth=-1, max_p
if dataset.shape[0] < dataset.shape[1]:
warnings.warn("The number of features is much larger than the sample size!")

Fas.citest_cache = dict() # DEBUG@2021/12/23, must refresh cache every time at initialization
Fas.cardinalities = None
Fas.is_discrete = False

def _unique(column):
return np.unique(column, return_inverse=True)[1]

if independence_test_method == chisq or independence_test_method == gsq:
dataset = np.apply_along_axis(_unique, 0, dataset).astype(np.int64)
Fas.is_discrete = True
Fas.cardinalities = np.max(dataset, axis=0) + 1


## ------- check parameters ------------
Expand All @@ -636,7 +623,7 @@ def _unique(column):
nodes.append(node)

# FAS (“Fast Adjacency Search”) is the adjacency search of the PC algorithm, used as a first step for the FCI algorithm.
graph, sep_sets = fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha, knowledge=background_knowledge, depth=depth, verbose=verbose)
graph, sep_sets = Fas.fas(dataset, nodes, independence_test_method=independence_test_method, alpha=alpha, knowledge=background_knowledge, depth=depth, verbose=verbose)

# reorient all edges with CIRCLE Endpoint
ori_edges = graph.get_graph_edges()
Expand Down
3 changes: 2 additions & 1 deletion causallearn/search/ConstraintBased/PC.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import networkx as nx
import numpy as np

from causallearn.utils import Fas
from causallearn.graph.GraphClass import CausalGraph
from causallearn.utils.PCUtils import SkeletonDiscovery, UCSepset, Meek, Helper
from causallearn.utils.PCUtils.BackgroundKnowledgeOrientUtils import orient_by_background_knowledge
Expand Down Expand Up @@ -71,6 +71,7 @@ def pc_alg(data, alpha, indep_test, stable, uc_rule, uc_priority, background_kno
cg_1 = SkeletonDiscovery.skeleton_discovery_using_fas(data, alpha, indep_test, stable,
background_knowledge=background_knowledge, verbose=verbose,
show_progress=show_progress)
cg_1.citest_cache = Fas.citest_cache # for citests in further UCSepset.uc_sepset

if background_knowledge is not None:
orient_by_background_knowledge(cg_1, background_knowledge)
Expand Down
50 changes: 19 additions & 31 deletions causallearn/utils/Fas.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@
from tqdm.auto import tqdm

citest_cache = dict()
cardinalities = None # only works for discrete data
is_discrete = False


def ci_test(independence_test, data, i, j, S):
i, j = (i, j) if (i < j) else (j, i)
ijS_key = (i, j, frozenset(S))
if ijS_key in citest_cache:
return citest_cache[ijS_key]
# if discrete, assert self.test is chisq or gsq, pass into cardinalities
pValue = independence_test(data, i, j, S, cardinalities) if is_discrete \
else independence_test(data, i, j, S)
citest_cache[ijS_key] = pValue
return pValue


def possible_parents(node_x, adjx, knowledge=None):
Expand Down Expand Up @@ -45,8 +59,6 @@ def forbiddenEdge(node_x, node_y, knowledge):
def searchAtDepth0(data, nodes, adjacencies, sep_sets, independence_test_method=fisherz, alpha=0.05,
verbose=False, knowledge=None, pbar=None):
empty = []
data_hash_key = hash(data.tobytes())
ci_test_hash_key = hash(independence_test_method)
show_progress = not pbar is None
if show_progress: pbar.reset()
for i in range(len(nodes)):
Expand All @@ -56,12 +68,7 @@ def searchAtDepth0(data, nodes, adjacencies, sep_sets, independence_test_method=
print(nodes[i + 1].get_name())

for j in range(i+1, len(nodes)):
ijS_key = (i, j, frozenset(), data_hash_key, ci_test_hash_key)
if ijS_key in citest_cache:
p_value = citest_cache[ijS_key]
else:
p_value = independence_test_method(data, i, j, tuple(empty))
citest_cache[ijS_key] = p_value
p_value = ci_test(independence_test_method, data, i, j, tuple(empty))
independent = p_value > alpha
no_edge_required = True if knowledge is None else \
((not knowledge.is_required(nodes[i], nodes[j])) or knowledge.is_required(nodes[j], nodes[i]))
Expand All @@ -80,9 +87,6 @@ def searchAtDepth0(data, nodes, adjacencies, sep_sets, independence_test_method=
def searchAtDepth(data, depth, nodes, adjacencies, sep_sets, independence_test_method=fisherz, alpha=0.05,
verbose=False, knowledge=None, pbar=None):

data_hash_key = hash(data.tobytes())
ci_test_hash_key = hash(independence_test_method)

def edge(adjx, i, adjacencies_completed_edge):
for j in range(len(adjx)):
node_y = adjx[j]
Expand All @@ -98,14 +102,8 @@ def edge(adjx, i, adjacencies_completed_edge):
cond_set = [nodes.index(ppx[index]) for index in choice]
choice = cg.next()

Y = nodes.index(adjx[j])
X, Y = (i, Y) if (i < Y) else (Y, i)
XYS_key = (X, Y, frozenset(cond_set), data_hash_key, ci_test_hash_key)
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = independence_test_method(data, X, Y, tuple(cond_set))
citest_cache[XYS_key] = p_value
p_value = ci_test(independence_test_method, data,
i, nodes.index(adjx[j]), tuple(cond_set))

independent = p_value > alpha

Expand Down Expand Up @@ -166,10 +164,6 @@ def edge(adjx, i, adjacencies_completed_edge):

def searchAtDepth_not_stable(data, depth, nodes, adjacencies, sep_sets, independence_test_method=fisherz, alpha=0.05,
verbose=False, knowledge=None, pbar=None):

data_hash_key = hash(data.tobytes())
ci_test_hash_key = hash(independence_test_method)

def edge(adjx, i, adjacencies_completed_edge):
for j in range(len(adjx)):
node_y = adjx[j]
Expand All @@ -185,14 +179,8 @@ def edge(adjx, i, adjacencies_completed_edge):
cond_set = [nodes.index(ppx[index]) for index in choice]
choice = cg.next()

Y = nodes.index(adjx[j])
X, Y = (i, Y) if (i < Y) else (Y, i)
XYS_key = (X, Y, frozenset(cond_set), data_hash_key, ci_test_hash_key)
if XYS_key in citest_cache:
p_value = citest_cache[XYS_key]
else:
p_value = independence_test_method(data, X, Y, tuple(cond_set))
citest_cache[XYS_key] = p_value
p_value = ci_test(independence_test_method, data,
i, nodes.index(adjx[j]), tuple(cond_set))

independent = p_value > alpha

Expand Down
10 changes: 7 additions & 3 deletions causallearn/utils/PCUtils/SkeletonDiscovery.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from itertools import combinations
from causallearn.utils.Fas import fas
from causallearn.utils import Fas
import numpy as np
from causallearn.graph.GraphClass import CausalGraph
from causallearn.utils.PCUtils.Helper import append_value
Expand Down Expand Up @@ -51,7 +51,6 @@ def _unique(column):
cg.cardinalities = np.max(cg.data, axis=0) + 1
else:
cg.data = data
cg.data_hash_key = hash(data.tobytes())

depth = -1
pbar = tqdm(total=no_of_var) if show_progress else None
Expand Down Expand Up @@ -143,6 +142,9 @@ def skeleton_discovery_using_fas(data, alpha, indep_test, stable=True, backgroun

assert type(data) == np.ndarray
assert 0 < alpha < 1
Fas.citest_cache = dict() # DEBUG@2021/12/23, must refresh cache every time at initialization
Fas.cardinalities = None
Fas.is_discrete = False

no_of_var = data.shape[1]
cg = CausalGraph(no_of_var)
Expand All @@ -160,11 +162,13 @@ def _unique(column):
cg.is_discrete = True
cg.data = np.apply_along_axis(_unique, 0, data).astype(np.int64)
cg.cardinalities = np.max(cg.data, axis=0) + 1
Fas.cardinalities = cg.cardinalities # DEBUG@2021/12/23, no repeat calculating cardinalities at every chisq/gsq.
Fas.is_discrete = True
else:
cg.data = data


graph, sep_sets = fas(cg.data, cg.G.nodes, independence_test_method=indep_test, alpha=alpha,
graph, sep_sets = Fas.fas(cg.data, cg.G.nodes, independence_test_method=indep_test, alpha=alpha,
knowledge=background_knowledge, depth=-1, verbose=verbose, stable=stable, show_progress=show_progress)

for (x, y) in sep_sets.keys():
Expand Down
6 changes: 2 additions & 4 deletions causallearn/utils/cit.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,13 @@ def fisherz(data, X, Y, condition_set, correlation_matrix=None):
return p


def chisq(data, X, Y, conditioning_set, cardinalities=None):
def chisq(data, X, Y, conditioning_set, cardinalities):
# though cardinalities can be computed from data, here we pass it as argument,
# to prevent from repeated computation on each variable's vardinality
if cardinalities is None: cardinalities = np.max(data, axis=0) + 1
indexs = list(conditioning_set) + [X, Y]
return chisq_or_gsq_test(data[:, indexs].T, cardinalities[indexs])

def gsq(data, X, Y, conditioning_set, cardinalities=None):
if cardinalities is None: cardinalities = np.max(data, axis=0) + 1
def gsq(data, X, Y, conditioning_set, cardinalities):
indexs = list(conditioning_set) + [X, Y]
return chisq_or_gsq_test(data[:, indexs].T, cardinalities[indexs], G_sq=True)

Expand Down