diff --git a/causalnex/structure/data_generators/wrappers.py b/causalnex/structure/data_generators/wrappers.py index f8abac1..0d9774c 100644 --- a/causalnex/structure/data_generators/wrappers.py +++ b/causalnex/structure/data_generators/wrappers.py @@ -533,10 +533,12 @@ def generate_dataframe_dynamic( # pylint: disable=R0914 ValueError: if sem_type isn't linear-gauss/linear_exp/linear-gumbel """ s_types = ("linear-gauss", "linear-exp", "linear-gumbel") + if sem_type not in s_types: raise ValueError(f"unknown sem type {sem_type}. Available types are: {s_types}") - intra_nodes = sorted(el for el in g.nodes if "_lag0" in el) - inter_nodes = sorted(el for el in g.nodes if "_lag0" not in el) + + intra_nodes = sorted([el for el in g.nodes if "_lag0" in el], key=lambda t: t.split('_lag')[1]) + inter_nodes = sorted([el for el in g.nodes if "_lag0" not in el], key=lambda t: t.split('_lag')[1]) w_mat = nx.to_numpy_array(g, nodelist=intra_nodes) a_mat = nx.to_numpy_array(g, nodelist=intra_nodes + inter_nodes)[ len(intra_nodes) :, : len(intra_nodes) @@ -612,7 +614,7 @@ def gen_stationary_dyn_net_and_df( # pylint: disable=R0913, R0914 - full: constructs a fully-connected graph - degree has no effect graph_type_inter: - erdos-renyi: constructs a graph such that the probability of any given edge is degree / (num_nodes - 1) - - full: connect all past nodes to all present nodesw_min_intra: + - full: connect all past nodes to all present nodes w_min_intra: minimum weight on intra-slice adjacency matrix w_max_intra: maximum weight on intra-slice adjacency matrix w_min_inter: minimum weight on inter-slice adjacency matrix diff --git a/causalnex/structure/dynotears.py b/causalnex/structure/dynotears.py index 42faf29..5163b14 100644 --- a/causalnex/structure/dynotears.py +++ b/causalnex/structure/dynotears.py @@ -49,7 +49,8 @@ def from_pandas_dynamic( # pylint: disable=too-many-arguments lambda_a: float = 0.1, max_iter: int = 100, h_tol: float = 1e-8, - w_threshold: float = 0.0, + tau_w: float = 0.0, + tau_a: float = 0.0, tabu_edges: List[Tuple[int, int, int]] = None, tabu_parent_nodes: List[int] = None, tabu_child_nodes: List[int] = None, @@ -116,7 +117,8 @@ def from_pandas_dynamic( # pylint: disable=too-many-arguments lambda_a, max_iter, h_tol, - w_threshold, + tau_w, + tau_a, tabu_edges, tabu_parent_nodes, tabu_child_nodes, @@ -162,7 +164,8 @@ def from_numpy_dynamic( # pylint: disable=too-many-arguments lambda_a: float = 0.1, max_iter: int = 100, h_tol: float = 1e-8, - w_threshold: float = 0.0, + tau_w: float = 0.0, + tau_a: float = 0.0, tabu_edges: List[Tuple[int, int, int]] = None, tabu_parent_nodes: List[int] = None, tabu_child_nodes: List[int] = None, @@ -246,8 +249,8 @@ def from_numpy_dynamic( # pylint: disable=too-many-arguments X, Xlags, bnds, lambda_w, lambda_a, max_iter, h_tol ) - w_est[np.abs(w_est) < w_threshold] = 0 - a_est[np.abs(a_est) < w_threshold] = 0 + w_est[np.abs(w_est) < tau_w] = 0 + a_est[np.abs(a_est) < tau_a] = 0 sm = _matrices_to_structure_model(w_est, a_est) return sm diff --git a/tests/structure/test_dynotears.py b/tests/structure/test_dynotears.py index 31094e6..687eb05 100644 --- a/tests/structure/test_dynotears.py +++ b/tests/structure/test_dynotears.py @@ -116,7 +116,7 @@ def test_expected_structure_learned_p1(self, data_dynotears_p1): """ sm = from_numpy_dynamic( - data_dynotears_p1["X"], data_dynotears_p1["Y"], w_threshold=0.2 + data_dynotears_p1["X"], data_dynotears_p1["Y"], tau_w=0.2 ) w_edges = [ (f"{i}_lag0", f"{j}_lag0") @@ -145,7 +145,7 @@ def test_expected_structure_learned_p2(self, data_dynotears_p2): """ sm = from_numpy_dynamic( - data_dynotears_p2["X"], data_dynotears_p2["Y"], w_threshold=0.25 + data_dynotears_p2["X"], data_dynotears_p2["Y"], tau_w=0.25 ) w_edges = [ (f"{i}_lag0", f"{j}_lag0") @@ -250,7 +250,7 @@ def test_all_columns_in_structure(self, data_dynotears_p2): def test_isolated_nodes_exist(self, data_dynotears_p2): """Isolated nodes should still be in the learned structure""" sm = from_numpy_dynamic( - data_dynotears_p2["X"], data_dynotears_p2["Y"], w_threshold=1 + data_dynotears_p2["X"], data_dynotears_p2["Y"], tau_w=1 ) assert len(sm.edges) == 2 assert len(sm.nodes) == 15 @@ -273,7 +273,7 @@ def test_certain_relationships_get_near_certain_weight(self): [[np.sqrt(el), np.sqrt(el)] for el in np.random.choice(100, size=500)], columns=["a", "b"], ) - sm = from_numpy_dynamic(data.values[1:], data.values[:-1], w_threshold=0.1) + sm = from_numpy_dynamic(data.values[1:], data.values[:-1], tau_w=0.1) edge = ( sm.get_edge_data("1_lag0", "0_lag0") or sm.get_edge_data("0_lag0", "1_lag0") )["weight"] @@ -287,7 +287,7 @@ def test_inverse_relationships_get_negative_weight(self): data = pd.DataFrame( [[el, -el] for el in np.random.choice(100, size=500)], columns=["a", "b"] ) - sm = from_numpy_dynamic(data.values[1:], data.values[:-1], w_threshold=0.1) + sm = from_numpy_dynamic(data.values[1:], data.values[:-1], tau_w=0.1) edge = ( sm.get_edge_data("1_lag0", "0_lag0") or sm.get_edge_data("0_lag0", "1_lag0") )["weight"] @@ -299,20 +299,20 @@ def test_no_cycles(self, data_dynotears_p2): """ sm = from_numpy_dynamic( - data_dynotears_p2["X"], data_dynotears_p2["Y"], w_threshold=0.05 + data_dynotears_p2["X"], data_dynotears_p2["Y"], tau_w=0.05 ) assert nx.algorithms.is_directed_acyclic_graph(sm) def test_tabu_edges_on_non_existing_edges_do_nothing(self, data_dynotears_p2): """If tabu edges do not exist in the original unconstrained network then nothing changes""" sm = from_numpy_dynamic( - data_dynotears_p2["X"], data_dynotears_p2["Y"], w_threshold=0.2 + data_dynotears_p2["X"], data_dynotears_p2["Y"], tau_w=0.2 ) sm_2 = from_numpy_dynamic( data_dynotears_p2["X"], data_dynotears_p2["Y"], - w_threshold=0.2, + tau_w=0.2, tabu_edges=[(0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3)], ) assert set(sm_2.edges) == set(sm.edges) @@ -391,7 +391,7 @@ def test_expected_structure_learned_p1(self, data_dynotears_p1): sm = from_pandas_dynamic( df, p=1, - w_threshold=0.2, + tau_w=0.2, ) map_ = dict(zip(range(5), ["a", "b", "c", "d", "e"])) w_edges = [ @@ -430,7 +430,7 @@ def test_expected_structure_learned_p2(self, data_dynotears_p2): sm = from_pandas_dynamic( df, p=2, - w_threshold=0.25, + tau_w=0.25, ) map_ = dict(zip(range(5), ["a", "b", "c", "d", "e"])) w_edges = [ @@ -532,7 +532,7 @@ def test_all_columns_in_structure(self, data_dynotears_p2): sm = from_pandas_dynamic( pd.DataFrame(data_dynotears_p2["X"], columns=["a", "b", "c", "d", "e"]), p=2, - w_threshold=0.4, + tau_w=0.4, ) assert sorted(sm.nodes) == [ f"{var}_lag{l_val}" @@ -547,7 +547,7 @@ def test_isolated_nodes_exist(self, data_dynotears_p2): df.loc[-2, :] = data_dynotears_p2["Y"][0, 5:10] df = df.sort_index() - sm = from_pandas_dynamic(df, p=2, w_threshold=1) + sm = from_pandas_dynamic(df, p=2, tau_w=1) assert len(sm.edges) == 2 assert len(sm.nodes) == 15 @@ -572,7 +572,7 @@ def test_certain_relationships_get_near_certain_weight(self): [[np.sqrt(el), np.sqrt(el)] for el in np.random.choice(100, size=500)], columns=["a", "b"], ) - sm = from_pandas_dynamic(data, p=1, w_threshold=0.1) + sm = from_pandas_dynamic(data, p=1, tau_w=0.1) edge = ( sm.get_edge_data("b_lag0", "a_lag0") or sm.get_edge_data("a_lag0", "b_lag0") )["weight"] @@ -586,7 +586,7 @@ def test_inverse_relationships_get_negative_weight(self): data = pd.DataFrame( [[el, -el] for el in np.random.choice(100, size=500)], columns=["a", "b"] ) - sm = from_pandas_dynamic(data, p=1, w_threshold=0.1) + sm = from_pandas_dynamic(data, p=1, tau_w=0.1) edge = ( sm.get_edge_data("b_lag0", "a_lag0") or sm.get_edge_data("a_lag0", "b_lag0") )["weight"] @@ -599,7 +599,7 @@ def test_no_cycles(self, data_dynotears_p2): sm = from_pandas_dynamic( pd.DataFrame(data_dynotears_p2["X"], columns=["a", "b", "c", "d", "e"]), p=2, - w_threshold=0.05, + tau_w=0.05, ) assert nx.algorithms.is_directed_acyclic_graph(sm) @@ -613,12 +613,12 @@ def test_tabu_edges_on_non_existing_edges_do_nothing(self, data_dynotears_p2): sm = from_pandas_dynamic( df, p=2, - w_threshold=0.2, + tau_w=0.2, ) sm_2 = from_pandas_dynamic( df, p=2, - w_threshold=0.2, + tau_w=0.2, tabu_edges=[(0, "a", "a"), (0, "a", "b"), (0, "a", "c"), (0, "a", "d")], ) assert set(sm_2.edges) == set(sm.edges) @@ -636,9 +636,9 @@ def test_list_of_dfs_as_input(self, data_dynotears_p2): df_ = df.copy() df_.index = range(100, 152) df = pd.concat([df, df_]) - sm = from_pandas_dynamic(df, p=2, w_threshold=0.05) - sm_1 = from_pandas_dynamic([df], p=2, w_threshold=0.05) - sm_2 = from_pandas_dynamic([df, df], p=2, w_threshold=0.05) + sm = from_pandas_dynamic(df, p=2, tau_w=0.05) + sm_1 = from_pandas_dynamic([df], p=2, tau_w=0.05) + sm_2 = from_pandas_dynamic([df, df], p=2, tau_w=0.05) assert list(sm_2.edges) == list(sm_1.edges) assert list(sm.edges) == list(sm_1.edges) @@ -664,8 +664,8 @@ def test_discondinuity(self): index=np.arange(200, 300), ) - sm = from_pandas_dynamic(pd.concat([df, df_2], axis=0), p=2, w_threshold=0.05) - sm_1 = from_pandas_dynamic([df, df_2], p=2, w_threshold=0.05) + sm = from_pandas_dynamic(pd.concat([df, df_2], axis=0), p=2, tau_w=0.05) + sm_1 = from_pandas_dynamic([df, df_2], p=2, tau_w=0.05) assert [(u, v, round(w, 3)) for u, v, w in sm_1.edges(data="weight")] == [ (u, v, round(w, 3)) for u, v, w in sm.edges(data="weight")