Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 77 additions & 35 deletions tests/TestAstar.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,84 @@
import sys

import itertools
import unittest
import hashlib
import numpy as np
from causallearn.graph.Dag import Dag
from causallearn.graph.GraphNode import GraphNode
from causallearn.search.ScoreBased.ExactSearch import bic_exact_search
from causallearn.utils.DAG2CPDAG import dag2cpdag

sys.path.append("")
import unittest
from pickle import load

import numpy as np

######################################### Test Notes ###########################################
# All the benchmark results of loaded files (e.g. "./TestData/benchmark_returned_results/") #
# are obtained from the code of causal-learn as of commit #
# https://github.com/cmu-phil/causal-learn/commit/8badb41 (07-12-2022). #
# #
# We are not sure if the results are completely "correct" (reflect ground truth graph) or not. #
# So if you find your tests failed, it means that your modified code is logically inconsistent #
# with the code as of 8badb41, but not necessarily means that your code is "wrong". #
# If you are sure that your modification is "correct" (e.g. fixed some bugs in 8badb41), #
# please report it to us. We will then modify these benchmark results accordingly. Thanks :) #
######################################### Test Notes ###########################################


BENCHMARK_TXTFILE_TO_MD5 = {
"tests/TestData/test_exact_search_simulated_linear_gaussian_data.txt": "1ec70464e4fc68c312adfb7143bd240b",
"tests/TestData/test_exact_search_simulated_linear_gaussian_CPDAG.txt": "52a6d3c5db269d5e212edcbb8283aca9",
}
# verify files integrity first
for file_path, expected_MD5 in BENCHMARK_TXTFILE_TO_MD5.items():
with open(file_path, 'rb') as fin:
assert hashlib.md5(fin.read()).hexdigest() == expected_MD5,\
f'{file_path} is corrupted. Please download it again from https://github.com/cmu-phil/causal-learn/blob/8badb41/tests/TestData'


class TestAstar(unittest.TestCase):
# Load data and run Astar with default parameters.
def test_astar_simulate_linear_gaussian_with_local_score_BIC(self):
# The data and ground truth loaded in this test case is generated by the function
# simulate_linear_gaussian_data_for_exact_search commented below
print('Now start test_astar_simulate_linear_gaussian_with_local_score_BIC ...')
truth_CPDAG_matrix = np.loadtxt("tests/TestData/test_exact_search_simulated_linear_gaussian_CPDAG.txt")
data = np.loadtxt("tests/TestData/test_exact_search_simulated_linear_gaussian_data.txt")
assert truth_CPDAG_matrix.shape[0] == truth_CPDAG_matrix.shape[1], "Should be a square numpy matrix"
num_of_nodes = len(truth_CPDAG_matrix)
assert data.shape[1] == num_of_nodes, "The second dimension of data should be same as number of nodes"
data = data - data.mean(axis=0, keepdims=True) # Center the data
# Iterate over different configurations of path extension and k-cycle heuristic
# to make sure they are working fine
for use_path_extension, use_k_cycle_heuristic in itertools.product([False, True], repeat=2):
DAG_matrix, _ = bic_exact_search(data, search_method='astar', use_path_extension=use_path_extension,
use_k_cycle_heuristic=use_k_cycle_heuristic, k=3)
# Convert DAG adjacency matrix to Dag object
nodes = [GraphNode(str(i)) for i in range(num_of_nodes)]
DAG = Dag(nodes)
for i, j in zip(*np.where(DAG_matrix == 1)):
DAG.add_directed_edge(nodes[i], nodes[j])
CPDAG = dag2cpdag(DAG) # Convert DAG to CPDAG
self.assertTrue(np.all(CPDAG.graph == truth_CPDAG_matrix))
print('test_astar_simulate_linear_gaussian_with_local_score_BIC passed!\n')


# example1
# for data with single-variate dimensions, astar.
def test_single_astar(self):
with open("example_data1.pk", 'rb') as example_data1:
# example_data1 = load(open("example_data1.pk", 'rb'))
example_data1 = load(example_data1)
X = example_data1['X']
X = X - np.tile(np.mean(X, axis=0), (X.shape[0], 1))
X = np.dot(X, np.diag(1 / np.std(X, axis=0)))
X = X[:50, :]
dag_est, search_stats = bic_exact_search(X, search_method='astar')
print(dag_est)
print(search_stats)

# example2
# for data with multi-variate dimensions, astar.
def test_multi_astar(self):
with open("example_data2.pk", 'rb') as example_data:
# example_data = load(open("example_data2.pk", 'rb'))
example_data = load(example_data)
Data_save = example_data['Data_save']
trial = 0
X = Data_save[trial]
X = X - np.tile(np.mean(X, axis=0), (X.shape[0], 1))
X = np.dot(X, np.diag(1 / np.std(X, axis=0)))
X = X[:50, :]
dag_est, search_stats = bic_exact_search(X, search_method='astar')
print(dag_est)
print(search_stats)
# def simulate_linear_gaussian_data_for_exact_search():
# import pandas as pd
# import random
# random.seed(1) # Reproducibility
# np.random.seed(1) # Reproducibility
# num_of_samples = 100000
# # Generate linear Gaussian data
# X0 = np.random.normal(scale=1.0, size=num_of_samples)
# X1 = 0.5 * X0 + np.random.normal(scale=2.0, size=num_of_samples)
# X3 = np.random.normal(scale=0.5, size=num_of_samples)
# X2 = 0.4 * X1 + 0.7 * X3 + np.random.normal(scale=1.5, size=num_of_samples)
# data_df = pd.DataFrame(data={'X0': X0, 'X1': X1, 'X2': X2, 'X3': X3})
# # Ground truth DAG: X0 -> X1 -> X2 <- X3
# # Ground truth CPDAG: X0 -- X1 -> X2 <- X3
# truth_CPDAG_matrix = np.array([[ 0, -1, 0, 0],
# [-1, 0, -1, 0],
# [ 0, 1, 0, 1],
# [ 0, 0, -1, 0]])
# truth_CPDAG_df = pd.DataFrame(data=truth_CPDAG_matrix)
# # Save data and ground truth
# truth_CPDAG_df.to_csv('./TestData/test_dp_simulated_linear_gaussian_CPDAG.txt', sep=' ', index=False, header=False)
# data_df.to_csv('./TestData/test_dp_simulated_linear_gaussian_data.txt', sep=' ', index=False, header=False)
110 changes: 75 additions & 35 deletions tests/TestDP.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,81 @@
import sys

import unittest
import hashlib
import numpy as np
from causallearn.graph.Dag import Dag
from causallearn.graph.GraphNode import GraphNode
from causallearn.search.ScoreBased.ExactSearch import bic_exact_search
from causallearn.utils.DAG2CPDAG import dag2cpdag

sys.path.append("")
import unittest
from pickle import load

import numpy as np

######################################### Test Notes ###########################################
# All the benchmark results of loaded files (e.g. "./TestData/benchmark_returned_results/") #
# are obtained from the code of causal-learn as of commit #
# https://github.com/cmu-phil/causal-learn/commit/8badb41 (07-12-2022). #
# #
# We are not sure if the results are completely "correct" (reflect ground truth graph) or not. #
# So if you find your tests failed, it means that your modified code is logically inconsistent #
# with the code as of 8badb41, but not necessarily means that your code is "wrong". #
# If you are sure that your modification is "correct" (e.g. fixed some bugs in 8badb41), #
# please report it to us. We will then modify these benchmark results accordingly. Thanks :) #
######################################### Test Notes ###########################################


BENCHMARK_TXTFILE_TO_MD5 = {
"tests/TestData/test_exact_search_simulated_linear_gaussian_data.txt": "1ec70464e4fc68c312adfb7143bd240b",
"tests/TestData/test_exact_search_simulated_linear_gaussian_CPDAG.txt": "52a6d3c5db269d5e212edcbb8283aca9",
}
# verify files integrity first
for file_path, expected_MD5 in BENCHMARK_TXTFILE_TO_MD5.items():
with open(file_path, 'rb') as fin:
assert hashlib.md5(fin.read()).hexdigest() == expected_MD5,\
f'{file_path} is corrupted. Please download it again from https://github.com/cmu-phil/causal-learn/blob/8badb41/tests/TestData'


class TestDP(unittest.TestCase):
# example3
# for data with single-variate dimensions, dp.
def test_single_dp(self):
with open("example_data1.pk", 'rb') as example_data1:
# example_data1 = load(open("example_data1.pk", 'rb'))
example_data1 = load(example_data1)
X = example_data1['X']
X = X - np.tile(np.mean(X, axis=0), (X.shape[0], 1))
X = np.dot(X, np.diag(1 / np.std(X, axis=0)))
X = X[:50, :]
dag_est, search_stats = bic_exact_search(X, search_method='dp')
print(dag_est)
print(search_stats)

# example4
# for data with multi-variate dimensions, dp.
def test_multi_dp(self):
with open("example_data2.pk", 'rb') as example_data:
# example_data = load(open("example_data2.pk", 'rb'))
example_data = load(example_data)
Data_save = example_data['Data_save']
trial = 0
X = Data_save[trial]
X = X - np.tile(np.mean(X, axis=0), (X.shape[0], 1))
X = np.dot(X, np.diag(1 / np.std(X, axis=0)))
X = X[:50, :]
dag_est, search_stats = bic_exact_search(X, search_method='dp')
print(dag_est)
print(search_stats)
# Load data and run DP with default parameters.
def test_dp_simulate_linear_gaussian_with_local_score_BIC(self):
# The data and ground truth loaded in this test case is generated by the function
# simulate_linear_gaussian_data_for_exact_search commented below
print('Now start test_dp_simulate_linear_gaussian_with_local_score_BIC ...')
truth_CPDAG_matrix = np.loadtxt("tests/TestData/test_exact_search_simulated_linear_gaussian_CPDAG.txt")
data = np.loadtxt("tests/TestData/test_exact_search_simulated_linear_gaussian_data.txt")
assert truth_CPDAG_matrix.shape[0] == truth_CPDAG_matrix.shape[1], "Should be a square numpy matrix"
num_of_nodes = len(truth_CPDAG_matrix)
assert data.shape[1] == num_of_nodes, "The second dimension of data should be same as number of nodes"
data = data - data.mean(axis=0, keepdims=True) # Center the data
# Iterate over different configurations of path extension to make sure they are working fine
for use_path_extension in [False, True]:
DAG_matrix, _ = bic_exact_search(data, search_method='dp', use_path_extension=use_path_extension)
# Convert DAG adjacency matrix to Dag object
nodes = [GraphNode(str(i)) for i in range(num_of_nodes)]
DAG = Dag(nodes)
for i, j in zip(*np.where(DAG_matrix == 1)):
DAG.add_directed_edge(nodes[i], nodes[j])
CPDAG = dag2cpdag(DAG) # Convert DAG to CPDAG
self.assertTrue(np.all(CPDAG.graph == truth_CPDAG_matrix))
print('test_dp_simulate_linear_gaussian_with_local_score_BIC passed!\n')


# def simulate_linear_gaussian_data_for_exact_search():
# import pandas as pd
# import random
# random.seed(1) # Reproducibility
# np.random.seed(1) # Reproducibility
# num_of_samples = 100000
# # Generate linear Gaussian data
# X0 = np.random.normal(scale=1.0, size=num_of_samples)
# X1 = 0.5 * X0 + np.random.normal(scale=2.0, size=num_of_samples)
# X3 = np.random.normal(scale=0.5, size=num_of_samples)
# X2 = 0.4 * X1 + 0.7 * X3 + np.random.normal(scale=1.5, size=num_of_samples)
# data_df = pd.DataFrame(data={'X0': X0, 'X1': X1, 'X2': X2, 'X3': X3})
# # Ground truth DAG: X0 -> X1 -> X2 <- X3
# # Ground truth CPDAG: X0 -- X1 -> X2 <- X3
# truth_CPDAG_matrix = np.array([[ 0, -1, 0, 0],
# [-1, 0, -1, 0],
# [ 0, 1, 0, 1],
# [ 0, 0, -1, 0]])
# truth_CPDAG_df = pd.DataFrame(data=truth_CPDAG_matrix)
# # Save data and ground truth
# truth_CPDAG_df.to_csv('./TestData/test_dp_simulated_linear_gaussian_CPDAG.txt', sep=' ', index=False, header=False)
# data_df.to_csv('./TestData/test_dp_simulated_linear_gaussian_data.txt', sep=' ', index=False, header=False)
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
0 -1 0 0
-1 0 -1 0
0 1 0 1
0 0 -1 0
Loading