In [21]:
import pandas as pd
import numpy as np
from causallearn.graph.Edge import Edge
from causallearn.search.ConstraintBased.FCI import fci
from causallearn.utils.GraphUtils import GraphUtils
from causallearn.utils.Dataset import load_dataset

In [3]:
data, labels = load_dataset("boston_housing")

In [4]:
data

array([[6.3000e-03, 1.8000e+01, 2.3100e+00, ..., 3.9690e+02, 4.9800e+00,
        2.4000e+01],
       [2.7300e-02, 0.0000e+00, 7.0700e+00, ..., 3.9690e+02, 9.1400e+00,
        2.1600e+01],
       [2.7300e-02, 0.0000e+00, 7.0700e+00, ..., 3.9283e+02, 4.0300e+00,
        3.4700e+01],
       ...,
       [6.0800e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 5.6400e+00,
        2.3900e+01],
       [1.0960e-01, 0.0000e+00, 1.1930e+01, ..., 3.9345e+02, 6.4800e+00,
        2.2000e+01],
       [4.7400e-02, 0.0000e+00, 1.1930e+01, ..., 3.9690e+02, 7.8800e+00,
        1.1900e+01]], shape=(506, 14))

In [5]:
g, edges = fci(data)
pdy = GraphUtils.to_pydot(g)
# pdy.write_png('boston_housing.png')

Depth=0, working on node 13: 100%|██████████| 14/14 [00:00<00:00, 327.18it/s]


X9 --> X1
X1 --> X12
X5 --> X3
X3 --> X10
X7 --> X5
X14 --> X6
X9 --> X10
X14 --> X11
X13 --> X14


In [26]:
# create an ENUM for the edge types
def get_endpoint_type(endpoint: int, isFirst: bool):
    if endpoint == -1:
        return "-"
    elif endpoint == 1:
        return "<" if isFirst else ">"
    elif endpoint == 2:
        return "o"

def get_edge(edge: Edge):
    start = edge.numerical_endpoint_1
    end = edge.numerical_endpoint_2

    return f"{get_endpoint_type(start, True)}-{get_endpoint_type(end, False)}"


In [27]:
sat_clauses = []
formatted_edges = []
for edge in edges:
    formatted_edges.append((edge.node1.name, edge.node2.name, get_edge(edge)))

print(formatted_edges)

[('X9', 'X1', '-->'), ('X1', 'X12', '-->'), ('X2', 'X8', 'o-o'), ('X2', 'X11', '-->'), ('X5', 'X3', '-->'), ('X8', 'X3', '-->'), ('X3', 'X9', '<->'), ('X3', 'X10', '-->'), ('X4', 'X14', 'o->'), ('X7', 'X5', '-->'), ('X8', 'X5', '-->'), ('X6', 'X13', '<->'), ('X14', 'X6', '-->'), ('X8', 'X7', '-->'), ('X7', 'X13', '<->'), ('X9', 'X10', '-->'), ('X9', 'X11', '<->'), ('X14', 'X11', '-->'), ('X13', 'X14', '-->')]


In [28]:
def get_unique_nodes(edges):
    nodes = set()
    for edge in edges:
        nodes.add(edge[0])
        nodes.add(edge[1])
    return nodes

In [29]:
nodes = get_unique_nodes(formatted_edges)

In [30]:
nodes

{'X1',
 'X10',
 'X11',
 'X12',
 'X13',
 'X14',
 'X2',
 'X3',
 'X4',
 'X5',
 'X6',
 'X7',
 'X8',
 'X9'}

In [31]:
var_mapping = {}
def create_variable_mapping(nodes):
    for n1 in nodes:
        for n2 in nodes:
            for edge_type in ["direct", "latent"]:
                var_mapping[(n1, n2, edge_type)] = len(var_mapping) + 1
    return var_mapping

In [32]:
var_mapping = create_variable_mapping(nodes)

In [33]:
var_mapping

{('X14', 'X14', 'direct'): 1,
 ('X14', 'X14', 'latent'): 2,
 ('X14', 'X13', 'direct'): 3,
 ('X14', 'X13', 'latent'): 4,
 ('X14', 'X4', 'direct'): 5,
 ('X14', 'X4', 'latent'): 6,
 ('X14', 'X5', 'direct'): 7,
 ('X14', 'X5', 'latent'): 8,
 ('X14', 'X8', 'direct'): 9,
 ('X14', 'X8', 'latent'): 10,
 ('X14', 'X9', 'direct'): 11,
 ('X14', 'X9', 'latent'): 12,
 ('X14', 'X10', 'direct'): 13,
 ('X14', 'X10', 'latent'): 14,
 ('X14', 'X11', 'direct'): 15,
 ('X14', 'X11', 'latent'): 16,
 ('X14', 'X12', 'direct'): 17,
 ('X14', 'X12', 'latent'): 18,
 ('X14', 'X3', 'direct'): 19,
 ('X14', 'X3', 'latent'): 20,
 ('X14', 'X1', 'direct'): 21,
 ('X14', 'X1', 'latent'): 22,
 ('X14', 'X7', 'direct'): 23,
 ('X14', 'X7', 'latent'): 24,
 ('X14', 'X2', 'direct'): 25,
 ('X14', 'X2', 'latent'): 26,
 ('X14', 'X6', 'direct'): 27,
 ('X14', 'X6', 'latent'): 28,
 ('X13', 'X14', 'direct'): 29,
 ('X13', 'X14', 'latent'): 30,
 ('X13', 'X13', 'direct'): 31,
 ('X13', 'X13', 'latent'): 32,
 ('X13', 'X4', 'direct'): 33,
 ('X1

In [34]:
def add_edge_constraints(edges):
    cnf = []
    for n1, n2, edge_type in edges:
            if edge_type == '-->':
                # Direct causation must be true
                cnf.append([var_mapping[(n1, n2, 'direct')]])
                # No latent common cause
                cnf.append([-var_mapping[(n1, n2, 'latent')]])

            elif edge_type == 'o->':
                # n2 cannot be ancestor of n1
                cnf.append([-var_mapping[(n2, n1, 'direct')]])

            elif edge_type == 'o-o':
                # Either direct causation or latent common cause must exist
                cnf.append([
                    var_mapping[(n1, n2, 'direct')],
                    var_mapping[(n1, n2, 'latent')]
                ])

            elif edge_type == '<->':
                # Must have latent common cause
                cnf.append([var_mapping[(n1, n2, 'latent')]])
                # No direct causation in either direction
                cnf.append([-var_mapping[(n2, n1, 'direct')]])
                cnf.append([-var_mapping[(n1, n2, 'direct')]])

    return cnf

In [35]:
cnf = add_edge_constraints(formatted_edges)

In [36]:
cnf

[[161],
 [-162],
 [297],
 [-298],
 [345, 346],
 [351],
 [-352],
 [103],
 [-104],
 [131],
 [-132],
 [264],
 [-159],
 [-263],
 [265],
 [-266],
 [-5],
 [315],
 [-316],
 [119],
 [-120],
 [368],
 [-55],
 [-367],
 [27],
 [-28],
 [135],
 [-136],
 [312],
 [-51],
 [-311],
 [153],
 [-154],
 [156],
 [-207],
 [-155],
 [15],
 [-16],
 [29],
 [-30]]