In [3]:
%load_ext autoreload
%autoreload 2
    
import os, sys
sys.path.insert(0, os.path.abspath(".."))
# or sys.path.insert(0, os.path.abspath("/workspace"))  
    
import networkx as nx
import numpy as np
import pandas as pd 

from src.exp.exp_change.gen.generate import DagType, IvMode, gen_data_type
from src.exp.exp_change.algos import CD
from src.exp.exp_change.gen.generate import GSType

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Causal DAG Discovery from Multiple Contexts (Chapter 3)

### 1. Example Synthetic Datasets

In [10]:
# loading a synthetic dataset

multicontext_X = {}
changes_per_node = {}
for c_i in range(5):
    context_X = np.array(pd.read_csv(f'../demo/datasets/chapter3_dag/example_synthetic_context_{c_i}.tsv', sep='\t'))
    multicontext_X[c_i] = context_X
for n_i in range(multicontext_X[0].shape[1]):
    changes_per_node[n_i] =  np.array(pd.read_csv(f'../demo/datasets/chapter3_dag/example_synthetic_changes_{n_i}.tsv', sep='\t')).reshape(1,-1)[0]
true_g = nx.from_numpy_array(np.array(pd.read_csv('../demo/datasets/chapter3_dag/example_synthetic_g.tsv', sep='\t')), create_using=nx.DiGraph)

In [11]:
print("Available Causal Discovery Methods:", [cd.value for cd in CD])

Available Causal Discovery Methods: ['skip', 'topic', 'topic-contexts-gp', 'topic-contexts-rff', 'causal-mixtures', 'causal-mixtures-ges', 'pc-partial-correl', 'pc-kci-partial-correl', 'fci', 'fci-kci', 'ges', 'cam', 'lingam', 'score', 'das', 'nogam', 'cam-uv', 'r2sort', 'randsort', 'varsort', 'mix-utigsp', 'cdnod-pc', 'cdnod-kci', 'jci-pc-pc', 'jci-pc-kci', 'jci-fci-pc', 'jci-fci-kci', 'utigsp']


####  Causal DAG search with TOPIC




In [8]:
method_name = CD.TopicContextsGP
method = method_name.get_method()
method.fit(multicontext_X)

print(f"Evaluation ({method_name}): {method.get_graph_metrics(true_g)}")

DataMode.CONTEXTS
Evaluation (topic-contexts-gp): {'sc': 0.4166666666666667, 'shd': 0.3333333333333333, 'sd': 0.0, 'anc-aid': 0.0, 'parent-aid': 0.0, 'sym-sd': 0.16666666666666666, 'sym-anc-aid': 0.041666666666666664, 'sym-pa-aid': 0.08333333333333333, 'shd-nm': 0.08333333333333333, 'f1': 0.6666666666666666, 'tp': 2, 'fp': 2, 'fn': 0, 'tn': 12, 'tpr': 1.0, 'fpr': 0.14285714285714285, 'pr': 0.25, 'tnr': 0.8571428571428572, 'fnr': 0.0, 'mcc': np.float64(0.6546536707079772)}


In [13]:
from src.causalchange.causal_change_topo import CausalChangeTopological
from src.causalchange.mixing.regression import DataMode


model = CausalChangeTopological(data_mode=DataMode.CONTEXTS)
dag = model.fit(multicontext_X)


DataMode.CONTEXTS


In [14]:
print(dag.edges, true_dag.edges)

[(1, 2), (3, 1), (3, 2)] [(1, 2), (2, 3)]


####  Other baselines

- JCI: Joint Causal Inference Framework

Reference: Mooij, J., Magliacane, S., and Claassen, T. Joint causal inference from multiple
contexts. *J. Mach. Learn. Res.,* 2020.

In [None]:
for method_name  in [CD.JCI_FCI_KCI, CD.JCI_FCI_PC, CD.JCI_PC_PC, CD.JCI_PC_KCI]:
    method = method_name.get_method()
    method.fit(multicontext_X)
    print(f"Evaluation ({method_name}): {method.get_graph_metrics(true_g)}")
    print(f"Targets ({method_name}): {method.targets}")


kci
Starting BK Orientation.
Orienting edge (Knowledge): C0 --> X0
Orienting edge (Knowledge): C3 --> X0
Orienting edge (Knowledge): C2 --> X1
Orienting edge (Knowledge): C0 --> X2
Orienting edge (Knowledge): C2 --> X3
Orienting edge (Knowledge): C3 --> X3
Finishing BK Orientation.
Starting BK Orientation.
Orienting edge (Knowledge): C0 --> X0
Orienting edge (Knowledge): C3 --> X0
Orienting edge (Knowledge): C2 --> X1
Orienting edge (Knowledge): C0 --> X2
Orienting edge (Knowledge): C2 --> X3
Orienting edge (Knowledge): C3 --> X3
Finishing BK Orientation.
X1 --> X2
Evaluation (jci-fci-kci): {'sc': 0.24999999999999997, 'shd': 0.16666666666666666, 'sd': 0.3333333333333333, 'anc-aid': 0.16666666666666666, 'parent-aid': 0.16666666666666666, 'sym-sd': 0.16666666666666666, 'sym-anc-aid': 0.08333333333333333, 'sym-pa-aid': 0.08333333333333333, 'shd-nm': 0.041666666666666664, 'f1': 0.6666666666666666, 'tp': 1, 'fp': 0, 'fn': 1, 'tn': 14, 'tpr': 0.5, 'fpr': 0.0, 'pr': 0.0625, 'tnr': 1.0, 'fnr':

- CD-NOD: Causal discovery from heterogeneous/nonstationary data

Reference: Huang, B., Zhang, K., Zhang, J., Ramsey, J., Sanchez-Romero, R., Glymour, C.,
and Schölkopf, B. Causal discovery from heterogeneous/nonstationary data. *J.
Mach. Learn. Res.*, 2020.

In [19]:
method_name = CD.CDNOD_PC
method = method_name.get_method()
method.fit(multicontext_X)
print(f"Evaluation ({method_name}): {method.get_graph_metrics(true_g)}")
print(f"Targets ({method_name}): {method.targets}")

method_name = CD.CDNOD_KCI
method = method_name.get_method()
method.fit(multicontext_X)
print(f"Evaluation ({method_name}): {method.get_graph_metrics(true_g)}")
print(f"Targets ({method_name}): {method.targets}")


Depth=2, working on node 4: 100%|██████████| 5/5 [00:00<00:00, 52.15it/s]  


Evaluation (cdnod-pc): {'sc': 0.24999999999999997, 'shd': 0.5, 'sd': 0.16666666666666666, 'anc-aid': 0.3333333333333333, 'parent-aid': 0.4166666666666667, 'sym-sd': 0.08333333333333333, 'sym-anc-aid': 0.25, 'sym-pa-aid': 0.2916666666666667, 'shd-nm': 0.125, 'f1': 0.0, 'tp': 0, 'fp': 0, 'fn': 2, 'tn': 14, 'tpr': 0.0, 'fpr': 0.0, 'pr': 0.0, 'tnr': 1.0, 'fnr': 1.0, 'mcc': 1}
Targets (cdnod-pc): [0, 2, 3]


Depth=2, working on node 4: 100%|██████████| 5/5 [00:00<00:00, 56.79it/s]  

Evaluation (cdnod-kci): {'sc': 0.24999999999999997, 'shd': 0.5, 'sd': 0.16666666666666666, 'anc-aid': 0.3333333333333333, 'parent-aid': 0.4166666666666667, 'sym-sd': 0.08333333333333333, 'sym-anc-aid': 0.25, 'sym-pa-aid': 0.2916666666666667, 'shd-nm': 0.125, 'f1': 0.0, 'tp': 0, 'fp': 0, 'fn': 2, 'tn': 14, 'tpr': 0.0, 'fpr': 0.0, 'pr': 0.0, 'tnr': 1.0, 'fnr': 1.0, 'mcc': 1}
Targets (cdnod-kci): [0, 2, 3]





- UT-IGSP: Unknown-Target Interventional GSP

Squires, C., Wang, Y., and Uhler, C. Permutation-based causal structure learning
with unknown intervention targets. *Conference on Uncertainty in Artificial
Intelligence (UAI),* 2020.

In [20]:
method_name = CD.UTIGSP
method = method_name.get_method()
method.fit(multicontext_X)
print(f"Targets ({method_name}): {method.targets} ")

[{np.int64(2)}, {np.int64(1), np.int64(2), np.int64(3)}, {np.int64(1), np.int64(2)}, {np.int64(0), np.int64(2)}]
Targets (utigsp): [0, 1, 2, 3] 


### 2. Generating synthetic data

In [165]:
# generate example datasets
seed = 42
params = {
    'N': 4, # number of observed nodes in G
    'S': 200,  # number of samples overall, S/C per samples context dataset
    'P': 0.4, # dag edge density
    'K': 3, # number of causal mechanism changes per node, if a node changes (see 'PC')
    'C': 4, # number of contexts (datasets)
    'PC': 1, # probability that a node changes (with 'K' mechanisms in 'C' contexts, otherwise 1 mechanism)
    'IVM': IvMode.MULTI_CONTEXT, # tells the data generation to generate multi-context data
    'GS': GSType.GRAPH, 'DG': DagType.ERDOS, # DAG structure
  #  'NS': NoiseType.MIX, 'F': FunType.MIX, 'IVT': IvType.COEF,  #no effect, generating process is always a function and noise type picked at random, and interventions are soft, i.e. causal mechanism changes
}


multicontext_X, truths = gen_data_type(params, seed)
true_g = truths["true_g"]
for c_i in multicontext_X:
    pd.DataFrame(multicontext_X[c_i]).to_csv(f'../demo/datasets/chapter3_dag/generated_synthetic_context_{c_i}.tsv', sep='\t', index=False)

for n_i in range(multicontext_X[0].shape[1]):
    pd.DataFrame(truths["mechanism_partitions"][n_i]).to_csv(f'../demo/datasets/chapter3_dag/generated_synthetic_changes_{n_i}.tsv', sep='\t', index=False)

pd.DataFrame(nx.to_numpy_array(truths["true_g"])).to_csv('../demo/datasets/chapter3_dag/generated_synthetic_g.tsv', sep='\t', index=False)


In [166]:
method_name = CD.TopicContextsGP #RFF
method = method_name.get_method()
method.fit(multicontext_X)
print(f"Graph eval ({method_name}): {method.get_graph_metrics(true_g)}")


print(f"Causal mechanism shifts: {method.changes}" )
print(f"Graph eval: {method.get_graph_metrics(true_g)}")

DataMode.CONTEXTS
Graph eval (topic-contexts-gp): {'sc': 0.0, 'shd': 0.0, 'sd': 0.0, 'anc-aid': 0.0, 'parent-aid': 0.0, 'sym-sd': 0.0, 'sym-anc-aid': 0.0, 'sym-pa-aid': 0.0, 'shd-nm': 0.0, 'f1': 1.0, 'tp': 2, 'fp': 0, 'fn': 0, 'tn': 14, 'tpr': 1.0, 'fpr': 0.0, 'pr': 0.125, 'tnr': 1.0, 'fnr': 0.0, 'mcc': np.float64(1.0)}
Causal mechanism shifts: {0: {'groups': [[0, 1, 2, 3]], 'partition': {0: 0, 1: 0, 2: 0, 3: 0}}, 1: {'groups': [[0, 1, 2, 3]], 'partition': {0: 0, 1: 0, 2: 0, 3: 0}}, 2: {'groups': [[0, 1, 2, 3]], 'partition': {0: 0, 1: 0, 2: 0, 3: 0}}, 3: {'groups': [[0, 1], [2, 3]], 'partition': {0: 0, 1: 0, 2: 1, 3: 1}}}
Graph eval: {'sc': 0.0, 'shd': 0.0, 'sd': 0.0, 'anc-aid': 0.0, 'parent-aid': 0.0, 'sym-sd': 0.0, 'sym-anc-aid': 0.0, 'sym-pa-aid': 0.0, 'shd-nm': 0.0, 'f1': 1.0, 'tp': 2, 'fp': 0, 'fn': 0, 'tn': 14, 'tpr': 1.0, 'fpr': 0.0, 'pr': 0.125, 'tnr': 1.0, 'fnr': 0.0, 'mcc': np.float64(1.0)}
