In [1]:
from dowhy import CausalModel 
import dowhy.datasets
from utils.neo4j_connector import Neo4jConnector
from utils.nx_neo4j_adapter import neo4j_to_nx 
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import statsmodels.api as sm
import networkx as nx
import io
from sklearn.linear_model import LinearRegression, LogisticRegression
import time

In [2]:
def generate_data(num_samples, config):
    
    
    data = dowhy.datasets.linear_dataset(10, 
                                        num_common_causes=config['num_common_causes'], 
                                        num_samples=num_samples,
                                        num_instruments=config['num_instruments'], 
                                        num_effect_modifiers=config['num_effect_modifiers'],
                                        num_treatments=config['num_treatments'],
                                        num_frontdoor_variables=config['num_frontdoor_variables'],
                                        treatment_is_binary=False,
                                        outcome_is_binary=False)
    return data

In [289]:
data = generate_data(1000)

In [3]:
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph

# Draw the graph
plt.figure(figsize=(6, 4))
pos = nx.spring_layout(model)  # Positions for nodes
nx.draw(model, pos, with_labels=True, node_color='lightblue', edge_color='gray', node_size=3000, font_size=12)
plt.title("Causal DAG")
plt.show()

NameError: name 'data' is not defined

In [291]:
df = data["df"]
print(df.head())

          FD0         FD1         FD2         FD3         FD4        X0  \
0   46.703625   79.261018   35.235795   88.011539   83.491488 -0.513396   
1   93.042131   81.989401   71.367850  136.930723   90.830291  0.498215   
2  293.250641  283.904629  237.934152  391.187318  352.526042 -0.173385   
3  -47.889363  -44.892826  -40.393096  -51.065180  -57.779603  1.001225   
4  184.630780  188.953227  146.405332  251.447548  229.836791  1.317250   

         X1        X2        X3        X4  ...        W1        W2        W3  \
0  0.277021 -1.344686  1.051081 -0.701712  ... -1.641741  1.573732 -2.437798   
1  0.655950 -0.863396 -0.572251 -0.324069  ... -1.353549 -0.760344  0.270836   
2  2.845904 -1.225743  1.551034 -0.884633  ...  0.120778  0.592307  0.031958   
3 -0.864010 -0.552103  1.166176  2.108267  ... -0.896505  1.271769 -0.020280   
4 -0.374693 -0.582884 -0.285806 -0.057130  ... -0.030849  0.312291 -0.919708   

         W4         v0         v1         v2         v3         v4  

In [None]:
def dag_to_neo4j(G):
    
    # Convert to Cypher statements
    cypher_statements = []
    for node in G.nodes(data=True):
        cypher_statements.append(f"CREATE (n:CausalVariable {{name: '{node[0]}'}});")

    for edge in G.edges(data=True):
        cypher_statements.append(f"MATCH (a:CausalVariable {{name: '{edge[0]}'}}), (b:CausalVariable {{name: '{edge[1]}'}}) CREATE (a)-[:CAUSALLY_LINKED]->(b);")

    # Save to a file
    with open("import_dag.cypher", "w") as f:
        f.write("\n".join(cypher_statements))
        
    try:
        os.system('/Users/amedeo/Downloads/neo4j-community-5.12.0/bin/cypher-shell -u neo4j -p neo4j -f ./import_dag.cypher')
    except Exception as e:
        print(e)

    print("Cypher export completed!")


In [316]:
dag_to_neo4j(model)

Cypher export completed!


In [None]:
def data_to_neo4j(df, G):
    cypher_statements = []
    columns = data['df'].columns.tolist()
    for index, row in data['df'].iterrows():
        for col in columns:
            cypher_statements.append(f"CREATE (n:{col} {{id: '{col+'_'+str(index)}', value: {row[col]}}});")

    

    for index, row in data['df'].iterrows():
        for edge in G.edges():
    
            cypher_statements.append(f"MATCH (a: {edge[0]} {{id:'{edge[0]+'_'+str(index)}'}}), (b: {edge[1]} {{id:'{edge[1]+'_'+str(index)}'}}) CREATE (a)-[:REL]->(b);")
    
    # Save to a file
    with open("import_data.cypher", "w") as f:
        f.write("\n".join(cypher_statements))

    try:
        os.system('/Users/amedeo/Downloads/neo4j-community-5.12.0/bin/cypher-shell -u neo4j -p neo4j -f ./import_data.cypher')
    except Exception as e:
        print(e)
    print("Cypher export completed!")


In [318]:
data_to_neo4j(df, model)

Cypher export completed!


In [None]:
def materialize_views_from_data(G):
    cypher_statements=[]
    for node in G.nodes(data=True):
        
        cypher_statements.append(f"MATCH (a:{node[0]}), (cv:CausalVariable {{name: '{node[0]}'}}) CREATE (a)-[:BELONGS]->(cv);")
    
    # Save to a file
    with open("merge.cypher", "w") as f:
        f.write("\n".join(cypher_statements))
        
    try:
        os.system('/Users/amedeo/Downloads/neo4j-community-5.12.0/bin/cypher-shell -u neo4j -p neo4j -f ./merge.cypher')
    except Exception as e:
        print(e)
    print("Cypher export completed!")

In [371]:
t0 = time.time()
materialize_views_from_data(model)
t1 = time.time()
print(t1-t0)

Cypher export completed!
1.548238754272461


In [7]:
def mediator_analysis(neo4jConnector):
    mediators = neo4jConnector.query("MATCH (x:CausalVariable)-[:CAUSALLY_LINKED]->(m:CausalVariable)-[:CAUSALLY_LINKED]->(y:CausalVariable) WHERE NOT EXISTS { (x)-[:CAUSALLY_LINKED]->(y) WHERE x <> m } RETURN DISTINCT m AS Mediator, COLLECT([x, y]) AS Paths_Involved")
    return mediators
    

In [8]:
neo4jConnector = Neo4jConnector("")


In [9]:
neo4jConnector.clearNeo4j()

In [370]:
t0 = time.time()
mediators = mediator_analysis(neo4jConnector)
t1 = time.time()
print(t1-t0)
for record in mediators:
    mediator = record['Mediator']
    paths = record['Paths_Involved']
    print(f"Mediator: {mediator['name']}")
    print("Paths involved:")
    for path in paths:
        print(f"{path[0]['name']} -> {mediator['name']} -> {path[1]['name']}")

0.009908914566040039
Mediator: FD0
Paths involved:
v0 -> FD0 -> y
v1 -> FD0 -> y
v2 -> FD0 -> y
v3 -> FD0 -> y
v4 -> FD0 -> y
Mediator: FD1
Paths involved:
v0 -> FD1 -> y
v1 -> FD1 -> y
v2 -> FD1 -> y
v3 -> FD1 -> y
v4 -> FD1 -> y
Mediator: FD2
Paths involved:
v0 -> FD2 -> y
v1 -> FD2 -> y
v2 -> FD2 -> y
v3 -> FD2 -> y
v4 -> FD2 -> y
Mediator: FD3
Paths involved:
v0 -> FD3 -> y
v1 -> FD3 -> y
v2 -> FD3 -> y
v3 -> FD3 -> y
v4 -> FD3 -> y
Mediator: FD4
Paths involved:
v0 -> FD4 -> y
v1 -> FD4 -> y
v2 -> FD4 -> y
v3 -> FD4 -> y
v4 -> FD4 -> y
Mediator: v0
Paths involved:
Z0 -> v0 -> FD0
Z1 -> v0 -> FD0
Z0 -> v0 -> FD1
Z1 -> v0 -> FD1
Z0 -> v0 -> FD2
Z1 -> v0 -> FD2
Z0 -> v0 -> FD3
Z1 -> v0 -> FD3
Z0 -> v0 -> FD4
Z1 -> v0 -> FD4
Mediator: v1
Paths involved:
Z0 -> v1 -> FD0
Z1 -> v1 -> FD0
Z0 -> v1 -> FD1
Z1 -> v1 -> FD1
Z0 -> v1 -> FD2
Z1 -> v1 -> FD2
Z0 -> v1 -> FD3
Z1 -> v1 -> FD3
Z0 -> v1 -> FD4
Z1 -> v1 -> FD4
Mediator: v2
Paths involved:
Z0 -> v2 -> FD0
Z1 -> v2 -> FD0
Z0 -> v2 -> FD1

In [11]:
def strict_confounders_analysis(neo4jConnector):
    confounders = neo4jConnector.query("""
            MATCH (u:CausalVariable)-[:CAUSALLY_LINKED]->(x:CausalVariable), (u:CausalVariable)-[:CAUSALLY_LINKED]->(y:CausalVariable)
            WHERE EXISTS { (x)-[:CAUSALLY_LINKED]->(y) }  // Ensure X affects Y
            AND NOT EXISTS { 
                MATCH path = (x)-[:CAUSALLY_LINKED*1..]->(y)  // Find paths from X to Y
                WHERE u IN nodes(path)  // Ensure U is NOT in any path from X to Y (not a mediator)
                RETURN path
            }
            AND NOT EXISTS { (x)-[:CAUSALLY_LINKED]->(u) }  // Ensure X does NOT affect U
            RETURN DISTINCT u AS Confounder, 
            COLLECT(DISTINCT [x, y]) AS Confounded_Paths
    """)
    return confounders

In [369]:
t0 = time.time()
confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
t1 = time.time()
print(t1-t0)
for record in confounders_not_mediators:
    confounder = record['Confounder']
    paths = record['Confounded_Paths']
    print(f"Confounder: {confounder['name']}")
    print("Paths involved:")
    for path in paths:
        print(f"{path[0]['name']} -> {path[1]['name']}")
        

0.020210981369018555
Confounder: W0
Paths involved:
v0 -> FD0
v0 -> FD1
v0 -> FD2
v0 -> FD3
v0 -> FD4
v1 -> FD0
v1 -> FD1
v1 -> FD2
v1 -> FD3
v1 -> FD4
v2 -> FD0
v2 -> FD1
v2 -> FD2
v2 -> FD3
v2 -> FD4
v3 -> FD0
v3 -> FD1
v3 -> FD2
v3 -> FD3
v3 -> FD4
v4 -> FD0
v4 -> FD1
v4 -> FD2
v4 -> FD3
v4 -> FD4
FD0 -> y
FD1 -> y
FD2 -> y
FD3 -> y
FD4 -> y
Confounder: W1
Paths involved:
v0 -> FD0
v0 -> FD1
v0 -> FD2
v0 -> FD3
v0 -> FD4
v1 -> FD0
v1 -> FD1
v1 -> FD2
v1 -> FD3
v1 -> FD4
v2 -> FD0
v2 -> FD1
v2 -> FD2
v2 -> FD3
v2 -> FD4
v3 -> FD0
v3 -> FD1
v3 -> FD2
v3 -> FD3
v3 -> FD4
v4 -> FD0
v4 -> FD1
v4 -> FD2
v4 -> FD3
v4 -> FD4
FD0 -> y
FD1 -> y
FD2 -> y
FD3 -> y
FD4 -> y
Confounder: W2
Paths involved:
v0 -> FD0
v0 -> FD1
v0 -> FD2
v0 -> FD3
v0 -> FD4
v1 -> FD0
v1 -> FD1
v1 -> FD2
v1 -> FD3
v1 -> FD4
v2 -> FD0
v2 -> FD1
v2 -> FD2
v2 -> FD3
v2 -> FD4
v3 -> FD0
v3 -> FD1
v3 -> FD2
v3 -> FD3
v3 -> FD4
v4 -> FD0
v4 -> FD1
v4 -> FD2
v4 -> FD3
v4 -> FD4
FD0 -> y
FD1 -> y
FD2 -> y
FD3 -> y
FD4 -> y
C

In [12]:
def confounder_analysis(neo4jConnector):
    confounders = neo4jConnector.query("""
            MATCH (u)-[:CAUSALLY_LINKED]->(x), (u)-[:CAUSALLY_LINKED]->(y)
            WHERE EXISTS { (x)-[:CAUSALLY_LINKED]->(y) }
            RETURN DISTINCT u AS Confounder, 
                COLLECT(DISTINCT [x, y]) AS Confounded_Paths

    """)
    return confounders

In [10]:
t0 = time.time()
confounders = strict_confounders_analysis(neo4jConnector)
t1 = time.time()
print(t1-t0)
for record in confounders:
    confounder = record['Confounder']
    paths = record['Confounded_Paths']
    print(f"Confounder: {confounder['name']}")
    print("Paths involved:")
    for path in paths:
        print(f"{path[0]['name']} -> {path[1]['name']}")
        

NameError: name 'neo4jConnector' is not defined

In [13]:
def collider_analysis(neo4jConnector):
    confounders = neo4jConnector.query("""
            MATCH (x)-[:CAUSALLY_LINKED]->(c)<-[:CAUSALLY_LINKED]-(y)
            WHERE NOT EXISTS { (x)-[:CAUSALLY_LINKED]->(y) }  // Ensure X and Y are NOT directly connected
            RETURN DISTINCT c AS Collider, 
            COLLECT(DISTINCT [x, y]) AS Collider_Paths
    """)
    return confounders

In [372]:
t0 = time.time()
confounders = collider_analysis(neo4jConnector)
t1 = time.time()
print(t1-t0)
for record in confounders:
    confounder = record['Collider']
    paths = record['Collider_Paths']
    print(f"Collider: {confounder['name']}")
    print("Paths involved:")
    for path in paths:
        print(f"{path[0]['name']} -> {path[1]['name']}")
        

0.03978705406188965
Collider: v0
Paths involved:
W1 -> W0
W2 -> W0
W3 -> W0
W4 -> W0
Z0 -> W0
Z1 -> W0
W0 -> W1
W2 -> W1
W3 -> W1
W4 -> W1
Z0 -> W1
Z1 -> W1
W0 -> W2
W1 -> W2
W3 -> W2
W4 -> W2
Z0 -> W2
Z1 -> W2
W0 -> W3
W1 -> W3
W2 -> W3
W4 -> W3
Z0 -> W3
Z1 -> W3
W0 -> W4
W1 -> W4
W2 -> W4
W3 -> W4
Z0 -> W4
Z1 -> W4
W0 -> Z0
W1 -> Z0
W2 -> Z0
W3 -> Z0
W4 -> Z0
Z1 -> Z0
W0 -> Z1
W1 -> Z1
W2 -> Z1
W3 -> Z1
W4 -> Z1
Z0 -> Z1
Collider: v1
Paths involved:
W1 -> W0
W2 -> W0
W3 -> W0
W4 -> W0
Z0 -> W0
Z1 -> W0
W0 -> W1
W2 -> W1
W3 -> W1
W4 -> W1
Z0 -> W1
Z1 -> W1
W0 -> W2
W1 -> W2
W3 -> W2
W4 -> W2
Z0 -> W2
Z1 -> W2
W0 -> W3
W1 -> W3
W2 -> W3
W4 -> W3
Z0 -> W3
Z1 -> W3
W0 -> W4
W1 -> W4
W2 -> W4
W3 -> W4
Z0 -> W4
Z1 -> W4
W0 -> Z0
W1 -> Z0
W2 -> Z0
W3 -> Z0
W4 -> Z0
Z1 -> Z0
W0 -> Z1
W1 -> Z1
W2 -> Z1
W3 -> Z1
W4 -> Z1
Z0 -> Z1
Collider: v2
Paths involved:
W1 -> W0
W2 -> W0
W3 -> W0
W4 -> W0
Z0 -> W0
Z1 -> W0
W0 -> W1
W2 -> W1
W3 -> W1
W4 -> W1
Z0 -> W1
Z1 -> W1
W0 -> W2
W1 -> W2
W3 -> W2
W4

In [14]:
def estimate_structural_equations(G, df):
    equations = {}

    for node in G.nodes:
        parents = list(G.predecessors(node))  # Get parent nodes (causes)
        
        if parents:  # Only fit a model if the node has causes
            X = df[parents]
            y = df[node]

            # Fit a linear regression model
            model = LinearRegression().fit(X, y)
            coeffs = model.coef_
            intercept = model.intercept_

            # Construct equation as a string
            equation = f"{node} = {intercept:.3f} "
            for i, parent in enumerate(parents):
                equation += f"+ ({coeffs[i]:.3f} * {parent}) "

            equations[node] = {}
            
            equations[node]['equation'] = equation.strip()
            equations[node]['intercept'] = intercept
            equations[node]['coefficients'] = coeffs
            equations[node]['parents'] = parents
             
    return equations


In [323]:
equations = estimate_structural_equations(model, df)

# Print all structural equations
for node, equation in equations.items():
    print(f"Equation for {node}: {equation}")

Equation for y: {'equation': 'y = 7425467.850 + (-2745095.676 * W0) + (740510.748 * W1) + (-2089585.355 * W2) + (4145033.822 * W3) + (-1523192.425 * W4) + (-147399.552 * FD0) + (400362.308 * FD1) + (-144406.464 * FD2) + (-169550.125 * FD3) + (820.632 * FD4) + (2941946.068 * X0) + (3337124.613 * X1) + (4500421.754 * X2) + (1386938.415 * X3) + (2286167.948 * X4)', 'intercept': np.float64(7425467.8500168715), 'coefficients': array([-2.74509568e+06,  7.40510748e+05, -2.08958535e+06,  4.14503382e+06,
       -1.52319242e+06, -1.47399552e+05,  4.00362308e+05, -1.44406464e+05,
       -1.69550125e+05,  8.20631511e+02,  2.94194607e+06,  3.33712461e+06,
        4.50042175e+06,  1.38693841e+06,  2.28616795e+06]), 'parents': ['W0', 'W1', 'W2', 'W3', 'W4', 'FD0', 'FD1', 'FD2', 'FD3', 'FD4', 'X0', 'X1', 'X2', 'X3', 'X4']}
Equation for FD0: {'equation': 'FD0 = 0.060 + (0.205 * W0) + (-0.089 * W1) + (0.399 * W2) + (-0.106 * W3) + (0.452 * W4) + (2.487 * v0) + (1.834 * v1) + (1.734 * v2) + (4.069 * v3) 

In [None]:
def set_structural_equations(equations):
    cypher_statements = []
    for node, equation in equations.items():
        for idx,parent in enumerate(equation['parents']):
            cypher_statements.append(f"MATCH (cv1:CausalVariable {{name: '{parent}'}})-[r:CAUSALLY_LINKED]->(cv2:CausalVariable {{name: '{node}'}}) SET r.parameter = {equation['coefficients'][idx]};")
    for node, equation in equations.items():
        cypher_statements.append(f"MATCH (cv1:CausalVariable {{name: '{node}'}}) SET cv1.intercept = {equation['intercept']};")
    # Save to a file
    with open("structural_equations.cypher", "w") as f:
        f.write("\n".join(cypher_statements))
        
    try:
        os.system('/Users/amedeo/Downloads/neo4j-community-5.12.0/bin/cypher-shell -u neo4j -p neo4j -f ./structural_equations.cypher')
    except Exception as e:
        print(e)
    print("Cypher export completed!")

In [327]:
set_structural_equations(equations)

Cypher export completed!


In [17]:
def do_intervention(target_variabel, value,neo4jConnector, output_variabel):
    
    neo4jConnector.merge_query(f"MATCH (a:{target_variabel}) SET a.old_value=a.value, a.value= {value};")
    
    intervention_distribution = neo4jConnector.query(f"MATCH (b)-[:BELONGS]->(cv:CausalVariable {{name:'{output_variabel}'}})<-[r:CAUSALLY_LINKED]-(parent:CausalVariable)<-[:BELONGS]-(a)-->(b) WITH distinct b, cv, SUM(r.parameter * a.value) AS sum_a RETURN b, sum_a + cv.intercept AS adjustedSum")
    
    neo4jConnector.merge_query(f"MATCH (a:{target_variabel}) SET a.value= a.old_value;")
    
    return intervention_distribution

In [373]:
t0=time.time()
intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
t1=time.time()
print(t1-t0)
post_intervention_values = []

for id in intervention_distribution:
    post_intervention_values.append(id['adjustedSum'])
    
print(np.mean(post_intervention_values))
print(np.std(post_intervention_values))


0.12688112258911133
-139336974.35297316
114143306.87011951


In [18]:
def do_counterfactual(target_instance, target_variabel, value,neo4jConnector,output_variabel):
    
    neo4jConnector.merge_query(f"MATCH (a:{target_variabel}) SET a.old_value=a.value, a.value= {value};")
    
    intervention_distribution = neo4jConnector.query(f"MATCH (b:{output_variabel} {{id:'{target_instance}'}})-[:BELONGS]->(cv:CausalVariable {{name:'{output_variabel}'}}) MATCH (cv)<-[r:CAUSALLY_LINKED]-(parent:CausalVariable)<-[:BELONGS]-(a)-->(b) WITH distinct b, cv, SUM(r.parameter * a.value) AS sum_a RETURN b, sum_a + cv.intercept AS adjustedSum")
    
    neo4jConnector.merge_query(f"MATCH (a:{target_variabel}) SET a.value= a.old_value;")
    
    return intervention_distribution

In [374]:
t0 = time.time()
counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
t1 = time.time()
print(t1-t0)

0.037010908126831055


In [355]:
counterfactual

[{'b': <Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:295892' labels=frozenset({'y'}) properties={'id': 'y_1', 'value': 19939.848084971505}>,
  'adjustedSum': -52374055.358440384}]

In [19]:
config1 = {
            'num_common_causes':1, 
            'num_instruments':1, 
            'num_effect_modifiers':1,
            'num_treatments':1,
            'num_frontdoor_variables':1,
            }
num_samples = 1000

In [20]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [28]:
mediators = mediator_analysis(neo4jConnector)

In [29]:
mediators

[{'Mediator': <Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:310178' labels=frozenset({'CausalVariable'}) properties={'intercept': 0.009206882180876619, 'name': 'FD0'}>,
  'Paths_Involved': [[<Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:310179' labels=frozenset({'CausalVariable'}) properties={'intercept': 0.021373439232440283, 'name': 'v0'}>,
    <Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:310175' labels=frozenset({'CausalVariable'}) properties={'intercept': 10.30653343833064, 'name': 'y'}>]]},
 {'Mediator': <Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:310179' labels=frozenset({'CausalVariable'}) properties={'intercept': 0.021373439232440283, 'name': 'v0'}>,
  'Paths_Involved': [[<Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:310177' labels=frozenset({'CausalVariable'}) properties={'name': 'Z0'}>,
    <Node element_id='4:d5297e9e-0b37-4e68-ab6e-014fbdbed984:310178' labels=frozenset({'CausalVariable'}) properties={'intercept': 0.0

In [21]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 1000
Merge times: 1.0085125207901
Mediator times: 0.024452996253967286
Strict confounder times: 0.020829105377197267
Confounder times: 0.0026035547256469727
Collider times: 0.006811380386352539
Intervention times: 0.121703839302063
Counterfactual times: 0.033784341812133786


In [22]:
num_samples = 10000

In [23]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [24]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 10000
Merge times: 1.1795682907104492
Mediator times: 0.012315702438354493
Strict confounder times: 0.011739826202392578
Confounder times: 0.003736138343811035
Collider times: 0.005746197700500488
Intervention times: 0.8576841115951538
Counterfactual times: 0.05885138511657715


In [25]:
num_samples = 100000

In [None]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)*
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [27]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 100000
Merge times: 3.9701653718948364
Mediator times: 0.008602595329284668
Strict confounder times: 0.007948613166809082
Confounder times: 0.0027294397354125977
Collider times: 0.003624367713928223
Intervention times: 10.033546209335327
Counterfactual times: 0.9108143806457519


In [50]:
t0 = time.time()
ntervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
t1 = time.time()

mediator_times.append(t1-t0)
print(f"Mediator times: {np.mean(mediator_times)}")

Mediator times: 1.1920271570032293


In [37]:
config3 = {
            'num_common_causes':10, 
            'num_instruments':2, 
            'num_effect_modifiers':10,
            'num_treatments':18,
            'num_frontdoor_variables':10,
            }

In [38]:
num_samples = 1000

In [39]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [40]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 1000
Merge times: 1.0288001537322997
Mediator times: 0.0075377941131591795
Strict confounder times: 0.007444953918457032
Confounder times: 0.0018110990524291993
Collider times: 0.0036610126495361327
Intervention times: 0.10612542629241943
Counterfactual times: 0.02019820213317871


In [41]:
num_samples = 10000

In [42]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [43]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 10000
Merge times: 1.20693359375
Mediator times: 0.006110072135925293
Strict confounder times: 0.00545353889465332
Confounder times: 0.0015691757202148438
Collider times: 0.002817106246948242
Intervention times: 0.834347128868103
Counterfactual times: 0.052653384208679196


In [44]:
num_samples = 100000

In [45]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [46]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 100000
Merge times: 3.9890846729278566
Mediator times: 0.007110857963562011
Strict confounder times: 0.004676198959350586
Confounder times: 0.002042555809020996
Collider times: 0.0036124706268310545
Intervention times: 9.830963897705079
Counterfactual times: 0.8922810554504395


In [49]:
config2 = {
            'num_common_causes':5, 
            'num_instruments':2, 
            'num_effect_modifiers':5,
            'num_treatments':8,
            'num_frontdoor_variables':5,
            }

In [50]:
num_samples = 1000

In [51]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [52]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 1000
Merge times: 1.0320783853530884
Mediator times: 0.009514141082763671
Strict confounder times: 0.007951617240905762
Confounder times: 0.0020543813705444338
Collider times: 0.0041799783706665036
Intervention times: 0.10910227298736572
Counterfactual times: 0.02249436378479004


In [53]:
num_samples = 10000

In [54]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [55]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 10000
Merge times: 1.276483702659607
Mediator times: 0.009201359748840333
Strict confounder times: 0.008504080772399902
Confounder times: 0.0022101640701293946
Collider times: 0.01230635643005371
Intervention times: 0.8381062030792237
Counterfactual times: 0.05692293643951416


In [56]:
num_samples = 100000

In [57]:
merge_times = []
mediator_times = []
strict_confounder_times = []
confounder_times = []
collider_times = []
intervention_times = []
counterfactual_times = []
neo4jConnector.clearNeo4j()
data = generate_data(num_samples, config1)
dag = data["gml_graph"]  # Get DAG in GML format
model = nx.parse_gml(dag)  # Parse GML to networkx graph
df = data["df"]

dag_to_neo4j(model)
data_to_neo4j(df, model)

equations = estimate_structural_equations(model, df)
set_structural_equations(equations)

Cypher export completed!
Cypher export completed!
Cypher export completed!


In [58]:
 



for i in range(10):
    t0 = time.time()
    materialize_views_from_data(model)
    t1 = time.time()
    
    merge_times.append(t1-t0)
    
    
    t0 = time.time()
    mediators = mediator_analysis(neo4jConnector)
    t1 = time.time()
    
    mediator_times.append(t1-t0)
            
    t0 = time.time()
    confounders_not_mediators = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    strict_confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = strict_confounders_analysis(neo4jConnector)
    t1 = time.time()
    confounder_times.append(t1-t0)
            
    t0 = time.time()
    confounders = collider_analysis(neo4jConnector)
    t1 = time.time()
    collider_times.append(t1-t0)

    t0=time.time()
    intervention_distribution = do_intervention('W0', 0.5, neo4jConnector, 'y')
    t1=time.time()
    intervention_times.append(t1-t0)

    t0 = time.time()
    counterfactual = do_counterfactual('y_1','W0', 0.5, neo4jConnector, 'y')
    t1 = time.time()
    counterfactual_times.append(t1-t0)
    
print(f"Number of samples: {num_samples}")
print(f"Merge times: {np.mean(merge_times)}")
print(f"Mediator times: {np.mean(mediator_times)}")
print(f"Strict confounder times: {np.mean(strict_confounder_times)}")
print(f"Confounder times: {np.mean(confounder_times)}")
print(f"Collider times: {np.mean(collider_times)}")
print(f"Intervention times: {np.mean(intervention_times)}")
print(f"Counterfactual times: {np.mean(counterfactual_times)}")

Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Cypher export completed!
Number of samples: 100000
Merge times: 3.973950147628784
Mediator times: 0.011180806159973144
Strict confounder times: 0.010750627517700196
Confounder times: 0.0027007102966308595
Collider times: 0.0035307645797729493
Intervention times: 9.951258182525635
Counterfactual times: 0.935277271270752
