In [6]:
import os
import sys
import json
# sys.path.append('/home/lzan/Bureau/Dynamic causal graph/root-cause-analysis/EasyRCA/TestEasyRCA_AISTATS2023/')
from tqdm import tqdm
from generate_data import generate_data_with_parametric_intervention # , generate_data_with_structural_intervention

import networkx as nx
import numpy as np
import pandas as pd

In [11]:
def dict_to_graph(graph_dict, inter_nodes):
    # Create an empty directed graph
    graph = nx.DiGraph()

    # Iterate through the dictionary and add nodes and edges to the graph
    for parent, children in graph_dict.items():
        # Add the parent node to the graph
        graph.add_node(parent)

        # Iterate through the children of the parent
        for child in children.keys():
            # Add the child node to the graph and create a directed edge from parent to child
            graph.add_node(child)
            if child not in inter_nodes:
                graph.add_edge(parent, child)
    return graph


def generate_historical_data_by_folder_PC(graphs_path, data_path, info_path, n, anomaly_length):
    if not os.path.exists(data_path):
        # If it doesn't exist, create the folder
        os.makedirs(data_path)
    # check the existence of data information folder
    if not os.path.exists(info_path):
        # If it doesn't exist, create the folder
        os.makedirs(info_path)

    graph_files = [os.path.join(graphs_path, f) for f in os.listdir(graphs_path) if os.path.isfile(os.path.join(graphs_path, f))]

    for json_file_path in tqdm(graph_files):
        with open(json_file_path, 'r') as json_file:
            json_graph = json.load(json_file)

        # Convert the loaded JSON data into a NetworkX graph
        graph = dict_to_graph(graph_dict=json_graph, inter_nodes=[])

        root = []
        for node in graph.nodes: 
            if len(list(graph.predecessors(node)))==0:
                root.append(node)
        
        while True:
            intervention_nodes = np.random.choice(graph.nodes,replace=False, size=2)
            all_paths = list(nx.all_simple_paths(graph, source=intervention_nodes[0], target=intervention_nodes[1]))
            all_paths += list(nx.all_simple_paths(graph, source=intervention_nodes[1], target=intervention_nodes[0]))
            nodes_in_same_path = any(intervention_nodes[0] in path and intervention_nodes[1] in path for path in all_paths)
            if not nodes_in_same_path:
                break

        # if nx.has_path(graph, root[0], intervention_node):
        #     print(root[0] + ' and ' + intervention_node + " are in the same path.")
            

        data = generate_data_with_parametric_intervention(DAG =graph,n=n, secondInterventionNode=intervention_nodes, rootStartIntervention=n-anomaly_length, rootEndIntervention=n-1)
        # data = generate_data_with_structural_intervention(DAG =graph,n=n, secondInterventionNode=intervention_node, rootStartIntervention=n-anomaly_length, rootEndIntervention=n-1)

        # root.append(intervention_node)
        
        info = {'intervention_nodes': list(intervention_nodes), 'anomaly_length': anomaly_length}

        data.to_csv(os.path.join(data_path, json_file_path.split('/')[1].replace('graph', 'data').replace('json', 'csv')), index=False)

        data_info_path = os.path.join(info_path, json_file_path.split('/')[1].replace('graph', 'info'))
        # Save the dictionary as a JSON file
        with open(data_info_path, 'w') as json_file:
            json.dump(info, json_file)

In [12]:
graphs_path = 'graphs'
data_path = os.path.join('EasyRCA', 'Parametric_2', 'data')
info_path = os.path.join('EasyRCA','Parametric_2', 'data_info')
# data_path = os.path.join('EasyRCA', 'Structual', 'data')
# info_path = os.path.join('EasyRCA','Structual', 'data_info')
n = 22000
anomaly_length = 2000
generate_historical_data_by_folder_PC(graphs_path=graphs_path, data_path=data_path, info_path=info_path, n=n, anomaly_length=anomaly_length)

100%|██████████| 50/50 [00:00<00:00, 770.96it/s]


In [38]:
# if not os.path.exists(data_path):
# # If it doesn't exist, create the folder
#     os.makedirs(data_path)
# # check the existence of data information folder
# if not os.path.exists(info_path):
# # If it doesn't exist, create the folder
#     os.makedirs(info_path)

n = 22000

graph_files = [os.path.join(graphs_path, f) for f in os.listdir(graphs_path) if os.path.isfile(os.path.join(graphs_path, f))]
index = 0

for json_file_path in tqdm(graph_files):
    with open(json_file_path, 'r') as json_file:
        json_graph = json.load(json_file)

    # Convert the loaded JSON data into a NetworkX graph
    graph = dict_to_graph(graph_dict=json_graph, inter_nodes=[])
    
    root = []
    for node in graph.nodes: 
        if len(list(graph.predecessors(node)))==0:
            root.append(node)
    
    intervention_node = np.random.choice([node for node in graph.nodes if node not in root], size=1)[0]
    
    if nx.has_path(graph, root[0], intervention_node):
        print(root[0] + ' and ' + intervention_node + " are in the same path.")
        index += 1
    
    data = generate_data_with_parametric_intervention(DAG =graph,n=n, secondInterventionNode=intervention_node, rootStartIntervention=n-2000, rootEndIntervention=n-1)

  0%|          | 0/50 [00:00<?, ?it/s]

a and f are in the same path.


  2%|▏         | 1/50 [00:28<23:24, 28.67s/it]

a and b are in the same path.


  4%|▍         | 2/50 [00:53<20:54, 26.14s/it]

a and d are in the same path.


  6%|▌         | 3/50 [01:17<19:53, 25.39s/it]

a and c are in the same path.


  6%|▌         | 3/50 [01:27<22:56, 29.28s/it]


KeyboardInterrupt: 