In [1]:
# modify the inputs from the subgraph-counting project
# to make them suitable for the SubgraphMatching project

import os

def data_graph_processing(data_graph_input_path, data_graph_output_path):
    # read the data graph
    num_vertices, num_edges = 0, 0
    edges = [] # list of tuples (from, to)
    with open(data_graph_input_path, 'r') as f:
        num_vertices, num_edges = map(int, f.readline().strip().split(' '))
        for line in f.readlines():
            from_vertex, to_vertex = map(int, line.strip().split(' '))
            edges.append((from_vertex, to_vertex))
    
    # calculate the degree of each vertex
    vertex_degree = [0] * num_vertices
    for edge in edges:
        vertex_degree[edge[0]] += 1
        vertex_degree[edge[1]] += 1
    
    # write the data graph
    with open(data_graph_output_path, 'w') as f:
        f.write(f't {num_vertices} {num_edges}\n')
        for vertex_id in range(num_vertices):
            f.write(f'v {vertex_id} 0 {vertex_degree[vertex_id]}\n')
        for edge in edges:
            f.write(f'e {edge[0]} {edge[1]}\n')

def pattern_graph_processing(pattern_graph_input_dirs, pattern_graph_output_dirs):
    for pattern_graph_dir_idx in range(len(pattern_graph_input_dirs)):
        pattern_graph_input_dir = pattern_graph_input_dirs[pattern_graph_dir_idx]
        pattern_graph_output_dir = pattern_graph_output_dirs[pattern_graph_dir_idx]

        for pattern_file in os.listdir(pattern_graph_input_dir):
            # read the pattern graph
            num_vertices, num_edges, orbit = 0, 0, 0
            edges = []
            with open(os.path.join(pattern_graph_input_dir, pattern_file), 'r') as f:
                num_vertices, num_edges = map(int, f.readline().strip().split(' '))
                for edge_id in range(num_edges):
                    from_vertex, to_vertex = map(int, f.readline().strip().split(' '))
                    edges.append((from_vertex, to_vertex))
                orbit = f.readline().strip().split(' ')[1]
            
            # calculate the degree of each vertex
            vertex_degree = [0] * num_vertices
            for edge in edges:
                vertex_degree[edge[0]] += 1
                vertex_degree[edge[1]] += 1

            # write the pattern
            pattern_file = os.path.splitext(pattern_file)[0] + ".graph"
            with open(os.path.join(pattern_graph_output_dir, pattern_file), 'w') as f:
                f.write(f't {num_vertices} {num_edges} {orbit}\n')
                for vertex_id in range(num_vertices):
                    f.write(f'v {vertex_id} 0 {vertex_degree[vertex_id]}\n')
                for edge in edges:
                    f.write(f'e {edge[0]} {edge[1]}\n')

if __name__ == '__main__':
    # data graph path
    data_graph_input_path = './test/data_graph/web-spam.txt'

    # pattern graph directory
    pattern_graph_input_dir = ['./test/query_graph/pattern_graph/5voc', './test/query_graph/pattern_graph/6voc', './test/query_graph/pattern_graph/7voc']

    # data graph output path
    data_graph_output_path = "./test/data_graph/web-spam-new.graph"

    # pattern graph output directory
    pattern_graph_output_dir = ['./test/query_graph/pattern_graph_new/5voc', './test/query_graph/pattern_graph_new/6voc', './test/query_graph/pattern_graph_new/7voc']

    if len(pattern_graph_input_dir) != len(pattern_graph_output_dir):
        print('The number of input pattern graph directories should be the same as the number of output pattern graph directories.')
        exit(1)
    
    # data graph processing
    # data_graph_processing(data_graph_input_path, data_graph_output_path)

    # pattern graph processing
    pattern_graph_processing(pattern_graph_input_dir, pattern_graph_output_dir)