In [None]:
#!pip install dill

In [None]:
import dill
import json
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import os
import random
import sys

In [None]:
def visualize(graph):
    plt.close()
    plt.figure(figsize=(10, 10))
    labeldict = {}
    for node in graph.nodes():
        labeldict[node] = graph.nodes[node]['letter']
    nx.draw(graph, labels=labeldict, with_labels=True)
    plt.show()


def get_random_connected_subgraph(graph, edges):
    random_node = random.choice(list(graph.nodes()))
    random_subgraph = nx.Graph()
    for edge in list(nx.bfs_edges(graph, random_node))[:edges]:
        if edge[0] not in list(random_subgraph.nodes()):
            random_subgraph.add_node(
                edge[0],
                letter=graph.nodes[edge[0]]['letter'],
                representations=graph.nodes[edge[0]]['representations']
            )
        if edge[1] not in list(random_subgraph.nodes()):
            random_subgraph.add_node(
                edge[1],
                letter=graph.nodes[edge[1]]['letter'],
                representations=graph.nodes[edge[1]]['representations']
            )
        random_subgraph.add_edge(*edge)
    return random_subgraph


def get_isomorphic_subgraphs(subgraph, all_graphs):
    isomorphic_subgraphs = []
    for file_name_without_extension, graph in all_graphs:
        graph_matcher = nx.algorithms.isomorphism.GraphMatcher(
            graph,
            subgraph,
            node_match=nx.algorithms.isomorphism.categorical_node_match('letter', -1)
        )
        for matching in graph_matcher.subgraph_isomorphisms_iter():
            isomorphic_subgraph = graph.subgraph(list(matching.keys()))
            isomorphic_subgraphs.append((file_name_without_extension, isomorphic_subgraph))
    return isomorphic_subgraphs


def get_subgraph_representations(config, subgraph, template_file_name=None):
    subgraph_representations = []
    for i in range(config['number_of_representations']):
        subgraph_representation = nx.Graph()
        for u, v in subgraph.edges():
            subgraph_representation.add_edge(u, v)
            subgraph_representation.nodes[u]['representation'] = subgraph.nodes[u]['representations'][i]
            subgraph_representation.nodes[v]['representation'] = subgraph.nodes[v]['representations'][i]
        if template_file_name is not None:
            graph_file_name = '%s_%d.json' % (template_file_name, i)
            subgraph_representations.append((graph_file_name, subgraph_representation))
        else:
            subgraph_representations.append(subgraph_representation)
    return subgraph_representations


def create_benchmark(config):
    random.seed(config['random_seed'])
    np.random.seed(config['random_seed'])
    all_graphs = []
    for file_name in os.listdir(config['templates_dir']):
        if file_name.endswith('json'):
            file_path = os.path.join(config['templates_dir'], file_name)
            with open(file_path, 'r') as graph_file:
                graph = nx.node_link_graph(json.loads(graph_file.read()))
                file_name_without_extension = file_name.split('.')[0]
                all_graphs.append((file_name_without_extension, graph))
    benchmark = []
    for progress, (_, graph) in enumerate(all_graphs):
        sys.stdout.write('\r%d/%d' % (progress + 1, len(all_graphs)))
        sys.stdout.flush()
        for i in range(config['number_of_subgraphs']):
            random_subgraph_edges = random.randint(*config['random_subgraph_range'])
            random_subgraph = get_random_connected_subgraph(graph, random_subgraph_edges)
            if config['visualize']:
                visualize(random_subgraph)
            isomorphic_subgraphs = get_isomorphic_subgraphs(random_subgraph, all_graphs)
            isomorphic_subgraph_representations = []
            for file_name_without_extension, isomorphic_subgraph in isomorphic_subgraphs:
                isomorphic_subgraph_representations += get_subgraph_representations(
                    config,
                    isomorphic_subgraph,
                    file_name_without_extension
                )
            for random_subgraph_representation in get_subgraph_representations(config, random_subgraph):
                benchmark_entry = (
                    random_subgraph_representation, isomorphic_subgraph_representations
                )
                benchmark.append(benchmark_entry)
    print()  # newline
    with open(config['output_path'], 'wb') as output_file:
        dill.dump(benchmark, output_file)

In [None]:
config = {
    'templates_dir': 'templates',
    'number_of_subgraphs': 10,
    'number_of_representations': 10,
    'random_subgraph_range': (3, 7),  # edges
    'random_seed': 42,
    'visualize': False,
    'output_path': 'benchmark.pkl'
}
create_benchmark(config)