In [3]:
import json
import random
import numpy as np

In [4]:
nobre_scores = {
    'size': {
        'nodelink topology driven': {'small': 3, 'medium': 2, 'large': 1},
        'nodelink attribute driven faceting': {'small': 3, 'medium': 1, 'large': 1},
        'nodelink attribute driven positioning': {'small': 3, 'medium': 1, 'large': 1},
        'adjacency matrix': {'small': 3, 'medium': 1, 'large': 1},
        'quilts': {'small': 3, 'medium': 1, 'large': 1},
        'biofabric': {'small': 3, 'medium': 1, 'large': 1},
        'sunburst': {'small': 3, 'medium': 2, 'large': 1},
        'treemap': {'small': 3, 'medium': 2, 'large': 2},
    },
    'type': {
        'nodelink topology driven': {'sparse': 3, 'dense': 1, 'k-partite': 3, 'tree': 3},
        'nodelink attribute driven faceting': {'sparse': 3, 'dense': 1, 'k-partite': 3, 'tree': 1},
        'nodelink attribute driven positioning': {'sparse': 3, 'dense': 1, 'k-partite': 1, 'tree': 1},
        'adjacency matrix': {'sparse': 2, 'dense': 3, 'k-partite': 2, 'tree': 1},
        'quilts': {'sparse': 3, 'dense': 1, 'k-partite': 3, 'tree': 3},
        'biofabric': {'sparse': 3, 'dense': 3, 'k-partite': 2, 'tree': 1},
        'sunburst': {'sparse': 0, 'dense': 0, 'k-partite': 0, 'tree': 3},
        'treemap': {'sparse': 0, 'dense': 0, 'k-partite': 0, 'tree': 3},
    },
    'node_types': {
        'nodelink topology driven': {'homogeneous': 3, 'heterogeneous': 2},
        'nodelink attribute driven faceting': {'homogeneous': 3, 'heterogeneous': 3},
        'nodelink attribute driven positioning': {'homogeneous': 3, 'heterogeneous': 1},
        'adjacency matrix': {'homogeneous': 3, 'heterogeneous': 2},
        'quilts': {'homogeneous': 3, 'heterogeneous': 3},
        'biofabric': {'homogeneous': 3, 'heterogeneous': 3},
        'sunburst': {'homogeneous': 3, 'heterogeneous': 1},
        'treemap': {'homogeneous': 3, 'heterogeneous': 1},
    },
    'node_attributes': {
        'nodelink topology driven': {'few': 2, 'several': 1},
        'nodelink attribute driven faceting': {'few': 3, 'several': 1},
        'nodelink attribute driven positioning': {'few': 3, 'several': 1},
        'adjacency matrix': {'few': 2, 'several': 3},
        'quilts': {'few': 3, 'several': 3},
        'biofabric': {'few': 3, 'several': 3},
        'sunburst': {'few': 3, 'several': 1},
        'treemap': {'few': 3, 'several': 1},
    },
    'edge_types': {
        'nodelink topology driven': {'homogeneous': 3, 'heterogeneous': 1},
        'nodelink attribute driven faceting': {'homogeneous': 2, 'heterogeneous': 1},
        'nodelink attribute driven positioning': {'homogeneous': 2, 'heterogeneous': 1},
        'adjacency matrix': {'homogeneous': 3, 'heterogeneous': 2},
        'quilts': {'homogeneous': 3, 'heterogeneous': 2},
        'biofabric': {'homogeneous': 3, 'heterogeneous': 3},
        'sunburst': {'homogeneous': 0, 'heterogeneous': 0},
        'treemap': {'homogeneous': 0, 'heterogeneous': 0},
    },
    'edge_attributes': {
        'nodelink topology driven': {'few': 2, 'several': 1},
        'nodelink attribute driven faceting': {'few': 2, 'several': 1},
        'nodelink attribute driven positioning': {'few': 2, 'several': 1},
        'adjacency matrix': {'few': 3, 'several': 2},
        'quilts': {'few': 3, 'several': 3},
        'biofabric': {'few': 3, 'several': 3},
        'sunburst': {'few': 0, 'several': 0},
        'treemap': {'few': 0, 'several': 0},
    },
}

In [5]:
def generate_samples(n_samples):
    nobre_samples = []

    for _ in range(n_samples):
        size = np.random.randint(10, 1000)
        volume = np.random.randint(0, size * (size - 1) // 2) / 2
        node_types = np.random.randint(1, 3)
        node_attributes = np.random.randint(0, 10)
        edge_types = np.random.randint(1, 2)
        edge_attributes = np.random.randint(0, 10)

        size_category = 'small' if size < 100 else 'medium' if size < 1000 else 'large'
        density = volume / (size * (size - 1) // 2)
        if density < 0.1:
            graph_type_category = 'sparse'
        elif density > 0.5:
            graph_type_category = 'dense'
        else:
            graph_type_category = random.choice(['tree', 'k-partite'])
        node_types_category = 'homogeneous' if node_types == 1 else 'heterogeneous'
        node_attributes_category = 'few' if node_attributes < 5 else 'several'
        edge_types_category = 'homogeneous' if edge_types == 1 else 'heterogeneous'
        edge_attributes_category = 'few' if edge_attributes < 3 else 'several'

        scores = {vis: 0 for vis in nobre_scores['size'].keys()}
        for vis in scores.keys():
            scores[vis] += nobre_scores['size'][vis][size_category]
            scores[vis] += nobre_scores['type'][vis][graph_type_category]
            scores[vis] += nobre_scores['node_types'][vis][node_types_category]
            scores[vis] += nobre_scores['node_attributes'][vis][node_attributes_category]
            scores[vis] += nobre_scores['edge_types'][vis][edge_types_category]
            scores[vis] += nobre_scores['edge_attributes'][vis][edge_attributes_category]

        best_visualization = max(scores, key=scores.get)

        domain = random.choice(['healthcare', 'asset performance management', 'cyber security', 'e-commerce'])
        
        statistics = {
            "size": size,
            "volume": volume,
            "node_types": node_types,
            "node_attributes": node_attributes,
            "edge_types": edge_types,
            "edge_attributes": edge_attributes
        }


        nobre_samples.append({
            "domain": domain,
            "statistics": statistics,
            "visualization": best_visualization,
            "user_feedback": 5
        })

    return nobre_samples

In [6]:
training = generate_samples(20000)
testing = generate_samples(5000)

In [7]:
with open('../data/training.json', 'w') as file:
    json.dump(training, file, indent=4)

with open('../data/testing.json', 'w') as file:
    json.dump(testing, file, indent=4)