In [1]:
import numpy as np
import pandas as pd
import time
import pickle as pkl

from vigor import graph_data, Predicate, generate_predicate, test_predicate_proportions

  from pandas.core import (


In [2]:
predicates = {
    'NODELINK': Predicate(clauses={
        'density': [0, .1],
        'is_directed_int': [0.5, 1],
        'n_self_loops': [0, 50],
        'n_components': [1, 5],
        'avg_degree': [1, 3],
        'clustering_coefficient': [0.1, 0.4],
        'node_types': [1, 3],
        'edge_types': [1, 2],
        'eccentricity_avg': [0, 5]
    }),
    'MATRIX': Predicate(clauses={
        'density': [0.1, 1],
        'avg_degree': [10, 50],
        'modularity': [0.3, 0.7],
        'avg_betweenness_centrality': [0.2, 0.5],
        'avg_eigenvector_centrality': [0.2, 0.8],
        'node_attributes': [2, 10],
        'edge_attributes': [1, 5]
    }),
    'NODETRIX': Predicate(clauses={
        'communities': [4, 10],
        'clustering_coefficient': [0.5, 1],
        'density': [0.1, 0.5],
        'node_types': [2, 5],
        'modularity': [0.3, 0.8],
        'avg_degree': [5, 15],
        'node_attributes': [3, 10],
        'edge_types': [1, 3]
    }),
    'NODELINK_MAP': Predicate(clauses={
        'has_spatial_attributes': [0.5, 1],
        'is_directed_int': [0, 1],
        'avg_degree': [1, 5],
        'n_components': [1, 5],
        'assortativity': [-0.5, 0.5]
    }),
    'PAOHVIS': Predicate(clauses={
        'n_nodes': [50, 500],
        'node_types': [3, 6],
        'edge_types': [2, 5],
        'density': [0.05, 0.2],
        'avg_degree': [5, 10],
        'transitivity': [0.2, 0.6]
    }),
    'CHORD_DIAGRAM': Predicate(clauses={
        'n_nodes': [0, 6],
        'edge_types': [1, 3],
        'clustering_coefficient': [0.3, 0.7],
        'n_components': [1, 2],
        'avg_degree': [2, 4],
        'n_parallel_edges': [0, 5]
    }),
    'TREEMAP': Predicate(clauses={
        'graph_type_1': [0.5, 1],
        'modularity': [0.5, 1],
        'n_nodes': [50, 200],
        'node_attributes': [5, 20],
        'edge_attributes': [0, 2],
        'n_components': [1, 1],
        'has_spatial_attributes': [0, 1]
    })
}

In [3]:
predicates_nobre = {
    'NODELINK': Predicate(clauses={
        'n_nodes': [0, 100],
        'graph_type_1': [0.5, 1],
        'graph_type_3': [0.5, 1],
        'graph_type_4': [0.5, 1],
        'node_types': [1, 1],
        'edge_types': [1, 1]
    }),
    'NODELINK_POSITIONING': Predicate(clauses={
        'n_nodes': [0, 100],
        'graph_type_1': [0.5, 1],
        'graph_type_3': [0.5, 1],
        'node_attributes': [0, 5],
        'node_types': [1, 5]
    }),
    'NODELINK_FACETING': Predicate(clauses={
        'n_nodes': [0, 100],
        'graph_type_1': [0.5, 1],
        'node_attributes': [0, 5],
        'node_types': [1, 1]
    }),
    'MATRIX': Predicate(clauses={
        'n_nodes': [0, 100],
        'graph_type_2': [0.5, 1],
        'node_attributes': [5, 10],
        'node_types': [1, 1],
        'edge_attributes': [0, 3],
        'edge_types': [1, 1]
    }),
    'QUILTS': Predicate(clauses={
        'n_nodes': [0, 100],
        'graph_type_1': [0.5, 1],
        'graph_type_3': [0.5, 1],
        'graph_type_4': [0.5, 1],
        'node_attributes': [0, 10],
        'node_types': [1, 5],
        'edge_attributes': [0, 10],
        'edge_types': [1, 1]
    }),
    'BIOFABRIC': Predicate(clauses={
        'n_nodes': [0, 100],
        'graph_type_1': [0.5, 1],
        'graph_type_2': [0.5, 1],
        'node_attributes': [0, 10],
        'node_types': [1, 5],
        'edge_attributes': [0, 10],
        'edge_types': [1, 5]
    }),
    'TREEMAP': Predicate(clauses={
        'graph_type_1': [0.5, 1],
        'graph_type_4': [0.5, 1],
        'node_attributes': [0, 5],
        'node_types': [1, 1]
    }),
    'SUNBURST': Predicate(clauses={
        'graph_type_1': [0.5, 1],
        'graph_type_4': [0.5, 1],
        'node_attributes': [0, 5],
        'node_types': [1, 1]
    }),
}

In [4]:
np.random.seed(10)
vistypes = ["NODELINK", "MATRIX", "NODETRIX", "NODELINK_MAP", "PAOHVIS", "CHORD_DIAGRAM", "TREEMAP"]
predicates_rand = {
    vistype: generate_predicate(
        graph_data, .5, np.random.choice(graph_data.numeric, size=np.random.poisson()+1)
    ) for vistype in vistypes
}

In [5]:
all_predicates = {'predicates': predicates, 'nobre': predicates_nobre, 'rand': predicates_rand}

In [6]:
all_data_paths = {'data': '../data/data_1733093714.csv', 'rand': '../data/rand_1733093714.csv'}
all_data = {k:pd.read_csv(v) for k,v in all_data_paths.items()}

In [7]:
proportions = test_predicate_proportions(all_predicates, all_data)

In [8]:
for name, p in proportions.groupby('predicates'):
    print(name)
    print(p)
    print()

nobre
                 vistype predicates      data  rand
7               NODELINK      nobre  0.000000   0.0
8   NODELINK_POSITIONING      nobre  0.000000   0.0
9      NODELINK_FACETING      nobre  0.003284   0.0
10                MATRIX      nobre  0.000000   0.0
11                QUILTS      nobre  0.000000   0.0
12             BIOFABRIC      nobre  0.000000   0.0
13               TREEMAP      nobre  0.000000   0.0
14              SUNBURST      nobre  0.000000   0.0

predicates
         vistype  predicates      data   rand
0       NODELINK  predicates  0.000000  0.000
1         MATRIX  predicates  0.000000  0.001
2       NODETRIX  predicates  0.000000  0.000
3   NODELINK_MAP  predicates  0.022989  0.004
4        PAOHVIS  predicates  0.000000  0.000
5  CHORD_DIAGRAM  predicates  0.000000  0.000
6        TREEMAP  predicates  0.000000  0.000

rand
          vistype predicates      data   rand
15       NODELINK       rand  0.385057  0.479
16         MATRIX       rand  0.390805  0.466
17

In [9]:
save_predicates = False
if save_predicates:
    t = int(time.time())
    with open(f'predicates_{t}.pkl', 'wb') as f:
        pkl.dump(all_predicates, f)