# Simulation of User Preferences

In [1]:
import os
import pandas as pd
from vigor import generate_graphs, label_graphs, predicates, learn_predicates, compute_metrics

## Generate graphs

1. Generate graphs using the fast_gnp_random_graph function from networkx
2. Calculate statistics for each graph

In [2]:
file_path = '../data/generated_graphs_example'

if not os.path.exists(f'{file_path}.csv'):
    print('Generating graph data')
    graphs = generate_graphs(100, 2, 200)
    graphs.to_csv(f'{file_path}.csv', index=False)
else:
    print('Loading graph data')
    graphs = pd.read_csv(f'{file_path}.csv')

Generating graph data
Generated statistics for graph 0 {'graph_type': 4, 'is_directed_int': 1, 'has_spatial_attributes': 0, 'has_temporal_attributes': 1, 'is_bipartite': 0, 'n_components': 2, 'avg_betweenness_centrality': 0.0068434104865375545, 'avg_closeness_centrality': 0.5044364373957784, 'avg_eigenvector_centrality': 0.08059175681583537, 'avg_degree': 17.356164383561644, 'std_degree': 3.7264011038245455, 'clustering_coefficient': 0.1149081813747267, 'transitivity': 0.11707317073170732, 'modularity': -3.114703956731663e-05, 'communities': 2, 'avg_shortest_path_length': 1.9854511100614076, 'radius': 3, 'diameter': 3, 'assortativity': 0.0018919467066478804, 'vertex_connectivity': 8, 'eccentricity_avg': 3.0, 'n_nodes': 146, 'node_types': 3, 'node_attributes': 6, 'number_of_isolates': 0, 'density': 0.1196976854038734, 'edge_types': 5, 'edge_attributes': 2, 'n_parallel_edges': 2, 'n_self_loops': 5}
Generated statistics for graph 1 {'graph_type': 4, 'is_directed_int': 0, 'has_spatial_attr

In [3]:
graphs.head()

Unnamed: 0,graph_type,is_directed_int,has_spatial_attributes,has_temporal_attributes,is_bipartite,n_components,avg_betweenness_centrality,avg_closeness_centrality,avg_eigenvector_centrality,avg_degree,...,eccentricity_avg,n_nodes,node_types,node_attributes,number_of_isolates,density,edge_types,edge_attributes,n_parallel_edges,n_self_loops
0,4,1,0,1,0,2,0.006843,0.504436,0.080592,17.356164,...,3.0,146,3,6,0,0.119698,5,2,2,5
1,4,0,0,0,0,7,0.011514,0.554609,0.114805,15.666667,...,2.736111,72,1,2,0,0.220657,4,12,0,4
2,4,1,0,1,0,9,0.034914,0.568343,0.198937,6.916667,...,2.666667,24,4,7,0,0.300725,5,15,3,1
3,4,0,0,0,1,1,0.007891,0.561783,0.097868,22.376238,...,2.356436,101,1,4,0,0.223762,4,1,0,0
4,4,0,1,1,1,5,0.004791,0.659691,0.094859,52.6,...,2.0,110,1,0,0,0.482569,3,0,0,4


## Sample designers

We evaluate the ability of VIGOR to recover the rules that were used to represent simulated users. We design 3 versions of this simulated user: bob_informed, who follows the rules 100\% of the time; bob_semi_informed, who follows the rules 75\% of the time and chooses other visualizations randomly the other 25\% of the time; and bob_uninformed who follows the rules 50\% of the time and chooses other visualizations randomly the other 50\% of the time.

In [4]:
informed = label_graphs(graphs, predicates, conformance=1)
semi_informed = label_graphs(graphs, predicates, conformance=0.75)
uninformed = label_graphs(graphs, predicates, conformance=0.5)

In [5]:
informed

0    NODETRIX
1      MATRIX
2     PAOHVIS
dtype: object

## Learning predicates from labeled data

In [6]:
learned_predicates_informed = learn_predicates(graphs, informed, 1000)
learned_predicates_semi_informed = learn_predicates(graphs, semi_informed, 1000)
learned_predicates_uninformed = learn_predicates(graphs, uninformed, 1000)

Learning predicates for NODETRIX
get_predicates [[9.99999851e-01 0.00000000e+00 9.99999956e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 9.99999953e-01
  9.99999974e-01 9.99999990e-01 0.00000000e+00 0.00000000e+00
  9.99999967e-01 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 9.99999951e-01 1.37112361e-01 8.19862128e-01
  3.79497414e-01 9.99999968e-01 9.99999971e-01 0.00000000e+00
  0.00000000e+00 0.00000000e+00 9.99999912e-01 9.45454545e-01
  0.00000000e+00 4.96350365e-01 9.99999971e-01]
 [1.51612230e-02 1.38378997e-01 0.00000000e+00 1.00000000e+00
  9.99999998e-01 3.32499795e-02 1.30800071e-01 9.48352955e-04
  7.85660391e-01 0.00000000e+00 9.01631057e-01 1.00000000e+00
  0.00000000e+00 1.00000000e+00 1.11824292e-01]] [[ True False False]] Index(['avg_betweenness_centrality', 'avg_closeness_centrality',
       'avg_eigenvector_centrality', 'avg_degree', 'std_degree',
       'clustering_coefficient', 'transitivity', 'modularity',
       'avg_shortest_path_length',

  selection_std = torch.stack([x[sel_t].std(0) for sel_t in selected], 0)


RuntimeError: all elements of input should be between 0 and 1

## Comparing learned predicates to initial predicates

### Informed User

In [None]:
metrics, mean_iou, mean_deviation, inclusion_ratio = compute_metrics(predicates, learned_predicates_informed)

print(f"Mean IoU: {mean_iou:.2f}")
print(f"Mean Deviation: {mean_deviation:.2f}")
print(f"Inclusion Ratio: {inclusion_ratio:.2f}")

### Semi-Informed User

In [None]:
metrics, mean_iou, mean_deviation, inclusion_ratio = compute_metrics(predicates, learned_predicates_semi_informed)

print(f"Mean IoU: {mean_iou:.2f}")
print(f"Mean Deviation: {mean_deviation:.2f}")
print(f"Inclusion Ratio: {inclusion_ratio:.2f}")

### Unformed User

In [None]:
metrics, mean_iou, mean_deviation, inclusion_ratio = compute_metrics(predicates, learned_predicates_uninformed)

print(f"Mean IoU: {mean_iou:.2f}")
print(f"Mean Deviation: {mean_deviation:.2f}")
print(f"Inclusion Ratio: {inclusion_ratio:.2f}")