# Simulation of User Preferences

In [1]:
import os
import altair as alt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import precision_recall_fscore_support

from vigor import generate_graphs, nobre_predicates, predicates, compute_metrics, Predicate, VIGOR, label_graphs, learn_predicates

  from pandas.core import (


## Generate graphs

1. Generate graphs using the fast_gnp_random_graph function from networkx
2. Calculate statistics for each graph

In [2]:
file_path = '../data/generated_graphs_example.csv'
# graphs = generate_graphs(1000, 2, 200, file_path=file_path)
all_graphs = pd.read_csv(f'{file_path}')

In [3]:
graphs = all_graphs.sample(frac=.8, random_state=0)
test_graphs = all_graphs.loc[~all_graphs.index.isin(graphs.index)]

In [4]:
graphs.head()

Unnamed: 0,graph_type,is_directed_int,has_spatial_attributes,has_temporal_attributes,is_bipartite,n_components,avg_betweenness_centrality,avg_closeness_centrality,avg_eigenvector_centrality,avg_degree,...,2.129032258064516,62,3.1,10.1,0.2,0.28239026969857217,3.2,1.2,1.3,4
1144,4,0,1,0,1,6,0.003506,0.613409,0.073731,66.769231,...,,,,,,,,,,
990,4,0,1,0,0,2,0.004779,0.60125,0.083575,47.007092,...,,,,,,,,,,
478,2,0,0,1,0,8,0.227273,0.305556,0.288675,2.0,...,,,,,,,,,,
161,4,0,0,1,0,4,0.018649,0.49649,0.125289,9.192982,...,,,,,,,,,,
1153,4,0,0,0,0,1,0.004753,0.560658,0.076448,35.856287,...,,,,,,,,,,


In [5]:
graphs = graphs[['graph_type', 'n_nodes', 'density']]

In [6]:
graphs

Unnamed: 0,graph_type,n_nodes,density
1144,4,182,0.368891
990,4,141,0.335765
478,2,12,0.181818
161,4,57,0.164160
1153,4,167,0.216002
...,...,...,...
414,4,11,0.272727
662,4,98,0.364191
1206,3,161,0.066149
1187,4,109,0.494903


## Sample designers

We evaluate the ability of VIGOR to recover the rules that were used to represent simulated users. We design 3 versions of this simulated user: bob_informed, who follows the rules 100\% of the time; bob_semi_informed, who follows the rules 75\% of the time and chooses other visualizations randomly the other 25\% of the time; and bob_uninformed who follows the rules 50\% of the time and chooses other visualizations randomly the other 50\% of the time.

In [7]:
informed = label_graphs(graphs, predicates, conformance=1)
semi_informed = label_graphs(graphs, predicates, conformance=0.75)
uninformed = label_graphs(graphs, predicates, conformance=0.5)

In [8]:
informed

1144      MATRIX
990       MATRIX
478       MATRIX
161      PAOHVIS
1153      MATRIX
          ...   
414       MATRIX
662       MATRIX
1206     PAOHVIS
1187      MATRIX
1027    NODELINK
Length: 974, dtype: object

### Learning predicates from labeled data

In [9]:
graphs['graph_type'].value_counts()

graph_type
4    657
3    160
2     83
1     74
Name: count, dtype: int64

In [10]:
graph_types = graphs['graph_type'].unique()
graphs['graph_type_' + pd.Series(graph_types).astype(str)] = (graphs['graph_type'].values[None] == graph_types[:,None]).astype(int).T
graphs = graphs.drop('graph_type', axis=1)

In [11]:
learned_predicates_informed = learn_predicates(graphs, informed, 10)
learned_predicates_semi_informed = learn_predicates(graphs, semi_informed, 10)
learned_predicates_uninformed = learn_predicates(graphs, uninformed, 10)

Learning predicates for MATRIX
[   0] loss 5.365243434906006
[   1] loss 5.203358173370361
[   2] loss 5.184918403625488
[   3] loss 5.162360191345215
[   4] loss 5.142056941986084
[   5] loss 5.12255334854126
[   6] loss 5.103957653045654
[   7] loss 5.086215019226074
[   8] loss 5.069209575653076
[   9] loss 5.052829265594482
[   0] loss 6.84258508682251
[   1] loss 6.830089092254639
[   2] loss 6.808320999145508
[   3] loss 6.776451110839844
[   4] loss 6.767061710357666
[   5] loss 6.757322311401367
[   6] loss 6.747422218322754
[   7] loss 6.737519264221191
[   8] loss 6.727634906768799
[   9] loss 6.717746257781982
Learning predicates for PAOHVIS
[   0] loss 3.276097297668457
[   1] loss 3.255321979522705
[   2] loss 3.2396798133850098
[   3] loss 3.223752737045288
[   4] loss 3.199697971343994
[   5] loss 3.1922619342803955
[   6] loss 3.185206651687622
[   7] loss 3.178217649459839
[   8] loss 3.171217203140259
[   9] loss 3.164182424545288
[   0] loss 9.380393981933594
[   1] 

### Comparing learned predicates to initial predicates

#### Informed User

In [12]:
test_informed = label_graphs(test_graphs, predicates, conformance=1)
evaluation_informed = compute_metrics(predicates, learned_predicates_informed, graphs, test_graphs, informed, test_informed)
print(evaluation_informed)

{'MATRIX': {'exact': {'density': {'iou': 0.013176651541917753, 'deviation': 0.444070506806137, 'inclusion': 1}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.36960985626283366}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.75}}, 'PAOHVIS': {'exact': {'n_nodes': {'iou': 0.08425370964522048, 'deviation': 206.0429153298254, 'inclusion': 1}, 'density': {'iou': 0.0, 'deviation': 0.22365594415019044, 'inclusion': 0}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.7885010266940452}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.75}}, 'NODELINK': {'exact': {'density': {'iou': 0.0, 'deviation': 0.3066536950120411, 'inclusion': 0}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8501026694045175}, 'generalize': {'precsion': 1.0, 'recall': 0.012195121951219513, 'f1': 0.024096385542168676, 'accuracy': 0.6680327868852459}}, 'CHORD_DIAGRAM': {'exact': {'n_nodes': {'iou': 0.0, 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Semi-Informed User

In [13]:
test_semi_informed = label_graphs(test_graphs, predicates, conformance=0.75)
evaluation_semi_informed = compute_metrics(predicates, learned_predicates_semi_informed, graphs, test_graphs, semi_informed, test_semi_informed)
print(evaluation_semi_informed)

{'MATRIX': {'exact': {'density': {'iou': 0.03404856766520673, 'deviation': 0.43467814455065695, 'inclusion': 1}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.48459958932238195}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.7663934426229508}}, 'PAOHVIS': {'exact': {'n_nodes': {'iou': 0.04794846899331743, 'deviation': 214.2115944765036, 'inclusion': 1}, 'density': {'iou': 0.0, 'deviation': 0.22434213986951648, 'inclusion': 0}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8090349075975359}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.7827868852459017}}, 'CHORD_DIAGRAM': {'exact': {'n_nodes': {'iou': 0.0, 'deviation': 123.4697713458671, 'inclusion': 0}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.946611909650924}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.9467213114754098}}, 'NODELINK': {'exact': {'density': {'iou': 0.0, 'deviat

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


#### Unformed User

In [14]:
test_uninformed = label_graphs(test_graphs, predicates, conformance=0.5)
evaluation_uninformed = compute_metrics(predicates, learned_predicates_uninformed, graphs, test_graphs, uninformed, test_uninformed)
print(evaluation_uninformed)

{'MATRIX': {'exact': {'density': {'iou': 0.03680648124853544, 'deviation': 0.433437083438159, 'inclusion': 1}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.5852156057494866}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8032786885245902}}, 'NODELINK': {'exact': {'density': {'iou': 0.0, 'deviation': 0.3001891007912256, 'inclusion': 0}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8316221765913757}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.7131147540983607}}, 'PAOHVIS': {'exact': {'n_nodes': {'iou': 0.0033695634114585954, 'deviation': 224.24184823242183, 'inclusion': 1}, 'density': {'iou': 0.0, 'deviation': 0.22516402889554585, 'inclusion': 0}}, 'describe': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8162217659137577}, 'generalize': {'precsion': 0.0, 'recall': 0.0, 'f1': 0.0, 'accuracy': 0.8442622950819673}}, 'TREEMAP': {'exact': {'graph_type_1': {'iou': 0.0, 'devia

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
data = [evaluation_informed, evaluation_semi_informed, evaluation_uninformed]

visualizations = set(vis for d in data for vis in d.keys())

avg_deviation = {vis: [] for vis in visualizations}

for vis in visualizations:
    for var in data:
        deviations = [attr['deviation'] for attr in var.get(vis, {}).values() if 'deviation' in attr]
        if deviations:
            avg_deviation[vis].append(np.mean(deviations))
        else:
            avg_deviation[vis].append(0)

avg_deviation

{'MATRIX': [0, 0, 0],
 'PAOHVIS': [0, 0, 0],
 'NODETRIX': [0, 0, 0],
 'CHORD_DIAGRAM': [0, 0, 0],
 'TREEMAP': [0, 0, 0],
 'NODELINK': [0, 0, 0]}

In [17]:
# Prepare data for plotting
x_labels = ["Informed", "Semi-Informed", "Uninformed"]
plot_data = []

for i, vis in enumerate(visualizations):
    for j, var in enumerate(x_labels):
        plot_data.append({
            'variable': var,
            'deviation': avg_deviation[vis][j],
            'visualization': vis
        })

# Convert to DataFrame
df = pd.DataFrame(plot_data)

# Create the Altair chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('variable:N', axis=alt.Axis(title='Variables')),
    y=alt.Y('deviation:Q', axis=alt.Axis(title='Average Deviation')),
    color='visualization:N',
    column='visualization:N',
    tooltip=['variable:N', 'deviation:Q', 'visualization:N']
).properties(
    title='Average Deviations per Variable for Each Visualization',
    width=150,
    height=300
)

# Configure chart appearance
chart.configure_view(
    stroke='transparent'
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
    anchor='middle'
)

# Display the chart
chart.show()