# Simulation of User Preferences

In [None]:
import os
import altair as alt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from vigor import generate_graphs, nobre_predicates, predicates, compute_metrics, Predicate, VIGOR, label_graphs, learn_predicates

## Generate graphs

1. Generate graphs using the fast_gnp_random_graph function from networkx
2. Calculate statistics for each graph

In [2]:
file_path = '../data/generated_graphs_example.csv'
# graphs = generate_graphs(1000, 2, 200, file_path=file_path)
graphs = pd.read_csv(f'{file_path}')

In [3]:
graphs.head()

Unnamed: 0,graph_type,is_directed_int,has_spatial_attributes,has_temporal_attributes,is_bipartite,n_components,avg_betweenness_centrality,avg_closeness_centrality,avg_eigenvector_centrality,avg_degree,...,2.129032258064516,62,3.1,10.1,0.2,0.28239026969857217,3.2,1.2,1.3,4
0,4,1,1,0,0,6,0.004762,0.596901,0.08276,46.305556,...,,,,,,,,,,
1,4,0,0,0,0,3,0.008252,0.574299,0.102541,23.608696,...,,,,,,,,,,
2,4,1,0,1,0,2,0.016667,0.155556,0.284518,0.666667,...,,,,,,,,,,
3,4,1,0,0,1,2,0.00602,0.582964,0.089947,34.049587,...,,,,,,,,,,
4,2,1,0,0,0,0,0.166667,0.75,0.5,2.0,...,,,,,,,,,,


In [4]:
graphs = graphs[['graph_type', 'n_nodes', 'density']]

In [5]:
graphs

Unnamed: 0,graph_type,n_nodes,density
0,4,144,0.323815
1,4,92,0.259436
2,4,6,0.133333
3,4,121,0.283747
4,2,4,0.666667
...,...,...,...
1213,4,121,0.133058
1214,2,18,0.117647
1215,2,10,0.222222
1216,4,88,0.298589


## Sample designers

We evaluate the ability of VIGOR to recover the rules that were used to represent simulated users. We design 3 versions of this simulated user: bob_informed, who follows the rules 100\% of the time; bob_semi_informed, who follows the rules 75\% of the time and chooses other visualizations randomly the other 25\% of the time; and bob_uninformed who follows the rules 50\% of the time and chooses other visualizations randomly the other 50\% of the time.

In [6]:
informed = label_graphs(graphs, predicates, conformance=1)
semi_informed = label_graphs(graphs, predicates, conformance=0.75)
uninformed = label_graphs(graphs, predicates, conformance=0.5)

In [7]:
informed

0        MATRIX
1        MATRIX
2        MATRIX
3        MATRIX
4        MATRIX
         ...   
1213    PAOHVIS
1214     MATRIX
1215     MATRIX
1216     MATRIX
1217     MATRIX
Length: 1218, dtype: object

### Learning predicates from labeled data

In [8]:
graphs['graph_type'].value_counts()

graph_type
4    819
3    199
2    112
1     88
Name: count, dtype: int64

In [9]:
graph_types = graphs['graph_type'].unique()
graphs['graph_type_' + pd.Series(graph_types).astype(str)] = (graphs['graph_type'].values[None] == graph_types[:,None]).astype(int).T
graphs = graphs.drop('graph_type', axis=1)

In [10]:
learned_predicates_informed = learn_predicates(graphs, informed, 1000)
learned_predicates_semi_informed = learn_predicates(graphs, semi_informed, 1000)
learned_predicates_uninformed = learn_predicates(graphs, uninformed, 1000)

Learning predicates for MATRIX
[   0] loss 4.202094554901123
[ 100] loss 3.1024680137634277
[ 200] loss 2.1733908653259277
[ 300] loss 1.8013806343078613
[ 400] loss 1.6272622346878052
[ 500] loss 1.4763075113296509
[ 600] loss 1.3297314643859863
[ 700] loss 1.1822682619094849
[ 800] loss 1.031112551689148
[ 900] loss 0.8715153932571411
[   0] loss 6.465519428253174
[ 100] loss 5.479440212249756
[ 200] loss 4.81907844543457
[ 300] loss 4.417529106140137
[ 400] loss 4.041301250457764
[ 500] loss 3.653075695037842
[ 600] loss 3.249213218688965
[ 700] loss 2.826672315597534
[ 800] loss 2.373044490814209
[ 900] loss 1.8492554426193237
Learning predicates for PAOHVIS
[   0] loss 2.8115358352661133
[ 100] loss 2.430608034133911
[ 200] loss 1.6142946481704712
[ 300] loss 1.500534176826477
[ 400] loss 1.3977253437042236
[ 500] loss 1.2870960235595703
[ 600] loss 1.16446053981781
[ 700] loss 1.032278060913086
[ 800] loss 0.8913918137550354
[ 900] loss 0.7388415932655334
[   0] loss 8.2419061660

### Comparing learned predicates to initial predicates

#### Informed User

In [13]:
evaluation_informed = compute_metrics(predicates, learned_predicates_informed)
print(evaluation_informed)

{'MATRIX': {'density': {'iou': 0.07153206357029356, 'deviation': 0.4178105713933679, 'inclusion': 1}}, 'PAOHVIS': {'density': {'iou': 0.0, 'deviation': 0.12168643422443977, 'inclusion': 1}, 'n_nodes': {'iou': 0.11555555555549635, 'deviation': 199.00000000001333, 'inclusion': 1}}, 'NODELINK': {}, 'CHORD_DIAGRAM': {'n_nodes': {'iou': 0.0, 'deviation': 114.9999999999406, 'inclusion': 0}}}


#### Semi-Informed User

In [14]:
evaluation_semi_informed = compute_metrics(predicates, learned_predicates_semi_informed)
print(evaluation_semi_informed)

{'MATRIX': {'density': {'iou': 0.07153206357029356, 'deviation': 0.4178105713933679, 'inclusion': 1}}, np.str_('CHORD_DIAGRAM'): {'n_nodes': {'iou': 0.0, 'deviation': 114.9999999999406, 'inclusion': 0}}, np.str_('NODETRIX'): {}, 'PAOHVIS': {'density': {'iou': 0.0, 'deviation': 0.16662564197098345, 'inclusion': 0}, 'n_nodes': {'iou': 0.11555555555549635, 'deviation': 199.00000000001333, 'inclusion': 1}}, np.str_('NODELINK'): {}, np.str_('TREEMAP'): {'graph_type_1': {'iou': 0.0, 'deviation': 0.6898366659879684, 'inclusion': 0}, 'n_nodes': {'iou': 0.34666666666648904, 'deviation': 49.00000000001332, 'inclusion': 1}}}


#### Unformed User

In [None]:
evaluation_uninformed = compute_metrics(predicates, learned_predicates_uninformed)
print(evaluation_uninformed)

{'MATRIX': {}, np.str_('PAOHVIS'): {'n_nodes': {'iou': 0.11555555555549635, 'deviation': 199.00000000001333, 'inclusion': 1}}, np.str_('CHORD_DIAGRAM'): {'n_nodes': {'iou': 0.0, 'deviation': 114.9999999999406, 'inclusion': 0}}, np.str_('NODETRIX'): {}, np.str_('TREEMAP'): {'graph_type_1': {'iou': 0.0, 'deviation': 0.75, 'inclusion': 0}, 'n_nodes': {'iou': 0.34666666666648904, 'deviation': 49.00000000001332, 'inclusion': 1}}, 'NODELINK': {}}


In [20]:
data = [evaluation_informed, evaluation_semi_informed, evaluation_uninformed]

visualizations = set(vis for d in data for vis in d.keys())

avg_deviation = {vis: [] for vis in visualizations}

for vis in visualizations:
    for var in data:
        deviations = [attr['deviation'] for attr in var.get(vis, {}).values() if 'deviation' in attr]
        if deviations:
            avg_deviation[vis].append(np.mean(deviations))
        else:
            avg_deviation[vis].append(0)

avg_deviation

{np.str_('NODETRIX'): [0, 0, 0],
 'PAOHVIS': [np.float64(99.56084321711889),
  np.float64(99.58331282099216),
  np.float64(199.00000000001333)],
 'NODELINK': [0, 0, 0],
 np.str_('TREEMAP'): [0,
  np.float64(24.844918333000646),
  np.float64(24.87500000000666)],
 'MATRIX': [np.float64(0.4178105713933679), np.float64(0.4178105713933679), 0],
 'CHORD_DIAGRAM': [np.float64(114.9999999999406),
  np.float64(114.9999999999406),
  np.float64(114.9999999999406)]}

In [None]:
# Prepare data for plotting
x_labels = ["Informed", "Semi-Informed", "Uninformed"]
plot_data = []

for i, vis in enumerate(visualizations):
    for j, var in enumerate(x_labels):
        plot_data.append({
            'variable': var,
            'deviation': avg_deviation[vis][j],
            'visualization': vis
        })

# Convert to DataFrame
df = pd.DataFrame(plot_data)

# Create the Altair chart
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('variable:N', axis=alt.Axis(title='Variables')),
    y=alt.Y('deviation:Q', axis=alt.Axis(title='Average Deviation')),
    color='visualization:N',
    column='visualization:N',
    tooltip=['variable:N', 'deviation:Q', 'visualization:N']
).properties(
    title='Average Deviations per Variable for Each Visualization',
    width=150,
    height=300
)

# Configure chart appearance
chart.configure_view(
    stroke='transparent'
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
    anchor='middle'
)

# Display the chart
chart.show()