# Simulation of User Preferences

In [1]:
import os
import pandas as pd
import numpy as np
from vigor import generate_graphs, predicates, compute_metrics, Predicate, VIGOR
# from vigor import label_graphs, learn_predicates

  from pandas.core import (


## Generate graphs

1. Generate graphs using the fast_gnp_random_graph function from networkx
2. Calculate statistics for each graph

In [2]:
file_path = '../data/generated_graphs_example'

if not os.path.exists(f'{file_path}.csv'):
    print('Generating graph data')
    graphs = generate_graphs(100, 2, 200)
    graphs.to_csv(f'{file_path}.csv', index=False)
else:
    print('Loading graph data')
    graphs = pd.read_csv(f'{file_path}.csv')

Loading graph data


In [3]:
graphs.head()

Unnamed: 0,graph_type,is_directed_int,has_spatial_attributes,has_temporal_attributes,is_bipartite,n_components,avg_betweenness_centrality,avg_closeness_centrality,avg_eigenvector_centrality,avg_degree,...,eccentricity_avg,n_nodes,node_types,node_attributes,number_of_isolates,density,edge_types,edge_attributes,n_parallel_edges,n_self_loops
0,4,1,0,1,0,2,0.006843,0.504436,0.080592,17.356164,...,3.0,146,3,6,0,0.119698,5,2,2,5
1,4,0,0,0,0,7,0.011514,0.554609,0.114805,15.666667,...,2.736111,72,1,2,0,0.220657,4,12,0,4
2,4,1,0,1,0,9,0.034914,0.568343,0.198937,6.916667,...,2.666667,24,4,7,0,0.300725,5,15,3,1
3,4,0,0,0,1,1,0.007891,0.561783,0.097868,22.376238,...,2.356436,101,1,4,0,0.223762,4,1,0,0
4,4,0,1,1,1,5,0.004791,0.659691,0.094859,52.6,...,2.0,110,1,0,0,0.482569,3,0,0,4


## Sample designers

We evaluate the ability of VIGOR to recover the rules that were used to represent simulated users. We design 3 versions of this simulated user: bob_informed, who follows the rules 100\% of the time; bob_semi_informed, who follows the rules 75\% of the time and chooses other visualizations randomly the other 25\% of the time; and bob_uninformed who follows the rules 50\% of the time and chooses other visualizations randomly the other 50\% of the time.

In [4]:
def label_graphs(df, predicates, conformance=1):
    """
    Function to label graphs based on predicates
    """
    if not predicates:
        print("No predicates provided")
        return df
    
    vistype_predicates = {}
    for vistype, attr, minval, maxval in predicates:
        if attr in df.columns:
            predicate = Predicate(clauses={attr: [minval, maxval]})
            predicate.fit(df)
            if vistype.name in vistype_predicates:
                vistype_predicates[vistype.name].append(predicate)
            else:
                vistype_predicates[vistype.name] = [predicate]
    
    vistype_labels = {k: pd.DataFrame({p.attrs[0]: p.mask for p in v}) for k, v in vistype_predicates.items()}
    scores = pd.DataFrame({k: v.sum(axis=1) for k,v in vistype_labels.items()})
    predicted_labels = scores.idxmax(axis=1)

    unique_labels = list(scores.columns)
    final_labels = predicted_labels.apply(
        lambda pred: pred if np.random.random() <= conformance else np.random.choice(unique_labels)
    )
    
    return final_labels

def get_predicates(vigor, X, y, n_iter=1000):
    failed, predicates = vigor.compute_predicate_sequence(
        X.values,
        y[None],
        attribute_names=X.columns,
        n_iter=n_iter,
    )
    
    if failed:
        return predicates
    else:
        p = Predicate(predicates[0])
        p.fit(X)
        return p

def learn_predicates(df, labels, n_iter=1000):
    """
    Function to learn predicates from the data
    """
    df = df.loc[:, df.nunique() > 1]
    epsilon = 1e-8  # Small value to avoid division by zero
    graphs_normalized = (df - df.min()) / (df.max() - df.min() + epsilon)

    vigor = VIGOR()
    pred_list = {}

    for visualization in labels.unique():
        ypos = (labels == visualization).astype(int).values
        yneg = (labels != visualization).astype(int).values
        print(f"Learning predicates for {visualization}")
        pred_pos = get_predicates(vigor, graphs_normalized, ypos, n_iter=n_iter)
        pred_neg = get_predicates(vigor, graphs_normalized, yneg, n_iter=n_iter)

        pred_list[visualization] = pred_pos, pred_neg

    return pred_list

In [5]:
graphs

Unnamed: 0,graph_type,is_directed_int,has_spatial_attributes,has_temporal_attributes,is_bipartite,n_components,avg_betweenness_centrality,avg_closeness_centrality,avg_eigenvector_centrality,avg_degree,...,eccentricity_avg,n_nodes,node_types,node_attributes,number_of_isolates,density,edge_types,edge_attributes,n_parallel_edges,n_self_loops
0,4,1,0,1,0,2,0.006843,0.504436,0.080592,17.356164,...,3.000000,146,3,6,0,0.119698,5,2,2,5
1,4,0,0,0,0,7,0.011514,0.554609,0.114805,15.666667,...,2.736111,72,1,2,0,0.220657,4,12,0,4
2,4,1,0,1,0,9,0.034914,0.568343,0.198937,6.916667,...,2.666667,24,4,7,0,0.300725,5,15,3,1
3,4,0,0,0,1,1,0.007891,0.561783,0.097868,22.376238,...,2.356436,101,1,4,0,0.223762,4,1,0,0
4,4,0,1,1,1,5,0.004791,0.659691,0.094859,52.600000,...,2.000000,110,1,0,0,0.482569,3,0,0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,4,1,0,1,0,9,0.003983,0.561744,0.070493,43.222222,...,2.000000,198,1,5,0,0.219402,1,1,0,0
95,4,1,0,1,1,1,0.009444,0.651135,0.129059,26.779661,...,2.000000,59,2,14,0,0.461718,1,0,3,3
96,4,0,1,0,0,2,0.008620,0.611387,0.113113,27.157895,...,2.000000,76,3,3,0,0.362105,2,1,3,2
97,4,0,1,0,0,9,0.051082,0.506539,0.190811,5.090909,...,3.136364,22,5,9,0,0.242424,4,8,3,0


In [6]:
informed = label_graphs(graphs, predicates, conformance=1)
semi_informed = label_graphs(graphs, predicates, conformance=0.75)
uninformed = label_graphs(graphs, predicates, conformance=0.5)

In [7]:
informed

0       MATRIX
1      PAOHVIS
2     NODETRIX
3       MATRIX
4      PAOHVIS
        ...   
94      MATRIX
95      MATRIX
96      MATRIX
97     PAOHVIS
98      MATRIX
Length: 99, dtype: object

## Learning predicates from labeled data

In [8]:
graphs['graph_type'].value_counts()

graph_type
4    84
3    15
Name: count, dtype: int64

In [9]:
graph_types = graphs['graph_type'].unique()
graphs['graph_type_' + pd.Series(graph_types).astype(str)] = (graphs['graph_type'].values[None] == graph_types[:,None]).astype(int).T
graphs = graphs.drop('graph_type', axis=1)

In [10]:
learned_predicates_informed = learn_predicates(graphs, informed, 1000)
learned_predicates_semi_informed = learn_predicates(graphs, semi_informed, 1000)
learned_predicates_uninformed = learn_predicates(graphs, uninformed, 1000)

Learning predicates for MATRIX
[   0] loss 7.505303382873535
[ 100] loss 6.861670970916748
[ 200] loss 6.2498860359191895
[ 300] loss 5.644992351531982
[ 400] loss 5.0304670333862305
[ 500] loss 4.399466037750244
[ 600] loss 3.766981363296509
[ 700] loss 3.1179075241088867
[ 800] loss 2.3517205715179443
[ 900] loss 1.415010690689087
[   0] loss 14.083810806274414
[ 100] loss 12.9830961227417
[ 200] loss 11.89760684967041
[ 300] loss 10.810150146484375
[ 400] loss 9.669657707214355
[ 500] loss 8.293329238891602
[ 600] loss 6.881173610687256
[ 700] loss 5.684715747833252
[ 800] loss 4.440169334411621
[ 900] loss 3.0027098655700684
Learning predicates for PAOHVIS
[   0] loss 7.596381187438965
[ 100] loss 6.969029903411865
[ 200] loss 6.365597724914551
[ 300] loss 5.783999443054199
[ 400] loss 5.212491035461426
[ 500] loss 4.632430076599121
[ 600] loss 4.033594608306885
[ 700] loss 3.388295888900757
[ 800] loss 2.6352384090423584
[ 900] loss 1.776832938194275
[   0] loss 14.09640121459961


[ 600] loss 8.193832397460938
[ 700] loss 6.740833759307861
[ 800] loss 5.146392822265625
[ 900] loss 3.14768123626709
Learning predicates for NODELINK
[   0] loss 3.61960768699646
[ 100] loss 3.177506923675537
[ 200] loss 2.8215370178222656
[ 300] loss 2.4952478408813477
[ 400] loss 2.236063241958618
[ 500] loss 2.004283905029297
[ 600] loss 1.7726693153381348
[ 700] loss 1.5325545072555542
[ 800] loss 1.2787092924118042
[ 900] loss 1.007798194885254
[   0] loss 20.266042709350586
[ 100] loss 17.888221740722656
[ 200] loss 16.078277587890625
[ 300] loss 14.448676109313965
[ 400] loss 12.821538925170898
[ 500] loss 11.181073188781738
[ 600] loss 9.5039701461792
[ 700] loss 7.733638763427734
[ 800] loss 5.703004360198975
[ 900] loss 2.8422367572784424
Learning predicates for PAOHVIS
[   0] loss 5.7230072021484375
[ 100] loss 5.1406941413879395
[ 200] loss 4.64283561706543
[ 300] loss 4.1399126052856445
[ 400] loss 3.646500825881958
[ 500] loss 3.1713414192199707
[ 600] loss 2.7070424556

In [12]:
learned_predicates_informed

{'MATRIX': ({'avg_betweenness_centrality': [0.016246764191961045, 0.03653128984298412], 'modularity': [0.008555203939161789, 0.00876282200377636], 'communities': [0.0, 0.0], 'diameter': [0.4444444439506172, 0.4444444439506172], 'assortativity': [0.767468497017307, 0.7753257749190122], 'vertex_connectivity': [0.13114754096210696, 0.13114754096210696], 'number_of_isolates': [0.0, 0.0], 'graph_type_4': [0.9999999900000002, 0.9999999900000002], 'graph_type_3': [0.0, 0.0]},
  {'has_spatial_attributes': [0.0335392951965332, 0.0], 'avg_betweenness_centrality': [0.016246764191961045, 0.03653128984298412], 'modularity': [0.008555203939161789, 0.00876282200377636], 'communities': [0.0, 0.0], 'diameter': [0.4444444439506172, 0.4444444439506172], 'assortativity': [0.767468497017307, 0.7753257749190122], 'vertex_connectivity': [0.13114754096210696, 0.13114754096210696], 'number_of_isolates': [0.0, 0.0], 'graph_type_4': [0.9999999900000002, 0.9999999900000002], 'graph_type_3': [0.0, 0.0]}),
 'PAOHVI

## Comparing learned predicates to initial predicates

### Informed User

In [11]:
metrics, mean_iou, mean_deviation, inclusion_ratio = compute_metrics(predicates, learned_predicates_informed)

print(f"Mean IoU: {mean_iou:.2f}")
print(f"Mean Deviation: {mean_deviation:.2f}")
print(f"Inclusion Ratio: {inclusion_ratio:.2f}")

TypeError: tuple indices must be integers or slices, not str

### Semi-Informed User

In [None]:
metrics, mean_iou, mean_deviation, inclusion_ratio = compute_metrics(predicates, learned_predicates_semi_informed)

print(f"Mean IoU: {mean_iou:.2f}")
print(f"Mean Deviation: {mean_deviation:.2f}")
print(f"Inclusion Ratio: {inclusion_ratio:.2f}")

### Unformed User

In [None]:
metrics, mean_iou, mean_deviation, inclusion_ratio = compute_metrics(predicates, learned_predicates_uninformed)

print(f"Mean IoU: {mean_iou:.2f}")
print(f"Mean Deviation: {mean_deviation:.2f}")
print(f"Inclusion Ratio: {inclusion_ratio:.2f}")