# Evaluation with sample users

In [7]:
import json
from scipy.stats import spearmanr, kendalltau
import altair as alt
import pandas as pd
from vigor import VIGOR, Visualization, predicates, Predicate
from vigor.visualization_types import VisualizationType

In [16]:
def calculate_correlations(file_path, label):
    vigor = VIGOR()

    visualization_preds = {}
    for vis, score, stat, min, max in predicates:
        if vis not in visualization_preds:
            visualization_preds[vis] = Visualization(vis)
        visualization_preds[vis].add_predicate(Predicate(stat, min, max, score))

    for vis in visualization_preds.values():
        vigor.add_visualization(vis)

    with open(file_path, 'r') as file:
        data = json.load(file)

    correlation_data = []

    for interaction_index, interaction in enumerate(data[:5]):
        statistics = interaction['statistics']
        sorted_visualizations = interaction['sorted_visualizations']
        recommendations = vigor.recommend_n(statistics, 8)

        for i, recommendation in enumerate(recommendations):
            recommendation_str = recommendation.name.upper()  # Convert to match the format in sorted_visualizations

            if recommendation_str in sorted_visualizations:
                position = sorted_visualizations.index(recommendation_str)
                feedback = 1 / (position + 1)  # Higher feedback for better positions
            else:
                feedback = 0  # If not found in sorted_visualizations

            for vis in vigor.visualizations:
                if vis.visualization_type == recommendation:
                    vis.update(feedback, statistics)  # Update the visualization with feedback
                    break

        # Calculate Spearman's rank correlation
        min_len = min(len(recommendations), len(sorted_visualizations))
        recommendations_truncated = recommendations[:min_len]
        sorted_visualizations_truncated = sorted_visualizations[:min_len]

        recommendations_indices = [list(VisualizationType).index(rec) for rec in recommendations_truncated]
        sorted_visualizations_names = [vis.name.upper() for vis in VisualizationType]
        sorted_visualizations_indices = [sorted_visualizations_names.index(visualization) for visualization in sorted_visualizations_truncated]

        # Calculate metrics
        spearman_corr = spearmanr(recommendations_indices, sorted_visualizations_indices).statistic
        kendall_corr = kendalltau(recommendations_indices, sorted_visualizations_indices).statistic

        # Store the results
        correlation_data.append({
            'interaction_index': interaction_index,
            'correlation': spearman_corr,
            'kendall': kendall_corr,
            'label': label
        })

    return correlation_data

In [17]:
# Calculate correlations for three different JSON files
correlation_data_1 = calculate_correlations('../data/evaluation/uninformed_user.json', 'Uninformed User')
correlation_data_2 = calculate_correlations('../data/evaluation/informed_user.json', 'Informed User')
correlation_data_3 = calculate_correlations('../data/evaluation/fifty_fifty_user.json', 'Balanced User')

size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
density
node_attributes
node_attributes
node_types
node_types
edge_attributes
edge_attributes
edge_types
edge_types
size
size
size
density
dens

RuntimeError: The size of tensor a (14) must match the size of tensor b (13) at non-singleton dimension 1

In [13]:
# Combine all correlation data
all_correlation_data = correlation_data_1 + correlation_data_2 + correlation_data_3

# Convert correlation data to a DataFrame
correlation_df = pd.DataFrame(all_correlation_data)

# Create an Altair line chart for the correlations
chart = alt.Chart(correlation_df).mark_line(point=True).encode(
    x='interaction_index:Q',
    y='correlation:Q',
    color='label:N',  # Use different colors for each dataset
    tooltip=['interaction_index:Q', 'correlation:Q', 'label:N']
).properties(
    title='Spearman Rank Correlation Over Time',
    width=600,
    height=400
)

# Show the chart
chart.display()