# Mean&Std Matrices

This is an experiment aiming to find which is the most efficient encoding technique to visualize mean and standard deviation values of links using adjacency matrices as layout.

We will test 1 factor (cell encoding) using 5 different levels (mark, mark with rotation, cell size, cell lightness and bars).


## Configuration and Tasks


In [87]:
import revisitpy as rvt

tasks_order = "random"
training_order = "fixed"

encoding_descriptions = {
    "markRotation": "matrices/assets/markRotation_description.md",
    "size": "matrices/assets/size_description.md",
    "lightness": "matrices/assets/lightness_description.md",
    "bars": "matrices/assets/bars_description.md",
    "mark": "matrices/assets/mark_description.md",
}

training_datasets = {
    "test": "training_test.txt",
}

task_datasets = {
    "test": "task_test.txt",
}

#### Training Tasks


In [88]:
training_configs = {
    ################ INTRO ################
    "training_intro": {
        "instruction": "Before exploring the matrix functionality, let's start with a quick recap."
        "<br><br>"
        "You are looking at an adjacency matrix of flight connections. Each cell shows the **mean** flight price between two states, along with the **variation**."
        " Note that the states are ordered alphabetically along both axes."
        "<br><br>"
        "On the right side, you'll find a legend explaining how the mean and variation are encoded."
    },
    ################ INTERACTION ################
    "training_interaction": {
        "instruction": "<b>Move your mouse over the cells:</b> this will highlight the corresponding row and column."
        "<br>"
        "<br>"
        "<b>Click on the top labels (states at the top): </b>"
        "the left axis will reorder itself."
        " States that have flights connected to it will come first."
        " The ones without direct flights will move to the bottom.</li>"
        " Click again on the top label to undo the ordering."
        "<br>"
        "<br>"
        "<b>Click on the left-side labels (states written horizontally): </b>"
        "to answer some questions you’ll need to select states by clicking on the names on the left side."
        " Click once to select a state. Click again to unselect it, or use the Clear States Selection button if you want to start fresh."
        "<br>"
        "<br>"
        "<b>Click on the cells to mark a connection: </b>"
        "this is not an answer but it can help you in certain tasks. Click again to undo cell selection or use the Clear Flights Selection button if you want to start fresh."
    },
    ################ SELECT NODES ################
    "training_selection": {
        "instruction": "Lets try a simple task."
        "<br> Select the following states: Texas, Florida and Nevada",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(id="answerNodes", answer=["Texas", "Florida", "Nevada"])
        ],
    },
    ################ SELECT NODES ADJACENCY ################
    """ "training_adjacency": {
        "instruction": "Select all states connected to South Carolina.",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                secondaryText="Tip: Clicking on the vertical labels will reorder the nodes, placing the connected ones first, followed by the disconnected ones in alphabetical order.",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(
                id="answerNodes",
                answer=[
                    "Florida",
                    "New York",
                    "Texas",
                    "Pennsylvania",
                ],
            )
        ],
    }, """
    ################ SELECT NODES MEAN ATTR ################
    "training_mean_adjacency": {
        "instruction": "Select all states connected to Kentucky with a mean price below $100.",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                secondaryText="Tip: Clicking on the vertical labels will reorder the nodes, placing the connected ones first, followed by the disconnected ones in alphabetical order.",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(
                id="answerNodes",
                answer=["Florida", "Texas", "California", "Nevada"],
            )
        ],
    },
    ################ SELECT NODES DEV ATTR ################
    "training_std_adjacency": {
        "instruction": "Select all states connected to Pennsylvania with a price variation greater than $90.",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                secondaryText="Tip: Clicking on the vertical labels will reorder the nodes, placing the connected ones first, followed by the disconnected ones in alphabetical order.",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(
                id="answerNodes",
                answer=["Texas", "Missouri", "Ohio"],
            )
        ],
    },
    ################ ESTIMATION ################
    """ "training_range": {
        "instruction": "What is the price range for flights connected to Ohio? Range as Mean ± Variation",
        "response": [
            rvt.response(
                id="min",
                prompt="Price range minimum value:",
                location="sidebar",
                type="slider",
                step=10,
                options=[
                    {"label": "0", "value": 0},
                    {"label": "600", "value": 600},
                ],
            ),
            rvt.response(
                id="max",
                prompt="Price range maximum value:",
                location="sidebar",
                type="slider",
                step=10,
                options=[{"label": "0", "value": 0}, {"label": "600", "value": 600}],
            ),
        ],
        "correctAnswer": [
            rvt.answer(
                id="min",
                answer=0,
            ),
            rvt.answer(
                id="max",
                answer=400,
            ),
        ],
    }, """
    ################ CLUSTERS ################
    "training_cluster": {
        "instruction": "Finally, clusters are groups of nodes that are highly interconnected. In this context, it means you can travel between any of the states within a cluster. Which of the highlighted clusters has the highest average mean price?",
        "response": [
            rvt.response(
                id="training_cluster",
                prompt="Select a cluster:",
                location="sidebar",
                type="radio",
                options=[
                    {"label": "A", "value": "a"},
                    {"label": "B", "value": "b"},
                    {"label": "C", "value": "c"},
                ],
            ),
        ],
        "correctAnswer": [rvt.answer(id="training_cluster", answer="a")],
        "extraParams": {
            "isClusterTask": True,
            "clusterMarks": [
                {"option": "A", "origin": "Texas", "destination": "Pennsylvania"},
                {"option": "B", "origin": "Nevada", "destination": "Nevada"},
                {"option": "C", "origin": "California", "destination": "Ohio"},
            ],
            "clusterMode": "optimal",
            "clusterVar": "mean",
        },
    },
}

#### Study Tasks


In [89]:
task_configs = {
    ################ ADJACENCY TASK ################ 1
    "adjacency": {
        "instruction": "Which state(s) are connected to South Carolina? (Ordering by state is deactivated on this task)",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(
                id="answerNodes",
                answer=[
                    "Florida",
                    "Georgia",
                    "Illinois",
                    "Maryland",
                    "Massachusetts",
                    "New Jersey",
                    "Michigan",
                    "New York",
                    "Ohio",
                    "Pennsylvania",
                    "Tennessee",
                    "Texas",
                    "Washington",
                ],
            )
        ],
        "extraParams": {"nodeOrderingDisabled": True},
    },
    ################ ATTRIBUTE TASK ################ 2
    "attr_std": {
        "instruction": "Which state(s) are connected to Georgia with a price variation higher than $90?",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(
                id="answerNodes", answer=["Georgia", "Oregon", "South Carolina", "Utah"]
            )
        ],
    },
    ################ ATTRIBUTE COMBINATION TASK ################ 3
    "attr_comb": {
        "instruction": "Which state(s) are connected to Utah with flight prices approximately between $100 and $200?",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [
            rvt.answer(id="answerNodes", answer=["New York", "Texas", "Washington"])
        ],
    },
    ################ EXTREME ATTRIBUTE TASK ################ 4
    "max_attr_std": {
        "instruction": "Which state(s) connected to Virginia show the highest price variation?",
        "response": [
            rvt.response(
                id="answerNodes",
                prompt="Selected States:",
                location="sidebar",
                type="reactive",
            ),
        ],
        "correctAnswer": [rvt.answer(id="answerNodes", answer=["Oregon"])],
    },
    ################ PATH ESTIMATION TASK ################ 5
    "path": {
        "instruction": "On which of the following routes would you spend less money.",
        "response": [
            rvt.response(
                id="path",
                prompt="Select a route:",
                secondaryText="Remember that you can highlight cells by clicking on them.",
                location="sidebar",
                type="radio",
                options=[
                    "Utah → Michigan → New York",
                    "Utah → Washington → New York",
                    "Utah → Colorado → New York",
                    "Utah → Nevada → New York",
                ],
            ),
        ],
        "correctAnswer": [
            rvt.answer(id="path", answer="Utah → Colorado → New York"),
        ],
    },
    ################ RANGE ESTIMATION TASK ################
    "range": {
        "instruction": "What is the price range of flying from Arizona",
        "response": [
            rvt.response(
                id="min",
                prompt="Price range minimum value:",
                location="sidebar",
                type="slider",
                step=10,
                options=[{"label": "0", "value": 0}, {"label": "600", "value": 600}],
            ),
            rvt.response(
                id="max",
                prompt="Price range maximum value:",
                location="sidebar",
                type="slider",
                step=10,
                options=[{"label": "0", "value": 0}, {"label": "600", "value": 600}],
            ),
        ],
        "correctAnswer": [
            rvt.answer(id="min", answer=0),
            rvt.answer(id="max", answer=350),
        ],
        "extraParams": {"isRangeTask": True},
    },
    ################ TREND EXTIMATION TASK ################ 7
    "trend": {
        "instruction": "Flight prices on Oregon are ______ in comparison to Florida?",
        "response": [
            rvt.response(
                id="stability",
                prompt="Select an option:",
                location="sidebar",
                type="radio",
                options=[
                    "very stable",
                    "stable",
                    "similar",
                    "unstable",
                    "very unstable",
                ],
            )
        ],
        "correctAnswer": [rvt.answer(id="answerNodes", answer="Unstable")],
    },
    ################ CLUSTER MEAN TASK ################ 8
    "cluster_avg": {
        "instruction": "Which cluster has the highest average mean price?",
        "response": [
            rvt.response(
                id="cluster",
                prompt="Select a cluster:",
                location="sidebar",
                type="radio",
                options=["A", "B", "C", "D"],
            )
        ],
        "correctAnswer": [rvt.answer(id="cluster", answer="C")],
        "extraParams": {
            "isClusterTask": True,
            "clusterMarks": [
                {"option": "A", "origin": "Virginia", "destination": "Arizona"},
                {"option": "B", "origin": "Massachusetts", "destination": "Utah"},
                {"option": "C", "origin": "Washington", "destination": "Massachusetts"},
                {"option": "D", "origin": "Utah", "destination": "Tennessee"},
            ],
            "clusterMode": "optimal",
            "clusterVar": "mean",
        },
    },
    ################ CLUSTER STABILITY TASK ################ 9
    "cluster_std": {
        "instruction": "Which cluster prices vary the most?",
        "response": [
            rvt.response(
                id="cluster",
                prompt="Select a cluster:",
                location="sidebar",
                type="radio",
                options=["A", "B", "C", "D"],
            )
        ],
        "correctAnswer": [rvt.answer(id="cluster", answer="C")],
        "extraParams": {
            "isClusterTask": True,
            "clusterMarks": [
                {"option": "A", "origin": "Missouri", "destination": "Texas"},
                {"option": "B", "origin": "Maryland", "destination": "New York"},
                {"option": "C", "origin": "Georgia", "destination": "Minnesota"},
                {
                    "option": "D",
                    "origin": "Pennsylvania",
                    "destination": "North Carolina",
                },
            ],
            "clusterMode": "optimal",
            "clusterVar": "std",
        },
    },
}

""" task_configs = {
    "reactive_not_working": {
        "instruction": "QUESTION. On which of the following routes would you spend less money. Select the route and estimate the final price range.",
        "response": [
            rvt.response(
                id="range",
                prompt="Selected range:",
                location="sidebar",
                type="reactive",
                required=True,
            ),
            rvt.response(
                id="path",
                prompt="Selected path:",
                location="sidebar",
                type="reactive",
                required=False,
            ),
        ],
        "correctAnswer": [],
        "extraParams": {
            "paths": [
                {"option": "A", "path": "Utah → Michigan → New York"},
                {"option": "B", "path": "Utah → Texas → New York"},
            ],
        },
    },
} """

' task_configs = {\n    "reactive_not_working": {\n        "instruction": "QUESTION. On which of the following routes would you spend less money. Select the route and estimate the final price range.",\n        "response": [\n            rvt.response(\n                id="range",\n                prompt="Selected range:",\n                location="sidebar",\n                type="reactive",\n                required=True,\n            ),\n            rvt.response(\n                id="path",\n                prompt="Selected path:",\n                location="sidebar",\n                type="reactive",\n                required=False,\n            ),\n        ],\n        "correctAnswer": [],\n        "extraParams": {\n            "paths": [\n                {"option": "A", "path": "Utah → Michigan → New York"},\n                {"option": "B", "path": "Utah → Texas → New York"},\n            ],\n        },\n    },\n} '

## Auxiliary functions

To generate sequences for:

- Introduction
- Training
- Tasks


#### Introduction Sequence


In [90]:
import revisitpy as rvt


def get_introduction(encoding):
    introduction_response = [
        rvt.response(
            id="signature",
            prompt="Please enter your Prolific ID",
            location="belowStimulus",
            type="shortText",
            placeholder="Prolific ID",
        )
    ]
    introduction = rvt.component(
        type="markdown",
        path="matrices/assets/introduction.md",
        component_name__="introduction",
        response=introduction_response,
    )

    consent_response = [
        rvt.response(
            id="accept",
            prompt="Do you consent to the study and wish to continue?",
            location="belowStimulus",
            requiredValue="yes",
            type="radio",
            options=[
                {"label": "Decline", "value": "no"},
                {"label": "Accept", "value": "yes"},
            ],
        ),
    ]

    consent = rvt.component(
        type="markdown",
        path="matrices/assets/consent.md",
        component_name__="consent",
        response=consent_response,
    )

    data_description = rvt.component(
        type="markdown",
        path="matrices/assets/data_description.md",
        component_name__="data_description",
    )

    list2num = rvt.component(
        instruction="Which mean±variation fits better the given list of numbers?",
        type="image",
        style={"maxWidth": "80%"},
        path="matrices/assets/images/list2num.svg",
        component_name__="list2num",
        response=[
            rvt.response(
                id="option",
                type="radio",
                location="sidebar",
                prompt="Select an option:",
                options=[
                    {"label": "A", "value": "a"},
                    {"label": "B", "value": "b"},
                    {"label": "C", "value": "c"},
                    {"label": "D", "value": "d"},
                ],
            ),
        ],
        correctAnswer=[rvt.answer(id="option", answer="b")],
        nextButtonLocation="sidebar",
        provideFeedback=True,
        allowFailedTraining=False,
        trainingAttempts=2,
    )

    num2list = rvt.component(
        instruction="Which list of numbers  fits better the given mean±variation?",
        type="image",
        style={"maxWidth": "80%"},
        path="matrices/assets/images/num2list.svg",
        component_name__="num2list",
        response=[
            rvt.response(
                id="option",
                type="radio",
                location="sidebar",
                prompt="Select an option:",
                options=[
                    {"label": "A", "value": "a"},
                    {"label": "B", "value": "b"},
                    {"label": "C", "value": "c"},
                    {"label": "D", "value": "d"},
                ],
            ),
        ],
        correctAnswer=[rvt.answer(id="option", answer="c")],
        nextButtonLocation="sidebar",
        provideFeedback=True,
        allowFailedTraining=False,
        trainingAttempts=2,
    )

    connectivity_description = rvt.component(
        type="markdown",
        path="matrices/assets/connectivity_description.md",
        component_name__="connectivity_description",
    )

    matrix2graph = rvt.component(
        instruction="Which graph corresponds to the matrix?",
        type="image",
        style={"maxWidth": "100%"},
        path="matrices/assets/images/matrix2graph_.svg",
        component_name__="matrix2graph",
        response=[
            rvt.response(
                id="option",
                type="radio",
                location="sidebar",
                prompt="Select an option:",
                options=[
                    {"label": "1", "value": "1"},
                    {"label": "2", "value": "2"},
                    {"label": "3", "value": "3"},
                    {"label": "None", "value": "4"},
                ],
            ),
        ],
        correctAnswer=[rvt.answer(id="option", answer="2")],
        nextButtonLocation="sidebar",
        provideFeedback=True,
        allowFailedTraining=False,
        trainingAttempts=2,
    )

    graph2matrix = rvt.component(
        instruction="Which matrix corresponds to the graph?",
        type="image",
        style={"maxWidth": "80%"},
        path="matrices/assets/images/graph2matrix.svg",
        component_name__="graph2matrix",
        response=[
            rvt.response(
                id="option",
                type="radio",
                location="sidebar",
                prompt="Select an option:",
                options=[
                    {"label": "1", "value": "1"},
                    {"label": "2", "value": "2"},
                    {"label": "3", "value": "3"},
                    {"label": "None", "value": "4"},
                ],
            ),
        ],
        correctAnswer=[rvt.answer(id="option", answer="3")],
        nextButtonLocation="sidebar",
        provideFeedback=True,
        allowFailedTraining=False,
        trainingAttempts=2,
    )

    encoding_description = rvt.component(
        type="markdown",
        path=encoding_descriptions[encoding],
        component_name__="encoding_description",
    )

    sequence = rvt.sequence(
        order="fixed",
        components=[
            introduction,
            consent,
            data_description,
            list2num,
            num2list,
            connectivity_description,
            matrix2graph,
            graph2matrix,
            encoding_description,
        ],
    )

    return sequence

#### Training Sequence


In [91]:
def get_training(encoding):
    parameters = {"dataset": training_datasets["test"], "encoding": encoding}

    components = []
    for name, config in training_configs.items():
        task_parameters = parameters.copy()
        if "extraParams" in config:
            task_parameters.update(config["extraParams"])

        task = rvt.component(
            type="react-component",
            path="matrices/Stimuli.tsx",
            component_name__=name,
            parameters=task_parameters,
            instruction=config["instruction"],
            response=config.get("response", ""),
            correctAnswer=config.get("correctAnswer", ""),
            nextButtonLocation="sidebar",
            provideFeedback=True,
            allowFailedTraining=False,
            trainingAttempts=4,
        )
        components.append(task)

    end = rvt.component(
        type="markdown",
        path="matrices/assets/training_ends.md",
        component_name__="training_ends",
    )

    components.append(end)

    sequence = rvt.sequence(
        order=training_order,
        components=components,
    )

    return sequence

#### Tasks Sequence


In [92]:
def get_tasks(encoding):
    parameters = {"dataset": task_datasets["test"], "encoding": encoding}

    components = []
    for name, config in task_configs.items():
        task_parameters = parameters.copy()
        if "extraParams" in config:
            task_parameters.update(config["extraParams"])

        print(task_parameters)

        task = rvt.component(
            type="react-component",
            path="matrices/Stimuli.tsx",
            component_name__=name,
            parameters=task_parameters,
            instruction=config["instruction"],
            secondaryText=config.get("secondaryText", ""),
            response=config.get("response", ""),
            correctAnswer=config.get("correctAnswer", ""),
            nextButtonLocation="sidebar",
        )
        components.append(task)

    sequence = rvt.sequence(order=tasks_order, components=components)
    return sequence

## Generate the studys

For now we generate five studies, one for each encoding.


In [93]:
def generate_study(encoding):

    study_metadata = rvt.studyMetadata(
        authors=["Jorge Acosta"],
        organizations=["Visualization Design Lab"],
        title="Mean&Std Encoding on Adjacency Matrices",
        description="This is an study to...",
        date="2025-01-13",
        version="1.0",
    )

    ui_config = rvt.uiConfig(
        contactEmail="jorge.acosta@upm.es",
        logoPath="revisitAssets/revisitLogoSquare.svg",
        helpTextPath=encoding_descriptions[encoding],
        sidebar=True,
        withProgressBar=True,
    )

    training_sequence = get_training(encoding)

    introduction_sequence = get_introduction(encoding)

    tasks_sequence = get_tasks(encoding)

    study_sequence = introduction_sequence + training_sequence + tasks_sequence

    """previs_sequence = rvt.sequence(order="fixed", components="$previs.se.4dimensions") """

    study = rvt.studyConfig(
        schema="https://raw.githubusercontent.com/revisit-studies/study/main/src/parser/StudyConfigSchema.json",
        uiConfig=ui_config,
        studyMetadata=study_metadata,
        sequence=study_sequence,
        importedLibraries=["previs", "beauvis"],
    )
    return study


for encoding, path in encoding_descriptions.items():
    study_config = generate_study(encoding)


with open("config.json", "w") as f:
    f.write(study_config.__str__())

{'dataset': 'task_test.txt', 'encoding': 'markRotation', 'nodeOrderingDisabled': True}
{'dataset': 'task_test.txt', 'encoding': 'markRotation'}
{'dataset': 'task_test.txt', 'encoding': 'markRotation'}
{'dataset': 'task_test.txt', 'encoding': 'markRotation'}
{'dataset': 'task_test.txt', 'encoding': 'markRotation'}
{'dataset': 'task_test.txt', 'encoding': 'markRotation', 'isRangeTask': True}
{'dataset': 'task_test.txt', 'encoding': 'markRotation'}
{'dataset': 'task_test.txt', 'encoding': 'markRotation', 'isClusterTask': True, 'clusterMarks': [{'option': 'A', 'origin': 'Virginia', 'destination': 'Arizona'}, {'option': 'B', 'origin': 'Massachusetts', 'destination': 'Utah'}, {'option': 'C', 'origin': 'Washington', 'destination': 'Massachusetts'}, {'option': 'D', 'origin': 'Utah', 'destination': 'Tennessee'}], 'clusterMode': 'optimal', 'clusterVar': 'mean'}
{'dataset': 'task_test.txt', 'encoding': 'markRotation', 'isClusterTask': True, 'clusterMarks': [{'option': 'A', 'origin': 'Missouri', '