# 08. User Study (Results)

TODO: Cleanup Codes
TODO: Add Docs


## Setup

In [1]:
from datetime import timedelta
from pathlib import Path
from typing import Optional

import numpy as np
import pandas as pd
import seaborn as sns

import matplotlib.pyplot as plt

from wordcloud import WordCloud


In [2]:
sns.set()


In [3]:
DATA_DIR: Path = Path("../data/")


In [4]:
def get_status(task: dict) -> str:
    if not task["failed"]:
        return "success"

    last_event = task["events"][-1]

    if last_event["event"] == "__init__":
        return "skip"

    if "error" not in last_event["message"]:
        return "interruption"

    if last_event["message"]["error"] == "timeout":
        return "timeout"

    return "interruption"


def get_task_completion_time(task: dict) -> Optional[int]:
    if task["failed"]:
        return None

    first_event = task["events"][0]
    last_event = task["events"][-1]

    if last_event["timestamp"] is None:
        return None

    return last_event["timestamp"] - first_event["timestamp"]


def get_n_trials(task: dict) -> Optional[int]:
    return len(
        [
            event
            for event in task["events"]
            if event["event"] == "__call__" and "error" not in event["message"]
        ]
    )


def get_last_no_error_event(task: dict) -> Optional[dict]:
    try:
        return next(
            event
            for event in task["events"][::-1]
            if event["event"] == "__call__" and "error" not in event["message"]
        )
    except StopIteration:
        return None


def get_last_args(task: dict) -> dict:
    event = get_last_no_error_event(task)

    if event is None:
        return None

    return event["message"]["args"]


def get_last_kwargs(task: dict) -> dict:
    event = get_last_no_error_event(task)

    if event is None:
        return None

    return event["message"]["kwargs"]


def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    user_group = df.at[0, "user_group"]

    if user_group == "A":
        a, b = "nli", "xnli"
    elif user_group == "B":
        a, b = "xnli", "nli"
    else:
        raise ValueError(f"user_group must be 'A' or 'B': not '{user_group}'")

    df[f"{a}_n_trials"] = df["task_a"].apply(get_n_trials)
    df[f"{b}_n_trials"] = df["task_b"].apply(get_n_trials)

    df[f"{a}_status"] = df["task_a"].apply(get_status)
    df[f"{b}_status"] = df["task_b"].apply(get_status)

    df[f"{a}_time_secs"] = df["task_a"].apply(get_task_completion_time)
    df[f"{b}_time_secs"] = df["task_b"].apply(get_task_completion_time)

    df[f"{a}_last_args"] = df["task_a"].apply(get_last_args)
    df[f"{a}_last_kwargs"] = df["task_a"].apply(get_last_kwargs)

    df[f"{b}_last_args"] = df["task_b"].apply(get_last_args)
    df[f"{b}_last_kwargs"] = df["task_b"].apply(get_last_kwargs)

    df["is_exercise"] = [True, True, True, False, False, False, False, False]

    df = df.rename(columns={"task_a": f"{a}_raw_data", "task_b": f"{b}_raw_data"})
    df = df.drop(columns=["nli_last_kwargs"])

    df.insert(3, "is_exercise", df.pop("is_exercise"))

    return df


In [5]:
USER_IDS = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]

tasks_df = pd.concat(
    [
        preprocess(
            pd.read_json(DATA_DIR.joinpath(f"results/user-study/tasks/{i}.json"))
        )
        for i in USER_IDS
    ]
)

tasks_df.drop(columns=["xnli_raw_data", "nli_raw_data"])


Unnamed: 0,user_id,user_group,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,1,B,True,1,1,success,success,95.887676,46.478651,[],"{'x': 'occupation', 'y': 'count of records'}","[create bar charts. the input is occupation, and the output is Count of Records. All bars should be blue.]"
1,1,B,True,2,1,success,success,46.227883,28.480804,[],"{'x': 'start_from', 'y': 'emploee_ID', 'graph_type': 'line'}","[create line charts. input is start_from, and output is employee. ]"
2,1,B,True,1,2,success,success,58.119295,73.931726,[],"{'graph_type': 'pie chart', 'input': 'transaction_type_code'}",[pie charts. show the proportion of Mean of amount_of_transaction. Input should be colored by thier transaction_type_code.]
3,1,B,False,1,1,success,success,26.164192,25.219796,[],"{'graph_type': 'bar chart', 'input': 'location', 'output': 'Sum of number_of_platforms'}","[create bar charts. input is location, and output if sum of number_of_platforms]"
4,1,B,False,5,8,success,interruption,99.767576,,[],"{'graph_type': 'line chart', 'input': 'season', 'output': 'Count of Records', 'order': 'Reverse'}","[create line charts. input is season (from new to old), and output if count of records. The direction of input should be reversed.]"
...,...,...,...,...,...,...,...,...,...,...,...,...
3,16,A,False,3,3,success,timeout,79.712267,,[],"{'graph': 'bar', 'group': 'location', 'sum': 'number_of_platforms'}",[bar chart sum number_of_Plattforms group by location]
4,16,A,False,5,8,success,success,178.074770,156.574884,[],"{'graph': 'line', 'group': 'season', 'order': 'desc'}",[line graph group by season order by season desc]
5,16,A,False,6,7,success,timeout,207.611814,,[],"{'graph': 'bar', 'group': 'sex', 'count': 'Rank AsstProf'}",[bar chart where Rank is AsstProf group by Sex ]
6,16,A,False,5,5,success,timeout,113.393069,,[],"{'graph': 'pie', 'bar': 'stu_fname', 'order': 'STU_GPA', 'top': '5'}",[pie chart stu_fname]


In [6]:
pre_experiment_qa_df = pd.read_csv(
    DATA_DIR.joinpath("results/user-study/questionnaires/pre-experiment.csv")
)
pre_experiment_qa_df = pre_experiment_qa_df[
    pre_experiment_qa_df["user_id"].isin(USER_IDS)
]
pre_experiment_qa_df = pre_experiment_qa_df.sort_values("user_id")
# pre_experiment_qa_df = pre_experiment_qa_df.drop(columns=["timestamp"])
pre_experiment_qa_df = pre_experiment_qa_df.reset_index(drop=True)

pre_experiment_qa_df


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
0,2022-12-26 10:31:00,1,Male,23,3,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",4,1,4,2
1,2022-12-25 11:22:33,2,Male,24,3,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,4,3
2,2022-12-26 04:57:41,3,Male,22,4,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",5,1,5,5
3,2023-01-05 03:06:40,5,Male,31,5,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,2,4
4,2022-12-28 05:33:05,6,Male,22,3,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",3,1,4,3
5,2022-12-26 03:12:22,7,Male,23,3,"Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,5,5
6,2022-12-28 01:54:47,8,Male,24,3,"Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,4,4
7,2022-12-28 11:51:39,9,Male,23,4,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",3,1,5,5
8,2022-12-29 11:44:53,10,Male,25,2,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",4,1,5,4
9,2023-01-02 02:41:27,11,Male,30,2,"Spreadsheet (Excel, Google Spreadsheet, etc)",1,1,5,2


In [None]:
post_experiment_qa_df = pd.read_csv(
    DATA_DIR.joinpath("results/user-study/questionnaires/post-experiment.csv")
)
post_experiment_qa_df = post_experiment_qa_df[
    post_experiment_qa_df["user_id"].isin(USER_IDS)
]
post_experiment_qa_df = post_experiment_qa_df.sort_values("user_id")
post_experiment_qa_df = post_experiment_qa_df.reset_index(drop=True)

post_experiment_qa_df


In [None]:
vxnli_qa_df = pd.read_csv(
    DATA_DIR.joinpath("results/user-study/questionnaires/v-xnli.csv")
)
vxnli_qa_df = vxnli_qa_df[vxnli_qa_df["user_id"].isin(USER_IDS)]

vnli_qa_df = pd.read_csv(
    DATA_DIR.joinpath("results/user-study/questionnaires/v-nli.csv")
)
vnli_qa_df = vnli_qa_df[vnli_qa_df["user_id"].isin(USER_IDS)]


vxnli_qa_df.columns = vnli_qa_df.columns = [
    "timestamp",
    "user_id",
    "01_score",
    "01_reason__easy_to_use_interface_?",
    "02_score",
    "02_reason__easy_to_fix_mistakes_?",
    "03_score",
    "03_reason__many_interpretation_mistakes_?",
    "04_score",
    "04_reason__can_not_understand_requests_?",
    "05_good_points_and_bad_points",
    "06_additional_comments",
]

vxnli_qa_df["is_vxnli"] = True
vnli_qa_df["is_vxnli"] = False


vxnli_vnli_qa_df = pd.concat(
    [
        vxnli_qa_df,
        vnli_qa_df,
    ]
)

vxnli_vnli_qa_df = vxnli_vnli_qa_df.sort_values(["user_id", "is_vxnli"])
vxnli_vnli_qa_df = vxnli_vnli_qa_df.drop(columns=["timestamp"])
vxnli_vnli_qa_df.insert(1, "is_vxnli", vxnli_vnli_qa_df.pop("is_vxnli"))
vxnli_vnli_qa_df = vxnli_vnli_qa_df.reset_index(drop=True)
vxnli_vnli_qa_df


## Outliers

We shared the link to Google Colab with the participants and let them solve tasks whenever possible.
It's low-effort and convenient both for us and them.
However, some participants didn't follow the procedure.
Or they solved tasks in halves.

We decided to cherry-pick the results below to understand the results correctly.


### P04

I asked him to participate in this user study.
However, he infected covid-19...
I can't wait for him due to this paper's deadline (for my graduation :)), so I decided to omit his result.
After submitting this paper, I'm going to ask him again, and summarize the results again to submit this paper to some conference.


### P08

P08 didn't read the explanation and used V-NLI instead of V-XNLI in task B.
Of course, it's ok if he read the description and decided to use the NL query (P04 used NL query only and it's a valid result).
However, if you check the logs, he solved the tasks (both task A and task B) in halves.

At first, he gave up the tasks with one or two interactions.
Secondly, he skipped task 5 without solving it.
Thirdly, he seemed to use copy & paste in some tasks (We anticipated this possibility, but we expected to use structured inputs, and no one did that).
Finally, I asked if he had read the task B explanation, and he admitted he didn't.

We appreciate his participation in this experiment.
However, we have to remove all of his results, including the task results and the questionnaire answers.


In [10]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 8]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
6,2022-12-28 01:54:47,8,Male,24,3,"Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,4,4


In [11]:
tasks_df[tasks_df["user_id"] == 8].drop(columns=["xnli_raw_data", "nli_raw_data"])


Unnamed: 0,user_id,user_group,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,8,A,True,2,0,success,timeout,193.274827,,"[make Histogram, x is occupation, y is count of occupation]",{},
1,8,A,True,2,2,success,success,96.639131,63.504093,"[make Folding Line Chart, x is start_from, y is employee_id]",{},"[make Folding Line Chart graph, x is the start_from, y is employee_id]"
2,8,A,True,1,1,success,success,27.629354,196.898112,"[calculate mean of amount_of_transaction, then generate a pie chart based on transaction_type_code]",{},"[calculate mean of amount_of_transaction, then generate a pie chart based on transaction_type_code]"
3,8,A,False,1,1,success,success,113.262611,97.473858,"[make Histogram, x is location, y is sum of number_of_platforms]",{},"[make Histogram graph, x is location, y is sum of number_of_platforms.]"
4,8,A,False,2,1,interruption,interruption,,,"[make Folding Line Chart, x is season, y is sum of the Score of Home_team]",{},"[make Folding Line Chart, x is season, y is Count of higest score]"
5,8,A,False,1,2,success,success,29.466767,140.730327,"[make Histogram, x is sex, y is the number of AsstProf]",{},"[make Histogram, x is sex, y is the number of AsstProf]"
6,8,A,False,2,1,interruption,timeout,,,"[make pie chart, only top 5 from STU_GPA, show sut_fname]",{},[make pie chart of Top 5 STU_GPA]
7,8,A,False,0,0,skip,skip,,,,,


### P12

In [12]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 12]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
10,2022-12-30 04:59:56,12,Male,36,2,"Tableau;Python Module for Visualization (Altair, Matplotlib, Plotly, etc)",3,1,5,3


In [13]:
tasks_df[tasks_df["user_id"] == 12].drop(columns=["xnli_raw_data", "nli_raw_data"])


Unnamed: 0,user_id,user_group,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,12,A,True,1,1,success,success,72.884276,173.392007,"[... bar chart, x=occupation, y=count of records, ]",{},"[... bar chart, x=occupation, y=count of records ...]"
1,12,A,True,1,1,success,success,68.715492,88.236934,"[... line chart, x=start_from, y=employee_id ...]",{},"[... line chart, x=start_from, y=employee_id ...]"
2,12,A,True,3,2,interruption,success,,280.456651,"[... pie chart, x=mean of amount_of_transaction, pur is blue, sale is orange ...]",{},"[... pie chart, x=mean of amount_of_transaction, label is transaction_type_code, pur is blue, sale is orange ...]"
3,12,A,False,1,1,success,success,39.908754,103.551189,"[... bar chart, x=location, y=sum of number_of_platforms ...]",{},"[... bar chart, x = location, y=sum of number_of_platforms ...]"
4,12,A,False,2,1,success,success,90.409683,110.037726,"[... line chart, x=season, y=count of records, season is in descending order ...]",{},"[... line chart, x = season, y = count of records, season is in descending order ...]"
5,12,A,False,2,2,success,timeout,71.967275,,"[... bar chart, x=sex, y=count of records, The value of Rank must be 'AsstProf' ...]",{},"[... bar chart, x=sex, y=count of records, group by sex ...]"
6,12,A,False,3,2,timeout,timeout,,,"[... pie chart, x=stu_fname, top 5 stu_gpa ...]",{},"[... pie chart, x=stu_fname...]"
7,12,A,False,4,1,success,timeout,123.319146,,"[... stacked bar chart, x=headquarters, y=count of records, color coded by industry, order by count of records ...]",{},"[... stacked bar chart, x=headquarters, y=count of records, color coded by industry ...]"


In [14]:
tasks_df[tasks_df["user_id"] == 15].drop(columns=["xnli_raw_data", "nli_raw_data"])


Unnamed: 0,user_id,user_group,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,15,B,True,2,0,success,skip,271.636818,,"[{'figure': 'bar'}, True, occupation]",{},
1,15,B,True,3,0,success,skip,149.861379,,"[{'figure': 'line'}, True, {'x': 'start_from'}, {'y': 'employee_id'}, {'order by': 'start_from'}]",{},
2,15,B,True,0,0,success,skip,,,,,
3,15,B,False,3,0,success,timeout,115.321336,,"[{'figure': 'bar'}, True, location, {'summary': 'sum of number_of_platforms'}]",{},
4,15,B,False,2,3,success,success,287.109709,126.395757,"[{'figure': 'line'}, True, {'x': 'season'}, {'summary': 'count of records'}, {'order by': 'season'}]",{},[... (figure is line chart.x is season and y is count of records. x is descending order) ...]
5,15,B,False,2,5,timeout,timeout,,,"[{'figure': 'bar'}, True, {'x': 'sex'}, {'summary': 'count of records'}, {'conditions': 'Rank=AsstProf'}]",{},[... (figure is bar chart.x is sex and y is count of records only by 'AsstProf') ...]
6,15,B,False,2,4,timeout,timeout,,,"[{'figure': 'pie chart'}, True, {'x': 'stu_fname'}, {'order by': 'STU_GPA'}]",{},[(figure is pie chart.x is stu_fname whitch are top 5 of STU_GPA]
7,15,B,False,2,2,timeout,success,,106.813744,"[{'figure': 'stacked bar chart'}, True, {'x': 'headquarters'}, {'summary': 'count of records'}, {'feature by': 'Industry'}]",{},[(figure is stacked bar chart.x is headquarters and y is count of records.x is stacked feature by industry.x is order by count of recored]


### P14

P14 didn't answer the questionnaire about V-XNLI.
We include his tasks results.
However, we remove the questionnaire answers.


### P15

P15 didn't read the task B explanation and used V-XNLI instead of V-NLI in task B exercises and task B1.

We put "vnli(   )" in the cell beforehand, but he removed it and used "vxnli" instead.
If he could use the vnli method, it raises an error ("Use single NL query!"), and he could notice his mistake soon.
As a result, it always returned awful results.
In task B2, he noticed his mistake.
If he noticed it during task B1, we could include his results.

We really appreciate his participation because he did all tasks and answered all questionnaire seriously. 
However, we decided to remove his results.

## Analyze Each Participant

### P01

In [15]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 1]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
0,2022-12-26 10:31:00,1,Male,23,3,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",4,1,4,2


In [16]:
p01_tasks_df = tasks_df[tasks_df["user_id"] == 1]
p01_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,1,1,success,success,95.887676,46.478651,[],"{'x': 'occupation', 'y': 'count of records'}","[create bar charts. the input is occupation, and the output is Count of Records. All bars should be blue.]"
1,True,2,1,success,success,46.227883,28.480804,[],"{'x': 'start_from', 'y': 'emploee_ID', 'graph_type': 'line'}","[create line charts. input is start_from, and output is employee. ]"
2,True,1,2,success,success,58.119295,73.931726,[],"{'graph_type': 'pie chart', 'input': 'transaction_type_code'}",[pie charts. show the proportion of Mean of amount_of_transaction. Input should be colored by thier transaction_type_code.]
3,False,1,1,success,success,26.164192,25.219796,[],"{'graph_type': 'bar chart', 'input': 'location', 'output': 'Sum of number_of_platforms'}","[create bar charts. input is location, and output if sum of number_of_platforms]"
4,False,5,8,success,interruption,99.767576,,[],"{'graph_type': 'line chart', 'input': 'season', 'output': 'Count of Records', 'order': 'Reverse'}","[create line charts. input is season (from new to old), and output if count of records. The direction of input should be reversed.]"
5,False,3,1,success,success,135.835367,37.560903,[],"{'graph_type': 'bar charts', 'input': 'sex', 'output': 'Count of Records', 'value_of_Rank': 'AsstProf'}","[create bar charts. input is sex, and output if count of records. note that the value of rank must be AsstProf]"
6,False,8,6,interruption,interruption,,,[],"{'graph_type': 'pie chart', 'input': 'stu_fname', 'output': 'STU_GPA', 'limit': 'Top 5'}",[create pie charts. show Top-5 STU_GPA. larger value must have larget area in the charts.]
7,False,8,7,interruption,interruption,,,[],"{'graph_type': 'bar charts', 'input': 'headquarters', 'output': 'Count of Records', 'color': 'industry', 'order': 'few from many'}","[create bar charts. input is headquaters, and output is count of records. bars should be colored by industry]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 1]


### P02


In [18]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 2]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
1,2022-12-25 11:22:33,2,Male,24,3,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,4,3


In [19]:
p02_tasks_df = tasks_df[tasks_df["user_id"] == 2]
p02_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,1,1,success,success,27.181318,108.208524,[cout of records for each occupation],{},[count the number of records for each occupation]
1,True,1,2,success,success,43.602573,157.019645,[a line graph where x-axis is start_from and y-axis is employee_id],{},[a line graph where x-axis is start_from y-axis is employee_id]
2,True,1,2,success,success,36.852808,111.703771,[The proportion of mean of amout_of_transaction for transaction_type_code],{},[The propotion of mean of amount_of_transaction for transaction_type_code]
3,False,1,1,success,success,35.979354,50.663591,[a graph where x-axis is location and y-axis is sum of number_of_platforms],{},[a graph x-axis is location and y-axis is sum of number_of_platforms]
4,False,4,6,success,timeout,91.625079,,[A line graph where x-axis is season and y-axis is count of records],{'x_reversed': True},"[A line graph where x-axis is season, y-axis is count of records. Then flip x]"
5,False,4,3,success,success,64.641389,121.762268,[A graph y-axis is he cout of records for sex where Rank is AsstProf],{},"[a graph x-axis is sex and y-axis is count of records, where rank = AsstProf]"
6,False,3,1,success,timeout,108.977788,,[STU_FNAMES wher STU_FNAES in Top 5 STU_GPA people],{},[top 5 STU_GPA]
7,False,8,5,timeout,timeout,,,[The graph where x-axis is headquarters and y-axis is count of records],"{'group_by': 'Industry', 'order_by': 'cout of records ascending'}",[The count of records for headquarters in ascending order showing breakdown for Industry]


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 2]


### P03


In [258]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 3]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
2,2022-12-26 04:57:41,3,Male,22,4,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",5,1,5,5


In [261]:
tasks_df[tasks_df["user_id"] == 3].drop(
    columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"]
)


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,4,2,interruption,success,,72.415835,"[the label of y is Count of Records, There is two students]",{},[the labe of x axis is occupation and the label of y is Count of Records]
1,True,5,4,success,timeout,149.848582,,"[the labe of x axis is start_from and the label of y is employee_id, line graph]",{},[the labe of x axis is headquarters and the label of y is Count of Records]
2,True,7,7,interruption,interruption,,,"[Pie chart, 66% of them are pur and the rest are sale]",{},[pie chart. 66% pur. 33% sale]
3,False,1,1,success,success,49.112643,16.158546,[the labe of x axis is location and the label of y is sum of number_of_platforms],{},[the labe of x axis is location and the label of y is sum of number_of_platforms]
4,False,6,1,success,success,166.098499,74.029132,"[the labe of x axis is season and the label of y is Count of Records, line graphthe order of x axis values is reversed]",{},[the labe of x axis is season and the label of y is Count of Records. line graph and the order of x axis values is reversed]
5,False,9,2,timeout,interruption,,,"[the labe of x axis is sex and the label of y is Count of Records, the range of y axis is from zero to twelve]",{},[the labe of x axis is sex and the label of y is Count of Records. The female is 3 and the male is 12. The order of x axis is reversed]
6,False,11,4,timeout,interruption,,,[pie chart],{},"[Pie chart. stu_fname are anne, gerald, john, raphael, and walter]"
7,False,4,4,timeout,interruption,,,[the labe of x axis is headquarters and the label of y is Count of Records],{},"[the labe of x axis is headquarters and the label of y is Count of Records. The more to right, the greater the y value is. The colors represents the industry]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 3]


### P05

P5 is a native English speaker.
He used NL query in V-XNLI, not only in V-NLI.
His first task is V-XNLI, so we think he read the explanation and decided to use single NL query instead of keyword arguments.


In [24]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 5]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
3,2023-01-05 03:06:40,5,Male,31,5,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,2,4


In [25]:
p05_tasks_df = tasks_df[tasks_df["user_id"] == 5]
p05_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,1,1,success,success,137.088312,42.78002,[show count of records as a function of occupation in a bar chart],{},[show count of records as afunction of occupation as a bar chart]
1,True,2,1,success,success,53.787033,35.658147,[plot employee_id as a function of start_from in a line chart],{},[plot employee_id as a function of start_from in ascending order as a line chart]
2,True,2,2,success,success,66.313174,45.97519,[calculate mean of amount_of_transaction for each transaction_type_code in a pie chart],{},[pie chart of the mean of amount_of_transaction for transaction_type_code]
3,False,1,1,success,success,30.517214,35.567909,[show sum of number of platforms as a function of location in a bar chart],{},[bar chart of sum of number_of_platforms as a function of location]
4,False,4,2,success,success,101.43995,39.251971,[plot count of records as a function of seasons in descending order in a line chart],{},[line chart of count of records as a function of season in descending order]
5,False,1,1,success,success,63.114725,29.751417,[show count of records for sex where rank is AsstProf as a bar chart],{},[bar chart of count of records for sex where rank is asstprof]
6,False,4,5,timeout,success,,146.821552,"[show stu_fname with the highest stu_gpa as a pie chart, limit to five]",{},[pie chart of five stu_fname with highest stu_gpa limit to 5]
7,False,6,7,timeout,success,,146.151248,[show count of records as a function industry grouped by headquarters as a bar chart seperate by industry],{},"[colored bar chart of count of records of industry for every headquarters, colorize by industry, order by count]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 5]


### P06


In [27]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 6]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
4,2022-12-28 05:33:05,6,Male,22,3,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",3,1,4,3


In [28]:
p06_tasks_df = tasks_df[tasks_df["user_id"] == 6]
p06_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,1,1,success,success,71.656503,163.748085,[],"{'figure': 'bar', 'xlabel': 'occupation', 'ylabel': 'count'}",[create a bar graph of the number of each person's occupation]
1,True,1,1,success,success,50.63235,113.557228,[],"{'figure': 'line', 'xlabel': 'start_from', 'ylabel': 'employee_id'}",[create a line graph of the relationship between start_from and employee_id in the order of start_from]
2,True,2,3,success,success,70.290268,269.027821,[],"{'figure': 'circle', 'value': 'mean of amount_of_transaction', 'label': 'transaction_type_code'}",[create a circular graph of the mean of amount_of_transaction of each transaction_type_code]
3,False,1,2,success,success,43.947267,93.215345,[],"{'figure': 'bar', 'xlabel': 'location', 'ylabel': 'sum of number_of_platforms'}",[create a bar graph of the sum of number_of_platforms of each location]
4,False,8,3,interruption,interruption,,,[],"{'figure': 'line', 'xlabel': 'season', 'ylabel': 'count', 'order': 'season inverse'}",[create a line graph of the count of records of each season]
5,False,4,2,interruption,success,,88.230017,[],"{'figure': 'bar', 'xlabel': 'sex', 'ylabel': 'count', 'filter': 'Rank is AsstProf'}",[create a bar graph of the number of each sex whose rank is AsstProf]
6,False,1,4,success,interruption,49.753891,,[graph of top 5 STU_GPA],"{'figure': 'circle', 'label': 'stu_fname'}",[create a circular graph of the biggest 5 STU_GPA]
7,False,2,6,success,interruption,77.834495,,[],"{'figure': 'bar', 'xlabel': 'headquarters', 'ylabel': 'count', 'color': 'industry', 'ascending': True}","[create a bar graph of the number of each headquarters, colored by industry, in the ascending order of headquarters]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 6]


### P07

P07 is only a person who clearly prefers to V-NLI, not V-XNLI.


In [30]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 7]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
5,2022-12-26 03:12:22,7,Male,23,3,"Spreadsheet (Excel, Google Spreadsheet, etc)",2,1,5,5


In [31]:
p07_tasks_df = tasks_df[tasks_df["user_id"] == 7]
p07_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,0,1,timeout,success,,31.559044,,,[bar graph of count of records per occupation]
1,True,4,1,timeout,success,,20.376275,[],{'line': True},[line graph of employ_id and start_from]
2,True,4,8,success,interruption,159.973599,,[],"{'plottype': 'circle_graph', 'category': 'transaction_type_code', 'value': 'mean of amount_of_transaction'}",[circular graph of mean of amount_of_transaction and transaction_type_code]
3,False,1,1,success,success,52.241676,29.220113,[],"{'plot': 'bar', 'vertical': 'sum of number_of_platforms', 'horizontal': 'location'}",[sum of number_of_platforms between location]
4,False,5,5,timeout,success,,126.650261,[],"{'line': True, 'vertical': 'Count of Records', 'horizontal': 'Season'}",[line graph. vertical axis is count of records. horizontal axis is season in reversed order]
5,False,3,1,success,success,96.8593,36.866914,[],"{'vertical': 'Count of records', 'horizontal': 'sex', 'filter': 'rank is AsstProf'}",[count of records per sex. rank is asstprof.]
6,False,5,2,timeout,success,,78.243871,[],"{'plot': 'circle', 'entry': 'stu_fname', 'value': 'stu_gpa', 'count': 5, 'sort': 'descend'}",[pie chart for stu_gpa. legend is stu_fname. show only top 5]
7,False,7,1,success,success,180.03021,43.979917,[],"{'vertical': 'Count of records', 'horizontal': 'headquarters', 'color': 'industry', 'sort': 'by total Count of records'}",[count of records per headquaters. colored by industry. sort by count of records.]


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 7]


### P09

In [33]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 9]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
7,2022-12-28 11:51:39,9,Male,23,4,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",3,1,5,5


In [264]:
tasks_df[tasks_df["user_id"] == 9].drop(
    columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"]
)


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,2,1,success,success,155.691051,44.333086,[],"{'x': 'occupation in lower case, alphabetical order', 'y': 'number of records'}",[Count of Records aggregated by occupation]
1,True,3,1,success,success,103.124895,30.497684,[line],"{'x': 'Start_from', 'y': 'Employee_ID'}",[line graph of employee_id by start_from]
2,True,2,1,success,success,89.816715,35.955742,"[pie graph of average amount_of_transaction, aggregated by transaction_type_code]",{},[pie graph of average amount_of_transaction per transaction_type_code]
3,False,1,1,success,success,46.810705,25.605043,[],"{'x': 'location', 'y': 'sum of number_of_platforms'}",[Bar graph of sum of number_of_platforms by location]
4,False,2,2,success,success,55.759731,34.648614,[line],"{'x': 'season, desc', 'y': 'Count of Records'}","[line graph of count of records aggregated by season, desc order]"
5,False,9,2,timeout,success,,66.253821,[],"{'x': 'sex', 'y': 'Count of Records', 'rank': 'AsstProf'}","[bar graph of count of records filtered by rank AsstProf, for each sex]"
6,False,2,5,success,timeout,59.610234,,"[pie graph of STU_GPA, only top five, labeled by STU_FNAME]",{},"[pie graph of STU_GPA, label STU_FNAME]"
7,False,8,5,interruption,success,,99.946604,[bar],"{'x': 'Headquaters', 'y': 'Count of Records', 'color': 'Industry', 'sort_by': 'count of records'}","[bar graph of count of records, aggregated by headquaters sorted by count of records, colored by industry ]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 9]


### P10

In [36]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 10]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
8,2022-12-29 11:44:53,10,Male,25,2,"Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",4,1,5,4


In [37]:
p10_tasks_df = tasks_df[tasks_df["user_id"] == 10]
p10_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,1,0,success,interruption,74.183105,,[create bar graph],"{'xlabel': 'occupation', 'ylabel': 'Count of Records'}",
1,True,1,2,success,success,59.722137,89.233025,[create line graph],"{'xlabel': 'start_from', 'ylabel': 'employee_id'}","[plot line graph, xlabel = start_from, ylabel = employee_id]"
2,True,2,2,success,success,80.696528,109.646931,"[create pie chart, mean of amount_of_transaction]",{'category': 'transaction_type_code'},"[create pie chart, mean of amount_of_transaction, category = transaction_type_code]"
3,False,1,1,success,success,51.691643,67.818852,[create bar graph],"{'x': 'location', 'y': 'Sum of number_of_platforms'}","[create bar graph, xlabel = location, ylabel = Sum of number_of_platforms]"
4,False,1,1,success,interruption,54.423123,,[create line graph],"{'x': 'season', 'xorder': 'reverse', 'y': 'Count of Records'}","[create bar graph, xlabel = location, ylabel = Sum of number_of_platforms]"
5,False,6,4,interruption,interruption,,,[bar graph for AsstProf Rank],"{'x': 'sex', 'y': 'Count of Records'}","[create bar graph, xlabel = sex, y = number of AsstProf in Rank]"
6,False,2,4,success,interruption,190.533816,,"[pie chart, STU_GPA]",{},"[create bar graph, xlabel = sex, y = number of AsstProf in Rank]"
7,False,5,4,success,interruption,139.069655,,[stacked graph],"{'x': 'headquarters', 'yorder': 'ascending', 'y': 'Count of Records', 'stack': 'industry'}","[create stacked graph, xlabel = headquarters, ylabel = Count of Records for each industry]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 10]


### P11

In [39]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 11]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
9,2023-01-02 02:41:27,11,Male,30,2,"Spreadsheet (Excel, Google Spreadsheet, etc)",1,1,5,2


In [40]:
p11_tasks_df = tasks_df[tasks_df["user_id"] == 11]
p11_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,3,3,success,success,181.58769,68.93311,[],"{'use_bar_graph': True, 'x': 'occupation', 'y': 'Count of Records'}",[横軸がoccupationで、縦軸がレコードの総数の棒グラフ]
1,True,1,10,success,success,32.432917,275.387718,[],"{'use_line': True, 'x': 'Start_from', 'y': 'employee_id'}","[show line graph with Y-axis of employee_id, group by start_from]"
2,True,2,9,success,success,135.770765,192.921439,[mean of amount_of_transaction group by transaction_type_code],{'use_pie': True},"[show a pie chart with mean of amount_of_transaction, grouped by transaction_type_code]"
3,False,1,7,success,timeout,60.632594,,[show sum of number_of_platforms per location],{},[show a bar graph that X-axis is location and Y-axis is count_of_records]
4,False,3,3,success,success,96.7247,134.095464,[],"{'x': 'season', 'y': 'count of records', 'line': True, 'desc': True}","[show a line graph that x is season, y is count of records, descending sorted by season]"
5,False,3,6,success,success,160.43633,218.395113,[show count of records per sex that only ranks is AsstProf with bar graph],{},"[show bar graph that X-Axis is sex, Y-Axis is count of sex]"
6,False,2,8,success,timeout,97.519329,,[],"{'graph': 'pie', 'order_by': 'STU_GPA', 'limit': 5, 'label': 'stu_fname'}",[show pie chart ]
7,False,4,2,timeout,interruption,,,[],"{'graph': 'bar', 'x': 'head_quaters', 'y': 'count of records', 'separate': 'industry'}","[show bar graph, X is head quaters, Y is count of records, separated by industry]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 11]


In [42]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 12]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
10,2022-12-30 04:59:56,12,Male,36,2,"Tableau;Python Module for Visualization (Altair, Matplotlib, Plotly, etc)",3,1,5,3


In [43]:
p12_tasks_df = tasks_df[tasks_df["user_id"] == 12]
p12_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,1,1,success,success,72.884276,173.392007,"[... bar chart, x=occupation, y=count of records, ]",{},"[... bar chart, x=occupation, y=count of records ...]"
1,True,1,1,success,success,68.715492,88.236934,"[... line chart, x=start_from, y=employee_id ...]",{},"[... line chart, x=start_from, y=employee_id ...]"
2,True,3,2,interruption,success,,280.456651,"[... pie chart, x=mean of amount_of_transaction, pur is blue, sale is orange ...]",{},"[... pie chart, x=mean of amount_of_transaction, label is transaction_type_code, pur is blue, sale is orange ...]"
3,False,1,1,success,success,39.908754,103.551189,"[... bar chart, x=location, y=sum of number_of_platforms ...]",{},"[... bar chart, x = location, y=sum of number_of_platforms ...]"
4,False,2,1,success,success,90.409683,110.037726,"[... line chart, x=season, y=count of records, season is in descending order ...]",{},"[... line chart, x = season, y = count of records, season is in descending order ...]"
5,False,2,2,success,timeout,71.967275,,"[... bar chart, x=sex, y=count of records, The value of Rank must be 'AsstProf' ...]",{},"[... bar chart, x=sex, y=count of records, group by sex ...]"
6,False,3,2,timeout,timeout,,,"[... pie chart, x=stu_fname, top 5 stu_gpa ...]",{},"[... pie chart, x=stu_fname...]"
7,False,4,1,success,timeout,123.319146,,"[... stacked bar chart, x=headquarters, y=count of records, color coded by industry, order by count of records ...]",{},"[... stacked bar chart, x=headquarters, y=count of records, color coded by industry ...]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 12]


### P13

In [45]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 13]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
11,2022-12-27 03:30:41,13,Male,24,1,"Spreadsheet (Excel, Google Spreadsheet, etc)",1,1,4,2


In [46]:
p13_tasks_df = tasks_df[tasks_df["user_id"] == 13]
p13_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,2,1,success,success,214.633254,238.6286,"[bar_graph, occupiration]",{},"[Horizontal axis is occupations, vertical axis is number of records, bar chart]"
1,True,1,1,success,success,36.972023,82.68882,[line graph],{},"[Horizontal axis is start_from, vertical axis is employee_id, line chart]"
2,True,4,5,success,success,234.268641,212.023408,"[pie chart by transaction_type_code, group by amount_of_transaction]",{},"[Pie chart by transaction_type_code, amount_of_transaction]"
3,False,2,2,success,success,156.284104,82.73493,"[bar graph by location, vertical axis is sum of Number_of_Platforms]",{},"[bar graph by location, sum of Number_of_Platforms]"
4,False,3,5,success,success,162.483924,141.408427,"[line graph by season, desc, vertical axis is count of records]",{},"[Horizontal axis is season, vertical axis is 'count of records', line chart, desc]"
5,False,3,3,timeout,interruption,,,"[bar graph by sex, minor scale is 2]",{},"[bar graph by sex, Vertical axis unit is 2]"
6,False,7,4,timeout,interruption,,,"[pie chart by stu_fname, anne, gerald, john, raphael, walter]",{},"[pie graph by stu_fname, Excluding 'william' and 'juliette']"
7,False,3,2,timeout,timeout,,,[bar graph by headquartersorder by count of records],{},"[bar graph by Headquarters, order by 'count of records']"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 13]


### P14

As we described above, P14 skipped the questionnaire about V-XNLI.
Some logs are broken due to errors.

NameError: name 'vnli' is not defined


We put examples like below.

```
vnli("... scatterplot ...")

vnli("... (not directly, but semantically specify a scatter graph) ...")
```


In [48]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 14]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
12,2023-01-02 10:10:59,14,Male,27,1,No Experience,1,1,3,3


In [49]:
p14_tasks_df = tasks_df[tasks_df["user_id"] == 14]
p14_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,0,1,success,timeout,,,,,[table]
1,True,0,12,interruption,timeout,,,,,[... Create line graphs with employee id ...]
2,True,0,0,interruption,interruption,,,,,
3,False,5,2,success,success,134.02018,14.468997,[create graph],"{'target': 'location', 'sum': 'number_of_platforms'}",[Create graph with sum of Number_of_platforms for each location ]
4,False,1,10,success,success,5.542552,160.110209,[create a line graph order by desc year],"{'target': 'season', 'count': 'records'}",[Create line graph with count of Records by season ]
5,False,3,1,success,interruption,36.306234,,[create a graph ],"{'target': 'sex', 'count': 'recours'}","[Create graph with count of records for each sex, and the value of Rank must be AssetProf]"
6,False,5,3,success,timeout,128.68536,,[create pie chart],"{'target': 'stu_fname', 'STU_GPA': 'TOP 5'}",[Create a pie chart for each stu_fname and display the TOP of STU_GPA]
7,False,10,3,timeout,interruption,,,[create graph sort by desc],"{'target': 'headeauaters', 'industryColor': ['automotive:blue']}","[Create a graph with the number of records by headquarters, sorted and displayed in order of largest to smallest.]"


In [None]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 14]


### P15

In [266]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 15]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
13,2023-01-05 09:59:18,15,Male,43,2,"Tableau;Python Module for Visualization (Altair, Matplotlib, Plotly, etc);Spreadsheet (Excel, Google Spreadsheet, etc)",5,2,5,5


In [267]:
tasks_df[tasks_df["user_id"] == 15].drop(
    columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"]
)


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,2,0,success,skip,271.636818,,"[{'figure': 'bar'}, True, occupation]",{},
1,True,3,0,success,skip,149.861379,,"[{'figure': 'line'}, True, {'x': 'start_from'}, {'y': 'employee_id'}, {'order by': 'start_from'}]",{},
2,True,0,0,success,skip,,,,,
3,False,3,0,success,timeout,115.321336,,"[{'figure': 'bar'}, True, location, {'summary': 'sum of number_of_platforms'}]",{},
4,False,2,3,success,success,287.109709,126.395757,"[{'figure': 'line'}, True, {'x': 'season'}, {'summary': 'count of records'}, {'order by': 'season'}]",{},[... (figure is line chart.x is season and y is count of records. x is descending order) ...]
5,False,2,5,timeout,timeout,,,"[{'figure': 'bar'}, True, {'x': 'sex'}, {'summary': 'count of records'}, {'conditions': 'Rank=AsstProf'}]",{},[... (figure is bar chart.x is sex and y is count of records only by 'AsstProf') ...]
6,False,2,4,timeout,timeout,,,"[{'figure': 'pie chart'}, True, {'x': 'stu_fname'}, {'order by': 'STU_GPA'}]",{},[(figure is pie chart.x is stu_fname whitch are top 5 of STU_GPA]
7,False,2,2,timeout,success,,106.813744,"[{'figure': 'stacked bar chart'}, True, {'x': 'headquarters'}, {'summary': 'count of records'}, {'feature by': 'Industry'}]",{},[(figure is stacked bar chart.x is headquarters and y is count of records.x is stacked feature by industry.x is order by count of recored]


### P16

In [51]:
pre_experiment_qa_df[pre_experiment_qa_df["user_id"] == 16]


Unnamed: 0,timestamp,user_id,Gender,Age,English Level,Visualization Tools You Have Used,Experience of Visualization Tools,Experience of Natural Language Interface for Data Visualization Tools like Tableau Ask Data,Experience of Programming Language Python,Experience of Jupyter Notebook
14,2023-01-04 10:31:17,16,Male,28,1,No Experience,1,1,1,1


In [52]:
p16_tasks_df = tasks_df[tasks_df["user_id"] == 16]
p16_tasks_df.drop(columns=["user_id", "user_group", "nli_raw_data", "xnli_raw_data"])


Unnamed: 0,is_exercise,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
0,True,0,2,interruption,success,,259.505052,,,[bar chart group by Occupation]
1,True,1,1,success,success,6.438019,35.224272,[],{'graph': 'line'},[line graph]
2,True,6,7,success,timeout,276.27237,,[],"{'graph': 'pie', 'group': 'transaction_type_code', 'sum': 'amount_of_transaction'}",[pie chart sum amount_of_transaction group by transaction_type_code]
3,False,3,3,success,timeout,79.712267,,[],"{'graph': 'bar', 'group': 'location', 'sum': 'number_of_platforms'}",[bar chart sum number_of_Plattforms group by location]
4,False,5,8,success,success,178.07477,156.574884,[],"{'graph': 'line', 'group': 'season', 'order': 'desc'}",[line graph group by season order by season desc]
5,False,6,7,success,timeout,207.611814,,[],"{'graph': 'bar', 'group': 'sex', 'count': 'Rank AsstProf'}",[bar chart where Rank is AsstProf group by Sex ]
6,False,5,5,success,timeout,113.393069,,[],"{'graph': 'pie', 'bar': 'stu_fname', 'order': 'STU_GPA', 'top': '5'}",[pie chart stu_fname]
7,False,0,3,success,timeout,,,,,[bar chart headquarters every industry]


In [53]:
vxnli_vnli_qa_df[vxnli_vnli_qa_df["user_id"] == 16]


Unnamed: 0,user_id,is_vxnli,01_score,01_reason__easy_to_use_interface_?,02_score,02_reason__easy_to_fix_mistakes_?,03_score,03_reason__many_interpretation_mistakes_?,04_score,04_reason__can_not_understand_requests_?,05_good_points_and_bad_points,06_additional_comments
27,16,False,2,,2,,3,,3,,,
28,16,True,4,,4,,2,,2,,,


In [None]:
post_experiment_qa_df[post_experiment_qa_df["user_id"] == 16]


## Analyze All Participants

In [None]:
tasks_df = tasks_df[""]


In [55]:
tasks_df[(tasks_df["user_group"] == "A") & ~tasks_df["is_exercise"]].groupby(
    "user_id"
).apply(lambda x: len(x[x["nli_status"] == "success"]) / 5).mean()


0.3428571428571429

In [56]:
tasks_df[(tasks_df["user_group"] == "B") & ~tasks_df["is_exercise"]].groupby(
    "user_id"
).apply(lambda x: len(x[x["xnli_status"] == "success"]) / 5).mean()


0.55

### Break

In [133]:
for i in range(2, 17, 2):
    if i == 4:
        continue

    task_b_start_time = tasks_df[tasks_df["user_id"] == i].iloc[0]["xnli_raw_data"][
        "events"
    ][0]["timestamp"]
    task_a_end_time = tasks_df[tasks_df["user_id"] == i].iloc[7]["nli_raw_data"][
        "events"
    ][-1]["timestamp"]

    print(f"{i:2d}: {(task_b_start_time - task_a_end_time) / 60:.3f} min")


 2: 11.342 min
 6: 179.690 min
 8: 802.213 min
10: 25.364 min
12: 11.420 min
14: 640.074 min
16: 10.671 min


In [134]:
for i in range(1, 17, 2):
    task_b_start_time = tasks_df[tasks_df["user_id"] == i].iloc[0]["nli_raw_data"][
        "events"
    ][0]["timestamp"]
    task_a_end_time = tasks_df[tasks_df["user_id"] == i].iloc[7]["xnli_raw_data"][
        "events"
    ][-1]["timestamp"]

    print(f"{i:2d}: {(task_b_start_time - task_a_end_time) / 60:.3f} min")


 1: 4.884 min
 3: 4.705 min
 5: 6.302 min
 7: 48.686 min
 9: 9.128 min
11: 17.651 min
13: 17.298 min
15: 21.864 min


### Total Time

In [105]:
total_time_df = tasks_df
total_time_df = total_time_df[~total_time_df["user_id"].isin([8, 12])]

total_time = 0
total_time += (
    total_time_df[total_time_df["is_exercise"]].fillna(300)["nli_time_secs"].sum()
)
total_time += (
    total_time_df[~total_time_df["is_exercise"]].fillna(180)["nli_time_secs"].sum()
)
total_time += (
    total_time_df[total_time_df["is_exercise"]].fillna(300)["xnli_time_secs"].sum()
)
total_time += (
    total_time_df[~total_time_df["is_exercise"]].fillna(180)["xnli_time_secs"].sum()
)

# minutes
total_time / 13 / 60


37.40100302115465

In [138]:
total_time_df = pre_experiment_qa_df[["timestamp", "user_id"]].join(
    post_experiment_qa_df[["timestamp", "user_id"]], rsuffix="_post"
)

total_time_df


Unnamed: 0,timestamp,user_id,timestamp_post,user_id_post
0,2022-12-26 10:31:00,1,2022-12-26 11:14:54,1
1,2022-12-25 11:22:33,2,2022-12-26 12:26:16,2
2,2022-12-26 04:57:41,3,2022-12-26 05:50:56,3
3,2023-01-05 03:06:40,5,2023-01-05 03:49:47,5
4,2022-12-28 05:33:05,6,2022-12-28 10:13:56,6
5,2022-12-26 03:12:22,7,2022-12-26 05:11:10,7
6,2022-12-28 01:54:47,8,2022-12-28 04:13:13,8
7,2022-12-28 11:51:39,9,2022-12-28 12:50:41,9
8,2022-12-29 11:44:53,10,2022-12-29 01:30:27,10
9,2023-01-02 02:41:27,11,2023-01-02 03:56:34,11


In [139]:
total_time_df = total_time_df[~total_time_df["user_id"].isin([8, 12])]
total_time_df = pd.to_datetime(total_time_df["timestamp_post"]) - pd.to_datetime(
    total_time_df["timestamp"]
)
total_time_df


0      0 days 00:43:54
1      1 days 01:03:43
2      0 days 00:53:15
3      0 days 00:43:07
4      0 days 04:40:51
5      0 days 01:58:48
7      0 days 00:59:02
8    -1 days +13:45:34
9      0 days 01:15:07
11     0 days 01:53:45
12     1 days 00:09:37
13     0 days 02:01:11
14     1 days 01:45:23
dtype: timedelta64[ns]

In [140]:
total_time_df = total_time_df[
    (timedelta(minutes=1) < total_time_df) & (total_time_df < timedelta(hours=2))
]
total_time_df


0    0 days 00:43:54
2    0 days 00:53:15
3    0 days 00:43:07
5    0 days 01:58:48
7    0 days 00:59:02
9    0 days 01:15:07
11   0 days 01:53:45
dtype: timedelta64[ns]

In [104]:
total_time_df.mean()


Timedelta('0 days 01:18:31.125000')

### The number of trials

In [59]:
df = tasks_df
df = df[~df["is_exercise"]]
df = df[["xnli_n_trials", "nli_n_trials"]]

df.apply(pd.Series.value_counts)


Unnamed: 0,xnli_n_trials,nli_n_trials
0,2,2.0
1,16,21.0
2,14,15.0
3,12,9.0
4,8,8.0
5,8,8.0
6,4,4.0
7,2,4.0
8,5,3.0
9,2,


### Task Completion Rate A

#### All

In [170]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,8,19
skip,1,1
success,48,36
timeout,18,19


xnli_status    0.64
nli_status     0.48
dtype: float64

In [171]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([8, 12, 15])]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,6,18
success,40,30
timeout,14,12


xnli_status    0.666667
nli_status     0.500000
dtype: float64

In [172]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([8, 12])]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,6,18
success,42,32
timeout,17,15


xnli_status    0.646154
nli_status     0.492308
dtype: float64

#### Group A

In [173]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,5,10
skip,1,1
success,26,12
timeout,3,12


xnli_status    0.742857
nli_status     0.342857
dtype: float64

In [174]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,3,9
success,20,8
timeout,2,8


xnli_status    0.80
nli_status     0.32
dtype: float64

#### Group B

In [175]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,3,9
success,22,24
timeout,15,7


xnli_status    0.55
nli_status     0.60
dtype: float64

In [176]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,3,9
success,20,22
timeout,12,4


xnli_status    0.571429
nli_status     0.628571
dtype: float64

#### P1 to P10

In [177]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([8])]
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,6,13
success,25,23
timeout,9,4


xnli_status    0.625
nli_status     0.575
dtype: float64

In [64]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([8])]
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,6,13
success,25,23
timeout,9,4


xnli_status    0.625
nli_status     0.575
dtype: float64

#### P1 to P10 (Group A)

In [65]:
task_completion_df = tasks_df[~tasks_df["user_id"].isin([8])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,3,7
success,11,5
timeout,1,3


xnli_status    0.733333
nli_status     0.333333
dtype: float64

In [180]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,5,8
skip,1,1
success,13,7
timeout,1,4


xnli_status    0.65
nli_status     0.35
dtype: float64

#### P1 to P10 (Group B)

In [182]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,3,6
success,14,18
timeout,8,1


xnli_status    0.56
nli_status     0.72
dtype: float64

#### P11 to P16

In [183]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,5
success,21.0,11
timeout,9.0,14


xnli_status    0.700000
nli_status     0.366667
dtype: float64

In [184]:
task_completion_df = tasks_df
task_completion_df = tasks_df[~tasks_df["user_id"].isin([12])]
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,5
success,17.0,9
timeout,8.0,11


xnli_status    0.68
nli_status     0.36
dtype: float64

In [185]:
task_completion_df = tasks_df
task_completion_df = tasks_df[~tasks_df["user_id"].isin([12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,5
success,15.0,7
timeout,5.0,8


xnli_status    0.75
nli_status     0.35
dtype: float64

#### P11 to P16 (Group A)

In [188]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,2
success,13.0,5
timeout,2.0,8


xnli_status    0.866667
nli_status     0.333333
dtype: float64

In [192]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["user_id"].isin([12])]
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,2
success,9.0,3
timeout,1.0,5


xnli_status    0.9
nli_status     0.3
dtype: float64

#### P11 to P16 (Group B)

In [193]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,3
success,8.0,6
timeout,7.0,6


xnli_status    0.533333
nli_status     0.400000
dtype: float64

In [194]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["user_id"].isin([15])]
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[["xnli_status", "nli_status"]]
task_completion_df = task_completion_df.apply(pd.Series.value_counts)

display(task_completion_df)

task_completion_df.loc["success"] / task_completion_df.sum()


Unnamed: 0,xnli_status,nli_status
interruption,,3
success,6.0,4
timeout,4.0,3


xnli_status    0.6
nli_status     0.4
dtype: float64

### Task Completion Rate B

#### Success Count Diff

In [204]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["xnli_status"] == "success") - sum(df["nli_status"] == "success")
)

task_completion_df


user_id
1     1
2     2
3     0
5    -2
6     1
7    -2
8     0
9    -1
10    3
11    2
12    2
13    0
14    2
15    0
16    4
dtype: int64

#### Within Subjects

In [222]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: (
        sum(df["xnli_status"] == "success") - sum(df["nli_status"] == "success")
    )
    / 5
)

display(task_completion_df)

task_completion_df.mean()


user_id
2.0     0.4
6.0     0.2
10.0    0.6
14.0    0.4
16.0    0.8
dtype: float64

0.4800000000000001

In [223]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: (
        sum(df["xnli_status"] == "success") - sum(df["nli_status"] == "success")
    )
    / 5
)

display(task_completion_df)

task_completion_df.mean()


user_id
1.0     0.2
3.0     0.0
5.0    -0.4
7.0    -0.4
9.0    -0.2
11.0    0.4
13.0    0.0
dtype: float64

-0.05714285714285715

In [221]:
(0.4800000000000001 - 0.05714285714285715) / 2


0.21142857142857147

In [224]:
task_completion_df = tasks_df
# task_completion_df = task_completion_df[~task_completion_df.isin([8, 12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: (
        sum(df["xnli_status"] == "success") - sum(df["nli_status"] == "success")
    )
    / 5
)

display(task_completion_df)

task_completion_df.mean()


user_id
2     0.4
6     0.2
8     0.0
10    0.6
12    0.4
14    0.4
16    0.8
dtype: float64

0.39999999999999997

In [225]:
task_completion_df = tasks_df
# task_completion_df = task_completion_df[~task_completion_df.isin([8, 12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: (
        sum(df["xnli_status"] == "success") - sum(df["nli_status"] == "success")
    )
    / 5
)

display(task_completion_df)

task_completion_df.mean()


user_id
1     0.2
3     0.0
5    -0.4
7    -0.4
9    -0.2
11    0.4
13    0.0
15    0.0
dtype: float64

-0.05000000000000001

#### Between Subjects

In [208]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["nli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
2.0     2
6.0     2
10.0    1
14.0    2
16.0    1
dtype: int64

0.32

In [209]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["xnli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
1.0     3
3.0     2
5.0     3
7.0     3
9.0     3
11.0    4
13.0    2
15.0    2
dtype: int64

0.55

In [227]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["xnli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
1.0     3
3.0     2
5.0     3
7.0     3
9.0     3
11.0    4
13.0    2
dtype: int64

0.5714285714285714

#### Between Subjects (Back 5 Tasks)

In [230]:
task_completion_df = tasks_df
# task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["xnli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
2     4
6     3
8     2
10    4
12    4
14    4
16    5
dtype: int64

0.7428571428571429

In [231]:
task_completion_df = tasks_df
# task_completion_df = task_completion_df[~task_completion_df.isin([8, 12, 15])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["nli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
1     2
3     2
5     5
7     5
9     4
11    2
13    2
15    2
dtype: int64

0.6

#### Between Subjects (P1 - P10)

In [210]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["nli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
2.0     2
6.0     2
10.0    1
dtype: int64

0.33333333333333337

In [212]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_id"] <= 10]
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["xnli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
1.0    3
3.0    2
5.0    3
7.0    3
9.0    3
dtype: int64

0.5599999999999999

#### Between Subjects (P11 - P16)

In [213]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["nli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
14.0    2
16.0    1
dtype: int64

0.3

In [214]:
task_completion_df = tasks_df
task_completion_df = task_completion_df[task_completion_df["user_id"] > 10]
task_completion_df = task_completion_df[~task_completion_df.isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df.groupby("user_id", group_keys=True).apply(
    lambda df: sum(df["xnli_status"] == "success")
)

display(task_completion_df)

task_completion_df.mean() / 5


user_id
11.0    4
13.0    2
15.0    2
dtype: int64

0.5333333333333333

#### (I forgot the purpose of this section)

In [66]:
tasks_df[
    ~tasks_df["is_exercise"]
    & (tasks_df["user_id"] <= 10)
    & (tasks_df["user_group"] == "B")
][["xnli_status", "nli_status"]].apply(pd.Series.value_counts)


Unnamed: 0,xnli_status,nli_status
interruption,3,6
success,14,18
timeout,8,1


In [67]:
# 76%

tasks_df[~tasks_df["is_exercise"] & (tasks_df["user_id"] > 10)][
    ["xnli_status", "nli_status"]
].apply(pd.Series.value_counts)


Unnamed: 0,xnli_status,nli_status
interruption,,5
success,21.0,11
timeout,9.0,14


In [68]:
df[~df["is_exercise"]].groupby("user_id", group_keys=True).apply(
    lambda df: df[df["xnli_status"] == "success"].count()
    > df[df["nli_status"] == "success"].count()
)


Unnamed: 0_level_0,user_id,user_group,xnli_raw_data,is_exercise,nli_raw_data,xnli_n_trials,nli_n_trials,xnli_status,nli_status,xnli_time_secs,nli_time_secs,xnli_last_args,xnli_last_kwargs,nli_last_args
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1,True,True,True,True,True,True,True,True,True,True,False,True,True,True
2,True,True,True,True,True,True,True,True,True,True,False,True,True,True
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False,False,False
6,True,True,True,True,True,True,True,True,True,True,False,True,True,True
7,False,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,True,False,False,False,False
10,True,True,True,True,True,True,True,True,True,True,False,True,True,True
11,True,True,True,True,True,True,True,True,True,True,False,True,True,True


###  Task Completion Rate (Per Task)

In [16]:
task_completion_df = tasks_df.copy()
task_completion_df = task_completion_df[
    ~task_completion_df["user_id"].isin([8, 12, 15])
]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[
    ["user_id", "user_group", "xnli_status", "nli_status"]
]
task_completion_df["task_id"] = [i % 5 + 1 for i in range(len(task_completion_df))]

# display(task_completion_df)

task_completion_df.groupby("task_id").apply(
    lambda df: (df["xnli_status"] == "success").mean()
)


task_id
1    1.000000
2    0.833333
3    0.583333
4    0.583333
5    0.333333
dtype: float64

In [17]:
task_completion_df = tasks_df.copy()
task_completion_df = task_completion_df[~task_completion_df["user_id"].isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "A"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[
    ["user_id", "user_group", "xnli_status", "nli_status"]
]
task_completion_df["task_id"] = [i % 5 + 1 for i in range(len(task_completion_df))]

# display(task_completion_df)

task_completion_df.groupby("task_id").apply(
    lambda df: (df["nli_status"] == "success").mean()
)


task_id
1    0.8
2    0.4
3    0.4
4    0.0
5    0.0
dtype: float64

In [18]:
task_completion_df = tasks_df.copy()
task_completion_df = task_completion_df[~task_completion_df["user_id"].isin([8, 12])]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[
    ["user_id", "user_group", "xnli_status", "nli_status"]
]
task_completion_df["task_id"] = [i % 5 + 1 for i in range(len(task_completion_df))]

# display(task_completion_df)

task_completion_df.groupby("task_id").apply(
    lambda df: (df["xnli_status"] == "success").mean()
)


task_id
1    1.000
2    0.875
3    0.500
4    0.250
5    0.125
dtype: float64

In [19]:
task_completion_df = tasks_df.copy()
task_completion_df = task_completion_df[
    ~task_completion_df["user_id"].isin([8, 12, 15])
]
task_completion_df = task_completion_df[task_completion_df["user_group"] == "B"]
task_completion_df = task_completion_df[~task_completion_df["is_exercise"]]
task_completion_df = task_completion_df[
    ["user_id", "user_group", "xnli_status", "nli_status"]
]
task_completion_df["task_id"] = [i % 5 + 1 for i in range(len(task_completion_df))]

# display(task_completion_df)

print("V-NLI")
print(
    task_completion_df.groupby("task_id")
    .apply(lambda df: (df["nli_status"] == "success").mean() * 100)
    .values.tolist()
)

print("V-XNLI")
print(
    task_completion_df.groupby("task_id")
    .apply(lambda df: (df["xnli_status"] == "success").mean() * 100)
    .values.tolist()
)


V-NLI
[85.71428571428571, 85.71428571428571, 71.42857142857143, 28.57142857142857, 42.857142857142854]
V-XNLI
[100.0, 85.71428571428571, 57.14285714285714, 28.57142857142857, 14.285714285714285]


In [9]:
# Between Subjects (Top 5 Tasks)

between_top5_vnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [80, 40, 40, 0, 0],
    }
)

between_top5_vnli_df["UI"] = "V-NLI"
between_top5_vnli_df["Method"] = "Between Subjects (Top 5 Tasks)"

between_top5_vxnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [100, 87.5, 50, 25, 12.5],
    }
)

between_top5_vxnli_df["UI"] = "V-XNLI"
between_top5_vxnli_df["Method"] = "Between Subjects (Top 5 Tasks)"

between_top5_df = pd.concat([between_top5_vnli_df, between_top5_vxnli_df])


# Between Subjects (Bottom 5 Tasks)

between_bottom5_vnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [75.0, 87.5, 62.5, 25.0, 50.0],
    }
)

between_bottom5_vnli_df["UI"] = "V-NLI"
between_bottom5_vnli_df["Method"] = "Between Subjects (Bottom 5 Tasks)"

between_bottom5_vxnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [100.0, 80.0, 60.0, 100.0, 60.0],
    }
)

between_bottom5_vxnli_df["UI"] = "V-XNLI"
between_bottom5_vxnli_df["Method"] = "Between Subjects (Bottom 5 Tasks)"

between_bottom5_df = pd.concat([between_bottom5_vnli_df, between_bottom5_vxnli_df])


# Within Subjects (User Group A)

within_a_vnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [80.0, 40.0, 40.0, 0.0, 0.0],
    }
)

within_a_vnli_df["UI"] = "V-NLI"
within_a_vnli_df["Method"] = "Within Subjects (User Group A)"

within_a_vxnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [100.0, 80.0, 60.0, 100.0, 60.0],
    }
)

within_a_vxnli_df["UI"] = "V-XNLI"
within_a_vxnli_df["Method"] = "Within Subjects (User Group A)"

within_a_df = pd.concat([within_a_vnli_df, within_a_vxnli_df])


# Within Subjects (User Group B)

within_b_vnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [
            85.71428571428571,
            85.71428571428571,
            71.42857142857143,
            28.57142857142857,
            42.857142857142854,
        ],
    }
)

within_b_vnli_df["UI"] = "V-NLI"
within_b_vnli_df["Method"] = "Within Subjects (User Group A)"

within_b_vxnli_df = pd.DataFrame(
    {
        "Task": ["Task 1", "Task 2", "Task 3", "Task 4", "Task 5"],
        "Task Completion [%]": [
            100.0,
            85.71428571428571,
            57.14285714285714,
            28.57142857142857,
            14.285714285714285,
        ],
    }
)

within_b_vxnli_df["UI"] = "V-XNLI"
within_b_vxnli_df["Method"] = "Within Subjects (User Group A)"

within_b_df = pd.concat([within_b_vnli_df, within_b_vxnli_df])


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(5.5 * 2, 4.8))

# gridspec_kw={"width_ratios": [5.5, 5.5, 5.5, 0.3]},


foo = sns.barplot(
    between_top5_df,
    y="Task Completion [%]",
    x="Task",
    hue="UI",
    ax=ax[0],
)

foo.legend_.remove()


sns.barplot(
    between_bottom5_df,
    y="Task Completion [%]",
    x="Task",
    hue="UI",
    ax=ax[1],
)

ax[0].set_xlabel("Top 5 Tasks")
# ax[0].set_ylabel('')
ax[1].set_xlabel("Bottom 5 Tasks")
ax[1].set_ylabel("")


In [None]:
ax = sns.barplot(
    between_top5_df,
    y="Task Completion [%]",
    x="Task",
    hue="UI",
)

ax.set_xlabel("Top 5 Tasks")


In [None]:
fig, ax = plt.subplots(1, 2, figsize=(5.5 * 2, 4.8))

# gridspec_kw={"width_ratios": [5.5, 5.5, 5.5, 0.3]},


foo = sns.barplot(
    within_a_df,
    y="Task Completion [%]",
    x="Task",
    hue="UI",
    ax=ax[0],
)

foo.legend_.remove()

sns.barplot(
    within_b_df,
    y="Task Completion [%]",
    x="Task",
    hue="UI",
    ax=ax[1],
)


ax[0].set_xlabel("User Group A\n(Task Order: V-NLI -> V-XNLI)")
# ax[0].set_ylabel('')
ax[1].set_xlabel("User Group B\n(Task Order: V-XNLI -> V-NLI)")
ax[1].set_ylabel("")


### Input Patterns

In [23]:
patterns_df = tasks_df
patterns_df = patterns_df[~patterns_df["is_exercise"]]
patterns_df = patterns_df[~patterns_df.isin([8, 12])]
patterns_df["n_xnli_last_args"] = patterns_df["xnli_last_args"].apply(
    lambda x: 0 if x is None else len(x)
)
patterns_df["n_xnli_last_kwargs"] = patterns_df["xnli_last_kwargs"].apply(
    lambda x: 0 if x is None else len(x)
)
patterns_df = patterns_df[["user_id", "n_xnli_last_args", "n_xnli_last_kwargs"]]

patterns_df.groupby("user_id").apply(
    lambda df: df[["n_xnli_last_args", "n_xnli_last_kwargs"]].T
)


Unnamed: 0_level_0,Unnamed: 1_level_0,3,4,5,6,7
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1.0,n_xnli_last_args,0,0,0,0,0
1.0,n_xnli_last_kwargs,3,4,4,4,5
2.0,n_xnli_last_args,1,1,1,1,1
2.0,n_xnli_last_kwargs,0,1,0,0,2
3.0,n_xnli_last_args,1,2,2,1,1
3.0,n_xnli_last_kwargs,0,0,0,0,0
5.0,n_xnli_last_args,1,1,1,1,1
5.0,n_xnli_last_kwargs,0,0,0,0,0
6.0,n_xnli_last_args,0,0,0,1,0
6.0,n_xnli_last_kwargs,3,4,4,2,5


In [265]:
patterns_df[~patterns_df["user_id"].isin([1, 2, 3, 5, 6, 7, 9, 10, 16])].groupby(
    "user_id"
).apply(lambda df: df[["n_xnli_last_args", "n_xnli_last_kwargs"]].T)


Unnamed: 0_level_0,Unnamed: 1_level_0,3,4,5,6,7
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
11.0,n_xnli_last_args,1,0,1,0,0
11.0,n_xnli_last_kwargs,0,4,0,4,4
13.0,n_xnli_last_args,2,3,2,6,1
13.0,n_xnli_last_kwargs,0,0,0,0,0
14.0,n_xnli_last_args,1,1,1,1,1
14.0,n_xnli_last_kwargs,2,2,2,2,2
15.0,n_xnli_last_args,4,5,5,4,5
15.0,n_xnli_last_kwargs,0,0,0,0,0


### Task TIme

In [9]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df = time_df.groupby("user_id").apply(
    lambda df: df[["xnli_time_secs", "nli_time_secs"]].T
)
time_df


Unnamed: 0_level_0,Unnamed: 1_level_0,3,4,5,6,7
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,xnli_time_secs,26.164192,99.767576,135.835367,,
1,nli_time_secs,25.219796,,37.560903,,
2,xnli_time_secs,35.979354,91.625079,64.641389,108.977788,
2,nli_time_secs,50.663591,,121.762268,,
3,xnli_time_secs,49.112643,166.098499,,,
3,nli_time_secs,16.158546,74.029132,,,
5,xnli_time_secs,30.517214,101.43995,63.114725,,
5,nli_time_secs,35.567909,39.251971,29.751417,146.821552,146.151248
6,xnli_time_secs,43.947267,,,49.753891,77.834495
6,nli_time_secs,93.215345,,88.230017,,


In [17]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
# time_df = time_df.groupby("user_id").apply(lambda df: df[["xnli_time_secs", "nli_time_secs"]].T)
time_df


vnli_time_secs = time_df[time_df["user_id"] % 2 == 0]["xnli_time_secs"].values
vnli_time_secs


array([ 35.97935438,  91.62507915,  64.64138865, 108.97778773,
                nan,  43.94726729,          nan,          nan,
        49.75389147,  77.83449459,  51.69164348,  54.42312336,
                nan, 190.53381562, 139.06965542, 134.02017951,
         5.54255152,  36.30623364, 128.68535972,          nan,
        79.71226716, 178.07476997, 207.61181426, 113.39306903,
                nan])

#### Between Subjects

In [9]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "A"]
time_df = time_df[["user_id", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.dropna()
# time_df["time_rate"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]
# time_df = time_df[time_df["task_id"] == 3]
time_df = time_df.reset_index(drop=True)
time_df = time_df.rename(columns={"nli_time_secs": "time_secs"})
time_df["is_vxnli"] = False

a_time_df = time_df

a_time_df.sort_values(by="task_id")


Unnamed: 0,user_id,time_secs,task_id,is_vxnli
0,2,50.663591,1,False
2,6,93.215345,1,False
4,10,67.818852,1,False
5,14,14.468997,1,False
6,14,160.110209,2,False
7,16,156.574884,2,False
1,2,121.762268,3,False
3,6,88.230017,3,False


In [10]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "B"]
time_df = time_df[["user_id", "xnli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.dropna()
# time_df["time_rate"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]
# time_df = time_df[time_df["task_id"] == 3]
time_df = time_df.reset_index(drop=True)
time_df = time_df.rename(columns={"xnli_time_secs": "time_secs"})
time_df["is_vxnli"] = True

b_time_df = time_df

b_time_df.sort_values(by="task_id")


Unnamed: 0,user_id,time_secs,task_id,is_vxnli
0,1,26.164192,1,True
18,13,156.284104,1,True
3,3,49.112643,1,True
5,5,30.517214,1,True
14,11,60.632594,1,True
8,7,52.241676,1,True
11,9,46.810705,1,True
20,15,115.321336,1,True
19,13,162.483924,2,True
15,11,96.7247,2,True


In [11]:
foo_df = pd.concat(
    [
        a_time_df,
        b_time_df,
    ]
)

foo_df = foo_df.drop(columns=["user_id"])
foo_df["UI"] = "V-NLI"
foo_df = foo_df.copy()
foo_df.loc[foo_df["is_vxnli"], ["UI"]] = "V-XNLI"
foo_df = foo_df.rename(columns={"time_secs": "Task Time [secs]", "task_id": "Task"})
foo_df


Unnamed: 0,Task Time [secs],Task,is_vxnli,UI
0,50.663591,1,False,V-NLI
1,121.762268,3,False,V-NLI
2,93.215345,1,False,V-NLI
3,88.230017,3,False,V-NLI
4,67.818852,1,False,V-NLI
5,14.468997,1,False,V-NLI
6,160.110209,2,False,V-NLI
7,156.574884,2,False,V-NLI
0,26.164192,1,True,V-XNLI
1,99.767576,2,True,V-XNLI


In [None]:
sns.barplot(data=foo_df, x="Task", y="Task Time [secs]", hue="UI")


In [14]:
def stderr(x):
    return np.std(x, ddof=1) / np.sqrt(len(x))


print(
    np.mean(
        foo_df[(foo_df["Task"] == 3) & ~foo_df["is_vxnli"]]["Task Time [secs]"].values
    )
)
stderr(foo_df[(foo_df["Task"] == 3) & ~foo_df["is_vxnli"]]["Task Time [secs]"].values)


104.99614250659943


16.766125082969666

In [17]:
def calc_error(m1, e1, m2, e2):
    return m1 / m2, np.sqrt((e1 / m2) ** 2 + (m1 * e2 / (m2**2) ** 2))


# VXNLI
[
    (67.13555797934532, 15.97482940548424),
    (138.48344118254525, 28.82067922757257),
    (114.0614303946495, 21.440257490694954),
]

# VNLI
[
    (56.541696429252625, 16.5247160817734),
    (158.3425463438034, 1.7676624059677124),
    (104.99614250659943, 16.766125082969666),
]

calc_error(
    114.0614303946495, 21.440257490694954, 104.99614250659943, 16.766125082969666
)


(1.086339247058341, 0.20423895610369835)

In [18]:
[
    (1.187363701818678, 0.28272386659517784),
    (0.8745813704540357, 0.18201582123528715),
    (1.086339247058341, 0.20423895610369835),
]

np.mean([1.187363701818678, 0.8745813704540357, 1.086339247058341])


1.049428106443685

In [19]:
(0.28272386659517784**2 + 0.18201582123528715**2 + 0.20423895610369835**2) ** (
    0.5
)


0.39341593144255405

In [17]:
(
    np.array(
        [
            67.135558,
            138.483441,
            114.061430,
        ]
    )
    / np.array(
        [
            56.541696,
            158.342546,
            104.996143,
        ]
    )
).mean()


1.0494281068643359

In [12]:
time_df = pd.concat([a_time_df, b_time_df])
time_df = time_df.pivot_table(
    index="task_id", columns="is_vxnli", values="time_secs", aggfunc=np.mean
)
time_df


is_vxnli,False,True
task_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,56.541696,67.135558
2,158.342546,138.483441
3,104.996143,114.06143
4,,78.564782
5,,180.03021


In [None]:
sns.barplot(
    pd.DataFrame(
        {
            "Task": [1, 2, 3, 4, 5] * 2,
            "Task Time [secs]": [
                56.541696,
                158.342546,
                104.996143,
                0,
                0,
                67.135558,
                138.483441,
                114.061430,
                78.564782,
                180.030210,
            ],
            "Model": ["V-NLI"] * 5 + ["V-XNLI"] * 5,
        }
    ),
    x="Task",
    y="Task Time [secs]",
    hue="Model",
)


#### Within Subjects

##### All

In [349]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
# time_df = time_df[time_df["task_id"] == 1]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]

display(time_df)

time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
0,1,26.164192,25.219796,1,1.037447
2,1,135.835367,37.560903,3,3.616403
5,2,35.979354,50.663591,1,0.710162
7,2,64.641389,121.762268,3,0.530882
10,3,49.112643,16.158546,1,3.039422
11,3,166.098499,74.029132,2,2.243691
15,5,30.517214,35.567909,1,0.857999
16,5,101.43995,39.251971,2,2.584328
17,5,63.114725,29.751417,3,2.121402
20,6,43.947267,93.215345,1,0.47146


1.8700919712268984

##### Group A vs Group B

In [375]:
time_df = tasks_df
# time_df = time_df[~time_df["user_id"].isin([8, 12])]
# time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "A"]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
# time_df = time_df[time_df["task_id"] == 1]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]
# time_df = time_df[time_df["time_ratio"] < 7]

display(time_df)


time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
0,2,35.979354,50.663591,1,0.710162
2,2,64.641389,121.762268,3,0.530882
5,6,43.947267,93.215345,1,0.47146
10,8,113.262611,97.473858,1,1.161979
12,8,29.466767,140.730327,3,0.209385
15,10,51.691643,67.818852,1,0.762202
20,12,39.908754,103.551189,1,0.385401
21,12,90.409683,110.037726,2,0.821624
26,14,5.542552,160.110209,2,0.034617
31,16,178.07477,156.574884,2,1.137314


0.6225025640625453

In [348]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12, 15])]
time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "B"]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
# time_df = time_df[time_df["task_id"] == 1]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]

display(time_df)

time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
0,1,26.164192,25.219796,1,1.037447
2,1,135.835367,37.560903,3,3.616403
5,3,49.112643,16.158546,1,3.039422
6,3,166.098499,74.029132,2,2.243691
10,5,30.517214,35.567909,1,0.857999
11,5,101.43995,39.251971,2,2.584328
12,5,63.114725,29.751417,3,2.121402
15,7,52.241676,29.220113,1,1.787867
17,7,96.8593,36.866914,3,2.627269
19,7,180.03021,43.979917,5,4.093464


2.287230532695228

##### Group A vs Group B (Task 1)

In [368]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12, 14])]
# time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "A"]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
time_df = time_df[time_df["task_id"] == 1]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]

display(time_df)

time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
0,2,35.979354,50.663591,1,0.710162
5,6,43.947267,93.215345,1,0.47146
10,10,51.691643,67.818852,1,0.762202


0.6479410695942295

In [367]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
# time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "B"]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
time_df = time_df[time_df["task_id"] == 1]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]

display(time_df)

time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
0,1,26.164192,25.219796,1,1.037447
5,3,49.112643,16.158546,1,3.039422
10,5,30.517214,35.567909,1,0.857999
15,7,52.241676,29.220113,1,1.787867
20,9,46.810705,25.605043,1,1.828183
30,13,156.284104,82.73493,1,1.888974


1.739981844762575

##### Group A vs Group B (Task 2)

In [354]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
# time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "A"]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
time_df = time_df[time_df["task_id"] == 2]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]

display(time_df)

time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
16,14,5.542552,160.110209,2,0.034617
21,16,178.07477,156.574884,2,1.137314


0.5859654312836329

In [353]:
time_df = tasks_df
time_df = time_df[~time_df["user_id"].isin([8, 12])]
time_df = time_df[time_df["user_id"] <= 10]
time_df = time_df[~time_df["is_exercise"]]
time_df = time_df[time_df["user_group"] == "B"]
time_df = time_df[["user_id", "xnli_time_secs", "nli_time_secs"]]
time_df["task_id"] = [i % 5 + 1 for i in range(len(time_df))]
time_df = time_df.reset_index(drop=True)
time_df = time_df.dropna()
time_df = time_df[time_df["task_id"] == 2]
time_df["time_ratio"] = time_df["xnli_time_secs"] / time_df["nli_time_secs"]

display(time_df)

time_df["time_ratio"].mean()


Unnamed: 0,user_id,xnli_time_secs,nli_time_secs,task_id,time_ratio
6,3,166.098499,74.029132,2,2.243691
11,5,101.43995,39.251971,2,2.584328
21,9,55.759731,34.648614,2,1.609292


2.1457701432736145

### Input Length

In [33]:
len_df = tasks_df
len_df["task_id"] = [i % 5 + 1 for i in range(len(len_df))]
len_df = len_df[~len_df["user_id"].isin([8, 12])]
# len_df = len_df[len_df["user_id"].isin([1, 6, 7, 10, 16])]
len_df = len_df[
    (len_df["nli_status"] == "success") & (len_df["xnli_status"] == "success")
]
len_df = len_df[~len_df["is_exercise"]]
len_df["nli_input_len"] = len_df["nli_last_args"].apply(
    lambda x: len(x[0]) + 2 if x is not None else None
)
len_df["xnli_args_input_len"] = len_df["xnli_last_args"].apply(
    lambda x: len(str(x)) - 2 if x is not None else None
)
len_df["xnli_kwargs_input_len"] = len_df["xnli_last_kwargs"].apply(
    lambda x: len(str(x)) - 2 - 3 * len(x) if x is not None else None
)
len_df = len_df[
    [
        "task_id",
        "user_id",
        "nli_input_len",
        "xnli_args_input_len",
        "xnli_kwargs_input_len",
    ]
]
len_df["xnli_input_len"] = (
    len_df["xnli_args_input_len"] + len_df["xnli_kwargs_input_len"]
)

len_df = len_df.dropna()

len_df["rate"] = len_df["xnli_input_len"] / len_df["nli_input_len"]
display(len_df)

len_df["rate"].mean()


Unnamed: 0,task_id,user_id,nli_input_len,xnli_args_input_len,xnli_kwargs_input_len,xnli_input_len,rate
3,4,1,80,0,77,77,0.9625
5,1,1,111,0,89,89,0.801802
3,2,2,69,75,0,75,1.086957
5,4,2,77,69,0,69,0.896104
3,5,3,81,81,0,81,1.0
4,1,3,124,121,0,121,0.975806
3,3,5,67,74,0,74,1.104478
4,4,5,76,84,0,84,1.105263
5,5,5,62,69,0,69,1.112903
3,1,6,71,0,68,68,0.957746


1.0441830027111638

In [38]:
len_df[len_df["task_id"] == 5]["rate"].mean()


0.9588498993398568

### Input Word Length

In [41]:
len_df = tasks_df
len_df["task_id"] = [i % 5 + 1 for i in range(len(len_df))]
len_df = len_df[~len_df["user_id"].isin([8, 12])]
# len_df = len_df[len_df["user_id"].isin([1, 6, 7, 10, 16])]
len_df = len_df[
    (len_df["nli_status"] == "success") & (len_df["xnli_status"] == "success")
]
len_df = len_df[~len_df["is_exercise"]]
len_df["nli_input_len"] = len_df["nli_last_args"].apply(
    lambda x: len(set(x[0].replace("_", " ").split())) if x is not None else None
)

len_df["xnli_args_input_len"] = len_df["xnli_last_args"].apply(
    lambda x: len(set(str(x).replace("_", " ").split())) if x is not None else None
)

len_df["xnli_kwargs_input_len"] = len_df["xnli_last_kwargs"].apply(
    lambda x: len(set(str(x).replace("_", " ").split())) if x is not None else None
)
len_df = len_df[
    [
        "task_id",
        "user_id",
        "nli_input_len",
        "xnli_args_input_len",
        "xnli_kwargs_input_len",
    ]
]
len_df["xnli_input_len"] = (
    len_df["xnli_args_input_len"] + len_df["xnli_kwargs_input_len"]
)

len_df = len_df.dropna()

len_df["rate"] = len_df["xnli_input_len"] / len_df["nli_input_len"]
display(len_df)

len_df["rate"].mean()


Unnamed: 0,task_id,user_id,nli_input_len,xnli_args_input_len,xnli_kwargs_input_len,xnli_input_len,rate
3,4,1,13,1,11,12,0.923077
5,1,1,20,1,13,14,0.7
3,2,2,11,12,1,13,1.181818
5,4,2,14,13,1,14,1.0
3,5,3,13,14,1,15,1.153846
4,1,3,17,18,1,19,1.117647
3,3,5,10,12,1,13,1.3
4,4,5,12,13,1,14,1.166667
5,5,5,11,14,1,15,1.363636
3,1,6,11,1,9,10,0.909091


1.1112704187152715

In [42]:
len_df[len_df["task_id"] == 5]["rate"].mean()


1.0793706293706293

### WordCloud

In [None]:
wordcloud_df = tasks_df
wordcloud_df = wordcloud_df[~wordcloud_df["user_id"].isin([8, 12])]
wordcloud_df = wordcloud_df[~wordcloud_df["is_exercise"]]

wordcloud = WordCloud(background_color="white").generate(
    " ".join(
        [v[0] if v is not None else "" for v in wordcloud_df["nli_last_args"].values]
    )
)

plt.figure(figsize=(30, 15))
plt.axis("off")
plt.imshow(wordcloud, interpolation="bilinear")


In [None]:
wordcloud = WordCloud(background_color="white").generate(
    " ".join(
        [str(vv) for v in wordcloud_df["xnli_last_args"].values for vv in (v or [])]
        + [
            str(vvv)
            for v in wordcloud_df["xnli_last_kwargs"].values
            for k, vv in (v or {}).items()
            for vvv in (k, vv)
        ]
    )
)

plt.figure(figsize=(30, 15))
plt.axis("off")
plt.imshow(wordcloud, interpolation="bilinear")


In [None]:
wordcloud = WordCloud(background_color="white").generate(
    " ".join(
        [
            str(vvv)
            for v in tasks_df[~tasks_df["is_exercise"]]["xnli_last_kwargs"].values
            for k, vv in (v or {}).items()
            for vvv in (k, vv)
        ]
    )
)

plt.figure(figsize=(30, 15))
plt.axis("off")
plt.imshow(wordcloud, interpolation="bilinear")


In [None]:
wordcloud = WordCloud(background_color="white").generate(
    " ".join(
        [
            str(vvv)
            for v in tasks_df[~tasks_df["is_exercise"]]["xnli_last_kwargs"].values
            for k, vv in (v or {}).items()
            for vvv in (k, vv)
        ]
    )
)

plt.figure(figsize=(30, 15))
plt.axis("off")
plt.imshow(wordcloud, interpolation="bilinear")


### Questionnaire

In [None]:
vxnli_vnli_qa_df


In [66]:
eval_qa_df = vxnli_vnli_qa_df
eval_qa_df = eval_qa_df[~eval_qa_df["user_id"].isin([8, 12])]

print("V-NLI - All")
print()

for i in range(4):
    print(f"0{i + 1}:", eval_qa_df[~eval_qa_df["is_vxnli"]][f"0{i + 1}_score"].mean())

print()
print("V-XNLI - All")
print()

for i in range(4):
    print(f"0{i + 1}:", eval_qa_df[eval_qa_df["is_vxnli"]][f"0{i + 1}_score"].mean())


V-NLI - All

01: 3.769230769230769
02: 2.6153846153846154
03: 2.923076923076923
04: 3.230769230769231

V-XNLI - All

01: 4.083333333333333
02: 3.4166666666666665
03: 2.25
04: 2.4166666666666665


In [59]:
df = tasks_df
df = df[~df["is_exercise"]]

df["task_id"] = [i % 5 + 1 for i in range(len(df))]

df = df[df["task_id"] == 3]

df[["user_id", "xnli_last_kwargs"]]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["task_id"] = [i % 5 + 1 for i in range(len(df))]


Unnamed: 0,user_id,xnli_last_kwargs
5,1,"{'graph_type': 'bar charts', 'input': 'sex', 'output': 'Count of Records', 'value_of_Rank': 'AsstProf'}"
5,2,{}
5,3,{}
5,5,{}
5,6,"{'figure': 'bar', 'xlabel': 'sex', 'ylabel': 'count', 'filter': 'Rank is AsstProf'}"
5,7,"{'vertical': 'Count of records', 'horizontal': 'sex', 'filter': 'rank is AsstProf'}"
5,8,{}
5,9,"{'x': 'sex', 'y': 'Count of Records', 'rank': 'AsstProf'}"
5,10,"{'x': 'sex', 'y': 'Count of Records'}"
5,11,{}


#### Violin

In [None]:
fig, ax = plt.subplots(1, figsize=(6.4 * 2.0, 4.8))

# ax.ravel()

qa_violin_df = vxnli_vnli_qa_df
qa_violin_df = qa_violin_df[
    ["is_vxnli", "01_score", "02_score", "03_score", "04_score"]
]

qa_violin_01_df = qa_violin_df[["is_vxnli", "01_score"]].copy()
qa_violin_01_df["Question"] = "I found V-NLI / V-XNLI\neasy to use (↑)"
qa_violin_01_df = qa_violin_01_df.rename(columns={"01_score": "Score"})

qa_violin_02_df = qa_violin_df[["is_vxnli", "02_score"]].copy()
qa_violin_02_df["Question"] = "It was easy to fix\ninterpretation mistakes (↑)"
qa_violin_02_df = qa_violin_02_df.rename(columns={"02_score": "Score"})

qa_violin_03_df = qa_violin_df[["is_vxnli", "03_score"]].copy()
qa_violin_03_df["Question"] = "There were many\ninterpretation mistakes (↓)"
qa_violin_03_df = qa_violin_03_df.rename(columns={"03_score": "Score"})

qa_violin_04_df = qa_violin_df[["is_vxnli", "04_score"]].copy()
qa_violin_04_df[
    "Question"
] = "V-NLI / V-XNLI didn't understand\nmy questions requests (↓)"
qa_violin_04_df = qa_violin_04_df.rename(columns={"04_score": "Score"})

qa_violin_df = pd.concat(
    [
        qa_violin_01_df,
        qa_violin_02_df,
        qa_violin_03_df,
        qa_violin_04_df,
    ],
    axis=0,
)

qa_violin_df["is_vxnli"] = qa_violin_df["is_vxnli"].apply(
    lambda x: "V-XNLI" if x else "V-NLI"
)
qa_violin_df = qa_violin_df.rename(columns={"is_vxnli": "UI"})

# ax.set_xlabel('')
# ax[0].set_ylabel('')

a = sns.violinplot(
    data=qa_violin_df,
    x="Question",
    y="Score",
    hue="UI",
    split=True,
    gridsize=150,
    ax=ax,
)

a.set(xlabel=None)

a


In [None]:
# Adopt This

fig, ax = plt.subplots(1, figsize=(6.4 * 2.0, 4.8))

# ax.ravel()

qa_violin_df = vxnli_vnli_qa_df
qa_violin_df = qa_violin_df[~qa_violin_df.isin([8, 12])]
qa_violin_df = qa_violin_df[
    ((qa_violin_df["user_id"] % 2 == 0) & ~qa_violin_df["is_vxnli"])
    | ((qa_violin_df["user_id"] % 2 == 1) & qa_violin_df["is_vxnli"])
]
qa_violin_df = qa_violin_df[
    ["is_vxnli", "01_score", "02_score", "03_score", "04_score"]
]

qa_violin_01_df = qa_violin_df[["is_vxnli", "01_score"]].copy()
qa_violin_01_df["Question"] = "I found V-NLI / V-XNLI\neasy to use (↑)"
qa_violin_01_df = qa_violin_01_df.rename(columns={"01_score": "Score"})

qa_violin_02_df = qa_violin_df[["is_vxnli", "02_score"]].copy()
qa_violin_02_df["Question"] = "It was easy to fix\ninterpretation mistakes (↑)"
qa_violin_02_df = qa_violin_02_df.rename(columns={"02_score": "Score"})

qa_violin_03_df = qa_violin_df[["is_vxnli", "03_score"]].copy()
qa_violin_03_df["Question"] = "There were many\ninterpretation mistakes (↓)"
qa_violin_03_df = qa_violin_03_df.rename(columns={"03_score": "Score"})

qa_violin_04_df = qa_violin_df[["is_vxnli", "04_score"]].copy()
qa_violin_04_df[
    "Question"
] = "V-NLI / V-XNLI didn't understand\nmy questions requests (↓)"
qa_violin_04_df = qa_violin_04_df.rename(columns={"04_score": "Score"})

qa_violin_df = pd.concat(
    [
        qa_violin_01_df,
        qa_violin_02_df,
        qa_violin_03_df,
        qa_violin_04_df,
    ],
    axis=0,
)

qa_violin_df["is_vxnli"] = qa_violin_df["is_vxnli"].apply(
    lambda x: "V-XNLI" if x else "V-NLI"
)
qa_violin_df = qa_violin_df.rename(columns={"is_vxnli": "UI"})

# ax.set_xlabel('')
# ax[0].set_ylabel('')

a = sns.violinplot(
    data=qa_violin_df,
    x="Question",
    y="Score",
    hue="UI",
    hue_order=["V-NLI", "V-XNLI"],
    split=True,
    gridsize=150,
    ax=ax,
    cut=0.0,
)

a.set(xlabel=None)
a.set(xlabel=None, yticks=[1, 2, 3, 4, 5])

a


In [9]:
qa_violin_df = vxnli_vnli_qa_df
qa_violin_df = qa_violin_df[~qa_violin_df.isin([8, 12])]
qa_violin_df = qa_violin_df[
    ((qa_violin_df["user_id"] % 2 == 0) & ~qa_violin_df["is_vxnli"])
    | ((qa_violin_df["user_id"] % 1 == 0) & qa_violin_df["is_vxnli"])
]

qa_violin_df = qa_violin_df[
    ["is_vxnli", "01_score", "02_score", "03_score", "04_score"]
]

display(qa_violin_df[qa_violin_df["is_vxnli"]].mean())

display(qa_violin_df[~qa_violin_df["is_vxnli"]].mean())


is_vxnli    1.000000
01_score    4.083333
02_score    3.416667
03_score    2.250000
04_score    2.416667
dtype: float64

is_vxnli    0.0
01_score    3.0
02_score    2.0
03_score    3.0
04_score    3.6
dtype: float64

In [12]:
def stderr(x):
    return np.std(x, ddof=1) / np.sqrt(len(x))


In [17]:
qa_violin_df[~qa_violin_df["is_vxnli"]]["01_score"].mean(), stderr(
    qa_violin_df[~qa_violin_df["is_vxnli"]]["01_score"].values
)


(3.0, 0.4472135954999579)

In [None]:
# Worst Case

fig, ax = plt.subplots(1, figsize=(6.4 * 2.0, 4.8))

# ax.ravel()

qa_violin_df = vxnli_vnli_qa_df
qa_violin_df = qa_violin_df[~qa_violin_df.isin([8, 12])]
qa_violin_df = qa_violin_df[
    ((qa_violin_df["user_id"] % 2 == 0) & qa_violin_df["is_vxnli"])
    | ((qa_violin_df["user_id"] % 1 == 0) & ~qa_violin_df["is_vxnli"])
]
qa_violin_df = qa_violin_df[
    ["is_vxnli", "01_score", "02_score", "03_score", "04_score"]
]

qa_violin_01_df = qa_violin_df[["is_vxnli", "01_score"]].copy()
qa_violin_01_df["Question"] = "I found V-NLI / V-XNLI\neasy to use (↑)"
qa_violin_01_df = qa_violin_01_df.rename(columns={"01_score": "Score"})

qa_violin_02_df = qa_violin_df[["is_vxnli", "02_score"]].copy()
qa_violin_02_df["Question"] = "It was easy to fix\ninterpretation mistakes (↑)"
qa_violin_02_df = qa_violin_02_df.rename(columns={"02_score": "Score"})

qa_violin_03_df = qa_violin_df[["is_vxnli", "03_score"]].copy()
qa_violin_03_df["Question"] = "There were many\ninterpretation mistakes (↓)"
qa_violin_03_df = qa_violin_03_df.rename(columns={"03_score": "Score"})

qa_violin_04_df = qa_violin_df[["is_vxnli", "04_score"]].copy()
qa_violin_04_df[
    "Question"
] = "V-NLI / V-XNLI didn't understand\nmy questions requests (↓)"
qa_violin_04_df = qa_violin_04_df.rename(columns={"04_score": "Score"})

qa_violin_df = pd.concat(
    [
        qa_violin_01_df,
        qa_violin_02_df,
        qa_violin_03_df,
        qa_violin_04_df,
    ],
    axis=0,
)

qa_violin_df["is_vxnli"] = qa_violin_df["is_vxnli"].apply(
    lambda x: "V-XNLI" if x else "V-NLI"
)
qa_violin_df = qa_violin_df.rename(columns={"is_vxnli": "UI"})

# ax.set_xlabel('')
# ax[0].set_ylabel('')

a = sns.violinplot(
    data=qa_violin_df,
    x="Question",
    y="Score",
    hue="UI",
    hue_order=["V-NLI", "V-XNLI"],
    split=True,
    gridsize=150,
    ax=ax,
    cut=0,
)

a.set(xlabel=None, yticks=[1, 2, 3, 4, 5])
# a.set_xticklabels()
a
