In [72]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

from matplotlib import patches

from scipy.stats import shapiro, rankdata, friedmanchisquare, ttest_rel, wilcoxon
from statsmodels.stats.anova import AnovaRM
from scikit_posthocs import posthoc_ttest, posthoc_wilcoxon, posthoc_tukey_hsd, posthoc_tukey
import pingouin as pg

import numpy as np
import scipy
import config as c

In [67]:
clicks_df = pd.DataFrame()
latency_conditions = [-40, -20, 0, 20, 40]
balanced_latin_square = [[0,1,4,3,2],
                        [4,2,0,3,1],
                        [3,2,1,4,0],
                        [1,0,3,4,2],
                        [4,0,2,1,3],
                        [2,3,4,1,0],
                        [1,3,0,2,4],
                        [0,4,1,2,3],
                        [2,4,3,0,1],
                        [3,1,2,0,4]]
for id in range(c.NUM_PARTICIPANTS):
    for trial in range(c.NUM_TRIALS):
        clicks_raw = pd.read_csv(
            c.FOLDER_PATH
            + str(id + 1)
            + f"/clicks_participant_{id + 1}_trial_{trial}.csv"
        )
        clicks_raw["level_of_latency"] = latency_conditions[balanced_latin_square[id][trial]]
        clicks_df = pd.concat([clicks_df, clicks_raw])
    
clicks_df.head()

Unnamed: 0,id,timestamp_ms,participant_id,trial,level_of_latency,target_number,target_width,target_amplitude,target_x,target_y,cursor_x,cursor_y,success,completion_time
0,0,1710166191603,1,0,-40,0,30,450,959,103,957,107,1,1.787344
1,1,1710166193111,1,0,-40,1,30,450,1113,975,1117,968,1,1.508388
2,2,1710166194311,1,0,-40,2,30,450,670,208,672,211,1,1.199971
3,3,1710166196228,1,0,-40,3,30,450,1349,778,1353,787,1,1.916836
4,4,1710166197495,1,0,-40,4,30,450,516,474,519,467,1,1.267135


In [68]:
# drop irrelevant columns
clicks_df = clicks_df.drop(
    ["timestamp_ms", "target_number"], axis=1
)
# drop rows of first click in round (no fitts law task)
clicks_df = clicks_df.drop(clicks_df.loc[clicks_df["id"] % 9 == 0].index, axis=0)
clicks_df.reset_index(inplace=True, drop=True)
clicks_df.head()

Unnamed: 0,id,participant_id,trial,level_of_latency,target_width,target_amplitude,target_x,target_y,cursor_x,cursor_y,success,completion_time
0,1,1,0,-40,30,450,1113,975,1117,968,1,1.508388
1,2,1,0,-40,30,450,670,208,672,211,1,1.199971
2,3,1,0,-40,30,450,1349,778,1353,787,1,1.916836
3,4,1,0,-40,30,450,516,474,519,467,1,1.267135
4,5,1,0,-40,30,450,1403,474,1409,482,1,1.045496


In [69]:
# calculation of id
clicks_df["ID"] = np.log2(clicks_df["target_amplitude"] / clicks_df["target_width"] + 1)

In [121]:
# drop rows with unsuccessful fitts law tasks
error_indices = clicks_df.loc[clicks_df["success"]==0].index
task_after_error_indices = clicks_df.loc[clicks_df["success"]==0].index + 1
clicks_error_df = clicks_df.drop(error_indices, axis=0)
clicks_error_df = clicks_df.drop(task_after_error_indices, axis=0)

clicks_error_df.reset_index(inplace=True, drop=True)
clicks_error_df

Unnamed: 0,id,participant_id,trial,level_of_latency,target_width,target_amplitude,target_x,target_y,cursor_x,cursor_y,success,completion_time,ID,distance_from_target
0,1,1,0,-40,30,450,1113,975,1117,968,1,1.508388,4.000000,8.062258
1,2,1,0,-40,30,450,670,208,672,211,1,1.199971,4.000000,3.605551
2,3,1,0,-40,30,450,1349,778,1353,787,1,1.916836,4.000000,9.848858
3,4,1,0,-40,30,450,516,474,519,467,1,1.267135,4.000000,7.615773
4,5,1,0,-40,30,450,1403,474,1409,482,1,1.045496,4.000000,10.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8123,318,6,4,-40,30,200,1133,653,1137,650,1,0.978993,2.938599,5.000000
8124,319,6,4,-40,30,200,763,518,758,510,1,0.821035,2.938599,9.433981
8125,320,6,4,-40,30,200,1156,518,1144,510,0,0.883963,2.938599,14.422205
8126,322,6,4,-40,30,200,1088,399,1085,392,1,1.191457,2.938599,7.615773


In [70]:
# calculation of effective id
num_ids = len(clicks_df["ID"].unique())

clicks_df["distance_from_target"] = np.sqrt(
    (clicks_df["target_x"] - clicks_df["cursor_x"])**2 +
    (clicks_df["target_y"] - clicks_df["cursor_y"])**2
)

effective_id_df = pd.DataFrame({"ID": clicks_df["ID"].unique(), 
                                "sigma": np.zeros(num_ids),
                                "amplitude": np.zeros(num_ids),
                                "width": np.zeros(num_ids)})
for _, entry in clicks_df.iterrows():
    for _, row in effective_id_df.iterrows():
        if entry["ID"] == row["ID"]:
            row["amplitude"] = entry["target_amplitude"]
            row["width"] = entry["target_width"]
            row["sigma"] += entry["distance_from_target"]

effective_id_df["sigma"] /= len(clicks_df)/9
effective_id_df["We"] = 4.133 * effective_id_df["sigma"]
effective_id_df["IDe"] = np.log2(
    effective_id_df["amplitude"] / effective_id_df["We"] + 1
)
effective_id_df.sort_values(by=["ID"])

Unnamed: 0,ID,sigma,amplitude,width,We,IDe
5,1.0,32.198907,200.0,200.0,133.078082,1.323587
3,1.392317,44.917568,325.0,200.0,185.644308,1.459778
8,1.415037,26.273462,200.0,120.0,108.588218,1.506815
6,1.70044,45.467754,450.0,200.0,187.918226,1.763267
2,1.890771,31.662447,325.0,120.0,130.860892,1.80056
7,2.247928,28.532752,450.0,120.0,117.925862,2.267822
4,2.938599,9.226566,200.0,30.0,38.133397,2.642643
1,3.564785,9.324391,325.0,30.0,38.537707,3.237762
0,4.0,10.124243,450.0,30.0,41.843497,3.555124


In [122]:
# calculation of throughput
throughput_df = pd.DataFrame({"IDe": np.zeros(clicks_error_df.shape[0]),
                              "completion_time": clicks_error_df["completion_time"]})

for index, entry in clicks_error_df.iterrows():
    for _, row in effective_id_df.iterrows():
        if entry["ID"] == row["ID"]:
            throughput_df.loc[index,"IDe"] = round(row["IDe"], 2)

throughput_df["throughput"] = throughput_df["IDe"] / throughput_df["completion_time"]
throughput_df["level_of_latency"] = clicks_error_df["level_of_latency"]
throughput_df["participant_id"] = clicks_error_df["participant_id"]
throughput_df

Unnamed: 0,IDe,completion_time,throughput,level_of_latency,participant_id
0,3.56,1.508388,2.360135,-40,1
1,3.56,1.199971,2.966738,-40,1
2,3.56,1.916836,1.857227,-40,1
3,3.56,1.267135,2.809488,-40,1
4,3.56,1.045496,3.405082,-40,1
...,...,...,...,...,...
8123,2.64,0.978993,2.696648,-40,6
8124,2.64,0.821035,3.215454,-40,6
8125,2.64,0.883963,2.986550,-40,6
8126,2.64,1.191457,2.215774,-40,6


In [123]:
# calculation of throughput
error_df = pd.DataFrame({"IDe": np.zeros(clicks_df.shape[0]),
                              "error_rate": 1 - clicks_df["success"]})

for index, entry in clicks_df.iterrows():
    for _, row in effective_id_df.iterrows():
        if entry["ID"] == row["ID"]:
            error_df.loc[index,"IDe"] = round(row["IDe"], 2)

error_df["level_of_latency"] = clicks_df["level_of_latency"]
error_df["participant_id"] = clicks_df["participant_id"]
error_df

Unnamed: 0,IDe,error_rate,level_of_latency,participant_id
0,3.56,0,-40,1
1,3.56,0,-40,1
2,3.56,0,-40,1
3,3.56,0,-40,1
4,3.56,0,-40,1
...,...,...,...,...
8635,2.64,0,-40,6
8636,2.64,1,-40,6
8637,2.64,0,-40,6
8638,2.64,0,-40,6


In [147]:
grouped_error_df = error_df.groupby(["level_of_latency", "participant_id"], as_index=False).mean()
grouped_error_df

Unnamed: 0,level_of_latency,participant_id,IDe,error_rate
0,-40,1,2.173333,0.038194
1,-40,2,2.173333,0.027778
2,-40,3,2.173333,0.15625
3,-40,4,2.173333,0.173611
4,-40,5,2.173333,0.006944
5,-40,6,2.173333,0.059028
6,-20,1,2.173333,0.024306
7,-20,2,2.173333,0.020833
8,-20,3,2.173333,0.09375
9,-20,4,2.173333,0.097222


# Inferential Analysis
- one-way repeated-measures ANOVA (+correction??) for latency throughput
- one-way repeated-measures ANOVA for latency -> movement time
- one-way repeated-measures ANOVA for latency -> error rate
- one-way repeated-measures ANOVA for latency -> nasa tlx

### ANOVA: latency -> throughput 

In [136]:
# test for normal distribution
pg.normality(data=throughput_df, dv='throughput', group='level_of_latency')

Unnamed: 0_level_0,W,pval,normal
level_of_latency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-40,0.981868,2.572967e-13,False
-20,0.995651,0.0001130047,False
40,0.926579,1.7622360000000002e-27,False
20,0.962982,4.8663129999999996e-20,False
0,0.936382,8.194631e-26,False


In [125]:
aov = pg.rm_anova(data=throughput_df, dv='throughput', within='level_of_latency', subject="participant_id", correction=True)
# Sphericity assumption not met -> use mauchly test chisquare with greenhouse geisser corrected p-value of the anova

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,1.085464,0.390288,0.369895,0.034451,0.438356,True,0.04286,0.346962


In [79]:
pg.pairwise_tests(data=throughput_df, dv='throughput', within='level_of_latency', subject="participant_id", return_desc=True, padjust='holm')

Unnamed: 0,Contrast,A,B,mean(A),std(A),mean(B),std(B),Paired,Parametric,T,dof,alternative,p-unc,p-corr,p-adjust,BF10,hedges
0,level_of_latency,-40,-20,3.360815,0.716993,3.524187,0.570553,True,True,-1.135485,5.0,two-sided,0.307652,1.0,holm,0.603,-0.232753
1,level_of_latency,-40,0,3.360815,0.716993,3.755098,1.096529,True,True,-1.532183,5.0,two-sided,0.186048,1.0,holm,0.83,-0.392867
2,level_of_latency,-40,20,3.360815,0.716993,3.717435,0.720463,True,True,-1.643348,5.0,two-sided,0.161232,1.0,holm,0.914,-0.458014
3,level_of_latency,-40,40,3.360815,0.716993,3.577879,0.932179,True,True,-1.225748,5.0,two-sided,0.274873,1.0,holm,0.646,-0.240948
4,level_of_latency,-20,0,3.524187,0.570553,3.755098,1.096529,True,True,-0.72188,5.0,two-sided,0.502705,1.0,holm,0.46,-0.243865
5,level_of_latency,-20,20,3.524187,0.570553,3.717435,0.720463,True,True,-0.883836,5.0,two-sided,0.417251,1.0,holm,0.506,-0.2745
6,level_of_latency,-20,40,3.524187,0.570553,3.577879,0.932179,True,True,-0.23035,5.0,two-sided,0.826948,1.0,holm,0.382,-0.06413
7,level_of_latency,0,20,3.755098,1.096529,3.717435,0.720463,True,True,0.215798,5.0,two-sided,0.837672,1.0,holm,0.381,0.037473
8,level_of_latency,0,40,3.755098,1.096529,3.577879,0.932179,True,True,1.386998,5.0,two-sided,0.224093,1.0,holm,0.735,0.160746
9,level_of_latency,20,40,3.717435,0.720463,3.577879,0.932179,True,True,0.84812,5.0,two-sided,0.435081,1.0,holm,0.495,0.154634


### ANOVA: latency -> error rate

In [140]:
pg.normality(data=error_df, dv='error_rate', group='level_of_latency')

Unnamed: 0_level_0,W,pval,normal
level_of_latency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-40,0.29331,7.316422e-63,False
-20,0.22486,1.4146699999999998e-64,False
40,0.250475,5.973812e-64,False
20,0.226528,1.551812e-64,False
0,0.247391,5.0115679999999996e-64,False


In [148]:
aov = pg.rm_anova(data=grouped_error_df, dv='error_rate', within="level_of_latency", subject="participant_id", correction=True)
# Sphericity assumption not met -> use mauchly test chisquare with greenhouse geisser corrected p-value of the anova

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,1.564706,0.222288,0.254816,0.034909,0.523395,True,196724200000.0,1.0


In [118]:
pg.pairwise_tests(data=error_df, dv='error_rate', within=['level_of_latency'], subject="participant_id", return_desc=True, padjust='holm').to_csv('anova.csv')

### ANOVA: latency -> completion time

In [129]:
pg.normality(data=throughput_df, dv='completion_time', group='level_of_latency')

Unnamed: 0_level_0,W,pval,normal
level_of_latency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-40,0.932763,3.239626e-26,False
-20,0.913666,1.396886e-29,False
40,0.931822,1.327423e-26,False
20,0.9286,2.945293e-27,False
0,0.935318,5.268153999999999e-26,False


In [149]:
aov = pg.rm_anova(data=throughput_df, dv='completion_time', within=['level_of_latency'], subject="participant_id", correction=True)
# Sphericity assumption not met -> use mauchly test chisquare with greenhouse geisser corrected p-value of the anova

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,1.685337,0.192808,0.220713,0.041719,0.655582,True,507.302463,1.0


In [120]:
pg.pairwise_tests(data=throughput_df, dv='completion_time', within=['level_of_latency'], subject="participant_id", return_desc=True, padjust='holm').to_csv('anova_completion_time.csv')

## Inferential analysis of NASA-TLX

In [99]:
tlx_df = pd.DataFrame()
for id in range(c.NUM_PARTICIPANTS):
    tlx_raw = pd.read_csv(
        c.FOLDER_PATH
        + str(id + 1)
        + f"/tlx_participant_{id + 1}.csv"
    )
    tlx_raw["level_of_latency"] = balanced_latin_square[id]
    tlx_df = pd.concat([tlx_df, tlx_raw])

tlx_df.drop(["latency", "trial"], axis=1, inplace=True)

tlx_df.head()

Unnamed: 0,participant_id,mental_demand,physical_demand,temporal_demand,performance,effort,frustration,level_of_latency
0,1,8.0,10.0,16.2,2.0,13.0,4.8,0
1,1,14.0,10.8,14.2,8.4,12.2,11.2,1
2,1,16.4,15.8,8.6,12.2,14.0,8.6,4
3,1,10.8,13.0,4.6,6.6,11.0,4.0,3
4,1,8.2,8.2,12.2,6.2,7.0,4.4,2


In [169]:
tlx_df["score"] = np.zeros(len(tlx_df))
for row, entry in tlx_df.iterrows():
    tlx_df.loc[row, "score"] = sum(entry.drop(["participant_id"]))/6
    print(sum(entry.drop(["participant_id"]))/6)

tlx_df.head() 

9.0
11.966666666666667
13.266666666666666
8.833333333333334
8.033333333333333
10.2
6.7
8.433333333333332
7.033333333333334
6.833333333333333
6.3
4.766666666666667
6.3
5.900000000000001
3.033333333333333
11.566666666666665
8.566666666666666
10.233333333333334
8.633333333333335
10.133333333333333
6.1000000000000005
3.866666666666667
3.8666666666666667
3.6333333333333333
4.0
3.266666666666667
3.8333333333333335
4.633333333333334
6.133333333333333
7.266666666666666


Unnamed: 0,participant_id,mental_demand,physical_demand,temporal_demand,performance,effort,frustration,level_of_latency,score
0,1,8.0,10.0,16.2,2.0,13.0,4.8,0,3.266667
1,1,14.0,10.8,14.2,8.4,12.2,11.2,1,3.833333
2,1,16.4,15.8,8.6,12.2,14.0,8.6,4,4.633333
3,1,10.8,13.0,4.6,6.6,11.0,4.0,3,6.133333
4,1,8.2,8.2,12.2,6.2,7.0,4.4,2,7.266667


In [167]:
(8+10+16.2+2+13+4.8)/6

9.0

In [152]:
tlx_df.groupby(["level_of_latency"], as_index=False).mean()

Unnamed: 0,level_of_latency,participant_id,mental_demand,physical_demand,temporal_demand,performance,effort,frustration
0,0,3.5,7.6,5.933333,4.9,7.033333,7.333333,7.366667
1,1,3.5,9.3,6.833333,4.633333,7.366667,8.666667,8.633333
2,2,3.5,6.233333,5.266667,3.833333,6.3,6.766667,6.366667
3,3,3.5,6.933333,5.5,4.966667,6.0,6.5,7.333333
4,4,3.5,8.666667,6.5,4.766667,9.433333,7.366667,8.0


In [96]:
# test for normal distribution
pg.normality(data=tlx_df, dv='mental_demand', group='level_of_latency')

Unnamed: 0_level_0,W,pval,normal
level_of_latency,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.928375,0.567616,True
1,0.946385,0.710936,True
4,0.85738,0.180357,True
3,0.890205,0.319263,True
2,0.944663,0.69695,True


In [100]:
aov = pg.rm_anova(data=tlx_df, dv='mental_demand', within='level_of_latency', subject="participant_id", correction=True)

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,2.089804,0.120142,0.159345,0.041889,0.620332,True,0.081934,0.532607


In [101]:
aov = pg.rm_anova(data=tlx_df, dv='temporal_demand', within='level_of_latency', subject="participant_id", correction=True)

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,0.09288,0.983603,0.903659,0.007434,0.474498,False,0.001627,0.016697


In [102]:
aov = pg.rm_anova(data=tlx_df, dv='physical_demand', within='level_of_latency', subject="participant_id", correction=True)

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,0.646919,0.63553,0.541827,0.013767,0.490626,True,0.06331,0.453677


In [103]:
aov = pg.rm_anova(data=tlx_df, dv='performance', within='level_of_latency', subject="participant_id", correction=True)

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,1.585408,0.216922,0.241481,0.087945,0.653701,True,0.176734,0.780207


In [104]:
aov = pg.rm_anova(data=tlx_df, dv='effort', within='level_of_latency', subject="participant_id", correction=True)

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,0.815794,0.530046,0.474351,0.032468,0.527428,True,0.102169,0.603713


In [105]:
aov = pg.rm_anova(data=tlx_df, dv='frustration', within='level_of_latency', subject="participant_id", correction=True)

aov

Unnamed: 0,Source,ddof1,ddof2,F,p-unc,p-GG-corr,ng2,eps,sphericity,W-spher,p-spher
0,level_of_latency,4,20,0.686836,0.609461,0.557531,0.029553,0.654908,True,0.257451,0.883762
