In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from scipy import stats
import sys
plt.rcParams['figure.dpi'] = 300

## Time data

In [2]:
study_data = pd.read_csv("data/study_data.csv")
study_data.head()

Unnamed: 0,uid,rep,tool,dataset,time
0,128524,1,stringcharter,easy,27
1,128524,1,timetable,easy,31
2,128524,2,stringcharter,easy,29
3,128524,2,timetable,easy,30
4,128524,3,stringcharter,easy,69


In [3]:
uids = study_data['uid'].unique()
uids

array([128524, 129485, 535623, 912837], dtype=int64)

In [4]:
means = study_data.groupby(["tool", "dataset"]).time.mean()
means

tool           dataset
stringcharter  easy       33.833333
               hard       34.250000
timetable      easy       32.000000
               hard       35.250000
Name: time, dtype: float64

In [5]:
uids = range(1,5)
x_labels = [str(x) for x in list(uids)*12]

easy_tt = study_data[(study_data["tool"]=="timetable")&(study_data["dataset"]=="easy")]
easy_sc = study_data[(study_data["tool"]=="stringcharter")&(study_data["dataset"]=="easy")]
hard_tt = study_data[(study_data["tool"]=="timetable")&(study_data["dataset"]=="hard")]
hard_sc = study_data[(study_data["tool"]=="stringcharter")&(study_data["dataset"]=="hard")]

# Plot data
span = int(len(study_data)/4)
plt.scatter(range(0, span), easy_tt.time, color='b', marker='o', label="Timetable (easy)");
plt.scatter(range(span, 2*span), easy_sc.time, color='b', marker='x', label="StringCharter (easy)");
plt.scatter(range(2*span, 3*span), hard_tt.time, color='r', marker='o', label="Timetable (hard)");
plt.scatter(range(3*span, 4*span), hard_sc.time, color='r', marker='x', label="StringCharter (hard)");
plt.xlim(0,47)
# Seperate
plt.axvline(span-0.5, color='k', ls='--', lw='0.7')
plt.axvline(2*span-0.5, color='k', ls='--', lw='0.7')
plt.axvline(3*span-0.5, color='k', ls='--', lw='0.7')
plt.axvline(4*span-0.5, color='k', ls='--', lw='0.7')

# Mean lines
plt.axhline(easy_tt.time.mean(), xmin=0, xmax=0.25, color='k', label="group mean")
plt.axhline(easy_sc.time.mean(), xmin=0.25, xmax=0.5, color='k')
plt.axhline(hard_tt.time.mean(), xmin=0.5, xmax=.75, color='k')
plt.axhline(hard_sc.time.mean(), xmin=0.75, xmax=1, color='k')
plt.xlim(-0.5,47.5)

plt.ylabel("Time (s)");
plt.legend();
plt.xticks([]);
plt.savefig("figs/"+'time_scatter_groups.png')
plt.clf()
# plt.xticks(range(0,len(study_data)), x_labels);

<Figure size 1800x1200 with 0 Axes>

In [6]:
uids = range(1,5)
x_labels = [str(x) for x in list(uids)*12]

sc = study_data[(study_data["tool"]=="stringcharter")]
tt = study_data[(study_data["tool"]=="timetable")]

# Plot data
span = int(len(study_data)/4)
plt.scatter(range(0, 2*span), tt.time, color='b', marker='o', label="Timetable");
plt.scatter(range(2*span, 4*span),sc.time, color='r', marker='x', label="StringCharter");
plt.xlim(0,47)

# Seperate
plt.axvline(2*span-0.5, color='k', ls='--', lw='0.7')

# Mean lines
plt.axhline(tt.time.mean(), xmin=0, xmax=0.5, color='k', label="group mean")
plt.axhline(sc.time.mean(), xmin=0.5, xmax=1, color='k')
plt.xlim(-0.5,47.5)

plt.ylabel("Time (s)");
plt.legend();
plt.xticks([]);
plt.savefig("figs/"+'time_scatter.png')
plt.clf()

<Figure size 1800x1200 with 0 Axes>

In [7]:
# OVERLAP
uids = range(1,5)
x_labels = [str(x) for x in list(uids)*6]

sc = study_data[(study_data["tool"]=="stringcharter")]
tt = study_data[(study_data["tool"]=="timetable")]

# Plot data
span = int(len(study_data)/2)
plt.scatter(range(0, span), tt.time, color='b', marker='o', label="Timetable");
plt.scatter(range(0, span),sc.time, color='r', marker='x', label="StringCharter");

# Mean lines
plt.axhline(tt.time.mean(), color='b', label="Timetable mean")
plt.axhline(sc.time.mean(), color='r', label="Stringcharter mean")

plt.ylabel("Time (s)");
plt.xlabel("Attempt");
plt.legend();
plt.xticks([]);
plt.savefig("figs/"+'time_scatter_overlap.png')
plt.clf()

<Figure size 1800x1200 with 0 Axes>

In [8]:
# Box plot
plt.boxplot([tt.time, sc.time], labels=["Timetable", "StringCharter"]);
plt.ylabel("Time (s)")
plt.xlabel("Group")
plt.savefig("figs/"+"time_box.png")
plt.clf()

<Figure size 1800x1200 with 0 Axes>

In [9]:
easy_tt_stats = ['easy_tt', easy_tt.time.mean(), easy_tt.time.std()]
easy_sc_stats = ['easy_sc', easy_sc.time.mean(), easy_sc.time.std()]
hard_tt_stats = ['hard_tt', hard_tt.time.mean(), hard_tt.time.std()]
hard_sc_stats = ['hard_sc', hard_sc.time.mean(), hard_sc.time.std()]

sc = study_data[(study_data["tool"]=="stringcharter")]
tt = study_data[(study_data["tool"]=="timetable")]
sc_stats = ['sc', sc.time.mean(), sc.time.std()]
tt_stats = ['tt', tt.time.mean(), tt.time.std()]

total_stats = ["all", study_data.time.mean(), study_data.time.std()]
df = pd.DataFrame([easy_tt_stats, easy_sc_stats, hard_tt_stats, hard_sc_stats, sc_stats, tt_stats, total_stats])
df.columns =['data','mean','std']
df.to_csv("stats/time_stats.csv")

In [10]:
# test of normality and t-test
for group in [easy_tt, easy_sc, hard_tt, hard_sc, sc, tt, study_data]:
    t, p = stats.shapiro(group.time)
#     if p < 0.05:
#         print(p)

# t-test
t, p = stats.ttest_ind(sc.time, tt.time, equal_var = False)


p > 0.05 so we reject the null hypothesis that there is no difference between the timetable and stringcharter.
Since the means of stringcharter are higher, we conclude that the timetable performs better than stringcharter.

## Form data (before)

In [11]:
form_bef_data = pd.read_csv("data/StringCharter study - before.csv")
form_bef_data

Unnamed: 0,Timestamp,Please enter your ID number,Please state your age,Please state your sex,How often do you usually ride on a public transportation?,Did you take any medications prior this study (24 hours),"If your answer is ""Yes"" on previous question, please write which one","Do you have some visual impairments (color blindness, poor eyesight etc.)?","If your answer is ""Yes"" on previous question, please write which one.1","In last 24 hours, did you consume any alcoholic beverages?","If your answer is ""Yes"" on previous question, please write when and what amount","In last 24 hours, did you consume any drugs?","If your answer is ""Yes"" on previous question, please write which one, when and what amount"
0,2021/01/12 9:40:54 PM GMT+1,128524,23,Female,Once per week,No,,No,,No,,No,
1,2021/01/12 9:41:25 PM GMT+1,129485,25,Male,Up to 4 times per week,No,,No,,Yes,Beer,No,
2,2021/01/12 9:41:47 PM GMT+1,535623,25,Female,I don't ride on a public transportation,No,,No,,No,,No,


## Form data (after)

In [19]:
form_aft_data = pd.read_csv("data/StringCharter study - after.csv")
form_aft_data

Unnamed: 0,Timestamp,Please enter your ID number,Which tool did you use?,Which of this test did you just do?,mental,physical,time,effort,performance,frustration
0,2021/01/12 9:50:01 PM GMT+1,128524,String Charter,Graz - Wien Hbf - Graz,9,7,2,7,10,3
1,2021/01/12 9:50:21 PM GMT+1,128524,Timetable,Graz - Wien Hbf - Graz,4,2,6,4,10,5
2,2021/01/12 9:51:01 PM GMT+1,128524,String Charter,Graz - Innsbruck - Graz,8,2,4,4,8,5
3,2021/01/12 9:51:44 PM GMT+1,128524,Timetable,Graz - Innsbruck - Graz,7,1,6,3,8,4
4,2021/01/12 9:52:01 PM GMT+1,129485,String Charter,Graz - Wien Hbf - Graz,7,7,7,7,7,7
5,2021/01/12 9:52:27 PM GMT+1,129485,Timetable,Graz - Wien Hbf - Graz,9,3,3,3,8,2
6,2021/01/12 9:52:48 PM GMT+1,129485,String Charter,Graz - Innsbruck - Graz,9,1,3,7,10,1
7,2021/01/12 9:53:08 PM GMT+1,129485,Timetable,Graz - Innsbruck - Graz,9,1,2,9,7,5
8,2021/01/12 9:53:36 PM GMT+1,535623,String Charter,Graz - Wien Hbf - Graz,7,3,8,6,10,1
9,2021/01/12 9:53:50 PM GMT+1,535623,Timetable,Graz - Wien Hbf - Graz,9,2,7,4,6,4


In [15]:
# Usability plots
attributes = ["mental", "physical", "time", "effort", "performance", "frustration"]
sc_u = form_aft_data[form_aft_data["Which tool did you use?"]=="String Charter"]
tt_u = form_aft_data[form_aft_data["Which tool did you use?"]=="Timetable"]

for att in attributes:
    # Plot data
    span = int(len(sc_u))
    plt.scatter(range(0, span), tt_u[att], color='b', marker='o', label="Timetable");
    plt.scatter(range(span, 2*span),sc_u[att], color='r', marker='x', label="StringCharter");

    # Seperate
    plt.axvline(2*span-0.5, color='k', ls='--', lw='0.7')

    # Mean lines
    plt.axhline(tt_u[att].mean(), xmin=0, xmax=0.5, color='k', label="group mean")
    plt.axhline(sc_u[att].mean(), xmin=0.5, xmax=1, color='k')
    plt.xlim(-0.5,2*span-.5)
    
    plt.ylim(0.5, 10.5)
    plt.yticks(range(1, 11))
    plt.ylabel(att.capitalize());
    plt.legend();
    plt.xticks([]);
    plt.savefig("figs/"+att+'.png')
    plt.clf()

<Figure size 1800x1200 with 0 Axes>

In [18]:
# Calc means and std
for att in attributes:
    # test of normality and t-test
    _, sc_shapiro_p = stats.shapiro(sc_u[att])
    _, tt_shapiro_p = stats.shapiro(tt_u[att])
    _, shapiro_p = stats.shapiro(form_aft_data[att])
    
    # t-test
    _, t_test_p = stats.ttest_ind(sc_u[att], tt_u[att], equal_var = False)
    
    sc_u_stats = ['stringcharter', sc_u[att].mean(), sc_u[att].std(), sc_shapiro_p, ""]
    tt_u_stats = ['timetable', tt_u[att].mean(), tt_u[att].std(), tt_shapiro_p, ""]
    total_stats = ["both", form_aft_data[att].mean(), form_aft_data[att].std(), shapiro_p, t_test_p]
    
    df = pd.DataFrame([sc_u_stats, tt_u_stats, total_stats])
    df.columns =['data-'+att,'mean','std', "shapiro test (p)", "t-test"]
    df.to_csv("stats/"+att+".csv")