In [21]:
import pandas as pd
import re
import numpy as np
import scipy.stats as stats
from scipy.stats import wilcoxon
from constants import diffMappingToScore, questions, labelsToElements
from functools import reduce
#from utils import fixationProportionThresholdAnalysis, phaseDetection, dwellRegressionOnRelevantElements, periodCalculation, scanPathPrecision, averageFixationDuration, averageSaccadeAmplitudeForPhases, addQuestionInfo

In [22]:
def dwellRegressionOnRelevantElements(dwells,grouper):

   
    #keep only dwells on relevant Elements
    dwells = dwells[dwells.apply(lambda x: x["element_"] in x["Relevant elements names"], axis=1)]
    
    #compute revisits
    dwells = dwells.groupby(['participant','currentQuestion','tabName','element_']+grouper,as_index=False).agg(visits=('id','count'))
    #calculate revisits
    dwells["revisits"] = dwells["visits"]-1    
    

    return dwells

def timeInterval(x):
    return x.iloc[-1]-x.iloc[0]

def periodCalculation(allData):
    
    #goup by participant, currentQuestion and Typ1, Type2 and Type3, Phase
    #Grouping by currentQuestion allows us to compute timeInterval() by substracting the last timestamp for the question from the first one
    #Grouping by 'Type1','Type2','Type3', Phase is done just to keep track of these attributes for further grouping. The grouping by questionID is already at the most fine-grained question level
    allData = allData.groupby(['currentQuestion','participant','Type1','Type2','Type3','Phase'],as_index=False).agg(timeInterval=('Timestamp', timeInterval))

    return allData

def scanPathPrecision(allData,grouper):


    #label fixations on relevant Elements
    allData["relevant"] = allData.apply(lambda x: 1 if x["element"] in x["Relevant elements names"] else 0, axis=1)
    
    allData = allData.groupby(['participant','currentQuestion']+grouper, as_index=False).agg(scan_path_precision=('relevant','mean'),timeInterval=('Timestamp', timeInterval),timestamp=('timestamp_formatted', 'first') )
    
    return allData


def averageFixationDuration(fixationData,grouper):
    return fixationData.groupby(['participant','currentQuestion']+grouper, as_index=False).agg(Average_Fixation_Duration=('Fixation Duration','mean'),timestamp=('timestamp_formatted', 'first'))

def averageSaccadeAmplitudeForPhases(phases,data,grouper):
    
    phases["RelAttrMerge"] = phases['participant'].astype(str) +";"+phases['currentQuestion'].astype(str) +";"+phases['Timestamp'].astype(str)
    return phases.groupby(grouper,as_index=False).agg(avSaccadeAmplitude=('RelAttrMerge', (lambda x: asaIntimeInterval(x,data))),timestamp=('timestamp_formatted', 'first'))



def asaIntimeInterval(x,data):
    
    #print("--------------")

    #extracing relevant attributes
    participant = x.iloc[0].split(";")[0]
    currentQuestion = int(x.iloc[0].split(";")[1])
    
    startTime = float(x.iloc[0].split(";")[2])
    endTime = float(x.iloc[-1].split(";")[2])
    
    #print(participant,currentQuestion,startTime,endTime)
    
    #select all saccades between startTime and EndTime for specific participant and task
    saccadeData = data.loc[(data['participant']==participant) & (data['currentQuestion']==currentQuestion)  & (data['Timestamp']>=startTime) & (data['Timestamp']<endTime)].copy(deep=True)
      
    #return average saccade amplitude
    return saccadeData['Saccade Amplitude'].mean()





def fixationProportionThresholdAnalysis(allData,grouper):


    #groupby participant, currentQuestion and grouper
    fixationData = allData.groupby(['participant','currentQuestion']+grouper, as_index=False).agg(
        shortFixationsProp=('Fixation Duration',lambda x: ((x <= 250)*1).sum()/x.count()),
        longFixationsProp=('Fixation Duration',lambda x: ((x > 500)*1).sum()/x.count()),
        timestamp=('timestamp_formatted', 'first')
    )
   

    return fixationData


#Test: OK
def phaseDetection(fixationData,questions):
   
    #keep only local or global tasks
    #fixationData = fixationData.loc[fixationData['Type1']==level]
    #label relevent elements
    fixationData["relevant"] = fixationData.apply(lambda x: 1 if x["element"] in x["Relevant elements names"] else 0, axis=1)
    
    
    out = None
    for participant in fixationData["participant"].unique():
        for task in fixationData["currentQuestion"].unique():
            
            #print("------")
            #print(participant)
            #print(task)
            
            #inits  
            endOfPhase1 = -1 #End of Phase 1 denotes the time when the participant locates the first relevant activity 
            endOfPhase2 = -1 #End of Phase 2 denotes the time wehn the participant locates all relevant activity
            
            #get question ids
            keys = [item['id'] for item in questions]
            new_dict = dict(zip(keys, questions)) 
            
            #dict with key=element, value=0
            checks = { element:0 for element in new_dict[task]["Relevant elements names"]}
            
            #print(checks)
            
            #select participant and task data
            rData = fixationData.loc[(fixationData['participant']==participant) &  (fixationData['currentQuestion']==task)].copy(deep=True)
            #reset index
            rData = rData.reset_index(drop=True)
            
            
            #find index of end of Phase 1 (if the participants ever finds the first relevant activity)
            endOfPhase1 = rData.index[rData['relevant'] == 1].tolist()[0] if len(rData.index[rData['relevant'] == 1].tolist())>0 else -1
            
            #print("endOfPhase1",endOfPhase1)
        
            #if end of Phase 1 detected
            if endOfPhase1!=-1:
                
                #set fixated element as checked
                checks[rData.iloc[endOfPhase1]['element']] = 1
                
                #print("checked updated (1)",checks)
                
                for i,(index,row) in enumerate(rData.iterrows()):
                    # skip rows before endOfPhase1
                    if i < endOfPhase1: 
                        continue 
                    # check if row['relevant']== 1 i.e., a relevant element was fixated 
                    if row['relevant']==1:
                        if checks[row['element']]==0:
                            # set checks[row['element']]=1 if not already set to 1
                            checks[row['element']] = 1
                            
                            #print("checked updated (2)",checks)
                    
                    # check if all relevant elements were fixated
                    if all(value == 1 for value in checks.values()):
                        #if so set endOfPhase2
                        endOfPhase2 = index
                        #print("all relevant elements fixated")
                        #print(f'endOfPhase2: {endOfPhase2}')
                        break
                
            #the phases are reduced to search and inference but could be more detailed    
            rData["Phase"] = rData.apply(lambda x: "N/A" if endOfPhase1== -1 or endOfPhase2== -1
                                         #x: 99999 if endOfPhase1== -1 or endOfPhase2== -1 
                                         #else 1 if x.name<endOfPhase1  
                                         #else 2 if x.name<endOfPhase2
                                         #else 3
                                         else "search" if x.name<endOfPhase2
                                         else "inference"
                                         ,axis=1)
            
            out = pd.concat([out,rData],axis=0)
            
    out = out.reset_index(drop=True)
    return out    


def addQuestionInfo(allData,questions):
    
    #change the type of questionID to integer
    allData['currentQuestion'] = allData['currentQuestion'].astype('int')
    
    #extend the columns of questionnaireData with those in DataFrame(questions) based the common question ID
    allData = allData.merge(pd.DataFrame(questions), left_on=['currentQuestion'], right_on=['id'])
    
    return allData


# def fixationThresholdAnalysis(allData,grouper,threshold_min,threshold_max ):


#     #Keep only fixations with a duration within a specified time interval
#     fixationData = allData.loc[(allData['Fixation Duration']>=threshold_min) & (allData['Fixation Duration']<threshold_max) ]

#     #groupby participant, currentQuestion and grouper
#     fixationData = fixationData.groupby(['participant','currentQuestion']+grouper, as_index=False).agg(Fixations_in_range=('Fixation Duration','count'))
   

#     return fixationData

In [23]:
#load data
data = pd.read_csv("eventsDataWithAois.csv")

In [24]:
#enrich questions with relevant elements
questions = [ {**question,**{'Relevant elements labels': re.findall('"(.+?)"', question["question"])}}  for question in questions ]

for question in questions:
    for idx, label in enumerate(question["Relevant elements labels"]):
        if re.compile("\[(.+?)\]").match(label):
            question["Relevant elements labels"][idx-1] = f'{question["Relevant elements labels"][idx-1]} {label}'
            question["Relevant elements labels"].remove(label)
            
questions = [ {**question,**{'Relevant elements count': len(question["Relevant elements labels"])}}  for question in questions ]

In [25]:
#get activities labels
questions = [ {**question,**{'Relevant elements names':  [ labelsToElements[activity] for  activity in question["Relevant elements labels"] ]   }}  for question in questions ]

In [26]:
#################
#
# Phase detection
#
#################

In [27]:
#drop na
fixationData = data.loc[(~data['FixID'].isna()) & (~data['currentQuestion'].isna())].copy(deep=True)
#add question info
fixationData = addQuestionInfo(fixationData,questions)

"""Q13 (local) and Q25 (global) need to be removed for SP11 due to low data quality"""
fixationData = fixationData.drop(fixationData[(fixationData['participant'] == 'SP11-no') & (fixationData['Type3'] == 'Exclusiveness')].index)

In [28]:
#detect phases (phase 1: pre/post to the point when all relevant activities identified)
phDectFix = phaseDetection(fixationData,questions)

In [29]:
#add Timestamp_formatted column
phDectFix["timestamp_formatted"] = pd.to_datetime(phDectFix['Fixation Start'], unit='ms')

In [30]:
#######################
#
# Phase duration
#
#######################

In [31]:
"""
2 rows are removed for participant SP11. This concerns the questions local and global (control-flow) Exclusiveness, since the participant 
skipped the answer for the local Exclusiveness question by mistake. Hence the duration and accuracy were biased!
"""
resTimeData = phDectFix.drop(phDectFix[(phDectFix['participant'] == 'SP11-no') & (phDectFix['Type3'] == 'Exclusiveness')].index)
print(resTimeData.shape)

(174701, 37)


In [32]:
#Example of query: time interval taken to answer each question and phase by each participant
periodCalculation(resTimeData)

Unnamed: 0,currentQuestion,participant,Type1,Type2,Type3,Phase,timeInterval
0,7,KP1-no,Local,Control-flow,Ordering,,75912.797
1,7,KP10-no,Local,Control-flow,Ordering,inference,15914.914
2,7,KP10-no,Local,Control-flow,Ordering,search,13181.871
3,7,KP11-no,Local,Control-flow,Ordering,inference,55194.007
4,7,KP11-no,Local,Control-flow,Ordering,search,29780.136
...,...,...,...,...,...,...,...
1028,46,SP6-no,Global,Data-flow,Data-based Routing,search,74863.285
1029,46,SP7-no,Global,Data-flow,Data-based Routing,inference,73650.420
1030,46,SP7-no,Global,Data-flow,Data-based Routing,search,51252.836
1031,46,SP8-no,Global,Data-flow,Data-based Routing,,80591.306


In [33]:
#filter out those with N/A
periods = periodCalculation(resTimeData)
periods = periods.loc[periods["Phase"]!="N/A"].copy(deep=True)
periods

Unnamed: 0,currentQuestion,participant,Type1,Type2,Type3,Phase,timeInterval
1,7,KP10-no,Local,Control-flow,Ordering,inference,15914.914
2,7,KP10-no,Local,Control-flow,Ordering,search,13181.871
3,7,KP11-no,Local,Control-flow,Ordering,inference,55194.007
4,7,KP11-no,Local,Control-flow,Ordering,search,29780.136
5,7,KP12-no,Local,Control-flow,Ordering,inference,24174.483
...,...,...,...,...,...,...,...
1026,46,SP5-no,Global,Data-flow,Data-based Routing,search,58585.158
1027,46,SP6-no,Global,Data-flow,Data-based Routing,inference,85947.712
1028,46,SP6-no,Global,Data-flow,Data-based Routing,search,74863.285
1029,46,SP7-no,Global,Data-flow,Data-based Routing,inference,73650.420


In [59]:
#Check whether the search duration is always larger for global than for local tasks for each participant on average
p_search = periods.loc[(periods['Phase'] == 'search') & (periods['Type2']=='Control-flow')]
p_searchCompare = p_search.groupby(['participant','Type1'],as_index=False).agg({'timeInterval':'mean'})
p_searchCompare_G = p_searchCompare.loc[p_searchCompare['Type1'] == 'Global']
p_searchCompare_L = p_searchCompare.loc[p_searchCompare['Type1'] == 'Local']
p_searchCompare_G_L = pd.merge(p_searchCompare_G, p_searchCompare_L, on='participant')

p_searchCompare_G_L['Glo > Loc'] = p_searchCompare_G_L.apply(lambda x: True if x['timeInterval_x'] > x['timeInterval_y'] else (x['timeInterval_x'],x['timeInterval_y']), axis=1)
p_searchCompare_G_L

Unnamed: 0,participant,Type1_x,timeInterval_x,Type1_y,timeInterval_y,Glo > Loc
0,KP1-no,Global,42251.0185,Local,42206.864,True
1,KP10-no,Global,50091.713,Local,18083.46025,True
2,KP11-no,Global,33654.67,Local,81439.0865,"(33654.669999999925, 81439.08650000021)"
3,KP12-no,Global,40184.511,Local,19123.3955,True
4,KP13-no,Global,68074.893333,Local,40992.693,True
5,KP14-no,Global,44766.74875,Local,22882.0245,True
6,KP15-no,Global,61602.22625,Local,32858.284,True
7,KP16-no,Global,54189.1335,Local,30080.709333,True
8,KP17-no,Global,44192.972,Local,28253.09375,True
9,KP18-no,Global,48389.57525,Local,38474.8545,True


In [60]:
#Check whether the inference duration is always larger for global than for local tasks for each participant on average
p_search = periods.loc[(periods['Phase'] == 'inference') & (periods['Type2']=='Control-flow')]
p_searchCompare = p_search.groupby(['participant','Type1'],as_index=False).agg({'timeInterval':'mean'})
p_searchCompare_G = p_searchCompare.loc[p_searchCompare['Type1'] == 'Global']
p_searchCompare_L = p_searchCompare.loc[p_searchCompare['Type1'] == 'Local']
p_searchCompare_G_L = pd.merge(p_searchCompare_G, p_searchCompare_L, on='participant')

p_searchCompare_G_L['Glo > Loc'] = p_searchCompare_G_L.apply(lambda x: True if x['timeInterval_x'] > x['timeInterval_y'] else (x['timeInterval_x'],x['timeInterval_y']), axis=1)
p_searchCompare_G_L

Unnamed: 0,participant,Type1_x,timeInterval_x,Type1_y,timeInterval_y,Glo > Loc
0,KP1-no,Global,31047.9605,Local,37907.025,"(31047.960500000045, 37907.02499999991)"
1,KP10-no,Global,85948.98675,Local,19431.21725,True
2,KP11-no,Global,30819.56275,Local,19366.6525,True
3,KP12-no,Global,91206.079,Local,11401.9755,True
4,KP13-no,Global,119775.250667,Local,34282.69675,True
5,KP14-no,Global,150743.75325,Local,49949.78975,True
6,KP15-no,Global,158643.91575,Local,36028.73225,True
7,KP16-no,Global,64291.691,Local,12579.024,True
8,KP17-no,Global,134673.68,Local,43676.3715,True
9,KP18-no,Global,103209.30125,Local,24376.41475,True


In [34]:
# keep only control-flow questions

In [35]:
periods = periods[periods['Type2']=='Control-flow']

In [36]:
#Descriptives (Local/Global)

In [37]:
#horizental analysis
periods.groupby(['Type1','Phase']).agg({"timeInterval":"mean"})

Unnamed: 0_level_0,Unnamed: 1_level_0,timeInterval
Type1,Phase,Unnamed: 2_level_1
Global,inference,77837.355705
Global,search,45084.496859
Local,inference,24856.041461
Local,search,28074.683844


In [52]:
#horizental analysis
periods.groupby(['Type1','Phase']).agg({"timeInterval":"std"})

Unnamed: 0_level_0,Unnamed: 1_level_0,timeInterval
Type1,Phase,Unnamed: 2_level_1
Global,inference,59235.966705
Global,search,24982.531932
Local,inference,19636.423453
Local,search,21918.277284


In [39]:
#Inferentials

In [40]:
#horizental analysis
types = ['Local','Global']
measures = ['timeInterval']

for typeQ in types:
    print(f'----{typeQ}')
    for measure in measures:
        print(f'--{measure}')
        
        #get one measure per participant/Type1/Phase
        periods_part = periods.groupby(['participant','Type1','Phase'], as_index=False).agg({measure:"mean"})
       
        measure_a = periods_part.loc[(periods_part["Type1"]==typeQ) & (periods_part["Phase"]=='inference')][['participant',measure]]
        measure_b = periods_part.loc[(periods_part["Type1"]==typeQ) & (periods_part["Phase"]=='search')][['participant',measure]]
        print(len(measure_a), len(measure_b))
        measure_merge = measure_a.merge(measure_b, on=['participant'], suffixes=('_a', '_b'), how='inner')
        print(len(measure_merge))
        print(stats.wilcoxon(measure_merge[f'{measure}_a'], measure_merge[f'{measure}_b']))




----Local
--timeInterval
44 44
44
WilcoxonResult(statistic=338.0, pvalue=0.06747004004739665)
----Global
--timeInterval
44 44
44
WilcoxonResult(statistic=119.0, pvalue=2.4714264554859255e-06)


In [41]:
#vertical analysis
phases = ['search','inference']
measures = ['timeInterval']

for phaseQ in phases:
    print(f'----{phaseQ}')
    for measure in measures:
        print(f'--{measure}')
        
        periods_part = periods.groupby(['participant','Type1','Phase'], as_index=False).agg({measure:"mean"})
       
        measure_a = periods_part.loc[(periods_part["Phase"]==phaseQ) & (periods_part["Type1"]=='Local')][['participant',measure]]
        measure_b = periods_part.loc[(periods_part["Phase"]==phaseQ) & (periods_part["Type1"]=='Global')][['participant',measure]]
        print(len(measure_a), len(measure_b))
        measure_merge = measure_a.merge(measure_b, on=['participant'], suffixes=('_a', '_b'), how='inner')
        print(len(measure_merge))
        print(stats.wilcoxon(measure_merge[f'{measure}_a'], measure_merge[f'{measure}_b']))

----search
--timeInterval
44 44
44
WilcoxonResult(statistic=52.0, pvalue=4.570324563246686e-09)
----inference
--timeInterval
44 44
44
WilcoxonResult(statistic=1.0, pvalue=2.2737367544323206e-13)


In [42]:
#Interpretation
# The duration of the phases is different between local/global tasks
#    1. Local tasks (easy from BPM paper): Search phase is (slightly) longer than inference phase (sig 0.06) -> Balanced search and inference (24sec vs 28sec)
#    2. Global tasks (difficult from BPM paper): Inference phase is (sig) longer than search phase (sig ***)  -> More need for inference than search (78 sec vs 45 sec)

# In local tasks search and inference take more or less the same time
# In global tasks inference takes way more time than search which suggests that the inference process is more time consuming in global tasks



In [43]:
#######################
#
# comparision between search and inference at the level of local and global tasks 
#
#######################

In [44]:
#######################
#
# Average fixation duration
#
#######################
avFDPT = averageFixationDuration(phDectFix,['Type1','Type2','Type3','Phase'])
#filter out those with N/A
avFDPT = avFDPT.loc[avFDPT["Phase"]!="N/A"].copy(deep=True)
#Keep only control-flow
avFDPT = avFDPT.loc[avFDPT["Type2"]=="Control-flow"].copy(deep=True)
#sorting (extra)
avFDPT = avFDPT.sort_values(by=['participant','currentQuestion','timestamp'])

In [45]:
#######################
#
# Fixation threshold proportion analysis
#
#######################
fxThresholdsData = fixationProportionThresholdAnalysis(phDectFix,['Type1','Type2','Type3','Phase'])
#filter out those with N/A
fxThresholdsData = fxThresholdsData.loc[fxThresholdsData["Phase"]!="N/A"].copy(deep=True)
#Keep only control-flow
fxThresholdsData = fxThresholdsData.loc[fxThresholdsData["Type2"]=="Control-flow"].copy(deep=True)
#sorting (extra)
fxThresholdsData = fxThresholdsData.sort_values(by=['participant','currentQuestion','timestamp'])

In [46]:
####################
#
# Scan-path precision
#
####################
scanPathPrecisionData = scanPathPrecision(phDectFix,['Type1','Type2','Type3','Phase'])
#filter out those with N/A
scanPathPrecisionData = scanPathPrecisionData.loc[scanPathPrecisionData["Phase"]!="N/A"].copy(deep=True)
#Keep only control-flow
scanPathPrecisionData = scanPathPrecisionData.loc[scanPathPrecisionData["Type2"]=="Control-flow"].copy(deep=True)
#sorting (extra)
scanPathPrecisionData = scanPathPrecisionData.sort_values(by=['participant','currentQuestion','timestamp'])

In [47]:
#merge all dataframes (computed previously)
dfs = [avFDPT,scanPathPrecisionData,fxThresholdsData]
all_measures = reduce(lambda left,right: pd.merge(left,right,on=['participant', 'currentQuestion', 'Type1', 'Type2', 'Type3', 'Phase','timestamp'], how='inner'), dfs)
all_measures.columns

Index(['participant', 'currentQuestion', 'Type1', 'Type2', 'Type3', 'Phase',
       'Average_Fixation_Duration', 'timestamp', 'scan_path_precision',
       'timeInterval', 'shortFixationsProp', 'longFixationsProp'],
      dtype='object')

In [48]:
#horizental analysis
allstats = all_measures.groupby(['Type1','Phase']).agg({'Average_Fixation_Duration':'mean',
'scan_path_precision':'mean', 
'shortFixationsProp':'mean', 
'longFixationsProp':'mean'},as_index=False)
allstats.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Average_Fixation_Duration,scan_path_precision,shortFixationsProp,longFixationsProp
Type1,Phase,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Global,inference,212.211,0.108,0.761,0.056
Global,search,188.246,0.039,0.805,0.025
Local,inference,211.806,0.222,0.763,0.054
Local,search,184.461,0.037,0.813,0.02


In [49]:
#vertical analysis
allstats = all_measures.groupby(['Phase','Type1']).agg({'Average_Fixation_Duration':'mean',
'scan_path_precision':'mean', 
'shortFixationsProp':'mean', 
'longFixationsProp':'mean'},as_index=False)
allstats.round(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Average_Fixation_Duration,scan_path_precision,shortFixationsProp,longFixationsProp
Phase,Type1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
inference,Global,212.211,0.108,0.761,0.056
inference,Local,211.806,0.222,0.763,0.054
search,Global,188.246,0.039,0.805,0.025
search,Local,184.461,0.037,0.813,0.02


In [50]:
#horizental analysis
measures = ['Average_Fixation_Duration', 
       'scan_path_precision', 'shortFixationsProp', 
       'longFixationsProp']

# new dict with measure values
values = []


for measure in measures:
    for typee in ['Local','Global']:
        print(f'--{measure}, {typee}')
    
    #get one measure per participant/phase
        all_measures_forType =  all_measures[all_measures['Type1']==typee]
        all_measures_part = all_measures_forType.groupby(['participant','Phase'], as_index=False).agg({measure:"mean"})


        measure_a = all_measures_part.loc[(all_measures_part["Phase"]=='inference')][['participant',measure]]
        measure_b = all_measures_part.loc[(all_measures_part["Phase"]=='search')][['participant',measure]]
        print(len(measure_a), len(measure_b))
        measure_merge = measure_a.merge(measure_b, on=['participant'], suffixes=('_a', '_b'), how='inner')
        print(len(measure_merge))
        pvalue = stats.wilcoxon(measure_merge[f'{measure}_a'], measure_merge[f'{measure}_b']).pvalue
        print(pvalue)
        values.append(pvalue)

--Average_Fixation_Duration, Local
44 44
44
7.50560502638109e-10
--Average_Fixation_Duration, Global
44 44
44
7.275957614183426e-11
--scan_path_precision, Local
44 44
44
1.1368683772161603e-13
--scan_path_precision, Global
44 44
44
5.684341886080801e-13
--shortFixationsProp, Local
44 44
44
1.1377991313565872e-06
--shortFixationsProp, Global
44 44
44
1.6860146843100665e-06
--longFixationsProp, Local
44 44
44
1.921307557495311e-11
--longFixationsProp, Global
44 44
44
1.1368683772161603e-12


In [51]:
#vertical analysis
measures = ['Average_Fixation_Duration', 
       'scan_path_precision', 'shortFixationsProp', 
       'longFixationsProp']

# new dict with measure values
values = []


for measure in measures:
    for phasee in ['search','inference']:
        print(f'--{measure}, {phasee}')
    
    #get one measure per participant/phase
        all_measures_forType =  all_measures[all_measures['Phase']==phasee]
        all_measures_part = all_measures_forType.groupby(['participant','Type1'], as_index=False).agg({measure:"mean"})


        measure_a = all_measures_part.loc[(all_measures_part["Type1"]=='Local')][['participant',measure]]
        measure_b = all_measures_part.loc[(all_measures_part["Type1"]=='Global')][['participant',measure]]
        print(len(measure_a), len(measure_b))
        measure_merge = measure_a.merge(measure_b, on=['participant'], suffixes=('_a', '_b'), how='inner')
        print(len(measure_merge))
        pvalue = stats.wilcoxon(measure_merge[f'{measure}_a'], measure_merge[f'{measure}_b']).pvalue
        print(pvalue)
        values.append(pvalue)

--Average_Fixation_Duration, search
44 44
44
0.016498111235137003
--Average_Fixation_Duration, inference
44 44
44
0.8217495229185943
--scan_path_precision, search
44 44
44
0.3217145050847421
--scan_path_precision, inference
44 44
44
1.1368683772161603e-13
--shortFixationsProp, search
44 44
44
0.07692739918388725
--shortFixationsProp, inference
44 44
44
0.4346026045950566
--longFixationsProp, search
44 44
44
0.04587502898289131
--longFixationsProp, inference
44 44
44
0.19928661531321268
