In [2]:
%matplotlib
#%matplotlib inline
import os
import csv
import fnmatch
import numpy as np
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import math
import pywt
import itertools
from scipy import stats

pd.options.mode.use_inf_as_na = True

Using matplotlib backend: TkAgg


In [3]:
TimeDwellOrig = 800
TimeFixation = 300

In [4]:
pupilTotal_Difficult = list()
pupilTotal_Medium = list()
pupilTotal_Easy = list()

In [5]:
def FixUserKeys(UserKeys_Old):
    # Fix the situation where comma has divided decimals into separate columns
    
    Column_beforeDecimal = [item[2] for item in UserKeys_Old]
    Column_afterDecimal = [item[3] if len(item)>3 else '00' for item in UserKeys_Old]
    
    UserKeys_ProgressPercent = [float(Column_beforeDecimal[i]+'.'+ Column_afterDecimal[i]) for i in 
                                range(0, len(Column_beforeDecimal))]
    UserKeys_Times = [item[0] for item in UserKeys_Old]
    UserKeys_Keys = [item[1] for item in UserKeys_Old]
    
    UserKeys_New = [[UserKeys_Times[ind], UserKeys_Keys[ind], UserKeys_ProgressPercent[ind]] for ind in 
                    range(0, len(UserKeys_ProgressPercent))]
    
    #UserKeys_New = np.concatenate((UserKeys_Times, UserKeys_Keys, UserKeys_ProgressPercent), axis = 0)
    
    
    return UserKeys_New
        

In [6]:
def ComputeDwellTime(userKeys):
    # modify userKeys to include a column of time instead of progress pct, which is dependent on the then dwell time
    
    timeDwell = TimeDwellOrig
    nKey = -1
    for key in userKeys:
        nKey = nKey + 1
        #print(key[1])
        if key[1] == 'IncreaseDwellTime':
            if float(key[2]) == 1:
                timeDwell = timeDwell + 100
        elif key[1] == 'DecreaseDwellTime':
            #print(key[2])
            if float(key[2]) == 1:
                timeDwell = timeDwell - 100
        else:
            userKeys[nKey].append(str(float(key[2])*timeDwell))
    
    return userKeys

In [7]:
def OptiKeyTypingTime(UserKeys):
    
    TimeTyping = dict()
    
    time1, t1, t2 = UserKeys[0][0].partition('+')
    startTime = datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
    
    time2, t1, t2 = UserKeys[-1][0].partition('+')
    endTime = datetime.datetime.strptime(re.sub('[:.T]','-',time2[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
    
    TimeTyping['startTime'] = startTime
    TimeTyping['endTime'] = endTime
    
    return TimeTyping

In [8]:
def FindTrialEndTimes(KeysSelected, timeTyping):
    # function to find start and end of tasks in experiments
    timeStartEnd = list() # format of this list will be: [startTime1, endTime1/startTime2, endTime2/startTime3, ..., endTimeN]
    
    timeStartEnd.append(timeTyping['startTime'])
    
    nTrial = 1
    
    for keys in KeysSelected:
        
        if keys[1] == 'NextPhrase':
            time1, t1, t2 = keys[0].partition('+')
            endTimeTrial = datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
            timeStartEnd.append(endTimeTrial)
    
    
    timeStartEnd.append(timeTyping['endTime'])
    
    
    return timeStartEnd

In [9]:
# function to convert list of date and time into datetime format list
def timeConversion(timeStrList):
    timeList = list()
    for time in timeStrList:
        time1, t1, t2 = time.partition('+')
        timeList.append(datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f"))
    return timeList

In [10]:
# This function will return the datetime in items which is the closest to the date pivot
def nearestTimePoint(dates, date):
    
    for d in dates:
        if d < date:
            nearestTP = d
        else:
            continue
    try: 
        nearestTP
        nearestTPind = dates.index(nearestTP)
    except:
        nearestTP = 0
        nearestTPind = -1
        
    return nearestTP, nearestTPind

In [11]:
def CreateTimeEpochsOfTrials(TimeStartEndMixed, UserKeys):
    # function to use list of mixed start and end times of trials and keys looked at by user to create trial epochs
    
    TimeEpochTrial = dict()
    TimeEpochTrial['Start'] = list()
    TimeEpochTrial['End'] = list()
    
    # Create list of times in userKeys to be able to use function 'nearestTimePoint'
    UserKeysStrTimes = [item3[0] for item3 in UserKeys]
    UserKeysTimes = timeConversion(UserKeysStrTimes)
    
    Flag_FoundSleepKey = 0 # Flag to indicate finding sleep key
    
    n = -1
    for time in TimeStartEndMixed:
        n = n + 1
        Flag_FoundSleepKey = 0
        
        if n == 0: # first time is only start time for the first trial
            TimeEpochTrial['Start'].append(time)
            continue
        elif n == len(TimeStartEndMixed)-1: # last time is only the end time for last trial
            
            TimeEpochTrial['End'].append(time)
            
        else: # the middle elements need to be divided into start and end
            TimeEpochTrial['End'].append(time)
            
            timeCheck = time
            
            # find the time in userkeys. Keep going to the previous element till you reach start of selection of
            # nextPhrase key
            while Flag_FoundSleepKey < 1:
                
                nearestToTrialStartTime, nearestToTrialStartInd = nearestTimePoint(UserKeysTimes, timeCheck)
                indCheck = nearestToTrialStartInd
                
                if 'NextPhrase' not in UserKeys[indCheck][1]:
                    TimeEpochTrial['Start'].append(nearestToTrialStartTime)
                    Flag_FoundSleepKey = 1
                    break
                else:
                    indCheck = indCheck - 2 # 2 added instead of 1, to allow nearestTimePoint to find the one before this
                    timeCheck = UserKeysTimes[indCheck]
                    
                
    return TimeEpochTrial      
            

In [12]:
def DwellTimeForBaseline(UserKeys_wDwellTime):
    
    DwellTime = list()
    
    for key in UserKeys_wDwellTime:
        if key[1] == 'NextPhrase':
            #print('NextPhrase found at ', key[2])
            if key[2] == 1:
                DwellTime.append(key[3])
                
    return DwellTime

In [13]:
def Combine2ColumnsTo1GazeLog(GazeLog, Column_1, Column_2):
    
    JoinedList = list()
    
    Column_beforeDecimal = [item4[Column_1] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    Column_afterDecimal = [item4[Column_2] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    
    
    for i in range(0, len(Column_beforeDecimal)):
        if 'Valid' not in Column_beforeDecimal[i] and 'Valid' not in Column_afterDecimal[i]:
            if 'nan' not in Column_beforeDecimal[i] and 'nan' not in Column_afterDecimal[i]:
                JoinedList.append(float(Column_beforeDecimal[i]+'.'+ Column_afterDecimal[i]))
            else:
                JoinedList.append(np.nan)
        else:
            # Rarely, the pupil size is a whole number
            JoinedList.append(np.nan) # we will ignore the row, since there is no way of automatically knowing which - 
            # right or left eye has whole number pupil size
            
    return JoinedList

In [14]:
def Convert2ColumnsToFormPupilSizes(GazeLog):
    # function to convert pupilsizes from 2 columns for every pupil due to comma use instead of decimal, 
    # to proper pupil sizes
    
    PupilLogL = Combine2ColumnsTo1GazeLog(GazeLog, -5, -4)
    PupilLogR = Combine2ColumnsTo1GazeLog(GazeLog, -2, -1)
            
    # if one of the pupils are nan, the other one is converted too
    nPupil = -1
    for pupilL in PupilLogL:
        nPupil = nPupil + 1
        if np.isnan(pupilL):
            if nPupil < len(PupilLogR):
                if not np.isnan(PupilLogR[nPupil]):
                    PupilLogR[nPupil] = np.nan
            else:
                del PupilLogL[len(PupilLogR):]
                
    nPupil = -1
    for pupilR in PupilLogR:
        nPupil = nPupil + 1
        if np.isnan(pupilR):
            if nPupil < len(PupilLogL):
                if not np.isnan(PupilLogL[nPupil]):
                    PupilLogL[nPupil] = np.nan
            else:
                del PupilLogR[len(PupilLogL):]
                
    #print(len(PupilLogL), len(PupilLogR))
    
    return PupilLogL, PupilLogR

In [15]:
def PupilSizeFromTrialTimes(TimeTrial, TimeGazeLog, TimeInternalGazeLog, PupilSizeLogL, PupilSizeLogR):
    # find pupil sizes from the start and end time given
    
    # find start and end time in gazeLog
    timeStart, timeStartInd = nearestTimePoint(TimeGazeLog, TimeTrial[0])
    timeEnd, timeEndInd = nearestTimePoint(TimeGazeLog, TimeTrial[1])
    
    pupilSize_TrialL = PupilSizeLogL[timeStartInd: timeEndInd]
    pupilSize_TrialR = PupilSizeLogR[timeStartInd: timeEndInd]
    
    TimeInternal_Trial = TimeInternalGazeLog[timeStartInd: timeEndInd]
    
    TimeGaze_Trial = TimeGazeLog[timeStartInd: timeEndInd]
    
    return pupilSize_TrialL, pupilSize_TrialR, TimeGaze_Trial, TimeInternal_Trial
    

In [16]:
def filterBlinks(pupilData, timeListComplete):
    # filter any blinks and nan values lasting around 250ms (on average)
    # http://faculty.washington.edu/chudler/facts.html
   
    # blink is every nan value in the range of 100-400ms 
    # 250 ms (23 samples) before and after the blink will also be removed
    extraBlinkSamples = 23    
    
    # remove single missing data, that are due to hardware error
    missingVal_Single = np.argwhere(np.isnan(pupilData))
    missingVal_Single = list(itertools.chain.from_iterable(missingVal_Single)) # flatten the list
    missingVal_SingleDifference = [t - s for s, t in zip(missingVal_Single, missingVal_Single[1:])] # find difference 
    # between consecutive elements
    missingVal_SingleDifference.insert(0, missingVal_Single[0]) # insert the first blink index in the beginning of list
    
    # the list missingVal_SingleDifference contains the index of the first blink, followed by the difference in the index to 
    # the next nan value
    
    # first remove the single nan values, which are missing data
    eyeTracker_missingData = list() # list with index of single missing data  
    valInd = -1

    for val in missingVal_SingleDifference:
        valInd = valInd + 1
        if valInd == 0:
            continue
        if val != 1:
            if missingVal_SingleDifference[valInd-1] !=1: # if there are 2 consecutive missing values (denoted by 2 consecutive
                # non 1 numbers, they are added to the list of eyeTracker_missingData)
                eyeTracker_missingData.append(sum(missingVal_SingleDifference[:valInd]))
                
    # remove single missing values from pupil data
    pupilData_woSingleMissingData0 = [pupilData[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(pupilData))]
    pupilData_woSingleMissingData = [x for x in pupilData_woSingleMissingData0 if x]
    
    # remove the times for single missing values in pupil data
    timeList_woSingleMissingData0 = [timeListComplete[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(timeListComplete))]
    timeList_woSingleMissingData = [x for x in timeList_woSingleMissingData0 if x]
    
#     print(len(timeList_woSingleMissingData))
    
    
    
    
    # find the nan values again from pupilData_woSingleMissingData
    missingVal_Rest = np.argwhere(np.isnan(pupilData_woSingleMissingData))
    missingVal_Rest = list(itertools.chain.from_iterable(missingVal_Rest))
    missingVal_RestDifference = [t - s for s, t in zip(missingVal_Rest, missingVal_Rest[1:])]
    missingVal_RestDifference.insert(0, missingVal_Rest[0])
    
    
    # compile and create list of start and end of blinks
    blink_missingData = dict()
    blink_missingData['Start'] = list()
    blink_missingData['End'] = list()
    
    valInd = -1
    for val in missingVal_RestDifference:
        valInd = valInd + 1
        if val > 1:
            #print('value', val)
            # instead of appending the actual index of blink start, since 250ms before and after the blink need to be
            # removed, it is also appended here.
            
            # just make sure that the additional samples do not make the index of blink go in negative
            if sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples > 0:
                
                blink_missingData['Start'].append(sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples)
            else:
                blink_missingData['Start'].append(0)
            
            if valInd == 0:
                continue
                
            # make sure that the additional samples do not increase the index to more than the length of the pupilData
            if sum(missingVal_RestDifference[:valInd])+extraBlinkSamples < len(pupilData_woSingleMissingData):
                blink_missingData['End'].append(sum(missingVal_RestDifference[:valInd])+extraBlinkSamples)
            else:
                blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
            #print('end', sum(missingVal_RestDifference[:valInd]))
      
    # add the last blink index
    # make sure that the additional samples do not increase the index to more than the length of the pupilData
    if sum(missingVal_RestDifference)+extraBlinkSamples < len(pupilData_woSingleMissingData):
        blink_missingData['End'].append(sum(missingVal_RestDifference)+extraBlinkSamples)
    else:
        blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
      
    
    # print start and end values
    #for ind in range(0,len(blink_missingData['Start'])):
    #    print(blink_missingData['Start'][ind]+23, blink_missingData['End'][ind]-23)
        
        
        
    # need to create a list containing indexes that are to be removed
    blinkIndexList = list()
    
    #print(len(blink_missingData['Start']), len(blink_missingData['End']))
    
    
    # remove blinks and additional data from pupil data to get filtered data
    for indInd in range(0, len(blink_missingData['Start'])):
        blinkIndexList.append(range(blink_missingData['Start'][indInd], blink_missingData['End'][indInd]+1))
    # flatten the list
    blinkIndexList = list(itertools.chain.from_iterable(blinkIndexList))
    
    
    ##print(len(pupilData_woSingleMissingData))
    
    pupilData_woRestMissingData0 = [pupilData_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(pupilData_woSingleMissingData))]
    #for i in enumerate(pupilData_woRestMissingData0):
    #    print(i)
    pupilData_filter = [x for x in pupilData_woRestMissingData0 if x]
    
    #for i in enumerate(pupilData_filter):
    #    print(i)
        
    # remove the times for single missing values in pupil data
    timeList_woRestMissingData0 = [timeList_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(timeList_woSingleMissingData))]
    time_filter = [x for x in timeList_woRestMissingData0 if x]
    
    #print(len(pupilData_filter))
        
    if np.nan in pupilData_filter:
        print('nan values still present in pupil data')
        #for i in enumerate(pupilData_woSingleMissingData):
        #    print(i)
        
    
    return pupilData_filter, time_filter, blink_missingData

In [17]:
def hampel(vals_orig, k, sd):
    '''
    vals: pandas series of values from which to remove outliers
    k: size of window (including the sample; 7 is equal to 3 on either side of value)
    '''
    # Obtained from: https://stackoverflow.com/questions/46819260/filtering-outliers-how-to-make-median-based-
    # hampel-function-faster
    
    #plt.plot(vals_orig)
    
    #Make copy so original not edited
    vals = pd.DataFrame(vals_orig)      
    #print(vals.isnull().any())
    vals0 = vals.replace([np.inf, -np.inf], np.nan)
    #vals = vals0.astype(float).fillna(method = 'backfill') # linear interpolation instead 
    #print(vals)
    vals = vals0.astype(float).interpolate('linear', limit_direction = 'both') # linear interpolation instead of 
    # simply copying the previous value --\ linear interpolation than cubic to not add any patterns in the data, limit direction
    # set to both, to interpolate the nan values occuring from the start of the series
    
    L= 1.4826
    rolling_median = vals.rolling(window=k, min_periods=1, center=True).median()
    
    #print(rolling_median)
    difference = np.abs(rolling_median-vals)
    median_abs_deviation = difference.rolling(k).median()
    threshold = sd * L * median_abs_deviation
    outlier_idx = difference>threshold
    vals[outlier_idx] = rolling_median[outlier_idx]
    #print(vals)
    #print('datatype', vals.dtypes)
    #print(vals.isnull().any())
    #vals.plot()
    return(vals)

In [18]:
def plotPupilSize(pupilData, timeData, TrialNumber, scoreDifficulty):
    
    dataLenEqualizer = min(min(len(pupilData['Left']), len(pupilData['Right'])), len(timeData))
    
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Left'][0:dataLenEqualizer], 'b')
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Right'][0:dataLenEqualizer], 'r')
    
    ax.set_ylabel('Relative pupil size [in mm]')
    ax.set_ylim([0.8, 1.2])
    ax.set_title(scoreDifficulty)


In [19]:
def FindAndPlotPupilSizeForEpoch(GazeLog, TimeEpochTrial, DwellTimes_ForBaseline, scoresDifficulty):
    # function that uses the list of start and end trial times to find the pupil sizes for those trials and plots them
    
    # first create a list of times in gaze log
    timeStrGazeLog = [item3[0] for item3 in GazeLog]
    # convert the list of strings to datetime formats
    timeGazeLog = timeConversion(timeStrGazeLog)
    
    # internal time, to depict seconds
    timeInternalGazeLog = [float(item3[1]) for item3 in GazeLog]
    
    # extract pupil sizes in decimals from the strange 2 columns for every pupil
    pupilLogL, pupilLogR = Convert2ColumnsToFormPupilSizes(GazeLog)
    
    pupilL_avgRelative = list()
    pupilR_avgRelative = list()
    pupilL_avgAbsolute = list()
    pupilR_avgAbsolute = list()

    timeOfGaze_TrialList = list()
    
    # for every epoch, plot the pupil size
    for trialNr in range(0, len(timeEpochTrial['Start'])):
        if trialNr == 0:
            continue
        #print(trialNr)
        # find pupil sizes for the trial
        pupilSizeL_Trial, pupilSizeR_Trial, timeGaze_Trial, timeInternal_Trial = PupilSizeFromTrialTimes(
            [TimeEpochTrial['Start'][trialNr], TimeEpochTrial['End'][trialNr]], timeGazeLog, 
                                timeInternalGazeLog, pupilLogL, pupilLogR)
        
        pupilSize_Trial = dict()
        pupilSize_Filter = dict()
        pupilSize_woBlink = dict()
        
        # find difference in consecutive elements of internal time
        timeInternalDifference = [t - s for s, t in zip(timeInternal_Trial, timeInternal_Trial[1:])]
        # divide by 1000 to make it s
        timeOfGaze_Trial = [sum(timeInternalDifference[:i])/1000000 for i in range(1,len(timeInternalDifference))]

        # some trials were skipped, because the sentence was written before. If the time of trial is less than
        # 10s, the trial is skipped
        if timeOfGaze_Trial[-1] < 20:
            print('trial number ', trialNr+1, 'with', timeOfGaze_Trial[-1], 's will be skipped')
            continue
        
        pupilSize_Trial['Left'] = pupilSizeL_Trial
        pupilSize_Trial['Right'] = pupilSizeR_Trial
        
        #if trialNr == 4:
        #    for i in range(0, len(pupilSizeL_Trial)):
        #        print(pupilSizeL_Trial[i], pupilSizeR_Trial[i])
            
        #print('Trial', len(pupilSizeL_Trial), len(pupilSizeR_Trial))
        
        # filter the blinks
        pupilSizeL_woBlink, time_filter, missingPupilData = filterBlinks(
            pupilSizeL_Trial, timeGaze_Trial)
        
        pupilSizeR_woBlink, time_filter, missingPupilData = filterBlinks(
            pupilSizeR_Trial, timeGaze_Trial)
        

        
        #print(index_blinkEndL)
        #print(index_blinkEndR)
        pupilSize_woBlink['Left'] = pupilSizeL_woBlink
        pupilSize_woBlink['Right'] = pupilSizeR_woBlink
        
        #print('After blink', len(pupilSizeL_woBlink), len(pupilSizeR_woBlink))
        # Hampel filter to remove the outliers
        winSize = 25
        pupilSizeL_filter = hampel(pupilSizeL_woBlink, winSize, 3)
        pupilSizeR_filter = hampel(pupilSizeR_woBlink, winSize, 3)

        pupilSize_Filter['Left'] = pupilSizeL_filter.values.tolist()
        pupilSize_Filter['Right'] = pupilSizeR_filter.values.tolist()
        
        pupilSizeL_filterList = [i[0] for i in pupilSizeL_filter.values]
        pupilSizeR_filterList = [i[0] for i in pupilSizeR_filter.values]
        
        #print('filter', len(pupilSizeL_filterList), len(pupilSizeR_filterList))
        RLCorrelation = np.corrcoef(pupilSizeL_filterList, pupilSizeR_filterList)
        
        if RLCorrelation[0][1] < 0.8:
            print(RLCorrelation[0][1])
            print('CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED')
        
        # Relative Pupil Size Calculation 
        # First find baseline pupil size, which is the time when looking at NextPhrase key
        Samples_ForBaseline = int((int(DwellTimes_ForBaseline[trialNr-1][:-2])*90)/1000) # Number of samples of looking at key depend on
        
        #print(DwellTimes_ForBaseline[trialNr-1])
        
        # dwell time
        pupilL_baseline = np.mean(pupilSizeL_filterList[0:Samples_ForBaseline])
        pupilR_baseline = np.mean(pupilSizeR_filterList[0:Samples_ForBaseline])
        
        pupilL_Relative = [pupil/pupilL_baseline for pupil in pupilSizeL_filterList]
        pupilR_Relative = [pupil/pupilR_baseline for pupil in pupilSizeR_filterList]
        
        # average of whole trial
        pupilL_avgRelative.append(np.mean(pupilL_Relative))
        pupilR_avgRelative.append(np.mean(pupilR_Relative))
        
        # average of whole trial
        pupilL_avgAbsolute.append(np.mean(pupilSizeL_filterList))
        pupilR_avgAbsolute.append(np.mean(pupilSizeR_filterList))
        
        #print(np.mean(pupilL_Relative), np.mean(pupilR_Relative))
        
        pupilSize_Relative = dict()
        pupilSize_Relative['Left'] = pupilL_Relative
        pupilSize_Relative['Right'] = pupilR_Relative
        
        #plotPupilSize(pupilSize_Relative, timeOfGaze_Trial, trialNr, scoresDifficulty.values[trialNr-1])
        
        
    return pupilL_avgRelative, pupilR_avgRelative, pupilL_avgAbsolute, pupilR_avgAbsolute

In [20]:
subjName = r'C:\DTU\Data\201812_ExptToCheckMovementEffect\Data'
j = 0
flagFirstSubj = 0
pupilData = dict()
pupilData['RLCorrelation'] = []
pupilAvgRelativeList = list()
pupilAvgAbsoluteList = list()
scoreDifferenceList = list()

# extract self-reported scores list and LIX score of given sentence
file_name = r'C:/DTU/Data/201812_ExptToCheckMovementEffect/Data/Scores.xlsx'


for root, dirs, subfolder in os.walk(subjName):
    if not dirs:
        
        if 'tb' in root or 'trial' in root:
            continue
            
        userKeys = None
        gazeLog = None
        keysSelected = None
        
        for file in subfolder:
            if fnmatch.fnmatch(file, 'user_looks*'):
                try:
                    
                    fUserKey = open(root + '\\' + file, encoding='utf-8')
                    readerUserKey = csv.reader(fUserKey)
                    userKeys = list(readerUserKey)
                    
                    userKeys.remove(userKeys[0])
                except:
                    if fUserKey is not None:
                        
                        fUserKey.close()
                    else:
                        print('error in opening the user looks at log file')
            
            elif fnmatch.fnmatch(file, 'KeySelection*'):
                try:
                    
                    fKeysSelected = open(root + '\\' + file, encoding='utf-8')
                    readerKeysSelected = csv.reader(fKeysSelected)
                    keysSelected = list(readerKeysSelected)
                    
                    keysSelected.remove(keysSelected[0])
                except:
                    if fKeysSelected is not None:
                        
                        fKeysSelected.close()
                    else:
                        print('error in opening the KeySelection log file')
            
            elif fnmatch.fnmatch(file, 'tobiiGazeLog*'):
                try:
                    fGazeLog = open(root + '\\' + file, encoding='utf-8')
                    readerGazeLog = csv.reader(fGazeLog)
                    gazeLog = list(readerGazeLog)
                    
                    gazeLog.remove(gazeLog[0]) # would not matter much even if the first row was not labels
                    gazeLog.remove(gazeLog[-1])

                except:
                    if fGazeLog is not None:
                        fGazeLog.close()
                    else:
                        print('error in opening the gaze log file')
            else:
                continue
            
                # if all these lists exist
            if userKeys is None or keysSelected is None or gazeLog is None:
                continue
            else:
                
                a = re.compile('(?<=ExptToCheckMovementEffect\\\\Data\\\\)(.*)(?=\\\\2018-1)')
                subjName = a.findall(root)[0]
                print(subjName)
                
                if subjName == 'sa\Test_woChinRest\p2':
                    userKeys = userKeys[:-1]
                
                testNr_re = re.compile('(?<=\\\Test)(.*)')
                testNr = 'Test' + testNr_re.findall(subjName)[0]
                if 'rh\Test_wChinRest' in subjName or 'sa\Test_woChinRest' in subjName:
                    testNr = 'Test' + testNr_re.findall(subjName)[0][:-3]
                
                sheet_to_df_map = pd.read_excel(file_name, sheet_name=subjName[0:2])
                columnName = testNr + '_SumOfScores'
                
                # find total of SumOfScores score
                scoresSumOfScores = sheet_to_df_map[columnName]
                scoresSumOfScores = scoresSumOfScores[1:]
                
                columnName1 = testNr + '_Difficulty'
                
                # find total of SumOfScores score
                scoresDifficulty = sheet_to_df_map[columnName1]
                scoresDifficulty = scoresDifficulty[1:]
                
                columnName2 = testNr + '_Complexity'
                
                # find total of SumOfScores score
                scoresComplexity = sheet_to_df_map[columnName2]
                scoresComplexity = scoresComplexity[1:]
                
                scoreDifference = scoresDifficulty - scoresComplexity
                
                #print(scoresSumOfScores)
                
                # fix userKeys due to comma related file changes
                userKeys_new = FixUserKeys(userKeys)
                
                # find dwell time of typing
                userKeys_wDwellTime = ComputeDwellTime(userKeys_new)
                
                # find start time of typing
                timeTyping = OptiKeyTypingTime(userKeys_wDwellTime)
                
                # for some of the subjects, the data was not completely collected
                if subjName == 'sa\Test_woChinRest\p1' or subjName == 'rh\Test_wChinRest\p1':
                    del keysSelected[-1]
                
                # divide complete data into epochs of phrases
                timeStartEndMixed = FindTrialEndTimes(keysSelected, timeTyping)
                
                # create trial time epoch using the list of start/end times of trial and userKeys, to make sure that 
                # Sleep is completely there in every trial, to allow for baseline
                timeEpochTrial = CreateTimeEpochsOfTrials(timeStartEndMixed, userKeys_wDwellTime)
                #print(timeEpochTrial)
                
                dwellTimes_ForBaseline = DwellTimeForBaseline(userKeys_wDwellTime)
                
                # find and plot pupil size for every trial
                pupilL_AvgRelative, pupilR_AvgRelative, pupilL_AvgAbsolute, pupilR_AvgAbsolute = FindAndPlotPupilSizeForEpoch(gazeLog, timeEpochTrial, dwellTimes_ForBaseline, scoresSumOfScores)
                
                if 'sa\Test_woChinRest' in subjName or 'rh\Test_wChinRest' in subjName:
                    if 'p1' in root:
                        pupilL_Avg1Relative = pupilL_AvgRelative
                        pupilR_Avg1Relative = pupilR_AvgRelative
                        pupilL_Avg1Absolute = pupilL_AvgAbsolute
                        pupilR_Avg1Absolute = pupilR_AvgAbsolute
                        continue
                        
                    else:
                        if 'sa\Test_woChinRest' in subjName:
                            pupilL_Avg2Relative = pupilL_AvgRelative[1:]
                            pupilR_Avg2Relative = pupilR_AvgRelative[1:]
                            pupilL_Avg2Absolute = pupilL_AvgAbsolute[1:]
                            pupilR_Avg2Absolute = pupilR_AvgAbsolute[1:]


                        else:
                            pupilL_Avg2Relative = pupilL_AvgRelative
                            pupilR_Avg2Relative = pupilR_AvgRelative
                   
                            pupilL_Avg2Absolute = pupilL_AvgAbsolute
                            pupilR_Avg2Absolute = pupilR_AvgAbsolute
                   
                if subjName == 'sa\Test_woChinRest\p2' or subjName == 'rh\Test_wChinRest\p2':
                    print('Addition of lists')
                    pupilL_AvgRelative = pupilL_Avg1Relative + pupilL_Avg2Relative
                    pupilR_AvgRelative = pupilR_Avg1Relative + pupilR_Avg2Relative
    
                    pupilL_AvgAbsolute = pupilL_Avg1Absolute + pupilL_Avg2Absolute
                    pupilR_AvgAbsolute = pupilR_Avg1Absolute + pupilR_Avg2Absolute
    
    
                pupilAvgRelative = [(pupilR_AvgRelative[i] + pupilL_AvgRelative[i])/2 for i in range(0, len(pupilR_AvgRelative))]
                pupilAvgAbsolute = [(pupilR_AvgAbsolute[i] + pupilL_AvgAbsolute[i])/2 for i in range(0, len(pupilR_AvgAbsolute))]
                
                
                pupilAvgRelativeList.append(pupilAvgRelative)
                pupilAvgAbsoluteList.append(pupilAvgAbsolute)
                scoreDifferenceList.append(scoreDifference)
                
                #print('pupilAvg len', len(pupilAvg), len(scoresSumOfScores))
                #pupil_Difficult = list()
                #pupil_Medium = list()
                #pupil_Easy = list()


                '''for ind in range(0, len(scoresSumOfScores)):
                    #print(scoresSumOfScores.values[ind])
                    if scoresSumOfScores.values[ind] > 12:
                        pupil_Difficult.append(pupilAvg[ind])
                        pupilTotal_Difficult.append(pupilAvg[ind])
        
                    elif scoresSumOfScores.values[ind] > 7:
                        pupil_Medium.append(pupilAvg[ind])
                        pupilTotal_Medium.append(pupilAvg[ind])
        
                    else:
                        pupil_Easy.append(pupilAvg[ind])
                        pupilTotal_Easy.append(pupilAvg[ind])
        
                fig = plt.figure()
                ax = fig.add_subplot(1,1,1)

                ax.plot(range(1,len(pupil_Difficult)+1), pupil_Difficult, 'r', marker = 'o', linestyle = '-', label = 'difficult')
                ax.plot(range(1,len(pupil_Medium)+1), pupil_Medium, 'b', marker = 's', linestyle = '-', label = 'medium')
                ax.plot(range(1,len(pupil_Easy)+1), pupil_Easy, 'c', marker = 'd', linestyle = '-', label = 'easy')


                #ax.plot(range(1,len(scoresSumOfScores)+1), scoresSumOfScores/100, 'y', label = 'SumOfScores')
                
                ax.set_title(subjName)
                
                ax.legend()
   
                print(np.corrcoef(pupilAvg, scoresSumOfScores)) '''
                

bh\Test_wChinRest
trial number  11 with 4.596693 s will be skipped
bh\Test_woChinRest
trial number  11 with 7.339166 s will be skipped
ph\Test_wChinRest
trial number  11 with 18.00399 s will be skipped
ph\Test_woChinRest
0.7896666866081681
CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED
pt\Test_wChinRest
pt\Test_woChinRest
trial number  11 with 17.47867 s will be skipped
rh\Test_wChinRest\p1
rh\Test_wChinRest\p2
trial number  4 with 10.770031 s will be skipped
trial number  10 with 14.989218 s will be skipped
Addition of lists
rh\Test_woChinRest
0.7990423296331524
CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED
trial number  11 with 10.476645 s will be skipped
sa\Test_wChinRest
sa\Test_woChinRest\p1
sa\Test_woChinRest\p2
trial number  3 with 8.449478 s will be skipped
trial number  5 with 8.926912 s will be skipped
Addition of lists


In [21]:
# correlation between difference and pupil size - absolute

for i in range(0,len(pupilAvgAbsoluteList)):
    #print(len(pupilAvgAbsoluteList[i]), len(scoreDifferenceList[i]))
    corr = np.corrcoef(pupilAvgAbsoluteList[i], scoreDifferenceList[i])[0][1]
    t = corr*(7/(1-corr**2))**0.5
    if t > 1.8 or t < -1.895:
        print(i, t, np.corrcoef(pupilAvgAbsoluteList[i], scoreDifferenceList[i])[0][1])


3 1.8951342203224073 0.5823184458086326
7 -2.010155085015549 -0.6049657650817704


In [50]:
for i in range(0,len(pupilAvgRelativeList)):
    #print(len(pupilAvgAbsoluteList[i]), len(scoreDifferenceList[i]))
    corr = np.corrcoef(pupilAvgRelativeList[i], scoreDifferenceList[i])[0][1]
    t = corr*(7/(1-corr**2))**0.5
    if t > 1.8 or t < -1.895:
        print(i, t, np.corrcoef(pupilAvgRelativeList[i], scoreDifferenceList[i])[0][1])

7 -2.2712182287746776 -0.6513584862523096


In [38]:
for i in range(0,len(scoreDifferenceList)):
    print('next')
    #print(len(pupilAvgAbsoluteList[i]), len(scoreDifferenceList[i]))
    for j in range(0, len(pupilAvgAbsoluteList[i])):
        #print(j)
        print(scoreDifferenceList[i+1][j+1], pupilAvgAbsoluteList[i][j], pupilAvgRelativeList[i][j])

next
0 3.879899357232969 1.143184649233656
0 3.3328143263722874 0.9478769125231028
0 3.344146590029346 0.9735491160420577
0 3.3019845487119523 0.9372152525388828
-1 3.4050725676242592 0.8430801169070509
0 3.058319572937809 0.7532101800801009
0 3.1644719761045823 1.0659444588764526
1 3.308093106688024 0.9918971981923403
1 3.128348127203816 0.7972553369918807
next
1 3.9863971678815675 1.0497059738870673
1 3.7906962101143398 0.9425973190710009
2 3.6126248008050266 0.986757055356076
0 3.4966795529410364 0.9988016682610905
0 3.563689811096336 1.0538477257074996
1 3.6774033838732176 1.0539727072624974
1 3.6290117547801812 0.953062806897634
0 3.5410160938616664 1.0351501298057715
1 3.4751802686416844 0.9960071245811647
next
1 5.845128008027968 1.0300512660580534
1 5.868312786802586 1.0603164591491656
1 5.709254012675594 1.0338052426374902
0 5.529005563417131 1.0062390175053606
0 5.448188108585139 1.0488970639765958
1 5.445421924726524 1.0176514725674246
0 5.3971751248691975 1.0815922008044505

IndexError: list index out of range

In [None]:
if 'sa\Test_woChinRest' in subjName or 'rh\Test_wChinRest' in subjName:
    print('Addition of lists')
    pupilL_Avg = pupilL_Avg1 + pupilL_Avg2
    pupilR_Avg = pupilR_Avg1 + pupilR_Avg2
    

pupilAvg = [(pupilR_Avg[i] + pupilL_Avg[i])/2 for i in range(0, len(pupilR_Avg))]

pupil_Difficult = list()
pupil_Medium = list()
pupil_Easy = list()


for ind in range(0, len(scoresDifficulty)):
    #print(scoresDifficulty.values[ind])
    if scoresDifficulty.values[ind] > 12:
        pupil_Difficult.append(pupilAvg[ind])
        pupilTotal_Difficult.append(pupilAvg[ind])
        
    elif scoresDifficulty.values[ind] > 7:
        pupil_Medium.append(pupilAvg[ind])
        pupilTotal_Medium.append(pupilAvg[ind])
        
    else:
        pupil_Easy.append(pupilAvg[ind])
        pupilTotal_Easy.append(pupilAvg[ind])
        

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(pupilAvg)
#ax.plot(range(1,len(pupil_Difficult)+1), pupil_Difficult, 'r', marker = 'o', linestyle = '-', label = 'difficult')
#ax.plot(range(1,len(pupil_Medium)+1), pupil_Medium, 'b', marker = 's', linestyle = '-', label = 'medium')
#ax.plot(range(1,len(pupil_Easy)+1), pupil_Easy, 'c', marker = 'd', linestyle = '-', label = 'easy')


#ax.plot(range(1,len(scoresDifficulty)+1), scoresDifficulty/100, 'b', label = 'difficulty')
#ax.plot(range(1, len(timeOfTrialList)+1), timeOfTrialListPlot, 'c', label = 'time' )
ax.set_title(subjName)
#ax.legend()
             
#print(ipaList)
#print(scoresDifficulty)


In [None]:
print(stats.ttest_ind(pupilTotal_Difficult,pupilTotal_Medium))
print(stats.ttest_ind(pupilTotal_Difficult,pupilTotal_Easy))
print(stats.ttest_ind(pupilTotal_Easy,pupilTotal_Medium))

In [None]:
pupilMean = [ np.mean(pupilTotal_Easy), np.mean(pupilTotal_Medium) , np.mean(pupilTotal_Difficult)]
pupilStd = [ np.std(pupilTotal_Easy), np.std(pupilTotal_Medium) , np.std(pupilTotal_Difficult)]

fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.errorbar([1, 2, 3], pupilMean, pupilStd, marker = 'o', capsize=4)
plt.xticks([1,2,3], ['Easy', 'Medium', 'Difficult'])

In [None]:
np.corrcoef(pupilAvg, scoresDifficulty)

In [None]:
scoresDifficultyPlot = [i/100 for i in scoresDifficulty]
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

ax.plot(range(1,len(pupilAvg)+1), pupilR_Avg, 'b')

ax.plot(range(1, len(scoresDifficulty)+1), scoresDifficultyPlot, 'r')

In [None]:
ipaMean = [ np.mean(ipaEasy), np.mean(ipaMedium) , np.mean(ipaDifficult)]
ipaStd = [ np.std(ipaEasy), np.std(ipaMedium) , np.std(ipaDifficult)]

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.errorbar([1, 2, 3], ipaMean, ipaStd, marker = 'o', capsize=4)
ax.set_title(subjName)
plt.xticks([1,2,3], ['Easy', 'Medium', 'Difficult'])

In [None]:
print(len(ipaEasy), len(ipaMedium), len(ipaDifficult))

In [23]:
pupilTotal_Difficult = list()
pupilTotal_Medium = list()
pupilTotal_Easy = list()

In [None]:
print('Difficult', np.mean(ipaDifficult), np.std(ipaDifficult))
print('Medium', np.mean(ipaMedium), np.std(ipaMedium))
print('Easy', np.mean(ipaEasy), np.std(ipaEasy))

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.errorbar([1, 2, 3], ipaMean, ipaStd, marker = 'o', capsize=4)
plt.xticks([1,2,3], ['Easy', 'Medium', 'Difficult'])

In [None]:
stats.pearsonr(scoreDifference, sco)