In [1]:
%matplotlib
#%matplotlib inline
import os
import csv
import fnmatch
import numpy as np
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import math
import pywt
import itertools
from mpl_toolkits.mplot3d import Axes3D

pd.options.mode.use_inf_as_na = True

Using matplotlib backend: TkAgg


In [2]:
def FixUserKeys(UserKeys_Old):
    # Fix the situation where comma has divided decimals into separate columns
    
    Column_beforeDecimal = [item[2] for item in UserKeys_Old]
    Column_afterDecimal = [item[3] if len(item)>3 else '00' for item in UserKeys_Old]
    
    UserKeys_ProgressPercent = [float(Column_beforeDecimal[i]+'.'+ Column_afterDecimal[i]) for i in 
                                range(0, len(Column_beforeDecimal))]
    UserKeys_Times = [item[0] for item in UserKeys_Old]
    UserKeys_Keys = [item[1] for item in UserKeys_Old]
    
    UserKeys_New = [[UserKeys_Times[ind], UserKeys_Keys[ind], UserKeys_ProgressPercent[ind]] for ind in 
                    range(0, len(UserKeys_ProgressPercent))]
    
    #UserKeys_New = np.concatenate((UserKeys_Times, UserKeys_Keys, UserKeys_ProgressPercent), axis = 0)
    
    
    return UserKeys_New
        

In [3]:
def OptiKeyTypingTime(UserKeys):
    
    TimeTyping = dict()
    
    time1, t1, t2 = UserKeys[0][0].partition('+')
    startTime = datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
    
    time2, t1, t2 = UserKeys[-1][0].partition('+')
    endTime = datetime.datetime.strptime(re.sub('[:.T]','-',time2[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
    
    TimeTyping['startTime'] = startTime
    TimeTyping['endTime'] = endTime
    
    return TimeTyping

In [4]:
def FindTrialEndTimes(KeysSelected, timeTyping):
    # function to find start and end of tasks in experiments
    timeStartEnd = list() # format of this list will be: [startTime1, endTime1/startTime2, endTime2/startTime3, ..., endTimeN]
    
    timeStartEnd.append(timeTyping['startTime'])
    
    nTrial = 1
    
    for keys in KeysSelected:
        
        if keys[1] == 'NextPhrase':
            time1, t1, t2 = keys[0].partition('+')
            endTimeTrial = datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
            timeStartEnd.append(endTimeTrial)
    
    timeStartEnd.append(timeTyping['endTime'])
    
    
    return timeStartEnd

In [5]:
# function to convert list of date and time into datetime format list
def timeConversion(timeStrList):
    timeList = list()
    for time in timeStrList:
        time1, t1, t2 = time.partition('+')
        timeList.append(datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f"))
    return timeList

In [6]:
# This function will return the datetime in items which is the closest to the date pivot
def nearestTimePoint(dates, date):
    
    for d in dates:
        if d < date:
            nearestTP = d
        else:
            continue
    try: 
        nearestTP
        nearestTPind = dates.index(nearestTP)
    except:
        nearestTP = 0
        nearestTPind = -1
        
    return nearestTP, nearestTPind

In [7]:
def CreateTimeEpochsOfTrials(TimeStartEndMixed, UserKeys):
    # function to use list of mixed start and end times of trials and keys looked at by user to create trial epochs
    
    TimeEpochTrial = dict()
    TimeEpochTrial['Start'] = list()
    TimeEpochTrial['End'] = list()
    
    # Create list of times in userKeys to be able to use function 'nearestTimePoint'
    UserKeysStrTimes = [item3[0] for item3 in UserKeys]
    UserKeysTimes = timeConversion(UserKeysStrTimes)
    
    Flag_FoundSleepKey = 0 # Flag to indicate finding sleep key
    
    n = -1
    for time in TimeStartEndMixed:
        n = n + 1
        Flag_FoundSleepKey = 0
        
        if n == 0: # first time is only start time for the first trial
            TimeEpochTrial['Start'].append(time)
            continue
        elif n == len(TimeStartEndMixed)-1: # last time is only the end time for last trial
            
            TimeEpochTrial['End'].append(time)
            
        else: # the middle elements need to be divided into start and end
            TimeEpochTrial['End'].append(time)
            
            timeCheck = time
            
            # find the time in userkeys. Keep going to the previous element till you reach start of selection of
            # nextPhrase key
            while Flag_FoundSleepKey < 1:
                
                nearestToTrialStartTime, nearestToTrialStartInd = nearestTimePoint(UserKeysTimes, timeCheck)
                indCheck = nearestToTrialStartInd
                
                if 'NextPhrase' not in UserKeys[indCheck][1]:
                    TimeEpochTrial['Start'].append(nearestToTrialStartTime)
                    Flag_FoundSleepKey = 1
                    break
                else:
                    indCheck = indCheck - 2 # 2 added instead of 1, to allow nearestTimePoint to find the one before this
                    timeCheck = UserKeysTimes[indCheck]
                    
                
    return TimeEpochTrial      
            

In [8]:
def Convert2ColumnSizesTo1(GazeLog):
    # function to convert pupilsizes from 2 columns for every pupil due to comma use instead of decimal, 
    # to proper pupil sizes
    
    PupilLogL = list()
    PupilLogR = list()
    
    PupilLogL_beforeDecimal = [item4[-5] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    PupilLogL_afterDecimal = [item4[-4] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    PupilLogR_beforeDecimal = [item4[-2] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    PupilLogR_afterDecimal = [item4[-1] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    
    for i in range(0, len(PupilLogL_beforeDecimal)):
        if 'Valid' not in PupilLogL_beforeDecimal[i] and 'Valid' not in PupilLogL_afterDecimal[i]:
            if 'nan' not in PupilLogL_beforeDecimal[i] and 'nan' not in PupilLogL_afterDecimal[i]:
                PupilLogL.append(float(PupilLogL_beforeDecimal[i]+'.'+PupilLogL_afterDecimal[i]))
            else:
                PupilLogL.append(np.nan)
        else:
            # Rarely, the pupil size is a whole number
            PupilLogL.append(np.nan) # we will ignore the row, since there is no way of automatically knowing which - 
            # right or left eye has whole number pupil size
    
    for i in range(0, len(PupilLogR_beforeDecimal)):
        if 'Valid' not in PupilLogR_beforeDecimal[i] and 'Valid' not in PupilLogR_afterDecimal[i]:
            if 'nan' not in PupilLogR_beforeDecimal[i] and 'nan' not in PupilLogR_afterDecimal[i]:
                PupilLogR.append(float(PupilLogR_beforeDecimal[i]+'.'+PupilLogR_afterDecimal[i]))
            else:
                PupilLogR.append(np.nan)
        else:
            # Rarely, the pupil size is a whole number
            PupilLogL.append(np.nan) # we will ignore the row, since there is no way of automatically knowing which - 
            # right or left eye has whole number pupil size
            
    # if one of the pupils are nan, the other one is converted too
    nPupil = -1
    for pupilL in PupilLogL:
        nPupil = nPupil + 1
        if np.isnan(pupilL):
            if nPupil < len(PupilLogR):
                if not np.isnan(PupilLogR[nPupil]):
                    PupilLogR[nPupil] = np.nan
            else:
                del PupilLogL[len(PupilLogR):]
                
    nPupil = -1
    for pupilR in PupilLogR:
        nPupil = nPupil + 1
        if np.isnan(pupilR):
            if nPupil < len(PupilLogL):
                if not np.isnan(PupilLogL[nPupil]):
                    PupilLogL[nPupil] = np.nan
            else:
                del PupilLogR[len(PupilLogL):]
                
    #print(len(PupilLogL), len(PupilLogR))
    
    return PupilLogL, PupilLogR

In [9]:
def PupilSizeFromTrialTimes(TimeTrial, TimeGazeLog, TimeInternalGazeLog, PupilSizeLogL, PupilSizeLogR):
    # find pupil sizes from the start and end time given
    
    # find start and end time in gazeLog
    timeStart, timeStartInd = nearestTimePoint(TimeGazeLog, TimeTrial[0])
    timeEnd, timeEndInd = nearestTimePoint(TimeGazeLog, TimeTrial[1])
    
    pupilSize_TrialL = PupilSizeLogL[timeStartInd: timeEndInd]
    pupilSize_TrialR = PupilSizeLogR[timeStartInd: timeEndInd]
    
    TimeInternal_Trial = TimeInternalGazeLog[timeStartInd: timeEndInd]
    
    TimeGaze_Trial = TimeGazeLog[timeStartInd: timeEndInd]
    
    return pupilSize_TrialL, pupilSize_TrialR, TimeGaze_Trial, TimeInternal_Trial
    

In [10]:
def filterBlinks(pupilData, timeInDatetime_trial, timeInS_Trial):
    # filter any blinks and nan values lasting around 250ms (on average)
    # http://faculty.washington.edu/chudler/facts.html
   
    # recording extra blink information - duration and frequency
    blinkDurationList = list()
    blinkTimeList = list()
    blinkCount = 0
    nonBlinkCount = 0
    nonBlinkTimeList = list()
    timeRemove = 0
    
    # blink is every nan value in the range of 100-400ms 
    # 250 ms (23 samples) before and after the blink will also be removed
    extraBlinkSamples = 23    
    
    # remove single missing data, that are due to hardware error
    missingVal_Single = np.argwhere(np.isnan(pupilData))
    missingVal_Single = list(itertools.chain.from_iterable(missingVal_Single)) # flatten the list
    missingVal_SingleDifference = [t - s for s, t in zip(missingVal_Single, missingVal_Single[1:])] # find difference 
    # between consecutive elements
    missingVal_SingleDifference.insert(0, missingVal_Single[0]) # insert the first blink index in the beginning of list
    
    # the list missingVal_SingleDifference contains the index of the first blink, followed by the difference in the index to 
    # the next nan value
    
    # first remove the single nan values, which are missing data
    eyeTracker_missingData = list() # list with index of single missing data  
    valInd = -1

    for val in missingVal_SingleDifference:
        valInd = valInd + 1
        if valInd == 0:
            continue
        if val != 1:
            if missingVal_SingleDifference[valInd-1] !=1: # if there are 2 consecutive missing values (denoted by 2 consecutive
                # non 1 numbers, they are added to the list of eyeTracker_missingData)
                eyeTracker_missingData.append(sum(missingVal_SingleDifference[:valInd]))
                
    # remove single missing values from pupil data
    pupilData_woSingleMissingData0 = [pupilData[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(pupilData))]
    pupilData_woSingleMissingData = [x for x in pupilData_woSingleMissingData0 if x]
    
    # remove the times for single missing values in pupil data
    timeList_woSingleMissingData0 = [timeInDatetime_trial[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(timeInDatetime_trial))]
    timeList_woSingleMissingData = [x for x in timeList_woSingleMissingData0 if x]
    
#     print(len(timeList_woSingleMissingData))
    
    
    timeInS_woSingleMissingData = timeInS_Trial[-1]-(len(timeList_woSingleMissingData)-len(timeInDatetime_trial))/90
    #print(timeInS_woSingleMissingData, timeInS_Trial[-1])
    
    # find the nan values again from pupilData_woSingleMissingData
    missingVal_Rest = np.argwhere(np.isnan(pupilData_woSingleMissingData))
    missingVal_Rest = list(itertools.chain.from_iterable(missingVal_Rest))
    missingVal_RestDifference = [t - s for s, t in zip(missingVal_Rest, missingVal_Rest[1:])]
    missingVal_RestDifference.insert(0, missingVal_Rest[0])
    #print(missingVal_RestDifference)
    
    # compile and create list of start and end of blinks
    blink_missingData = dict()
    blink_missingData['Start'] = list()
    blink_missingData['End'] = list()
    
    valInd = -1
    for val in missingVal_RestDifference:
        valInd = valInd + 1
        if val > 1:
            
            
            #print('value', val)
            # instead of appending the actual index of blink start, since 250ms before and after the blink need to be
            # removed, it is also appended here.
            
            # just make sure that the additional samples do not make the index of blink go in negative
            if sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples > 0:
                
                blink_missingData['Start'].append(sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples)
            else:
                blink_missingData['Start'].append(0)
            
            if valInd == 0:
                lastBlinkStart = valInd
                continue
                
            # append blink duration list
            blinkDurationCurrent = valInd-lastBlinkStart
            # if blink duration is greater than 1s, it is not considered to be blink anymore
            if blinkDurationCurrent < 90: # since tobii sampling frequency is 90Hz
                blinkCount = blinkCount + 1
                blinkDurationList.append(blinkDurationCurrent/90)
                blinkTimeList.append(timeList_woSingleMissingData[sum(missingVal_RestDifference[:valInd+1])])
                lastBlinkStart = valInd
            else:
                # collect the time of non-blinks, that will need to be removed from trial time, to calculate 
                # blink frequency
                #print('current blink duration', valInd, lastBlinkStart, blinkDurationCurrent)
                timeRemove = timeRemove + blinkDurationCurrent
                nonBlinkCount = nonBlinkCount + 1
                nonBlinkTimeList.append(timeList_woSingleMissingData[sum(missingVal_RestDifference[:valInd+1])])
                lastBlinkStart = valInd
            
            # make sure that the additional samples do not increase the index to more than the length of the pupilData
            if sum(missingVal_RestDifference[:valInd])+extraBlinkSamples < len(pupilData_woSingleMissingData):
                blink_missingData['End'].append(sum(missingVal_RestDifference[:valInd])+extraBlinkSamples)
            else:
                blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
#         else:
#             # val is 1
#             if valInd-2 > 0 and valInd+3 < len(missingVal_RestDifference):
#                 if missingVal_RestDifference[valInd-1] > 1:
#                     if missingVal_RestDifference[valInd+1] == 1:
#                         if missingVal_RestDifference[valInd+2] > 1:
#                             print(timeList_woSingleMissingData[sum(missingVal_RestDifference[:valInd+1])], 
#                                 missingVal_RestDifference[valInd-2:valInd+3])
#                             if missingVal_RestDifference[valInd+2] > missingVal_RestDifference[valInd-1]:
#                                 if valInd-6>0:
#                                     print(missingVal_RestDifference[valInd-6:valInd+3])
#                     elif missingVal_RestDifference[valInd+1] > 1:
#                         print(timeList_woSingleMissingData[sum(missingVal_RestDifference[:valInd+1])], 
#                               missingVal_RestDifference[valInd-2:valInd+3])
#                         if missingVal_RestDifference[valInd+2] > missingVal_RestDifference[valInd-1]:
#                                 if valInd-6>0:
#                                     print(missingVal_RestDifference[valInd-6:valInd+3])
                        
                        
    # add the last blink index
    # make sure that the additional samples do not increase the index to more than the length of the pupilData
    if sum(missingVal_RestDifference)+extraBlinkSamples < len(pupilData_woSingleMissingData):
        blink_missingData['End'].append(sum(missingVal_RestDifference)+extraBlinkSamples)
    else:
        blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
                
    # need to create a list containing indexes that are to be removed
    blinkIndexList = list()
    
#     print(len(blink_missingData['Start']), len(blink_missingData['End']))
    
    # remove blinks and additional data from pupil data to get filtered data
    for indInd in range(0, len(blink_missingData['Start'])):
        blinkIndexList.append(range(blink_missingData['Start'][indInd], blink_missingData['End'][indInd]+1))
    # flatten the list
    blinkIndexList = list(itertools.chain.from_iterable(blinkIndexList))
    
    pupilData_woRestMissingData0 = [pupilData_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(pupilData_woSingleMissingData))]
    pupilData_filter = [x for x in pupilData_woRestMissingData0 if x]
    
    # remove the times for single missing values in pupil data
    timeList_woRestMissingData0 = [timeList_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(timeList_woSingleMissingData))]
    time_filter = [x for x in timeList_woRestMissingData0 if x]
    
    timeInS_Trial_filter = timeInS_woSingleMissingData-timeRemove/90
    
    blinkFrequency = blinkCount/timeInS_Trial_filter
    #print('freq', blinkFrequency, timeInS_woSingleMissingData, timeRemove)
    #print('time difference', len(timeInDatetime_trial), len(time_filter))
    if np.nan in pupilData_filter:
        print('nan values in filtered data')
#         for i in enumerate(pupilData_filter):
#             print(i)
        
    #print(nonBlinkCount, blinkCount, nonBlinkTimeList)
    return pupilData_filter, time_filter, blink_missingData, blinkDurationList, blinkFrequency, blinkTimeList, timeInS_Trial_filter

In [11]:
def plotPupilSize(pupilData, timeData, TrialNumber):
    
    dataLenEqualizer = min(min(len(pupilData['Left']), len(pupilData['Right'])), len(timeData))
    
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Left'][0:dataLenEqualizer], 'b')
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Right'][0:dataLenEqualizer], 'r')
    
    ax.set_ylabel('Absolute pupil size [in mm]')

    ax.set_title(TrialNumber)


In [12]:
def hampel(vals_orig, k, sd):
    '''
    vals: pandas series of values from which to remove outliers
    k: size of window (including the sample; 7 is equal to 3 on either side of value)
    '''
    # Obtained from: https://stackoverflow.com/questions/46819260/filtering-outliers-how-to-make-median-based-
    # hampel-function-faster
    
    #plt.plot(vals_orig)
    
    #Make copy so original not edited
    vals = pd.DataFrame(vals_orig)      
    #print(vals.isnull().any())
    vals0 = vals.replace([np.inf, -np.inf], np.nan)
    #vals = vals0.astype(float).fillna(method = 'backfill') # linear interpolation instead 
    #print(vals)
    vals = vals0.astype(float).interpolate('linear', limit_direction = 'both') # linear interpolation instead of 
    # simply copying the previous value --\ linear interpolation than cubic to not add any patterns in the data, limit direction
    # set to both, to interpolate the nan values occuring from the start of the series
    
    L= 1.4826
    rolling_median = vals.rolling(window=k, min_periods=1, center=True).median()
    
    #print(rolling_median)
    difference = np.abs(rolling_median-vals)
    median_abs_deviation = difference.rolling(k).median()
    threshold = sd * L * median_abs_deviation
    outlier_idx = difference>threshold
    vals[outlier_idx] = rolling_median[outlier_idx]
    #print(vals)
    #print('datatype', vals.dtypes)
    #print(vals.isnull().any())
    #vals.plot()
    return(vals)

In [60]:
def FindAndPlotPupilSizeForEpoch(GazeLog, TimeEpochTrial, ScoresDifficulty):
    # function that uses the list of start and end trial times to find the pupil sizes for those trials and plots them
    
    # first create a list of times in gaze log
    timeStrGazeLog = [item3[0] for item3 in GazeLog]
    # convert the list of strings to datetime formats
    timeGazeLog = timeConversion(timeStrGazeLog)
    
    # internal time, to depict seconds
    timeInternalGazeLog = [float(item3[1]) for item3 in GazeLog]
    
    # extract pupil sizes in decimals from the strange 2 columns for every pupil
    pupilLogL, pupilLogR = Convert2ColumnSizesTo1(GazeLog)
    
    blinkDurationList = list()
    blinkDurationAverageList = list()
    blinkTimeList = list()
    blinkFrequencyList = list()
    timeInS_List = list()
    
    # for every epoch, plot the pupil size
    for trialNr in range(0, len(timeEpochTrial['Start'])):
        
        if trialNr == 0:
            continue
        
        #print('TRIAL NUMBER:   ', trialNr)
        
        # find pupil sizes for the trial
        pupilSizeL_Trial, pupilSizeR_Trial, timeGaze_Trial, timeInternal_Trial = PupilSizeFromTrialTimes(
            [TimeEpochTrial['Start'][trialNr], TimeEpochTrial['End'][trialNr]], timeGazeLog, 
                                timeInternalGazeLog, pupilLogL, pupilLogR)
        
        
        pupilSize_Trial = dict()
        pupilSize_Filter = dict()
        pupilSize_woBlink = dict()
        
        # find difference in consecutive elements of internal time
        timeInternalDifference = [t - s for s, t in zip(timeInternal_Trial, timeInternal_Trial[1:])]
        # divide by 1000 to make it s
        timeOfGaze_Trial = [sum(timeInternalDifference[:i])/1000000 for i in range(1,len(timeInternalDifference))]

        #print(timeOfGaze_Trial[-1])
        # some trials were skipped, because the sentence was written before. If the time of trial is less than
        # 10s, the trial is skipped
        if timeOfGaze_Trial[-1] < 20:
            
            print('trial number ', trialNr+1, 'with', timeOfGaze_Trial[-1], 's will be skipped')
            continue
#         print(trialNr, timeOfGaze_Trial[-1])
        pupilSize_Trial['Left'] = pupilSizeL_Trial
        pupilSize_Trial['Right'] = pupilSizeR_Trial
        
        #if trialNr == 4:
        #    for i in range(0, len(pupilSizeL_Trial)):
        #        print(pupilSizeL_Trial[i], pupilSizeR_Trial[i])
            
        #print('Trial', len(pupilSizeL_Trial), len(pupilSizeR_Trial))
        
        # filter the blinks
        pupilSizeL_woBlink, time_filter, missingPupilData, blinkDuration, blinkFrequency, blinkTimeList, timeInS_filter = filterBlinks(pupilSizeL_Trial, timeGaze_Trial, timeOfGaze_Trial)
        pupilSizeR_woBlink, time_filter, missingPupilData, blinkDuration, blinkFrequency, blinkTimeList, timeInS_filter = filterBlinks(pupilSizeR_Trial, timeGaze_Trial, timeOfGaze_Trial)
        
#         print(trialNr, blinkFrequency)
        #print(trialNr, blinkDuration)

        # time of trial
        timeInS_List.append(timeInS_filter)
        #print(trialNr, timeInS_filter)
        
        #print(index_blinkEndL)
        #print(index_blinkEndR)
        pupilSize_woBlink['Left'] = pupilSizeL_woBlink
        pupilSize_woBlink['Right'] = pupilSizeR_woBlink
        
        #print('After blink', len(pupilSizeL_woBlink), len(pupilSizeR_woBlink))
        # Hampel filter to remove the outliers
        winSize = 25
        pupilSizeL_filter = hampel(pupilSizeL_woBlink, winSize, 3)
        pupilSizeR_filter = hampel(pupilSizeR_woBlink, winSize, 3)

        pupilSize_Filter['Left'] = pupilSizeL_filter.values.tolist()
        pupilSize_Filter['Right'] = pupilSizeR_filter.values.tolist()
        
        pupilSizeL_filterList = [i[0] for i in pupilSizeL_filter.values]
        pupilSizeR_filterList = [i[0] for i in pupilSizeR_filter.values]
        
        #print('filter', len(pupilSizeL_filterList), len(pupilSizeR_filterList))
        RLCorrelation = np.corrcoef(pupilSizeL_filterList, pupilSizeR_filterList)
        #print(RLCorrelation)
        
        
#         fig = plt.figure()
#         ax = fig.add_subplot(1,1,1)
#         ax.plot(blinkDuration, 'b', marker = 'v')
#         ax.set_ylim([0, 1.1])
#         ax.set_title(str(trialNr) + ',  ' + str(blinkFrequency) + ',  ' + str(ScoresDifficulty.values[trialNr-1]))
        
# #         for i in enumerate(blinkTimeList):
# #             print(i)
        
        
        blinkDurationList.append(blinkDuration)
        blinkFrequencyList.append(blinkFrequency)
        
        
        if len(blinkDuration)>0:
            blinkDurationAverageList.append(np.mean(blinkDuration))
        else:
            blinkDurationAverageList.append(0)
    
    
    #print(blinkFrequencyList)
    return blinkDurationList, blinkDurationAverageList, blinkFrequencyList, timeInS_List
        

In [83]:
blinkTotalFrequency = list()
blinkTotalDurationAverage = list()

timeTotalTrial = list()
blinkTotalDuration = list()

scoreTotalLIX = list()
scoreTotalComplexity = list()
scoreTotalDifficulty = list()
scoreTotalSumOfScores = list()



corrDurationList = list()
corrFrequencyList = list()

In [76]:
subjName = r'C:\DTU\Data\201812_ExptToCheckMovementEffect\Data'
j = 0
flagFirstSubj = 0
pupilData = dict()
pupilData['RLCorrelation'] = []

# extract self-reported scores list and LIX score of given sentence
file_name = r'C:/DTU/Data/201812_ExptToCheckMovementEffect/Data/Scores.xlsx'

for root, dirs, subfolder in os.walk(subjName):
    if not dirs:
        
        if 'tb' in root or 'trial' in root:
            continue
            
        userKeys = None
        gazeLog = None
        keysSelected = None
        
        for file in subfolder:
            if fnmatch.fnmatch(file, 'user_looks*'):
                try:
                    
                    fUserKey = open(root + '\\' + file, encoding='utf-8')
                    readerUserKey = csv.reader(fUserKey)
                    userKeys = list(readerUserKey)
                    
                    userKeys.remove(userKeys[0])
                except:
                    if fUserKey is not None:
                        
                        fUserKey.close()
                    else:
                        print('error in opening the user looks at log file')
            
            elif fnmatch.fnmatch(file, 'KeySelection*'):
                try:
                    
                    fKeysSelected = open(root + '\\' + file, encoding='utf-8')
                    readerKeysSelected = csv.reader(fKeysSelected)
                    keysSelected = list(readerKeysSelected)
                    
                    keysSelected.remove(keysSelected[0])
                except:
                    if fKeysSelected is not None:
                        
                        fKeysSelected.close()
                    else:
                        print('error in opening the KeySelection log file')
            
            elif fnmatch.fnmatch(file, 'tobiiGazeLog*'):
                try:
                    fGazeLog = open(root + '\\' + file, encoding='utf-8')
                    readerGazeLog = csv.reader(fGazeLog)
                    gazeLog = list(readerGazeLog)
                    
                    gazeLog.remove(gazeLog[0]) # would not matter much even if the first row was not labels
                    gazeLog.remove(gazeLog[-1])

                except:
                    if fGazeLog is not None:
                        fGazeLog.close()
                    else:
                        print('error in opening the gaze log file')
            else:
                continue
            
                # if all these lists exist
            if userKeys is None or keysSelected is None or gazeLog is None:
                continue
            else:
                
                a = re.compile('(?<=ExptToCheckMovementEffect\\\\Data\\\\)(.*)(?=\\\\2018-1)')
                subjName = a.findall(root)[0]
                print(subjName)
                
                if subjName == 'sa\Test_woChinRest\p2':
                    userKeys = userKeys[:-1]
                  
                
                    
                testNr_re = re.compile('(?<=\\\Test)(.*)')
                testNr = 'Test' + testNr_re.findall(subjName)[0]
                if 'rh\Test_wChinRest' in subjName or 'sa\Test_woChinRest' in subjName:
                    testNr = 'Test' + testNr_re.findall(subjName)[0][:-3]
                
                sheet_to_df_map = pd.read_excel(file_name, sheet_name=subjName[0:2])
                
                columnName1 = testNr + '_SumOfScores'
                # find total of SumOfScores score
                scoresSumOfScores = sheet_to_df_map[columnName1]
                scoresSumOfScores = scoresSumOfScores[1:]
                
                
                columnName2 = testNr + '_LIX'
                # find total of SumOfScores score
                scoresLIX = sheet_to_df_map[columnName2]
                scoresLIX = scoresLIX[1:]
                
                columnName3 = testNr + '_Complexity'
                # find total of SumOfScores score
                scoresComplexity = sheet_to_df_map[columnName3]
                scoresComplexity = scoresComplexity[1:]
                
                columnName4 = testNr + '_Difficulty'
                # find total of SumOfScores score
                scoresDifficulty = sheet_to_df_map[columnName4]
                scoresDifficulty = scoresDifficulty[1:]
                
                
                
                # fix userKeys due to comma related file changes
                userKeys_new = FixUserKeys(userKeys)
                
                # find start time of typing
                timeTyping = OptiKeyTypingTime(userKeys_new)
                
                # for some of the subjects, the data was not completely collected
                if subjName == 'sa\Test1\p1' or subjName == 'rh\Test1\p1':
                    del keysSelected[-1]
                
                # for some of the subjects, the data was not completely collected
                if subjName == 'sa\Test_woChinRest\p1' or subjName == 'rh\Test_wChinRest\p1':
                    del keysSelected[-1]
                    
                # divide complete data into epochs of phrases
                timeStartEndMixed = FindTrialEndTimes(keysSelected, timeTyping)
                
                # create trial time epoch using the list of start/end times of trial and userKeys, to make sure that 
                # Sleep is completely there in every trial, to allow for baseline
                timeEpochTrial = CreateTimeEpochsOfTrials(timeStartEndMixed, userKeys_new)
                #print(timeEpochTrial)
                #if 'rh\Test_woChinRest' in root:
                #    timeEpochTrial['Start']
                
                # find and plot pupil size for every trial
                blinkDuration, blinkDurationAverage, blinkFrequency, time_trialList = FindAndPlotPupilSizeForEpoch(gazeLog, timeEpochTrial, 
                                                                                                   scoresSumOfScores)
                
                
#                 print(blinkFrequency)
                if 'rh\Test_wChinRest' in subjName or 'sa\Test_woChinRest' in subjName:
                    if 'p1' in subjName:
                        blinkDurationAverage1 = blinkDurationAverage
                        blinkFrequency1 = blinkFrequency
                        time1 = time_trialList
                        blinkDuration1 = blinkDuration
                        
                        continue
                    elif 'p2' in subjName:
                        if subjName == 'sa\Test_woChinRest\p2':
                            blinkDurationAverage2 = blinkDurationAverage[1:]
                            blinkFrequency2 = blinkFrequency[1:]
                            time2 = time_trialList[1:]
                            blinkDuration2 = blinkDuration[1:]
                        else:
                            blinkDurationAverage2 = blinkDurationAverage
                            blinkFrequency2 = blinkFrequency
                            time2 = time_trialList
                            blinkDuration2 = blinkDuration
                        
                        
                            
                        blinkFrequency = blinkFrequency1 + blinkFrequency2
                        blinkDurationAverage = blinkDurationAverage1 + blinkDurationAverage2
                        time_trialList = time1 + time2
                        blinkDuration = blinkDuration1 + blinkDuration2
                        
                #blinkFrequencyNew = blinkFrequency
#                 blinkDurationAverageNew = blinkDurationAverage
#                 scoresSumOfScoresNew = scoresSumOfScores
                
                blinkTotalFrequency.append(blinkFrequency)
                blinkTotalDurationAverage.append(blinkDurationAverage)
                timeTotalTrial.append(time_trialList)
                blinkTotalDuration.append(blinkDuration)
                
                
                scoreTotalLIX.append(scoresLIX)
                scoreTotalComplexity.append(scoresComplexity)
                scoreTotalDifficulty.append(scoresDifficulty)
                scoreTotalSumOfScores.append(scoresSumOfScores)
                
                #print(len(blinkDuration), len(blinkFrequency), len(blinkDurationAverage), len(time_trialList))
                
#                 ind = -1
#                 for score in scoresSumOfScores:
#                     ind = ind + 1
#                     if score > 12:
#                         blinkFrequencyDifficult.append(blinkFrequency[ind])
#                         blinkDurationDifficult.append(blinkDurationAverage[ind])
#                     elif score > 7:
#                         blinkFrequencyMedium.append(blinkFrequency[ind])
#                         blinkDurationMedium.append(blinkDurationAverage[ind])
#                     else:
#                         blinkFrequencyEasy.append(blinkFrequency[ind])
#                         blinkDurationEasy.append(blinkDurationAverage[ind])
                        
#                 blinkFrequencyNew0 = [blinkFrequency[ind] if ind not in removeData else [] 
#                                      for ind in range(0, len(blinkFrequency))]
#                 blinkFrequencyNew1 = [x for x in blinkFrequencyNew0 if x]
                
                
#                 blinkDurationAverageNew0 = [blinkDurationAverage[ind] if ind not in removeData else [] 
#                                      for ind in range(0, len(blinkDurationAverage))]
#                 blinkDurationAverageNew1 = [x for x in blinkDurationAverageNew0 if x]
                
#                 scoresSumOfScoresNew0 = [scoresSumOfScores.values[ind] if ind not in removeData else [] 
#                                      for ind in range(0, len(scoresSumOfScores.values))]
#                 scoresSumOfScoresNew1 = [x for x in scoresSumOfScoresNew0 if x]
    
                
#                 print(blinkDurationAverageNew1,blinkFrequencyNew1, scoresSumOfScoresNew1)
# #                 print(blinkFrequency)
                
                
                

bh\Test_wChinRest
trial number  11 with 4.596693 s will be skipped
bh\Test_woChinRest
trial number  11 with 7.339166 s will be skipped
ph\Test_wChinRest
trial number  11 with 18.00399 s will be skipped
ph\Test_woChinRest
pt\Test_wChinRest
pt\Test_woChinRest
trial number  11 with 17.47867 s will be skipped
rh\Test_wChinRest\p1
rh\Test_wChinRest\p2
trial number  4 with 10.770031 s will be skipped
trial number  10 with 14.989218 s will be skipped
rh\Test_woChinRest
trial number  11 with 10.476645 s will be skipped
sa\Test_wChinRest
sa\Test_woChinRest\p1
sa\Test_woChinRest\p2
trial number  3 with 8.449478 s will be skipped
trial number  5 with 8.926912 s will be skipped


In [99]:
blinkFrequencyDifficult = list()
blinkFrequencyEasy = list()
blinkFrequencyMedium = list()

blinkDurationDifficult = list()
blinkDurationEasy = list()
blinkDurationMedium = list()

In [78]:
flat_score = [item for sublist in scoreTotalDifficulty for item in sublist]
flat_duration = [item for sublist in blinkTotalDurationAverage for item in sublist]
flat_frequency = [item for sublist in blinkTotalFrequency for item in sublist]
flat_time = [item for sublist in timeTotalTrial for item in sublist]
flat_scoreDifficulty = [item for sublist in scoreTotalDifficulty for item in sublist]
flat_scoreComplexity = [item for sublist in scoreTotalComplexity for item in sublist]
flat_scoreLIX = [item for sublist in scoreTotalLIX for item in sublist]
flat_scoreSumOfScores = [item for sublist in scoreTotalSumOfScores for item in sublist]


In [68]:
print(len(blinkTotalDuration), len(blinkTotalFrequency), len(blinkTotalDurationAverage), len(timeTotalTrial))

10 10 10 10


In [None]:
print(np.corrcoef(blinkTotalDurationAverage, scoreTotalDifficulty))
print(np.corrcoef(blinkTotalFrequency, scoreTotalDifficulty))
print(np.corrcoef(blinkTotalDurationAverage, blinkTotalFrequency))

In [70]:
# differentiate in easy and difficult


ind = -1
for score in flat_scoreSumOfScores:
    ind = ind + 1
    if score > 12:
        blinkFrequencyDifficult.append(flat_frequency[ind])
        blinkDurationDifficult.append(flat_duration[ind])
    elif score > 7:
        blinkFrequencyMedium.append(flat_frequency[ind])
        blinkDurationMedium.append(flat_duration[ind])
    else:
        blinkFrequencyEasy.append(flat_frequency[ind])
        blinkDurationEasy.append(flat_duration[ind])


In [85]:
ind = -1
for score in flat_scoreLIX:
    ind = ind + 1
    if score > 2:
        blinkFrequencyDifficult.append(flat_frequency[ind])
        blinkDurationDifficult.append(flat_duration[ind])
    elif score > 1:
        blinkFrequencyMedium.append(flat_frequency[ind])
        blinkDurationMedium.append(flat_duration[ind])
    else:
        blinkFrequencyEasy.append(flat_frequency[ind])
        blinkDurationEasy.append(flat_duration[ind])
        

In [100]:
ind = -1
for score in flat_scoreComplexity:
    ind = ind + 1
    if score > 6:
        blinkFrequencyDifficult.append(flat_frequency[ind])
        blinkDurationDifficult.append(flat_duration[ind])
    elif score > 2:
        blinkFrequencyMedium.append(flat_frequency[ind])
        blinkDurationMedium.append(flat_duration[ind])
    else:
        blinkFrequencyEasy.append(flat_frequency[ind])
        blinkDurationEasy.append(flat_duration[ind])
        

In [97]:
ind = -1
for score in flat_scoreDifficulty:
    ind = ind + 1
    if score > 6:
        blinkFrequencyDifficult.append(flat_frequency[ind])
        blinkDurationDifficult.append(flat_duration[ind])
    elif score > 2:
        blinkFrequencyMedium.append(flat_frequency[ind])
        blinkDurationMedium.append(flat_duration[ind])
    else:
        blinkFrequencyEasy.append(flat_frequency[ind])
        blinkDurationEasy.append(flat_duration[ind])
        

In [101]:
# plot blink duration, frequency and scores (easy, difficult)

fig = plt.figure()
ax = fig.add_subplot(1,1,1)

ax.plot(blinkFrequencyEasy, blinkDurationEasy, 'co', label = 'easy')
#ax.plot(blinkFrequencyMedium, blinkDurationMedium, 'bd', label = 'medium')
ax.plot(blinkFrequencyDifficult, blinkDurationDifficult, 'rv', label = 'difficult')

ax.set_xlabel('Blink Frequency')
ax.set_ylabel('Blink duration')
ax.legend()


<matplotlib.legend.Legend at 0x1a6af66e6a0>

In [29]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

ax.plot(blinkDurationEasy, 'c', marker = 'v', label = 'easy')
ax.plot(blinkDurationDifficult, 'r', marker = 'v', label = 'difficult')
ax.plot(blinkDurationMedium, 'b', marker = 'v', label = 'medium')

ax.set_ylabel('Blink Duration')
ax.legend()


<matplotlib.legend.Legend at 0x1a68cfe7828>

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

ax.plot(blinkFrequencyEasy, 'c', marker = 'v', label = 'easy')
ax.plot(blinkFrequencyDifficult, 'r', marker = 'v', label = 'difficult')
ax.plot(blinkFrequencyMedium, 'b', marker = 'v', label = 'medium')

ax.set_ylabel('Blink Frequency')
ax.legend()

In [None]:
print(len(blinkDurationDifficult), len(blinkDurationMedium), len(blinkDurationEasy))

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.hist(blinkFrequencyDifficult, alpha = .60, label = 'difficult')
ax.hist(blinkFrequencyMedium, alpha = .50, label = 'medium')
ax.hist(blinkFrequencyEasy, alpha = .40, label = 'easy')
ax.legend()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ax.plot(corrDurationListNew)
ax.plot(corrFrequencyListNew)

In [None]:
corrDurationListNew = list() 
for i in corrDurationList:
    corrDurationListNew.append(i[0][1])

In [None]:
corrFrequencyListNew = list() 
for i in corrFrequencyList:
    corrFrequencyListNew.append(i[0][1])

In [None]:
blinkFrequencyDifficult1 = list()
blinkFrequencyEasy1 = list()
blinkFrequencyMedium1 = list()

blinkDurationDifficult1 = list()
blinkDurationEasy1 = list()
blinkDurationMedium1 = list()

timeDifficult1 = list()
timeEasy1 = list()
timeMedium1 = list()


subjNr = 6

for ind in range(0, len(scoreTotalDifficulty[subjNr])):
    
    if scoreTotalDifficulty[subjNr][ind+1] > 12:
        blinkFrequencyDifficult1.append(blinkTotalFrequency[subjNr][ind])
        blinkDurationDifficult1.append(blinkTotalDurationAverage[subjNr][ind])
        timeDifficult1.append(timeTotalTrial[subjNr][ind])
        
    elif scoreTotalDifficulty[subjNr][ind+1] > 7:
        blinkFrequencyMedium1.append(blinkTotalFrequency[subjNr][ind])
        blinkDurationMedium1.append(blinkTotalDurationAverage[subjNr][ind])
        timeMedium1.append(timeTotalTrial[subjNr][ind])
        
    else:
        blinkFrequencyEasy1.append(blinkTotalFrequency[subjNr][ind])
        blinkDurationEasy1.append(blinkTotalDurationAverage[subjNr][ind])
        timeEasy1.append(timeTotalTrial[subjNr][ind])
        

In [None]:
# plot blink duration, frequency and scores (easy, difficult)

fig = plt.figure()
ax = fig.add_subplot(111, projection = '3d')

ax.plot(blinkFrequencyEasy1, blinkDurationEasy1, timeEasy1, 'co', label = 'easy')
ax.plot(blinkFrequencyMedium1, blinkDurationMedium1, timeMedium1, 'bo', label = 'medium')
ax.plot(blinkFrequencyDifficult1, blinkDurationDifficult1, timeDifficult1, 'ro', label = 'difficult')

ax.set_xlabel('Blink Frequency')
ax.set_ylabel('Blink duration')
ax.set_zlabel('Task duration')

ax.set_xlim([0, 1])
ax.set_ylim([0, 0.25])
ax.legend()
ax.set_title('pt_woChinRest')

In [None]:
len(timeTotalTrial[6])

In [24]:
# plot blink duration, frequency and scores (easy, difficult)

fig = plt.figure()
ax = fig.add_subplot(111)

ax.plot(flat_frequency, flat_duration, marker = 'v')

ax.set_xlabel('Blink Frequency')
ax.set_ylabel('Blink duration')

#ax.set_xlim([0, 1])
#ax.set_ylim([0, 0.25])
ax.legend()
ax.set_title('All subjects')

No handles with labels found to put in legend.


Text(0.5,1,'All subjects')

In [22]:
print(len(flat_frequency), len(flat_duration), len(flat_time))

189 189 184


In [38]:
print(len(blinkTotalFrequency))

21
