In [1]:
%matplotlib
#%matplotlib inline
import os
import csv
import fnmatch
import numpy as np
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import math

pd.options.mode.use_inf_as_na = True

Using matplotlib backend: Qt5Agg


In [2]:
KeyInclude = ['IncreaseTimeDwell', 'DecreaseTimeDwell']
TimeDwellOrig = 800
TimeFixation = 300

In [3]:
def ComputeDwellTime(userKeys):
    #print(userKeys)
    TimeDwellChanges = [key for key in userKeys if key[1] in KeyInclude]
    #print(TimeDwellChanges)
    
    if TimeDwellChanges:
        for pctChange in TimeDwellChanges:
            if pctChange[2] == 1:
                if pctChange[1] == 'IncreaseDwellTime':
                    TimeDwellNew = TimeDwellNew + 100
                else:
                    TimeDwellNew = TimeDwellNew - 100
    else:
        TimeDwellNew = TimeDwellOrig
    #print(TimeDwellNew)   
    return TimeDwellNew

In [4]:
# Create list of list (made of epochs) composed of times when user looked at scratchPad

def UserLookedAtScratchPad(keysLookedAt):
    epoch = list()   
    epochList = list()
    
    scratchPadKeyTime = [key for key in keysLookedAt if 'ScratchPad' in key[1]]
    scratchPadInd = [keyInd[0] for keyInd in enumerate(keysLookedAt) if 'ScratchPad' in keysLookedAt[keyInd[0]][1]]
    scratchPadIndArray = np.diff(np.asarray(scratchPadInd))
    
    # TODO: reduce the list to only the first and the last elements of the epoch
    
    # Find unique looks at scratchpad, by checking if progress pct is greater than previous one
    
    for keyInd in enumerate(scratchPadKeyTime):
        if keyInd[0] == 0:
            # first element
            epoch.append(scratchPadKeyTime[keyInd[0]])
            continue
        
        progressPctPrevious = scratchPadKeyTime[keyInd[0]-1][2]
        progressPctNow = scratchPadKeyTime[keyInd[0]][2]
        
        #print(progressPctPrevious, progressPctNow)
        if progressPctPrevious > progressPctNow:
            # new fixation has started
            epochList.append(epoch[0])
            epoch = list()
        
        epoch.append(scratchPadKeyTime[keyInd[0]])
            
    #print(epochList)

    return epochList

In [5]:
# function to convert list of date and time into datetime format list

def timeConversion(timeStrList):
    timeList = list()
    for time in timeStrList:
        time1, t1, t2 = time.partition('+')
        timeList.append(datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f"))
    return timeList

In [6]:
# This function will return the datetime in items which is the closest to the date pivot
def nearestTimePoint(dates, date):
    
    for d in dates:
        if d < date:
            nearestTP = d
        else:
            continue
    try: 
        nearestTP
        nearestTPind = dates.index(nearestTP)
    except:
        nearestTP = 0
        nearestTPind = -1
        
    return nearestTP, nearestTPind

In [7]:
# function to remove all invalid data in gazelog
def cleanGazeLog(gazeLog):
    gazeLogNew = list()
    for rowInd in enumerate(gazeLog):
        if 'Invalid' not in gazeLog[rowInd[0]]:
            gazeLogNew.append(gazeLog[rowInd[0]])
    return gazeLogNew

In [42]:
def hampel(vals_orig, k, sd):
    '''
    vals: pandas series of values from which to remove outliers
    k: size of window (including the sample; 7 is equal to 3 on either side of value)
    '''
    # Obtained from: https://stackoverflow.com/questions/46819260/filtering-outliers-how-to-make-median-based-
    # hampel-function-faster
    
    #Make copy so original not edited
    vals = pd.DataFrame(vals_orig)      
    #print(vals.isnull().any())
    vals0 = vals.replace([np.inf, -np.inf], np.nan)
    #vals = vals0.astype(float).fillna(method = 'backfill') # linear interpolation instead 
    #print(vals)
    vals = vals0.astype(float).interpolate('linear', limit_direction = 'both') # linear interpolation instead of 
    # simply copying the previous value --\ linear interpolation than cubic to not add any patterns in the data, limit direction
    # set to both, to interpolate the nan values occuring from the start of the series
    
    L= 1.4826
    rolling_median = vals.rolling(window=k, min_periods=1, center=True).median()
    
    #print(rolling_median)
    difference = np.abs(rolling_median-vals)
    median_abs_deviation = difference.rolling(k).median()
    threshold = sd * L * median_abs_deviation
    outlier_idx = difference>threshold
    vals[outlier_idx] = rolling_median[outlier_idx]
    #print(vals)
    #print('datatype', vals.dtypes)
    #print(vals.isnull().any())
    
    return(vals)

In [45]:
def computeAggregateAverage(scratchPadLookedAtEpoch, scratchPadList, phraseList, GazeLog, pupilData, TimeDwell, subjName):
    
    rowTimeList = -1
    phraseScratchedList = list()
    phraseToBeScratchedList = list()
    timeUserLooked = list()
    timeScratchAll = list()
    timeToBeScratchedAll = list()
   
    phraseScratchedInd = 0
    phraseToBeScratchedInd = 0
    
    timeStrUserLooked =  [item[0] for item in scratchPadLookedAtEpoch]
    timeStrScratch = [item1[0] for item1 in scratchPadList]
    timeStrToBeScracthed = [item2[0] for item2 in phraseList]
    timeStrGazeLog = [item3[0] for item3 in GazeLog]
    
    timeUserLooked = timeConversion(timeStrUserLooked)
    timeScratchAll = timeConversion(timeStrScratch)
    timeToBeScratchedAll = timeConversion(timeStrToBeScracthed)
    timeGazeLog = timeConversion(timeStrGazeLog)
        
    # Create list of pupil sizes from gazelog
    pupilLogL = [float(item4[29]) if 'Invalid' not in item4 else np.nan for item4 in GazeLog]
    pupilLogR = [float(item5[31]) if 'Invalid' not in item5 else np.nan for item5 in GazeLog]

    # Pupil distance (x,y,z) in User Coordinate system 
    pupilDistLx = [float(item6[3]) if 'Invalid' not in item6 else np.nan for item6 in GazeLog]
    pupilDistLy = [float(item7[4]) if 'Invalid' not in item7 else np.nan for item7 in GazeLog]
    pupilDistLz = [float(item8[5]) if 'Invalid' not in item8 else np.nan for item8 in GazeLog]
    pupilDistRx = [float(item9[10]) if 'Invalid' not in item9 else np.nan for item9 in GazeLog]
    pupilDistRy = [float(item10[11]) if 'Invalid' not in item10 else np.nan for item10 in GazeLog]
    pupilDistRz = [float(item11[12]) if 'Invalid' not in item11 else np.nan for item11 in GazeLog]
    
    # create dataframe from the pupil distances
    df_pupilDist = pd.DataFrame(np.column_stack([pupilDistLx, pupilDistLy, pupilDistLz, pupilDistRx, pupilDistRy, pupilDistRz]), columns=['pupilDistLx', 'pupilDistLy', 'pupilDistLz', 'pupilDistRx', 'pupilDistRy', 'pupilDistRz'])
    df_pupilDist = df_pupilDist.interpolate('linear', limit_area = 'inside')
    #print(df_pupilDist.pupilDistLx)
    
    pupilDist = [math.sqrt((df_pupilDist.pupilDistLx[indPt]-df_pupilDist.pupilDistRx[indPt])**2 + (df_pupilDist.pupilDistLy[indPt]-df_pupilDist.pupilDistRy[indPt])**2 + (df_pupilDist.pupilDistLz[indPt]-df_pupilDist.pupilDistRz[indPt])**2) for indPt in range(0, len(pupilDistLx))]
    
    #print(timeUserLooked)
    
    for timeList in timeUserLooked:
        rowTimeList = rowTimeList + 1
        # Extract epoch data from gazelog
        
        timeScratchPadActive = timeList - datetime.timedelta(milliseconds=float(scratchPadLookedAtEpoch[rowTimeList][2])*TimeDwell) - datetime.timedelta(milliseconds=TimeFixation)
        timeGazeLogStart, GazeLogStartInd = nearestTimePoint(timeGazeLog, timeScratchPadActive)
        
        timeWindowEnd = timeList + datetime.timedelta(seconds=5)
        
        timeGazeLogEnd, GazeLogEndInd = nearestTimePoint(timeGazeLog, timeWindowEnd)
        #print(GazeLogStartInd, GazeLogEndInd)
        
        if GazeLogStartInd != GazeLogEndInd:
            gazeEpochL = pupilLogL[GazeLogStartInd:GazeLogEndInd]
            gazeEpochR = pupilLogR[GazeLogStartInd:GazeLogEndInd]
            interPupilDist = pupilDist[GazeLogStartInd:GazeLogEndInd]
            
            winSize = 25
            # Filter pupil sizes
            pupilWoOutlierL = hampel(gazeEpochL, winSize, 3)
            pupilWoOutlierR = hampel(gazeEpochR, winSize, 3)
            
            # Moving Mean of data without outliers: 
            pupilMeanL = pupilWoOutlierL.rolling(window=winSize, min_periods=1, center=True).mean()
            pupilMeanR = pupilWoOutlierR.rolling(window=winSize, min_periods=1, center=True).mean()
            
            #print(subjName, len(pupilWoOutlierL), len(pupilMeanL))
            
            # After the filtering is done: CHANGE THIS
            pupilSizeL = pupilWoOutlierL.mean(numeric_only=float)[0]
            pupilSizeR = pupilWoOutlierR.mean(numeric_only=float)[0] 
            
            pupilPlotL = (pupilMeanL - pupilMeanL[0][0])/pupilMeanL[0][0]
            pupilPlotR = (pupilMeanR - pupilMeanR[0][0])/pupilMeanR[0][0]
            
        else:
            print('one')
            continue
            #continue # remove samples with only 1 sample
        
        #print(pupilPlotL)
        # Also to check if phrase typed is correct or not, only check the last element of epoch
        #print(timeList[-1])

        timeOfPhraseScratchedNow, phraseScratchedInd = nearestTimePoint(timeScratchAll, timeList)
        timeOfPhraseToBeScratchedNow, phraseToBeScratchedInd = nearestTimePoint(timeToBeScratchedAll, timeList)
        
        if phraseScratchedInd < 0:
            phraseScratched = ''
        else:
            phraseScratched = scratchPadList[phraseScratchedInd][1]
        
        phraseToBeScratched = phraseList[phraseToBeScratchedInd][1]
        
        #print(phraseScratched, phraseToBeScratched)
        
        # are they the same? yes/no -> save in list with the time
        if phraseScratched in phraseToBeScratched:
            if 'THE EXPERIMENT IS NOW DONE' in phraseToBeScratched:
                #print('exp done')
                continue
            # add the pupil size
            if pupilData['CorrectFirst'] == 0:
                pupilData['CorrectFirst'] = 1
                pupilData['CorrectLeft'] = [pupilMeanL.values[i][0] for i in range(0, len(pupilMeanL.values))]
                pupilData['CorrectRight'] = [pupilMeanR.values[i][0] for i in range(0, len(pupilMeanR.values))]
                pupilData['CorrectNumber'] = pupilData['CorrectNumber'] + 1
                #print(pupilData['CorrectNumber'])
                
            else:
                pupilAddL = [pupilData['CorrectLeft'][i]+pupilMeanL.values[i][0] for i in range(0, min(len(pupilMeanL[pupilMeanL.columns[0]]), len(pupilData['CorrectLeft'])))]
                pupilAddR = [pupilData['CorrectRight'][i]+pupilMeanR.values[i][0] for i in range(0, min(len(pupilMeanL[pupilMeanR.columns[0]]), len(pupilData['CorrectRight'])))]
                pupilData['CorrectLeft'] = pupilAddL
                pupilData['CorrectRight'] = pupilAddR
                pupilData['CorrectNumber'] = pupilData['CorrectNumber'] + 1
                #print(pupilData['CorrectNumber'])
        else:
            
            if pupilData['IncorrectFirst'] == 0:
                pupilData['IncorrectFirst'] = 1
                pupilData['IncorrectLeft'] = [pupilMeanL.values[i][0] for i in range(0, len(pupilMeanL.values))]
                pupilData['IncorrectRight'] = [pupilMeanR.values[i][0] for i in range(0, len(pupilMeanR.values))]
                pupilData['IncorrectNumber'] = pupilData['IncorrectNumber'] + 1
                #print(subjName, 'Incorrect')

            else:
                pupilAddL = [pupilData['IncorrectLeft'][i]+pupilMeanL.values[i][0] for i in range(0, min(len(pupilMeanL[pupilMeanL.columns[0]]), len(pupilData['IncorrectLeft'])))]
                pupilAddR = [pupilData['IncorrectRight'][i]+pupilMeanR.values[i][0] for i in range(0, min(len(pupilMeanR[pupilMeanR.columns[0]]), len(pupilData['IncorrectRight'])))]
                pupilData['IncorrectLeft'] = pupilAddL
                pupilData['IncorrectRight'] = pupilAddR
                pupilData['IncorrectNumber'] = pupilData['IncorrectNumber'] + 1
                #print(subjName, 'Incorrect')
                
    return pupilData

In [46]:
subjName = r'C:\DTU\Data\201805_HealthnRehab\TypingData'
j = 0
flagFirstSubj = 0
pupilData = dict()
pupilData['CorrectFirst'] = 0
pupilData['IncorrectFirst'] = 0
pupilData['CorrectNumber'] = 0
pupilData['IncorrectNumber'] = 0

for root, dirs, subfolder in os.walk(subjName):
    LetterLookedAtList = list()
    LetterLookedAt = list()
    
    if not dirs and 'hc' in root:
        
        if 'notCompleted' in root or 'notInclude' in root: # Some subjects do not have gaze log and have been marked as 
            #notInclude
            continue
        if 'tb' in root or 'joha' in root or 'ae' in root:
            continue
            
        userKeys = None
        scratchPad = None
        gazeLog = None
        stimPhrase = None
        
        for file in subfolder:
            
            if fnmatch.fnmatch(file, 'user_look*'):
                try:
                    
                    fUserKey = open(root + '\\' + file, encoding='utf-8')
                    readerUserKey = csv.reader(fUserKey)
                    userKeys = list(readerUserKey)
                    userKeys.remove(userKeys[0])
                except:
                    if fUserKey is not None:
                        fUserKey.close()
                    else:
                        print('error in opening the user looks at log file')
            elif fnmatch.fnmatch(file, 'ScratchPad*'):
                try:
                    fScratchPad = open(root + '\\' + file, encoding='utf-8')
                    readerScratchPad = csv.reader(fScratchPad)
                    scratchPad = list(readerScratchPad)  
                    scratchPad.remove(scratchPad[0])
                except:
                    if fScratchPad is not None:
                        fScratchPad.close()
                    else:
                        print('error in opening the user looks at log file')
            elif fnmatch.fnmatch(file, 'PhraseLog*'):
                try:
                    fStimPhrase = open(root + '\\' + file, encoding='utf-8')
                    readerStimPhrase = csv.reader(fStimPhrase)
                    stimPhrase = list(readerStimPhrase)
                    stimPhrase.remove(stimPhrase[0])
                except:
                    if fStimPhrase is not None:
                        fStimPhrase.close()
                    else:
                        print('error in opening the phrase log file')
            elif fnmatch.fnmatch(file, 'GazeLog*'):
                try:
                    fGazeLog = open(root + '\\' + file, encoding='utf-8')
                    readerGazeLog = csv.reader(fGazeLog)
                    gazeLog = list(readerGazeLog)
                    gazeLog.remove(gazeLog[0]) # would not matter much even if the first row was not labels
                    gazeLog.remove(gazeLog[-1])
                except:
                    if fGazeLog is not None:
                        fGazeLog.close()
                    else:
                        print('error in opening the gaze log file')
            else:
                continue
            
                # if all these lists exist
            if userKeys is None or scratchPad is None or stimPhrase is None or gazeLog is None:
                continue
            else:

                # Compute dwell time
                TimeDwell = ComputeDwellTime(userKeys)

                # call function to check when scratchpad is looked at and save it in a list
                scratchPadKeyTime = UserLookedAtScratchPad(userKeys)

                # for every element, find the time closest and previous to it, and check what was typed 
                # AND what should have been typed
                # Also, add gaze data to epoch, but first subtract the fixation time and complete the pupil data filtering as
                # per Per's paper
                
                a = re.compile('(?<=TypingData\\\May[0-9]{2}\\\)(.*)(?=\\\OptiKey)')
                subjName = a.findall(root)[0]
                print(subjName)
                
                pupilData = computeAggregateAverage(scratchPadKeyTime, scratchPad, stimPhrase, gazeLog, pupilData, TimeDwell, subjName)
                #print(pupilData)

if pupilData:
    # if the correct and incorrect data are of different sizes,
    pupilSizeMin = min(len(pupilData['CorrectLeft']), len(pupilData['IncorrectLeft']))
    fig = plt.figure()
    axL = fig.add_subplot(2, 1, 1)
    axR = fig.add_subplot(2, 1, 2)
    xAxis = np.arange(0, float(pupilSizeMin/90), float(1/90))
    pupilPlot = dict()
    pupilPlot['CorrectLeft'] = [x/pupilData['CorrectNumber'] for x in pupilData['CorrectLeft']]
    pupilPlot['CorrectRight'] = [x/pupilData['CorrectNumber'] for x in pupilData['CorrectRight']]
    pupilPlot['IncorrectLeft'] = [x/pupilData['IncorrectNumber'] for x in pupilData['IncorrectLeft']]
    pupilPlot['IncorrectRight'] = [x/pupilData['IncorrectNumber'] for x in pupilData['IncorrectRight']]
    axL.plot(xAxis, pupilPlot['CorrectLeft'][0:pupilSizeMin], 'bo', label = 'Correct')
    axL.plot(xAxis, pupilPlot['IncorrectLeft'][0:pupilSizeMin], 'ro', label = 'Incorrect')
    axL.set_title('Left')
    axR.plot(xAxis, pupilPlot['CorrectRight'][0:pupilSizeMin], 'bo', label = 'Correct')
    axR.plot(xAxis, pupilPlot['IncorrectRight'][0:pupilSizeMin], 'ro', label = 'Incorrect')
    axR.set_title('Right')
                

hc_MS
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>
NaN removed for this datatype: <class 'numpy.float64'> <class 'numpy.float64'>


In [29]:
# plot again, if required

pupilSizeMin = min(len(pupilData['CorrectLeft']), len(pupilData['IncorrectLeft']))
fig = plt.figure()
axL = fig.add_subplot(2, 1, 1)
axR = fig.add_subplot(2, 1, 2)
xAxis = np.arange(0, float(pupilSizeMin/90), float(1/90))
pupilPlot = dict()
pupilPlot['CorrectLeft'] = [x/pupilData['CorrectNumber'] for x in pupilData['CorrectLeft']]
pupilPlot['CorrectRight'] = [x/pupilData['CorrectNumber'] for x in pupilData['CorrectRight']]
pupilPlot['IncorrectLeft'] = [x/pupilData['IncorrectNumber'] for x in pupilData['IncorrectLeft']]
pupilPlot['IncorrectRight'] = [x/pupilData['IncorrectNumber'] for x in pupilData['IncorrectRight']]
axL.plot(xAxis, pupilPlot['CorrectLeft'][0:pupilSizeMin], 'bo', label = 'Correct')
axL.plot(xAxis, pupilPlot['IncorrectLeft'][0:pupilSizeMin], 'ro', label = 'Incorrect')
axL.set_title('Left')
axR.plot(xAxis, pupilPlot['CorrectRight'][0:pupilSizeMin], 'bo', label = 'Correct')
axR.plot(xAxis, pupilPlot['IncorrectRight'][0:pupilSizeMin], 'ro', label = 'Incorrect')
axR.set_title('Right')

Text(0.5,1,'Right')

In [41]:
n = pd.DataFrame([np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan])
n.astype(float).interpolate('linear')

Unnamed: 0,0
0,
1,
2,
3,4.0
4,5.0
5,5.0
6,5.0
