In [1]:
%matplotlib
#%matplotlib inline
import os
import csv
import fnmatch
import numpy as np
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import math
import pywt
import itertools
from scipy import stats

pd.options.mode.use_inf_as_na = True

Using matplotlib backend: TkAgg


In [2]:
TimeDwellOrig = 100
TimeFixation = 300

In [3]:
pupilTotal_Difficult = list()
pupilTotal_Medium = list()
pupilTotal_Easy = list()

In [4]:
def FixUserKeys(UserKeys_Old):
    # Fix the situation where comma has divided decimals into separate columns
    
    Column_beforeDecimal = [item[2] for item in UserKeys_Old]
    Column_afterDecimal = [item[3] if len(item)>3 else '00' for item in UserKeys_Old]
    
    UserKeys_ProgressPercent = [float(Column_beforeDecimal[i]+'.'+ Column_afterDecimal[i]) for i in 
                                range(0, len(Column_beforeDecimal))]
    UserKeys_Times = [item[0] for item in UserKeys_Old]
    UserKeys_Keys = [item[1] for item in UserKeys_Old]
    
    UserKeys_New = [[UserKeys_Times[ind], UserKeys_Keys[ind], UserKeys_ProgressPercent[ind]] for ind in 
                    range(0, len(UserKeys_ProgressPercent))]
    
    #UserKeys_New = np.concatenate((UserKeys_Times, UserKeys_Keys, UserKeys_ProgressPercent), axis = 0)
    
    
    return UserKeys_New
        

In [5]:
def ComputeDwellTime(userKeys):
    # modify userKeys to include a column of time instead of progress pct, which is dependent on the then dwell time
    
    timeDwell = TimeDwellOrig
    nKey = -1
    for key in userKeys:
        nKey = nKey + 1
        #print(key[1])
        if key[1] == 'IncreaseDwellTime':
            if float(key[2]) == 1:
                timeDwell = timeDwell + 100
        elif key[1] == 'DecreaseDwellTime':
            #print(key[2])
            if float(key[2]) == 1:
                timeDwell = timeDwell - 100
        else:
            userKeys[nKey].append(str(float(key[2])*timeDwell))
    
    return userKeys

In [6]:
def OptiKeyTypingTime(UserKeys):
    
    TimeTyping = dict()
    
    time1, t1, t2 = UserKeys[0][0].partition('+')
    startTime = datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
    
    time2, t1, t2 = UserKeys[-1][0].partition('+')
    endTime = datetime.datetime.strptime(re.sub('[:.T]','-',time2[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
    
    TimeTyping['startTime'] = startTime
    TimeTyping['endTime'] = endTime
    
    return TimeTyping

In [7]:
def FindTrialEndTimes(KeysSelected, timeTyping):
    # function to find start and end of tasks in experiments
    timeStartEnd = list() # format of this list will be: [startTime1, endTime1/startTime2, endTime2/startTime3, ..., endTimeN]
    
    timeStartEnd.append(timeTyping['startTime'])
    
    nTrial = 1
    
    for keys in KeysSelected:
        
        if keys[1] == 'NextPhrase':
            time1, t1, t2 = keys[0].partition('+')
            endTimeTrial = datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f")
            timeStartEnd.append(endTimeTrial)
    
    
    timeStartEnd.append(timeTyping['endTime'])
    
    
    return timeStartEnd

In [8]:
# function to convert list of date and time into datetime format list
def timeConversion(timeStrList):
    timeList = list()
    for time in timeStrList:
        time1, t1, t2 = time.partition('+')
        timeList.append(datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f"))
    return timeList

In [9]:
# This function will return the datetime in items which is the closest to the date pivot
def nearestTimePoint(dates, date):
    
    for d in dates:
        if d < date:
            nearestTP = d
        else:
            continue
    try: 
        nearestTP
        nearestTPind = dates.index(nearestTP)
    except:
        nearestTP = 0
        nearestTPind = -1
        
    return nearestTP, nearestTPind

In [10]:
def CreateTimeEpochsOfTrials(TimeStartEndMixed, UserKeys):
    # function to use list of mixed start and end times of trials and keys looked at by user to create trial epochs
    
    TimeEpochTrial = dict()
    TimeEpochTrial['Start'] = list()
    TimeEpochTrial['End'] = list()
    
    # Create list of times in userKeys to be able to use function 'nearestTimePoint'
    UserKeysStrTimes = [item3[0] for item3 in UserKeys]
    UserKeysTimes = timeConversion(UserKeysStrTimes)
    
    Flag_FoundSleepKey = 0 # Flag to indicate finding sleep key
    
    n = -1
    for time in TimeStartEndMixed:
        n = n + 1
        Flag_FoundSleepKey = 0
        
        if n == 0: # first time is only start time for the first trial
            TimeEpochTrial['Start'].append(time)
            continue
        elif n == len(TimeStartEndMixed)-1: # last time is only the end time for last trial
            
            TimeEpochTrial['End'].append(time)
            
        else: # the middle elements need to be divided into start and end
            TimeEpochTrial['End'].append(time)
            
            # start time of trial is the time when 'Sleep' key is started in userKeys, after NextPhrase               
            nearestToTrialStartTime, nearestToTrialStartInd = nearestTimePoint(UserKeysTimes, time)
            indCheck = nearestToTrialStartInd + 2
            TimeEpochTrial['Start'].append(UserKeysTimes[indCheck])
            
    return TimeEpochTrial      
            

In [11]:
def DwellTimeForBaseline(UserKeys_wDwellTime):
    
    DwellTime = list()
    
    for key in UserKeys_wDwellTime:
        if key[1] == 'NextPhrase':
            #print('NextPhrase found at ', key[2])
            if key[2] == 1:
                DwellTime.append(key[3])
                
    return DwellTime

In [12]:
def Combine2ColumnsTo1GazeLog(GazeLog, Column_1, Column_2):
    
    JoinedList = list()
    
    Column_beforeDecimal = [item4[Column_1] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    Column_afterDecimal = [item4[Column_2] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    
    
    for i in range(0, len(Column_beforeDecimal)):
        if 'Valid' not in Column_beforeDecimal[i] and 'Valid' not in Column_afterDecimal[i]:
            if 'nan' not in Column_beforeDecimal[i] and 'nan' not in Column_afterDecimal[i]:
                JoinedList.append(float(Column_beforeDecimal[i]+'.'+ Column_afterDecimal[i]))
            else:
                JoinedList.append(np.nan)
        else:
            # Rarely, the pupil size is a whole number
            JoinedList.append(np.nan) # we will ignore the row, since there is no way of automatically knowing which - 
            # right or left eye has whole number pupil size
            
    return JoinedList

In [13]:
def Convert2ColumnsToFormPupilSizes(GazeLog):
    # function to convert pupilsizes from 2 columns for every pupil due to comma use instead of decimal, 
    # to proper pupil sizes
    
    PupilLogL = Combine2ColumnsTo1GazeLog(GazeLog, -5, -4)
    PupilLogR = Combine2ColumnsTo1GazeLog(GazeLog, -2, -1)
            
    # if one of the pupils are nan, the other one is converted too
    nPupil = -1
    for pupilL in PupilLogL:
        nPupil = nPupil + 1
        if np.isnan(pupilL):
            if nPupil < len(PupilLogR):
                if not np.isnan(PupilLogR[nPupil]):
                    PupilLogR[nPupil] = np.nan
            else:
                del PupilLogL[len(PupilLogR):]
                
    nPupil = -1
    for pupilR in PupilLogR:
        nPupil = nPupil + 1
        if np.isnan(pupilR):
            if nPupil < len(PupilLogL):
                if not np.isnan(PupilLogL[nPupil]):
                    PupilLogL[nPupil] = np.nan
            else:
                del PupilLogR[len(PupilLogL):]
                
    #print(len(PupilLogL), len(PupilLogR))
    
    return PupilLogL, PupilLogR

In [14]:
def PupilSizeFromTrialTimes(TimeTrial, TimeGazeLog, TimeInternalGazeLog, PupilSizeLogL, PupilSizeLogR):
    # find pupil sizes from the start and end time given
    
    # find start and end time in gazeLog
    timeStart, timeStartInd = nearestTimePoint(TimeGazeLog, TimeTrial[0])
    timeEnd, timeEndInd = nearestTimePoint(TimeGazeLog, TimeTrial[1])
    
    pupilSize_TrialL = PupilSizeLogL[timeStartInd: timeEndInd]
    pupilSize_TrialR = PupilSizeLogR[timeStartInd: timeEndInd]
    
    TimeInternal_Trial = TimeInternalGazeLog[timeStartInd: timeEndInd]
    
    TimeGaze_Trial = TimeGazeLog[timeStartInd: timeEndInd]
    
    return pupilSize_TrialL, pupilSize_TrialR, TimeGaze_Trial, TimeInternal_Trial
    

In [60]:
def filterBlinks(pupilData, timeListComplete):
    # filter any blinks and nan values lasting around 250ms (on average)
    # http://faculty.washington.edu/chudler/facts.html
   
    # blink is every nan value in the range of 100-400ms 
    # 250 ms (23 samples) before and after the blink will also be removed
    extraBlinkSamples = 23    
    
    # remove single missing data, that are due to hardware error
    missingVal_Single = np.argwhere(np.isnan(pupilData))
    missingVal_Single = list(itertools.chain.from_iterable(missingVal_Single)) # flatten the list
    missingVal_SingleDifference = [t - s for s, t in zip(missingVal_Single, missingVal_Single[1:])] # find difference 
    # between consecutive elements
    
    if len(missingVal_Single) > 0:
        missingVal_SingleDifference.insert(0, missingVal_Single[0]) # insert the first blink index in the beginning of list
    else:
        return pupilData, timeListComplete, []
    # the list missingVal_SingleDifference contains the index of the first blink, followed by the difference in the index to 
    # the next nan value
    
    # first remove the single nan values, which are missing data
    eyeTracker_missingData = list() # list with index of single missing data  
    valInd = -1

    for val in missingVal_SingleDifference:
        valInd = valInd + 1
        if valInd == 0:
            continue
        if val != 1:
            if missingVal_SingleDifference[valInd-1] !=1: # if there are 2 consecutive missing values (denoted by 2 consecutive
                # non 1 numbers, they are added to the list of eyeTracker_missingData)
                eyeTracker_missingData.append(sum(missingVal_SingleDifference[:valInd]))
                
    # remove single missing values from pupil data
    pupilData_woSingleMissingData0 = [pupilData[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(pupilData))]
    pupilData_woSingleMissingData = [x for x in pupilData_woSingleMissingData0 if x]
    
    # remove the times for single missing values in pupil data
    timeList_woSingleMissingData0 = [timeListComplete[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(timeListComplete))]
    timeList_woSingleMissingData = [x for x in timeList_woSingleMissingData0 if x]
    
#     print(len(timeList_woSingleMissingData))
    
    
    
    
    # find the nan values again from pupilData_woSingleMissingData
    missingVal_Rest = np.argwhere(np.isnan(pupilData_woSingleMissingData))
    missingVal_Rest = list(itertools.chain.from_iterable(missingVal_Rest))
    missingVal_RestDifference = [t - s for s, t in zip(missingVal_Rest, missingVal_Rest[1:])]
    missingVal_RestDifference.insert(0, missingVal_Rest[0])
    
    
    # compile and create list of start and end of blinks
    blink_missingData = dict()
    blink_missingData['Start'] = list()
    blink_missingData['End'] = list()
    
    valInd = -1
    for val in missingVal_RestDifference:
        valInd = valInd + 1
        if val > 1:
            #print('value', val)
            # instead of appending the actual index of blink start, since 250ms before and after the blink need to be
            # removed, it is also appended here.
            
            # just make sure that the additional samples do not make the index of blink go in negative
            if sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples > 0:
                
                blink_missingData['Start'].append(sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples)
            else:
                blink_missingData['Start'].append(0)
            
            if valInd == 0:
                continue
                
            # make sure that the additional samples do not increase the index to more than the length of the pupilData
            if sum(missingVal_RestDifference[:valInd])+extraBlinkSamples < len(pupilData_woSingleMissingData):
                blink_missingData['End'].append(sum(missingVal_RestDifference[:valInd])+extraBlinkSamples)
            else:
                blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
            #print('end', sum(missingVal_RestDifference[:valInd]))
      
    # add the last blink index
    # make sure that the additional samples do not increase the index to more than the length of the pupilData
    if sum(missingVal_RestDifference)+extraBlinkSamples < len(pupilData_woSingleMissingData):
        blink_missingData['End'].append(sum(missingVal_RestDifference)+extraBlinkSamples)
    else:
        blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
      
    
    # print start and end values
    #for ind in range(0,len(blink_missingData['Start'])):
    #    print(blink_missingData['Start'][ind]+23, blink_missingData['End'][ind]-23)
        
        
        
    # need to create a list containing indexes that are to be removed
    blinkIndexList = list()
    
    #print(len(blink_missingData['Start']), len(blink_missingData['End']))
    
    
    # remove blinks and additional data from pupil data to get filtered data
    for indInd in range(0, len(blink_missingData['Start'])):
        blinkIndexList.append(range(blink_missingData['Start'][indInd], blink_missingData['End'][indInd]+1))
    # flatten the list
    blinkIndexList = list(itertools.chain.from_iterable(blinkIndexList))
    
    
    ##print(len(pupilData_woSingleMissingData))
    
    pupilData_woRestMissingData0 = [pupilData_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(pupilData_woSingleMissingData))]
    #for i in enumerate(pupilData_woRestMissingData0):
    #    print(i)
    pupilData_filter = [x for x in pupilData_woRestMissingData0 if x]
    
    #for i in enumerate(pupilData_filter):
    #    print(i)
        
    # remove the times for single missing values in pupil data
    timeList_woRestMissingData0 = [timeList_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(timeList_woSingleMissingData))]
    time_filter = [x for x in timeList_woRestMissingData0 if x]
    
    #print(len(pupilData_filter))
        
    if np.nan in pupilData_filter:
        print('nan values still present in pupil data')
        #for i in enumerate(pupilData_woSingleMissingData):
        #    print(i)
        
    
    return pupilData_filter, time_filter, blink_missingData

In [61]:
def hampel(vals_orig, k, sd):
    '''
    vals: pandas series of values from which to remove outliers
    k: size of window (including the sample; 7 is equal to 3 on either side of value)
    '''
    # Obtained from: https://stackoverflow.com/questions/46819260/filtering-outliers-how-to-make-median-based-
    # hampel-function-faster
    
    #plt.plot(vals_orig)
    
    #Make copy so original not edited
    vals = pd.DataFrame(vals_orig)      
    #print(vals.isnull().any())
    vals0 = vals.replace([np.inf, -np.inf], np.nan)
    #vals = vals0.astype(float).fillna(method = 'backfill') # linear interpolation instead 
    #print(vals)
    vals = vals0.astype(float).interpolate('linear', limit_direction = 'both') # linear interpolation instead of 
    # simply copying the previous value --\ linear interpolation than cubic to not add any patterns in the data, limit direction
    # set to both, to interpolate the nan values occuring from the start of the series
    
    L= 1.4826
    rolling_median = vals.rolling(window=k, min_periods=1, center=True).median()
    
    #print(rolling_median)
    difference = np.abs(rolling_median-vals)
    median_abs_deviation = difference.rolling(k).median()
    threshold = sd * L * median_abs_deviation
    outlier_idx = difference>threshold
    vals[outlier_idx] = rolling_median[outlier_idx]
    #print(vals)
    #print('datatype', vals.dtypes)
    #print(vals.isnull().any())
    #vals.plot()
    return(vals)

In [62]:
def plotPupilSize(pupilData, timeData, TrialNumber, scoreDifficulty):
    
    dataLenEqualizer = min(min(len(pupilData['Left']), len(pupilData['Right'])), len(timeData))
    
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Left'][0:dataLenEqualizer], 'b')
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Right'][0:dataLenEqualizer], 'r')
    
    ax.set_ylabel('Relative pupil size [in mm]')
    ax.set_ylim([0.8, 1.2])
    ax.set_title(scoreDifficulty)


In [63]:
def FindAndPlotPupilSizeForEpoch(GazeLog, TimeEpochTrial, DwellTimes_ForBaseline):
    # function that uses the list of start and end trial times to find the pupil sizes for those trials and plots them
    
    # first create a list of times in gaze log
    timeStrGazeLog = [item3[0] for item3 in GazeLog]
    # convert the list of strings to datetime formats
    timeGazeLog = timeConversion(timeStrGazeLog)
    
    # internal time, to depict seconds
    timeInternalGazeLog = [float(item3[1]) for item3 in GazeLog]
    
    # extract pupil sizes in decimals from the strange 2 columns for every pupil
    pupilLogL, pupilLogR = Convert2ColumnsToFormPupilSizes(GazeLog)
    
    pupilRelative_avg = list()
    pupilAbsolute_avg = list()
    timeOfGaze_TrialList = list()
    
    #trialsToIgnore = [1, 2, 4, 6, 8, 10, 12]
    trialsForBaseline = [2, 4, 6, 8, 10]
    # for every epoch, plot the pupil size
    for trialNr in range(0, len(timeEpochTrial['Start'])):
        #if trialNr in trialsToIgnore:
        #    continue
        if trialNr == 1 or trialNr == 0:
            continue
            
        
        # find pupil sizes for the trial
        pupilSizeL_Trial, pupilSizeR_Trial, timeGaze_Trial, timeInternal_Trial = PupilSizeFromTrialTimes(
            [TimeEpochTrial['Start'][trialNr], TimeEpochTrial['End'][trialNr]], timeGazeLog, 
                                timeInternalGazeLog, pupilLogL, pupilLogR)
        
        pupilSize_Trial = dict()
        pupilSize_Filter = dict()
        pupilSize_woBlink = dict()
        
        # find difference in consecutive elements of internal time
        timeInternalDifference = [t - s for s, t in zip(timeInternal_Trial, timeInternal_Trial[1:])]
        # divide by 1000 to make it s
        timeOfGaze_Trial = [sum(timeInternalDifference[:i])/1000000 for i in range(1,len(timeInternalDifference))]

        # some trials were skipped, because the sentence was written before. If the time of trial is less than
        # 10s, the trial is skipped
        if timeOfGaze_Trial[-1] < 20:
            if trialNr != 3:
                print(trialNr)
                if trialNr not in trialsForBaseline:
                    print('trial number ', trialNr, 'with', timeOfGaze_Trial[-1], 's will be skipped')
                    continue
        
        pupilSize_Trial['Left'] = pupilSizeL_Trial
        pupilSize_Trial['Right'] = pupilSizeR_Trial
        
        #if trialNr == 4:
        #    for i in range(0, len(pupilSizeL_Trial)):
        #        print(pupilSizeL_Trial[i], pupilSizeR_Trial[i])
            
        #print('Trial', len(pupilSizeL_Trial), len(pupilSizeR_Trial))
        
        # filter the blinks
        pupilSizeL_woBlink, time_filter, missingPupilData = filterBlinks(
            pupilSizeL_Trial, timeGaze_Trial)
        
        pupilSizeR_woBlink, time_filter, missingPupilData = filterBlinks(
            pupilSizeR_Trial, timeGaze_Trial)
        

        
        #print(index_blinkEndL)
        #print(index_blinkEndR)
        pupilSize_woBlink['Left'] = pupilSizeL_woBlink
        pupilSize_woBlink['Right'] = pupilSizeR_woBlink
        
        #print('After blink', len(pupilSizeL_woBlink), len(pupilSizeR_woBlink))
        # Hampel filter to remove the outliers
        winSize = 25
        pupilSizeL_filter = hampel(pupilSizeL_woBlink, winSize, 3)
        pupilSizeR_filter = hampel(pupilSizeR_woBlink, winSize, 3)

        pupilSize_Filter['Left'] = pupilSizeL_filter.values.tolist()
        pupilSize_Filter['Right'] = pupilSizeR_filter.values.tolist()
        
        pupilSizeL_filterList = [i[0] for i in pupilSizeL_filter.values]
        pupilSizeR_filterList = [i[0] for i in pupilSizeR_filter.values]
        
        #print('filter', len(pupilSizeL_filterList), len(pupilSizeR_filterList))
        RLCorrelation = np.corrcoef(pupilSizeL_filterList, pupilSizeR_filterList)
        
        if RLCorrelation[0][1] < 0.8:
            print(RLCorrelation[0][1])
            print('CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED')
        
        if trialNr in trialsForBaseline:
            print(trialNr)
            # baseline trial
            # First find baseline pupil size, which is the time when looking at NextPhrase key
            Samples_ForBaseline = int((int(DwellTimes_ForBaseline[trialNr][:-2])*90)/1000) # Number of samples of looking at key depend on
        
            # dwell time
            pupilL_baseline = np.mean(pupilSizeL_filterList[0:Samples_ForBaseline])
            pupilR_baseline = np.mean(pupilSizeR_filterList[0:Samples_ForBaseline])
            
            continue
        
        
        # Relative Pupil Size Calculation 
        
        #print(DwellTimes_ForBaseline[trialNr-1])
        
        pupilL_Relative = [pupil/pupilL_baseline for pupil in pupilSizeL_filterList]
        pupilR_Relative = [pupil/pupilR_baseline for pupil in pupilSizeR_filterList]
        
        # average of whole trial
        pupilRelative_avg.append((np.mean(pupilL_Relative)+np.mean(pupilR_Relative))/2)
        pupilAbsolute_avg.append((np.mean(pupilSizeL_filterList)+np.mean(pupilSizeR_filterList))/2)
        
        #print(np.mean(pupilL_Relative), np.mean(pupilR_Relative))
        
        
    return pupilAbsolute_avg, pupilRelative_avg

In [64]:
inputFolderName = r'\\ait-pdfs.win.dtu.dk\users\homedir\taba\Documents\Courses\Credits\2019January\Principles of BCI\Group Project\EEG Data Grp Project\Data_GazeTyping'

pupilAbsolute_avgList = list()
pupilRelative_avgList = list()

for root, dirs, subfolder in os.walk(inputFolderName):
    if not dirs:
        print(root)
        if 'notCounting' in root:
            continue
            
        userKeys = None
        gazeLog = None
        keysSelected = None
        
        for file in subfolder:
            if fnmatch.fnmatch(file, 'user_looks*'):
                try:
                    
                    fUserKey = open(root + '\\' + file, encoding='utf-8')
                    readerUserKey = csv.reader(fUserKey)
                    userKeys = list(readerUserKey)
                    
                    userKeys.remove(userKeys[0])
                except:
                    if fUserKey is not None:
                        
                        fUserKey.close()
                    else:
                        print('error in opening the user looks at log file')
            
            elif fnmatch.fnmatch(file, 'KeySelection*'):
                try:
                    
                    fKeysSelected = open(root + '\\' + file, encoding='utf-8')
                    readerKeysSelected = csv.reader(fKeysSelected)
                    keysSelected = list(readerKeysSelected)
                    
                    keysSelected.remove(keysSelected[0])
                except:
                    if fKeysSelected is not None:
                        
                        fKeysSelected.close()
                    else:
                        print('error in opening the KeySelection log file')
            
            elif fnmatch.fnmatch(file, 'tobiiGazeLog*'):
                try:
                    fGazeLog = open(root + '\\' + file, encoding='utf-8')
                    readerGazeLog = csv.reader(fGazeLog)
                    gazeLog = list(readerGazeLog)
                    
                    gazeLog.remove(gazeLog[0]) # would not matter much even if the first row was not labels
                    gazeLog.remove(gazeLog[-1])

                except:
                    if fGazeLog is not None:
                        fGazeLog.close()
                    else:
                        print('error in opening the gaze log file')
            else:
                continue
            
                # if all these lists exist
            if userKeys is None or keysSelected is None or gazeLog is None:
                continue
            else:
                
                a = re.compile('(?<=Data_GazeTyping\\\\)(.*)(?=\\\\2019-1)')
                subjName = a.findall(root)[0]
                print(subjName)
                
                # fix userKeys due to comma related file changes
                userKeys_new = FixUserKeys(userKeys)
                
                # find dwell time of typing
                userKeys_wDwellTime = ComputeDwellTime(userKeys_new)
                
                # find start time of typing
                timeTyping = OptiKeyTypingTime(userKeys_wDwellTime)
                
                # divide complete data into epochs of phrases
                timeStartEndMixed = FindTrialEndTimes(keysSelected, timeTyping)
                #print(timeStartEndMixed)
                
                # create trial time epoch using the list of start/end times of trial and userKeys, to make sure that 
                # Sleep is completely there in every trial, to allow for baseline
                timeEpochTrial = CreateTimeEpochsOfTrials(timeStartEndMixed, userKeys_wDwellTime)
                #print(timeEpochTrial)
                
                dwellTimes_ForBaseline = DwellTimeForBaseline(userKeys_wDwellTime)
                
                # find and plot pupil size for every trial
                pupilAbsolute, pupilRelative = FindAndPlotPupilSizeForEpoch(gazeLog, timeEpochTrial, dwellTimes_ForBaseline)
                
                
                
                if 'easy' in root:
                    pupilAbsolute[0] = np.nan
                    pupilRelative[0] = np.nan
                    
                
                pupilRelative_avgList.append(pupilRelative)
                pupilAbsolute_avgList.append(pupilAbsolute)

\\ait-pdfs.win.dtu.dk\users\homedir\taba\Documents\Courses\Credits\2019January\Principles of BCI\Group Project\EEG Data Grp Project\Data_GazeTyping\tb1_difficult_notCounting
\\ait-pdfs.win.dtu.dk\users\homedir\taba\Documents\Courses\Credits\2019January\Principles of BCI\Group Project\EEG Data Grp Project\Data_GazeTyping\tb1_easy\2019-1-17-13-12-54
tb1_easy
2
0.7265572540426227
CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED
2
4
6
0.5710067552040098
CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED
6
8
8
10
0.7347133191395835
CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED
10
12
trial number  12 with 2.342759 s will be skipped
13
trial number  13 with 1.232447 s will be skipped
\\ait-pdfs.win.dtu.dk\users\homedir\taba\Documents\Courses\Credits\2019January\Principles of BCI\Group Project\EEG Data Grp Project\Data_GazeTyping\tb2_difficult\2019-1-17-13-26-43
tb2_difficult
2
2
4
4
6
6
8
8
10
10
12
trial number  12 with 1.4212 s

In [None]:
plt.plot(pupilAbsolute_avgList)
plt.plot(pupilAbsolute_avgList[1])

In [87]:
pupilRelative_avgList

[[nan,
  0.8244980417457839,
  0.9297555049607051,
  1.0552341189310543,
  0.8759705994982582],
 [0.9165068388896602,
  0.9775671053395556,
  0.9227354512938861,
  0.952379423266789,
  0.9019541813131609]]

In [74]:
plt.plot(flat_list, 'o')
plt.xticks(np.arange(10), ('', 'Easy_trial2', 'Easy_trial3', 'Easy_trial4', 'Easy_trial5', 'Difficult_trial1', 'Difficult_trial2', 'Difficult_trial3', 'Difficult_trial4', 'Difficult_trial5'))

([<matplotlib.axis.XTick at 0x19c3cd28940>,
  <matplotlib.axis.XTick at 0x19c3cd28278>,
  <matplotlib.axis.XTick at 0x19c3cd28160>,
  <matplotlib.axis.XTick at 0x19c3cd9ebe0>,
  <matplotlib.axis.XTick at 0x19c39f90160>,
  <matplotlib.axis.XTick at 0x19c39f905c0>,
  <matplotlib.axis.XTick at 0x19c39f90a90>,
  <matplotlib.axis.XTick at 0x19c39f90f60>,
  <matplotlib.axis.XTick at 0x19c39fb3470>,
  <matplotlib.axis.XTick at 0x19c39f90a58>],
 <a list of 10 Text xticklabel objects>)

In [70]:
print(np.mean(pupilRelative_avgList[0][1:]), np.mean(pupilRelative_avgList[1]))

0.9213645662839504 0.9342286000206104


In [92]:
pupilRelative_avgList[1]

[0.9165068388896602,
 0.9775671053395556,
 0.9227354512938861,
 0.952379423266789,
 0.9019541813131609]

In [93]:
pupilRelative_boxplot = [pupilRelative_avgList[0][1:], pupilRelative_avgList[1]]

In [165]:
# box plot of pupil size

#boxprops = dict(linewidth=3)
#flierprops = dict(marker='o', markerfacecolor='green', markersize=12,
 #                 linestyle='none')
#medianprops = dict(linewidth=2.5)
#meanpointprops = dict(marker='s', markeredgecolor='black',
#                      markerfacecolor='green', markersize = 8)

boxprops = dict(markeredgecolor= 'c', color='blue')
flierprops = dict(marker='o', markerfacecolor = 'white', markersize=12,linestyle='none', color='blue', markeredgecolor= 'blue')
medianprops = dict(linewidth=2.5, color = 'red')
meanpointprops = dict(marker='s', markerfacecolor = 'white', markeredgecolor = 'black', markersize = 8)
whiskerprops = dict(linestyle = '--')

#meanlineprops = dict(linestyle='--', linewidth=2.5, color='purple')
plt.tick_params(axis='both', which='major', labelsize=14)

plt.boxplot(pupilRelative_boxplot, showmeans=True, notch=True, whiskerprops=whiskerprops, boxprops=boxprops, flierprops=flierprops, medianprops=medianprops, meanprops=meanpointprops)
plt.xticks(np.arange(1, 3), ('Easy', 'Difficult'))
plt.ylabel('Relative pupil size (mm)', fontsize = 14)
plt.tight_layout()
plt.xlim([0.5,2.5])
plt.ylim([0.8, 1.1])
plt.savefig('pupilSize1.png', format='png', dpi=300)

In [66]:
plt.plot(pupilRelative_avgList[0], '*')
plt.plot(pupilRelative_avgList[1], 'o')

[<matplotlib.lines.Line2D at 0x19c53d6fd30>]

In [88]:
import csv

resultFile = inputFolderName + '\pupilData1.csv' 

with open(resultFile, 'w+') as myfile:
    wr = csv.writer(myfile)
    wr.writerow(pupilRelative_avgList[0])
    wr.writerow(pupilRelative_avgList[1])

In [76]:
# Function to compute the MSD, with cost of 2 for substitution and 1 for insertion and deletion
costSub = 2
costIns = 1
costDel = 1

def levenshteinDist(phraseIn, phraseOut):
    
    lenStim = len(phraseIn)
    lenUser = len(phraseOut)
    costMatrix = np.zeros((lenStim+1, lenUser+1), dtype=int)
    MSDoperation = np.empty([lenStim+1, lenUser+1], dtype="U4")
    costMatrix[0,0:] = range(0, lenUser+1)
    costMatrix[0:,0] = range(0, lenStim+1)
    MSDoperation[0,0:] = 'I'
    MSDoperation[0:,0] = 'D'
    
    for i in range(1, len(phraseIn)+1):
        iP = i - 1
        for j in range(1, len(phraseOut)+1):
            jP = j - 1
            if phraseIn[iP].lower() == phraseOut[jP].lower():
                # Define the possible cost array
                costOptionArray = [costMatrix[i,j-1]+costDel, costMatrix[i-1,j]+costIns, costMatrix[i-1,j-1]] 
                flagSame = 1
            else:
                costOptionArray = [costMatrix[i,j-1]+costDel, costMatrix[i-1,j]+costIns, costMatrix[i-1,j-1]+costSub]
                flagSame = 0
            costMatrix[i,j], MSDoperation[i][j] = minValnInd(costOptionArray, flagSame)
    #print(costMatrix)
    return costMatrix[-1,-1]

In [78]:
# find minimum cost and the operations that give rise to it
def minValnInd(costOptions, flagSame):
    operator = list()
    unique_entries = set(costOptions)
    valInd = { value : [ i for i, v in enumerate(costOptions) if v == value ] for value in unique_entries }
    keyVal = list(valInd.keys())
    min_value = min(keyVal)
    
    if 0 in valInd[min_value]:
        operator.append('D')
    if 1 in valInd[min_value]:
        operator.append('I')
    if 2 in valInd[min_value]:
        if flagSame == 0:
            operator.append('S')
        else:
            operator.append('N')   
    flagSame = None    
    return min_value, ''.join(operator)

In [85]:
# error rate for easy session:
phraseLog_reduced = ['He wants humans to come to him freely,  rather than by coercion.', 'If in doubt about which level of course you think is appropriate,  please do not hesitate to ask.', 'This site uses frames - please update your browser.', 'He learned to rise early and work late at all times and in all weathers.', ]
phraseUserEnd = ['he wants humans to come to him freely, rather than by  coercion.', 'if in doubt about which course is appropriate, do not hesitate to ask.', 'this site uses frames - please update your browser.', 'he learned to rise early and work late at all times and all weathers.']
for n in range(0,len(phraseLog_reduced)):
    #print(levenshteinDist(phraseLog_reduced[n], phraseUserEnd[n]), max(len(phraseLog_reduced[n]),len(phraseUserEnd[n])))
    
    print(levenshteinDist(phraseLog_reduced[n], phraseUserEnd[n])/max(len(phraseLog_reduced[n]),len(phraseUserEnd[n])))

0.03125
0.27835051546391754
0.0
0.041666666666666664


In [178]:
# error rate for difficult session:
phraseLog_reduced = ['Here, a group of thespians (and I use the term loosely): the director, producer, and a few crew members,head for an isolated island where they plan to make a film utilising an abandoned school.', 'Except for the Beginners tournaments ungraded players will be limited to 50kr of prize money unless the Congress organisers are able to satisfy themselves as to the player''s strength.', 'Further combinations may be possible and interested students are encouraged to contact the other Departments involved.', 'Environment Concern for Batley was established in 1986 as a voluntary organisation, registered as a company limited by guarantee in 1997 and granted registered charitable status in 1998.', 'The Blues were missing three key players with David Logan, Dave Goodchild and Keeper Stuart Dawson all suspended after last season''s bad tempered Presidents Cup Final against Trafford.']
phraseUserEnd = ['a group of thespians (and i us the word loosely): the director, producer and a few crew members head to an isolated island where they plan to make a film in an abandoned school.', 'except the beginners tournament ungraded players will be limited to 50kr unless the Congress organisers are able to satisfy themselves so as to the player''s strength.', 'further combinations may be possible and interested students are asked to contact the other departments involved.', 'environmental concern for Batley was established in 1986 as a voluntary organisation, registered as a company with limited guarantee in 1997 and granted the registered charitable company in 1998.','the blues were missing three key players David Logan,Dave Goodchild and keeper Stuart Dawson all suspended after last season''s bad-tempered Presidents cup finals against trawthorn.']
for n in range(0,len(phraseLog_reduced)):
    #print(levenshteinDist(phraseLog_reduced[n], phraseUserEnd[n]), max(len(phraseLog_reduced[n]),len(phraseUserEnd[n])))
    
    print(levenshteinDist(phraseLog_reduced[n], phraseUserEnd[n])/max(len(phraseLog_reduced[n]),len(phraseUserEnd[n])))

0.13471502590673576
0.12637362637362637
0.07627118644067797
0.1282051282051282
0.08743169398907104


In [167]:
np.std([6,7,5,7,7])

0.7999999999999999

In [173]:
scoreDifficulty = [[2,3,4,2], [6,7,5,7,7]]
plt.boxplot(scoreDifficulty, showmeans=True, notch=True, whiskerprops=whiskerprops, boxprops=boxprops, flierprops=flierprops, medianprops=medianprops, meanprops=meanpointprops)
plt.xticks(np.arange(1, 3), ('Easy', 'Difficult'))
plt.ylabel('Subjective Difficulty Score', fontsize = 18)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.tight_layout()
plt.xlim([0.5,2.5])
plt.ylim([1, 8])
plt.savefig('scoreDifficulty.png', format='png', dpi=300)

In [177]:
errorRate = [[3.12, 27.84, 0.0, 4.16], [13.47, 13.26, 7.62, 12.82, 8.74]]
plt.boxplot(errorRate, showmeans=True, notch=True, whiskerprops=whiskerprops, boxprops=boxprops, flierprops=flierprops, medianprops=medianprops, meanprops=meanpointprops)
plt.xticks(np.arange(1, 3), ('Easy', 'Difficult'))
plt.ylabel('Subjective Difficulty Score', fontsize = 18)
plt.tick_params(axis='both', which='major', labelsize=18)
plt.tight_layout()
plt.xlim([0.5,2.5])
#plt.ylim([1, 8])
plt.savefig('errorRate.png', format='png', dpi=300)