In [1]:
%matplotlib

import math 
import pywt 
import numpy as np
import os
import csv
import fnmatch
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats

Using matplotlib backend: Qt5Agg


In [2]:
def filterBlinks(pupilData, timeList):
    # filter any blinks and nan values lasting around 250ms (on average)
    # http://faculty.washington.edu/chudler/facts.html
    
    # blink is every nan value
    # 200 ms (18 samples) before and after the blink will also be removed
    addedBlinkSamples = 18
    
    missingPupilData = dict()
    missingPupilData['Start'] = list()
    missingPupilData['End'] = list()
    
    pupilData_filter = list()
    time_filter = list()
    index_blinkEnd = list()
    
    missingBeginning = 0
    missingEnd = 0

    # First, find the missing values
    pupilInd = -1
    for pupilD in pupilData:
        
        pupilInd = pupilInd + 1
        #print(pupilInd, pupilD)
        #if pupilInd > 6800 and pupilInd < 6900:
        #    print(pupilD)
        #print(pupilInd, 'NEXTTTTTTTTTTTT')
        
        # check if val is nan
        if np.isnan(pupilD):
            # check if first element of list
            if pupilInd == 0:
                # first value is nan
                #print(pupilD, 'Very Start')
                missingBeginning = 1
                missingPupilData['Start'].append(pupilInd)
            
            # check if previous val was nan
            elif np.isnan(pupilData[pupilInd-1]):
                
                # check if val is not last element
                if pupilInd < len(pupilData)-1:
                    # check if next val is not nan
                    #print(pupilInd, len(pupilData))
                    if not np.isnan(pupilData[pupilInd+1]):
                        # note the index for end of blink
                        #print(pupilD, 'End')
                        missingPupilData['End'].append(pupilInd)
                else:
                    # last value is nan
                    #print(pupilD, 'Very End')
                    missingEnd = 1
                    missingPupilData['End'].append(pupilInd)
                    
            # if new nan, note the index
            else:
                #print(pupilD, 'Start')
                missingPupilData['Start'].append(pupilInd)
                
                # for single nan values
                # check if val is not last element
                if pupilInd < len(pupilData)-1:
                    # check if next val is not nan
                    if not np.isnan(pupilData[pupilInd+1]):
                        # note the index for end of blink
                        #print(pupilD, 'End')
                        missingPupilData['End'].append(pupilInd)
                else:
                    # last value is nan
                    #print(pupilD, 'Very End')
                    missingEnd = 1
                    missingPupilData['End'].append(pupilInd)
                
    # print(len(missingPupilData['Start']), len(missingPupilData['End']))
    #for i in range(0, len(missingPupilData['Start'])):
    #     print(missingPupilData['Start'][i], missingPupilData['End'][i])
    
    # Second, create a list that does not contain missing values or 100ms before and after that
    
    # if nan in first 11 samples, missingBeginning should be 1
    if missingPupilData['Start'][0] <= addedBlinkSamples:
        missingBeginning = 1
    
    # similarly, if nan in last 11 samples, missingEnd should be 1
    if len(pupilData) - missingPupilData['End'][-1] <= addedBlinkSamples:
        missingEnd = 1
    
    if missingBeginning:
        
        # append first data
        for valInd in range(missingPupilData['End'][0] + addedBlinkSamples, missingPupilData['Start'][1] - addedBlinkSamples+1):
            pupilData_filter.append(pupilData[valInd])
            time_filter.append(timeList[valInd])
        index_blinkEnd.append(len(pupilData_filter)-1)
        startAppend = 1
            
    else:
        # append first data
        for valInd in range(0, missingPupilData['Start'][0] - addedBlinkSamples+1):
            #print(valInd)
            pupilData_filter.append(pupilData[valInd])
            time_filter.append(timeList[valInd])
        index_blinkEnd.append(len(pupilData_filter)-1)
        startAppend = 0
    
    if missingEnd:
        endAppend = len(missingPupilData['End']) - 3
    else:
        endAppend = len(missingPupilData['End']) - 2

    notPossibleToAdd = 0
    i = -1
    for i in range(startAppend, endAppend):

        if missingPupilData['End'][i]+addedBlinkSamples > missingPupilData['Start'][i+1]-addedBlinkSamples+1:
            notPossibleToAdd = notPossibleToAdd + missingPupilData['Start'][i+1] - missingPupilData['End'][i] - 2
            #print('not possible to add', missingPupilData['End'][i], missingPupilData['End'][i]+addedBlinkSamples, missingPupilData['Start'][i+1], missingPupilData['Start'][i+1]-addedBlinkSamples+1)
        else:
            for valInd in range(missingPupilData['End'][i]+addedBlinkSamples, missingPupilData['Start'][i+1]-addedBlinkSamples+1):
                #print(valInd)
                pupilData_filter.append(pupilData[valInd])
                time_filter.append(timeList[valInd])
            index_blinkEnd.append(len(pupilData_filter)-1)
                
    if missingEnd:
        if missingPupilData['End'][-2]+addedBlinkSamples > missingPupilData['Start'][-1]-addedBlinkSamples+1:
            notPossibleToAdd = notPossibleToAdd + missingPupilData['Start'][-1] - missingPupilData['End'][-2] - 2
        else:
            for valInd in range(missingPupilData['End'][-2]+addedBlinkSamples, missingPupilData['Start'][-1]-addedBlinkSamples+1):
                pupilData_filter.append(pupilData[valInd])
                time_filter.append(timeList[valInd])
            index_blinkEnd.append(len(pupilData_filter)-1)
    else:
        #print(missingPupilData['End'][-1]+addedBlinkSamples, len(pupilData))
        if missingPupilData['End'][-1]+addedBlinkSamples > len(pupilData):
            notPossibleToAdd = notPossibleToAdd + len(pupilData) - missingPupilData['End'][-1] - 2
        else:                
            for valInd in range(missingPupilData['End'][-1]+addedBlinkSamples, len(pupilData)):
                pupilData_filter.append(pupilData[valInd])
                time_filter.append(timeList[valInd])
            index_blinkEnd.append(len(pupilData_filter)-1)

            
            
    #nMissing = 0
    # count total nan values in data:
#     for i in pupilData:
#         if np.isnan(i):
#             nMissing = nMissing + 1
    
    #print(len(pupilData_filter))
    return pupilData_filter, time_filter, index_blinkEnd

In [3]:
# function to convert list of date and time into datetime format list

def timeConversion(timeStrList):
    timeList = list()
    for time in timeStrList:
        time1, t1, t2 = time.partition('+')
        timeList.append(datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f"))
    return timeList

In [4]:
def modmax(d):
    # modulus maxima detection
    
    # compute signal modulus
    m = [0.0]*len(d)
    for i in range(0, len(d)):
        m[i] = math.fabs(d[i])
    
    # if value is larger than both neighbours , and strictly
    # larger than either , then it is a local maximum
    t = [0.0]*len(d)
    for i in range(0, len(d)):
        ll = m[i-1] if i >= 1 else m[i]
        oo = m[i]
        rr = m[i+1] if i < len(d)-2 else m[i]
        if (ll <= oo and oo >= rr) and (ll < oo or oo > rr):
            # compute magnitude
            t[i] = math.sqrt(d[i]**2)
        else:
            t[i] = 0.0
    #print(len(t))
    return t

In [5]:
def ipa(d):
    # compute ipa value of pupil diameter
    IPA = list()
    #print(len(d.pupildata.values))
    # obtain 2-level DWT of pupil diameter signal d
    try:
        (cA2,cD2,cD1) = pywt.wavedec(d.pupildata.values,'sym16','per',level=2)
    except ValueError:
        print('value error in wavedec')
        return
    
    # get signal duration (in seconds)
    tt = ((d.timestamp.values[-1] - d.timestamp.values[0]).item())/1000000000
    
    #print(tt)
    
    # normalize by 1=2j , j = 2 for 2-level DWT
    cA2[:] = [x / math.sqrt(4.0) for x in cA2]
    cD1[:] = [x / math.sqrt(2.0) for x in cD1]
    cD2[:] = [x / math.sqrt(4.0) for x in cD2]
    
    # detect modulus maxima , see Listing 2
    cD2m = modmax(cD2)
    #print(len(cD2m))
    
    # threshold using universal threshold l_univ = s*sqrt(2logn)
    # where s is the standard deviation of the noise
    luniv = np.std(cD2m) * math.sqrt(2.0*np.log2(len(cD2m)))
    cD2t = pywt.threshold(cD2m ,luniv,mode="hard")
    
    # compute IPA
    ctr = 0
    for i in range(0, len(cD2t)):
        if math.fabs(cD2t[i]) > 0: ctr += 1
        #IPA = float(ctr)/tt
        # maybe each pupil data has an IPA?
    IPA = (float(ctr)/tt)
    
    return IPA, cD2m, cD2t, cD2, cD1, cA2

In [6]:
def plotCoeff(coeff, ipa, fig_label, title, position, sampling_time):
    
    
    ax = fig.add_subplot(position)
                
    ylim = [min(coeff)-0.05, max(coeff) + 0.05]

    xaxis = np.arange(0, len(coeff), 1)

    ax.plot(coeff)
    xTicks = ax.get_xticks()
    xTickLabels = [np.round(i*(sampling_time), 2) for i in xTicks[1:]]
    ax.set_xticks(xTicks[1:-1])
    ax.set_xticklabels(xTickLabels)
    ax.set_ylim(ylim)
    ax.set_title('{0}.  subject:  {1},  {2},  IPA  {3}'.format(fig_label, subjName, title, np.round(ipa,4)))
    ax.set_xlabel('Time [in s]')
                
    

In [7]:
subjName = r'C:\DTU\Data\201805_HealthnRehab\TypingData'
j = 0
flagFirstSubj = 0
ipaList = list()
blinkFreq = list()

for root, dirs, subfolder in os.walk(subjName):
    LetterLookedAtList = list()
    LetterLookedAt = list()
    
    if not dirs:
        
        if 'notCompleted' in root or 'notInclude' in root: # Some subjects do not have gaze log and have been marked as 
            #notInclude
            continue
        if 'tb' in root or 'joha' in root:
            continue
            
        userKeys = None
        scratchPad = None
        gazeLog = None
        stimPhrase = None
        
        for file in subfolder:
            
            
            if fnmatch.fnmatch(file, 'GazeLog*'):
                try:
                    fGazeLog = open(root + '\\' + file, encoding='utf-8')
                    readerGazeLog = csv.reader(fGazeLog)
                    gazeLog = list(readerGazeLog)
                    gazeLog.remove(gazeLog[0]) # would not matter much even if the first row was not labels
                    gazeLog.remove(gazeLog[-1])
                except:
                    if fGazeLog is not None:
                        fGazeLog.close()
                    else:
                        print('error in opening the gaze log file')
            else:
                continue
            
                # if all these lists exist
            if gazeLog is None:
                continue
            else:
                
                a = re.compile('(?<=TypingData\\\May[0-9]{2}\\\)(.*)(?=\\\OptiKey)')
                subjName = a.findall(root)[0]
                typing_mechanism = subjName[-2:]
                print(subjName)
                
                # obtain right and left pupil data
                # Create list of pupil sizes from gazelog
                pupilLogL = [float(item4[29]) if 'Invalid' not in item4 else np.nan for item4 in gazeLog]
                pupilLogR = [float(item5[31]) if 'Invalid' not in item5 else np.nan for item5 in gazeLog]
                timeList = [item[0] if 'Invalid' not in item else np.nan for item in gazeLog]
                
                # filter the blinks
                pupilLogL_filter, time_filter, index_blinkEnd = filterBlinks(pupilLogL, timeList)
                pupilLogR_filter, time_filter, index_blinkEnd = filterBlinks(pupilLogR, timeList)
                
                blinkFreq.append(len(index_blinkEnd))
                
                time_filterFormatted = timeConversion(time_filter)
                pupilLog_avg = [(pupilLogL_filter[i] + pupilLogR_filter[i])/2 for i in range(0, len(pupilLogL_filter))]
                
#                 fig=plt.figure()
#                 ax = fig.add_subplot(111)
#                 ax.plot(pupilLog_avg)
#                 ylim =  [min(pupilLog_avg)-0.05, max(pupilLog_avg) + 0.05]
#                 for xPoint in index_blinkEnd:
#                     ax.plot([xPoint, xPoint], ylim, '--', 'k', alpha = 0.3)
#                 ax.set_ylim(ylim)
                
                # for avg of pupils
                pupilLog_filter_wTime_Tuple = list(zip(time_filterFormatted, pupilLog_avg))
                pupilLog_pd =  pd.DataFrame(pupilLog_filter_wTime_Tuple, columns=['timestamp','pupildata'])
                
                ipaVal, coeff_modmax, coeff_hard, coeff_D2, coeff_D1, coeff_A = ipa(pupilLog_pd)
                
                #ax.set_title('subject: {0},   ipa: {1}'.format(subjName, ipaVal))
                
                #print(len(coeff_modmax), len(coeff_hard), len(coeff_D2), len(coeff_D1), len(coeff_A))
                
                print(ipaVal)
                #fig = plt.figure()
                # plot coefficients and set as title the subject name and the ipa value
                
                #plotCoeff(coeff_A, ipaVal, 'a', 'Coefficient of Approximation', 311, 4/90)
                #plotCoeff(coeff_D1, ipaVal, 'b', 'Coefficient of Detail 1', 312, 2/90)
                #plotCoeff(coeff_hard, ipaVal, 'c', 'Hard thesholded Coefficient of Detail 2', 313, 4/90)
                #plotCoeff(coeff_D2, ipaVal, 'Coefficient of D2', 314)
                
                #plt.tight_layout()
                
                # save ipa value
                ipaList.append([subjName, ipaVal])
                
                # for right and left pupils separately                              
#                 pupilLogL_filter_wTime_Tuple = list(zip(time_filterFormatted, pupilLogL_filter))
#                 pupilLogR_filter_wTime_Tuple = list(zip(time_filterFormatted, pupilLogR_filter))
                
#                 pupilLogL_pd =  pd.DataFrame(pupilLogL_filter_wTime_Tuple, columns=['timestamp','pupildata'])
#                 pupilLogR_pd =  pd.DataFrame(pupilLogR_filter_wTime_Tuple, columns=['timestamp','pupildata'])
                
#                 # perform ipa
#                 ipa_left, coeffL_modmax, coeffL_hard, coeffL_D2, coeffL_D1, coeffL_A = ipa(pupilLogL_pd)
#                 ipa_right, coeffR_modmax, coeffR_hard, coeffR_D2, coeffR_D1, coeffR_A = ipa(pupilLogR_pd)
                
#                 print(ipa_left, ipa_right)
                
#                 # plot coefficients and set as title the subject name and the ipa value
#                 plotCoeff(coeffL_A, coeffR_A, 'A')
#                 plotCoeff(coeffL_D1, coeffR_D1, 'D1')
#                 plotCoeff(coeffL_hard, coeffR_hard, 'hard thesholded')
#                 #plotCoeff(coeffL_D2, coeffR_D2, 'D2')



akt_MS
0.11376504137858293
jl_DT
0.0381041291554801
KEA_MS
0.10719267110844512
lone_DT
0.12444028644615406
mcc_MS
0.08439868670241976
MK_DT
0.05480556465410968
ok_MS
0.04800682052974846
pt_DT
0.08125394467587915
sc_MS
0.07005413940347345
sh_MS
0.08388261614539724
ae_DT
0.030889731029142904
hc_MS
0.14635227929209324
ib_MS
0.16521734395049356
km_DT
0.12364628225534893
ma_DT
0.10104659841048556
pgba_DT
0.16616461856024553
smn_DT
0.12106014654683393
eo_DT
0.07277775208184034
jek_MS
0.07394315102273895
jg_DT
0.13119227490706986
lg_MS
0.15232298229231334
lr_MS
0.10874631292713174
mm_MS
0.08136143848220037
snk_DT
0.06336023670654524


In [73]:
ipa_DT = [key[1] for key in ipaList if 'DT' in key[0]]
ipa_MS = [key[1] for key in ipaList if 'MS' in key[0]]

ipaPlot = [ipa_DT, ipa_MS]

print('Dwell time: ', np.mean(ipa_DT), np.std(ipa_DT))
print('Multi-key selection: ', np.mean(ipa_MS), np.std(ipa_MS))


Dwell time:  0.09239513045242796 0.040148275593047254
Multi-key selection:  0.10293695693625317 0.03474106860509688


In [8]:
ipaOnly = [key[1] for key in ipaList]

np.corrcoef(ipaOnly, blinkFreq)

array([[1.        , 0.08682381],
       [0.08682381, 1.        ]])

In [23]:
# sort list first
blinkFreqSortedInd = [i for (v, i) in sorted((v, i) for (i, v) in enumerate(blinkFreq))]
blinkFreqSortedVal = [v for (v, i) in sorted((v, i) for (i, v) in enumerate(blinkFreq))]

ipaValList = [ipaOnly[i] for i in blinkFreqSortedInd]

fig = plt.figure()

ax1 = fig.add_subplot(1,1,1)
ax2 = ax1.twinx()

ax1.plot(ipaValList, 'r')
ax1.set_ylabel('IPA values [Hz/s]')
ax1.set_xlabel('Subject number')

ax2.plot(blinkFreqSortedVal)
ax2.set_ylabel('Blink frequency')


Text(0,0.5,'Blink frequency')

In [22]:
plt.plot(ipaOnly)

[<matplotlib.lines.Line2D at 0x21c9afe4278>]

In [76]:
fig = plt.figure()
ax = fig.add_subplot(1,1,1) 

ax.boxplot([ipa_DT, ipa_MS], positions= [1, 2])
xTicks = ax.get_xticks()
print(xTicks)
ax.set_xticklabels(['Dwell time', 'Multi-key selection'])

ax.set_title('Dwell time- mean: {0}, std: {1}                      Multi-key selection- mean:  {2},  std:  {3}'.format(np.round(np.mean(ipa_DT), 3), np.round(np.std(ipa_DT),3), np.round(np.mean(ipa_MS),3), np.round(np.std(ipa_MS),3)))
ax.set_ylim([0, 0.20])

#print(scipy.stats.ttest_ind(ipa_DT, ipa_MS))
#scipy.stats.ranksums(ipa_DT, ipa_MS)


[1 2]


(0, 0.2)

In [None]:
fig = plt.figure()
axL = fig.add_subplot(2,1,1)
axR = fig.add_subplot(2,1,2)
ylim = [-0.02, 0.2]

xaxis = np.arange(0, len(coeff_left), 1/90)

axL.plot(coeff_left)
xTick = axL.get_xticks()
#axL.set_xticklabels(np.round(xTick*(1/90), 2))
axL.set_ylim(ylim)
index_blinkEndAdjust = [i/4 for i in index_blinkEnd]
#for xPoint in index_blinkEndAdjust:
#    axL.plot([xPoint, xPoint], ylim, '--', 'k', alpha = 0.3)

axR.plot(coeff_right)
axR.set_ylim(ylim)

In [None]:
a = np.arange(0, len(coeff_left)*(4/90), 4/90)

In [None]:
def plotCoeff_RL(coeffL, coeffR, title):
    
    fig = plt.figure()
    axL = fig.add_subplot(2,1,1)
    axR = fig.add_subplot(2,1,2)
                
    ylim = [np.min([min(coeffL), min(coeffR)])-0.02, np.max([max(coeffL), max(coeffR)]) + 0.02]

    xaxis = np.arange(0, len(coeffL), 1/90)

    axL.plot(coeffL)
    xTicks = axL.get_xticks()
    xTickLabels = [np.round(i*(4/90), 2) for i in xTicks[1:]]
    axL.set_xticks(xTicks[1:-1])
    axL.set_xticklabels(xTickLabels)
    axL.set_ylim(ylim)
    axL.set_title('subject:  {0},  coefficient for {1},  Left IPA  {2}'.format(subjName, title, np.round(ipa_left,4)))
    axL.set_xlabel('Time [in s]')
                
    axR.plot(coeffR)
    axR.set_ylim(ylim)
    axR.set_xticks(xTicks[1:-1])
    axR.set_xticklabels(xTickLabels)
    axR.set_title('Right IPA  {0}'.format(np.round(ipa_right, 4)))
    axR.set_xlabel('Time [in s]')
                
    plt.tight_layout()

In [None]:
a = axL.get_xticks()

In [None]:
a*(4/90)

In [None]:
a