In [1]:
%matplotlib
#%matplotlib inline
import os
import csv
import fnmatch
import numpy as np
import datetime
import re 
import pandas as pd
import matplotlib.pyplot as plt
import math
import pywt
import itertools
from scipy import stats

pd.options.mode.use_inf_as_na = True

Using matplotlib backend: Qt5Agg


In [2]:
TimeDwellOrig = 800
TimeFixation = 300

In [3]:
pupilTotal_Difficult = list()
pupilTotal_Medium = list()
pupilTotal_Easy = list()

In [4]:
# function to convert list of date and time into datetime format list
def timeConversion(timeStrList):
    timeList = list()
    for time in timeStrList:
        time1, t1, t2 = time.partition('+')
        timeList.append(datetime.datetime.strptime(re.sub('[:.T]','-',time1[:-1]), "%Y-%m-%d-%H-%M-%S-%f"))
    return timeList

In [5]:
# This function will return the datetime in items which is the closest to the date pivot
def nearestTimePoint(dates, date):
    
    for d in dates:
        if d < date:
            nearestTP = d
        else:
            continue
    try: 
        nearestTP
        nearestTPind = dates.index(nearestTP)
    except:
        nearestTP = 0
        nearestTPind = -1
        
    return nearestTP, nearestTPind

In [6]:
def Combine2ColumnsTo1GazeLog(GazeLog, Column_1, Column_2):
    
    JoinedList = list()
    
    Column_beforeDecimal = [item4[Column_1] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    Column_afterDecimal = [item4[Column_2] if 'Invalid' not in item4 else 'nan' for item4 in GazeLog]
    
    
    for i in range(0, len(Column_beforeDecimal)):
        if 'Valid' not in Column_beforeDecimal[i] and 'Valid' not in Column_afterDecimal[i]:
            if 'nan' not in Column_beforeDecimal[i] and 'nan' not in Column_afterDecimal[i]:
                JoinedList.append(float(Column_beforeDecimal[i]+'.'+ Column_afterDecimal[i]))
            else:
                JoinedList.append(np.nan)
        else:
            # Rarely, the pupil size is a whole number
            JoinedList.append(np.nan) # we will ignore the row, since there is no way of automatically knowing which - 
            # right or left eye has whole number pupil size
            
    return JoinedList

In [7]:
def Convert2ColumnsToFormPupilSizes(GazeLog):
    # function to convert pupilsizes from 2 columns for every pupil due to comma use instead of decimal, 
    # to proper pupil sizes
    
    PupilLogL = Combine2ColumnsTo1GazeLog(GazeLog, -5, -4)
    PupilLogR = Combine2ColumnsTo1GazeLog(GazeLog, -2, -1)
            
    # if one of the pupils are nan, the other one is converted too
    nPupil = -1
    for pupilL in PupilLogL:
        nPupil = nPupil + 1
        if np.isnan(pupilL):
            if nPupil < len(PupilLogR):
                if not np.isnan(PupilLogR[nPupil]):
                    PupilLogR[nPupil] = np.nan
            else:
                del PupilLogL[len(PupilLogR):]
                
    nPupil = -1
    for pupilR in PupilLogR:
        nPupil = nPupil + 1
        if np.isnan(pupilR):
            if nPupil < len(PupilLogL):
                if not np.isnan(PupilLogL[nPupil]):
                    PupilLogL[nPupil] = np.nan
            else:
                del PupilLogR[len(PupilLogL):]
                
    #print(len(PupilLogL), len(PupilLogR))
    
    return PupilLogL, PupilLogR

In [53]:
def filterBlinks(pupilData, timeListComplete):
    # filter any blinks and nan values lasting around 250ms (on average)
    # http://faculty.washington.edu/chudler/facts.html
   
    # blink is every nan value in the range of 100-400ms 
    # 250 ms (23 samples) before and after the blink will also be removed
    extraBlinkSamples = 23    
    
    # remove single missing data, that are due to hardware error
    missingVal_Single = np.argwhere(np.isnan(pupilData))
    missingVal_Single = list(itertools.chain.from_iterable(missingVal_Single)) # flatten the list
    missingVal_SingleDifference = [t - s for s, t in zip(missingVal_Single, missingVal_Single[1:])] # find difference 
    # between consecutive elements
    missingVal_SingleDifference.insert(0, missingVal_Single[0]) # insert the first blink index in the beginning of list
    
    # the list missingVal_SingleDifference contains the index of the first blink, followed by the difference in the index to 
    # the next nan value
    
    # first remove the single nan values, which are missing data
    eyeTracker_missingData = list() # list with index of single missing data  
    valInd = -1

    for val in missingVal_SingleDifference:
        valInd = valInd + 1
        if valInd == 0:
            continue
        if val != 1:
            if missingVal_SingleDifference[valInd-1] !=1: # if there are 2 consecutive missing values (denoted by 2 consecutive
                # non 1 numbers, they are added to the list of eyeTracker_missingData)
                eyeTracker_missingData.append(sum(missingVal_SingleDifference[:valInd]))
                
    # remove single missing values from pupil data
    pupilData_woSingleMissingData0 = [pupilData[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(pupilData))]
    pupilData_woSingleMissingData = [x for x in pupilData_woSingleMissingData0 if x]
    
    # remove the times for single missing values in pupil data
    timeList_woSingleMissingData0 = [timeListComplete[ind] if ind not in eyeTracker_missingData else [] 
                                     for ind in range(0, len(timeListComplete))]
    timeList_woSingleMissingData = [x for x in timeList_woSingleMissingData0 if x]
    
#     print(len(timeList_woSingleMissingData))
    
    
    
    
    # find the nan values again from pupilData_woSingleMissingData
    missingVal_Rest = np.argwhere(np.isnan(pupilData_woSingleMissingData))
    missingVal_Rest = list(itertools.chain.from_iterable(missingVal_Rest))
    missingVal_RestDifference = [t - s for s, t in zip(missingVal_Rest, missingVal_Rest[1:])]
    missingVal_RestDifference.insert(0, missingVal_Rest[0])
    
    
    # compile and create list of start and end of blinks
    blink_missingData = dict()
    blink_missingData['Start'] = list()
    blink_missingData['End'] = list()
    
    valInd = -1
    for val in missingVal_RestDifference:
        valInd = valInd + 1
        if val > 1:
            #print('value', val)
            # instead of appending the actual index of blink start, since 250ms before and after the blink need to be
            # removed, it is also appended here.
            
            # just make sure that the additional samples do not make the index of blink go in negative
            if sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples > 0:
                
                blink_missingData['Start'].append(sum(missingVal_RestDifference[:valInd+1])-extraBlinkSamples)
            else:
                blink_missingData['Start'].append(0)
            
            if valInd == 0:
                continue
                
            # make sure that the additional samples do not increase the index to more than the length of the pupilData
            if sum(missingVal_RestDifference[:valInd])+extraBlinkSamples < len(pupilData_woSingleMissingData):
                blink_missingData['End'].append(sum(missingVal_RestDifference[:valInd])+extraBlinkSamples)
            else:
                blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
            #print('end', sum(missingVal_RestDifference[:valInd]))
      
    # add the last blink index
    # make sure that the additional samples do not increase the index to more than the length of the pupilData
    if sum(missingVal_RestDifference)+extraBlinkSamples < len(pupilData_woSingleMissingData):
        blink_missingData['End'].append(sum(missingVal_RestDifference)+extraBlinkSamples)
    else:
        blink_missingData['End'].append(len(pupilData_woSingleMissingData)-1)
      
    
    # print start and end values
    #for ind in range(0,len(blink_missingData['Start'])):
    #    print(blink_missingData['Start'][ind]+23, blink_missingData['End'][ind]-23)
        
        
        
    # need to create a list containing indexes that are to be removed
    blinkIndexList = list()
    
    #print(len(blink_missingData['Start']), len(blink_missingData['End']))
    
    
    # remove blinks and additional data from pupil data to get filtered data
    for indInd in range(0, len(blink_missingData['Start'])):
        blinkIndexList.append(range(blink_missingData['Start'][indInd], blink_missingData['End'][indInd]+1))
    # flatten the list
    blinkIndexList = list(itertools.chain.from_iterable(blinkIndexList))
    
    
    ##print(len(pupilData_woSingleMissingData))
    
    pupilData_woRestMissingData0 = [pupilData_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(pupilData_woSingleMissingData))]
    #for i in enumerate(pupilData_woRestMissingData0):
    #    print(i)
    pupilData_filter = [x for x in pupilData_woRestMissingData0 if x]
    
    #for i in enumerate(pupilData_filter):
    #    print(i)
        
    # remove the times for single missing values in pupil data
    timeList_woRestMissingData0 = [timeList_woSingleMissingData[ind] if ind not in blinkIndexList else [] 
                                     for ind in range(0, len(timeList_woSingleMissingData))]
    time_filter = [x for x in timeList_woRestMissingData0 if x]
    
    #print(len(pupilData_filter))
        
    if np.nan in pupilData_filter:
        print('nan values still present in pupil data')
        #for i in enumerate(pupilData_woSingleMissingData):
        #    print(i)
        
    
    return pupilData_filter, time_filter, blink_missingData

In [54]:
def hampel(vals_orig, k, sd):
    '''
    vals: pandas series of values from which to remove outliers
    k: size of window (including the sample; 7 is equal to 3 on either side of value)
    '''
    # Obtained from: https://stackoverflow.com/questions/46819260/filtering-outliers-how-to-make-median-based-
    # hampel-function-faster
    
    #plt.plot(vals_orig)
    
    #Make copy so original not edited
    vals = pd.DataFrame(vals_orig)      
    #print(vals.isnull().any())
    vals0 = vals.replace([np.inf, -np.inf], np.nan)
    #vals = vals0.astype(float).fillna(method = 'backfill') # linear interpolation instead 
    #print(vals)
    vals = vals0.astype(float).interpolate('linear', limit_direction = 'both') # linear interpolation instead of 
    # simply copying the previous value --\ linear interpolation than cubic to not add any patterns in the data, limit direction
    # set to both, to interpolate the nan values occuring from the start of the series
    
    L= 1.4826
    rolling_median = vals.rolling(window=k, min_periods=1, center=True).median()
    
    #print(rolling_median)
    difference = np.abs(rolling_median-vals)
    median_abs_deviation = difference.rolling(k).median()
    threshold = sd * L * median_abs_deviation
    outlier_idx = difference>threshold
    vals[outlier_idx] = rolling_median[outlier_idx]
    #print(vals)
    #print('datatype', vals.dtypes)
    #print(vals.isnull().any())
    #vals.plot()
    return(vals)

In [95]:
def plotPupilSize(pupilData, timeData, TrialNumber, Complexity, Difficulty, writeStart):
    
    dataLenEqualizer = min(min(len(pupilData['Left']), len(pupilData['Right'])), len(timeData))
    
    timesOfTrials = [0.01, 2.2, 54.08, 61.19, 167.95, 176.16, 260.46, 303.12, 418.08, 421.13, 487.20, 513.66]
    
    
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)
    ax.plot(timeData[0:dataLenEqualizer], pupilData['Left'][0:dataLenEqualizer], 'b')
    #ax.plot(timeData[0:dataLenEqualizer], pupilData['Right'][0:dataLenEqualizer], 'r')
    
    
    ax.vlines(writeStart, ymin=0.5, ymax=5, linestyle = '-.', color = 'r')
        
    
    #for i_start in range(0, len(timesOfTrials), 2):
    #    xPoint = timesOfTrials[i_start]
    #    xPoint2 = timesOfTrials[i_start+1]
         
    #    ax.vlines(xPoint, ymin=0.5, ymax=5, linestyle = '-.', color = 'r')
    #    ax.vlines(xPoint2, ymin = 0.5, ymax = 5,  linestyle = '-.', color = 'g')
    
    ax.set_ylabel('Relative pupil size [in mm]')
    ax.set_xlabel('Time [in s]')
    ax.set_title('trial: %d, complexity: %d, difficulty: %d' %(TrialNumber, Complexity, Difficulty))


In [96]:
def FindAndPlotPupilSizeForEpoch(GazeLog):
    # function that uses the list of start and end trial times to find the pupil sizes for those trials and plots them
    
    # first create a list of times in gaze log
    timeStrGazeLog = [item3[0] for item3 in GazeLog]
    # convert the list of strings to datetime formats
    timeGazeLog = timeConversion(timeStrGazeLog)
    
    # internal time, to depict seconds
    timeInternalGazeLog = [float(item3[1]) for item3 in GazeLog]
    
    # extract pupil sizes in decimals from the strange 2 columns for every pupil
    pupilLogL, pupilLogR = Convert2ColumnsToFormPupilSizes(GazeLog)
    
    pupilL_avg = list()
    pupilR_avg = list()
    timeOfGaze_TrialList = list()
    pupil_AvgPartsList = list()
    
    # find pupil sizes
    pupilSizeL_Trial = pupilLogL
    pupilSizeR_Trial = pupilLogR
    timeInternal_Trial = timeInternalGazeLog
    timeGaze_Trial = timeGazeLog
    
        
    pupilSize_Trial = dict()
    pupilSize_Filter = dict()
    pupilSize_woBlink = dict()
        
    # find difference in consecutive elements of internal time
    timeInternalDifference = [t - s for s, t in zip(timeInternal_Trial, timeInternal_Trial[1:])]
    # divide by 1000 to make it s
    timeOfGaze_Trial = [sum(timeInternalDifference[:i])/1000000 for i in range(1,len(timeInternalDifference))]

    pupilSize_Trial['Left'] = pupilSizeL_Trial
    pupilSize_Trial['Right'] = pupilSizeR_Trial
    
    RLCorrelation = np.corrcoef(pupilSizeL_Trial, pupilSizeR_Trial)
        
    if RLCorrelation[0][1] < 0.8:
        print(RLCorrelation[0][1])
        print('CORRELATION BETWEEN RIGHT AND LEFT IS NOT GOOD. TRIAL MUST BE REMOVED')
        
    timesOfTrials = [0.01, 2.2, 54.08, 61.19, 167.95, 176.16, 260.46, 303.12, 418.08, 421.13, 487.20, 513.66]
    
    # divide pupil size into trials
    
    n = -1
    ComplexityList = [0, 3, 2, 4, 1, 3]
    DifficultyList = [0, 4, 3, 4, 2, 3]
    
    for i_start in range(0, len(timesOfTrials), 2):
        n = n + 1
        
        complexity = ComplexityList[n]
        difficulty = DifficultyList[n]
        xPoint = timesOfTrials[i_start]
        xPoint2 = timesOfTrials[i_start+1]
        
        readTime, readTimeInd = nearestTimePoint(timeOfGaze_Trial, xPoint)
        writeTime, writeTimeInd = nearestTimePoint(timeOfGaze_Trial, xPoint2)
        
        if i_start < len(timesOfTrials) - 2:
            xPoint3 = timesOfTrials[i_start+2]
            readTime3, readTimeInd3 = nearestTimePoint(timeOfGaze_Trial, xPoint3)
        else:
            readTime3 = timeOfGaze_Trial[-1]
            readTimeInd3 = len(timeOfGaze_Trial)-1

        if i_start == 0:
            readTimeInd = 0
            
        
        pupilL = pupilSizeL_Trial[readTimeInd:readTimeInd3]
        timeTrial = timeOfGaze_Trial[readTimeInd:readTimeInd3]
        
        writeTimeTrial = writeTime - readTime
        
        pupilSize_Trial['Left'] = pupilL
            
        print(readTime, readTimeInd, readTime3, readTimeInd3)
            
        plotPupilSize(pupilSize_Trial, timeTrial, n, complexity, difficulty, writeTime)
    
    
    #plotPupilSize(pupilSize_Trial, time_filter, 1)
        
    #for i in range(0, len(timeOfGaze_Trial)):
    #    print(timeOfGaze_Trial[i], timeGaze_Trial[i])
        
    return pupil_AvgPartsList

In [98]:
subjName = r'C:\DTU\Data\20190213_CheckEffectOfClosingEyesAndLookingAtLight'
j = 0
flagFirstSubj = 0
pupilData = dict()
pupilData['RLCorrelation'] = []

pupilAvgTotal_AvgPartsList = list()
scoreLIX_Total = list()
scoreLen_Total = list()

for root, dirs, subfolder in os.walk(subjName):
    if not dirs:
        
        gazeLog = None
        
        for file in subfolder:
            if fnmatch.fnmatch(file, 'tobiiGazeLog*'):
                try:
                    fGazeLog = open(root + '\\' + file, encoding='utf-8')
                    readerGazeLog = csv.reader(fGazeLog)
                    gazeLog = list(readerGazeLog)
                    
                    gazeLog.remove(gazeLog[0]) # would not matter much even if the first row was not labels
                    gazeLog.remove(gazeLog[-1])

                except:
                    if fGazeLog is not None:
                        fGazeLog.close()
                    else:
                        print('error in opening the gaze log file')
            else:
                continue
            
                # if all these lists exist
            if gazeLog is None:
                continue
            else:
                
                # find and plot pupil size for every trial
                pupilAvg_AvgPartsList = FindAndPlotPupilSizeForEpoch(gazeLog)
                
                

0 0 54.075033 4867
54.075033 4867 167.939646 15120
167.939646 15120 260.456813 23446
260.456813 23446 418.073298 37633
418.073298 37633 487.194273 43854
487.194273 43854 650.584988 58552


[5, 5, 5, 5, 5, 5, 5, 5, 5, 5]