In [2]:
# !pip install fsspec

In [4]:
#Libraries
import pandas as pd
import numpy as np
#from datetime import dt
import math
import os

# Import code developed and functions
from feature_extraction import preprocess, flatten, label, select_group

# Ignore warnings of appending dataframes
import warnings
warnings.simplefilter('ignore')

In [5]:
path = 'C:\\Users\\mverd\\Desktop\\IMD\\ESSEX\\PredictingEmpathy\\rawdata\\'
fname = 'test\\Participant0059.tsv'  # ANA: it's better not to call a  variable "name", as it's one of the protected words in python
# path = ''
# fname = 'C://Users/Ana/Desktop/Participant0001.tsv'

In [6]:
df_table = pd.read_table(path + fname, sep='\t',low_memory=False)

# Remove calibration points in recording
startPoints = df_table[df_table['Event']=='ImageStimulusStart'].index.values.astype(int)
endPoints = df_table[df_table['Event']=='ImageStimulusEnd'].index.values.astype(int)

# Store only image stimulus
df = pd.DataFrame()

for i in range(len(startPoints)):
    start = startPoints[i]
    end = endPoints[i]

    trial = df_table.iloc[start:end+1]
    df = pd.concat([df,trial])

# Columns we are keeping
df_col = ['Recording timestamp','Participant name',
            'Recording name','Recording duration',
            'Pupil diameter left','Pupil diameter right',
            'Gaze point X (MCSnorm)','Gaze point Y (MCSnorm)',
            'Eye movement type','Gaze event duration',
            'Fixation point X (MCSnorm)','Fixation point Y (MCSnorm)']

# Removing columns
df_features = df[df_col]

# Columns that need to be changed from object to float
objColumns = ['Pupil diameter left','Pupil diameter right','Gaze point X (MCSnorm)',
                'Gaze point Y (MCSnorm)','Fixation point X (MCSnorm)','Fixation point Y (MCSnorm)']

# Change (commas) to (decimals) and convert object to float64
for feature in objColumns:
    df_features[feature] = df_features[feature].str.replace(',','.').astype(float)

df_features

Unnamed: 0,Recording timestamp,Participant name,Recording name,Recording duration,Pupil diameter left,Pupil diameter right,Gaze point X (MCSnorm),Gaze point Y (MCSnorm),Eye movement type,Gaze event duration,Fixation point X (MCSnorm),Fixation point Y (MCSnorm)
1610,12532530,Participant0059,Recording1,88301,,,,,EyesNotFound,75.0,,
1611,12533294,Participant0059,Recording1,88301,,,,,EyesNotFound,75.0,,
1612,12541623,Participant0059,Recording1,88301,,,,,EyesNotFound,75.0,,
1613,12549985,Participant0059,Recording1,88301,,,,,EyesNotFound,75.0,,
1614,12558306,Participant0059,Recording1,88301,,,,,EyesNotFound,75.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...
102967,63277840,Participant0059,Recording8,63470,,,,,Unclassified,17.0,,
102968,63277840,Participant0059,Recording8,63470,,,,,Unclassified,17.0,,
102969,63282267,Participant0059,Recording8,63470,,,,,EyesNotFound,8.0,,
102970,63290558,Participant0059,Recording8,63470,,,0.4547,0.4617,Unclassified,8.0,,


In [30]:
df_features['Time'] = pd.to_datetime(df_features['Recording timestamp']).astype(np.int64)/int(1e6)  # seconds
df_features['Time'].diff()  # if we've done it right, this should be approx 1/sfreq = 1/120 = 0.00833  (if sfreq=120Hz)

3083          NaN
3084     0.006805
3085     0.008329
3086     0.008333
3087     0.008337
           ...   
82494    0.008336
82495    0.008343
82496    0.009366
82497    0.000000
82498    0.038590
Name: Time, Length: 71206, dtype: float64

In [31]:
df_features['Distance'] = np.sqrt(df_features['Gaze point X (MCSnorm)']**2 + df_features['Gaze point Y (MCSnorm)']**2)
df_features['Speed'] = df_features['Distance'].diff() / df_features['Time'].diff() # Delta_position / Delta_time
df_features  # MCSnorm/sec

Unnamed: 0,Recording timestamp,Participant name,Recording name,Recording duration,Pupil diameter left,Pupil diameter right,Gaze point X (MCSnorm),Gaze point Y (MCSnorm),Eye movement type,Gaze event duration,Fixation point X (MCSnorm),Fixation point Y (MCSnorm),Time,Delta Time,Distance,Speed
3083,23419315,Participant0001,Recording1,83579,,,,,Fixation,117.0,0.5073,0.316,23.419315,,,
3084,23426120,Participant0001,Recording1,83579,,,0.5011,0.3203,Fixation,117.0,0.5073,0.316,23.426120,0.006805,0.594721,
3085,23434449,Participant0001,Recording1,83579,,,0.5016,0.3244,Fixation,117.0,0.5073,0.316,23.434449,0.008329,0.597359,0.316716
3086,23442782,Participant0001,Recording1,83579,3.21,3.14,0.5016,0.3202,Fixation,117.0,0.5073,0.316,23.442782,0.008333,0.595089,-0.272458
3087,23451119,Participant0001,Recording1,83579,,,0.5079,0.3145,Fixation,117.0,0.5073,0.316,23.451119,0.008337,0.597388,0.275815
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82494,43375993,Participant0001,Recording8,43548,,,,,EyesNotFound,922.0,,,43.375993,0.008336,,
82495,43384336,Participant0001,Recording8,43548,,,,,EyesNotFound,922.0,,,43.384336,0.008343,,
82496,43393702,Participant0001,Recording8,43548,,,,,,,,,43.393702,0.009366,,
82497,43393702,Participant0001,Recording8,43548,,,,,,,,,43.393702,0.000000,,


In [32]:
df_features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 71206 entries, 3083 to 82498
Data columns (total 16 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Recording timestamp         71206 non-null  int64  
 1   Participant name            71206 non-null  object 
 2   Recording name              71206 non-null  object 
 3   Recording duration          71206 non-null  int64  
 4   Pupil diameter left         22320 non-null  float64
 5   Pupil diameter right        22221 non-null  float64
 6   Gaze point X (MCSnorm)      50429 non-null  float64
 7   Gaze point Y (MCSnorm)      50429 non-null  float64
 8   Eye movement type           71189 non-null  object 
 9   Gaze event duration         71189 non-null  float64
 10  Fixation point X (MCSnorm)  27653 non-null  float64
 11  Fixation point Y (MCSnorm)  27653 non-null  float64
 12  Time                        71206 non-null  float64
 13  Delta Time                  