### Cross-validation

In [None]:
import pandas as pd
import os
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler

%run ./EM_BI_Functions.ipynb

In [None]:
# Settings for segments division
segment_length=100
segment_number=3
data_part= segment_length * segment_number

# Name of the column wich contains x coordinate
col='x'

# Number of jumping points to analyze
lp=50

# Session settings
# Number of participants
class_nb=14

# Number of sessions 
lb_sessions=9 

# Number of columns for data series set - user, session, trail, 7 data features
col_number=10

# Number of column for statistical features - user, session, trail, 16 data features
flat_col_number=19

In [None]:
# Settings for data folder and results folder

# Assumption: data from GazeBase set are extracted into folder Random_Saccades_<sessions_number>, 
# each round data are stored in separate subfolder Round_<round_number>, 
# for example: Random_Saccades_9/Round_1

# Data folder
root_folder="../data/Random_Saccades_"+str(lb_sessions)

# Results folder 
# results in the form of a CSV file with information on accuracy, F1 score, and Cohen's Kappa for each LOSO iteration
output_dir="../results/"

In [None]:
# Data loading and features calculation
f_values_all=pd.DataFrame()
f_series_all=pd.DataFrame()

for dirpath, dirnames, filenames in os.walk(root_folder):

    for file in filenames:
        print(file)
        file_name=file.split('.')[0]
        session, label, trial = file_name.split('_')[1][0],  file_name.split('_')[1][1:4], file_name.split('_')[2]
       
        eye_data = pd.read_csv(os.path.join(dirpath, file),header=0, sep=',') 
        xyT_df=eye_data.iloc[:,[4,5]]
        
        xTu_df= xyT_df['xT'].unique()
        yTu_df= xyT_df['yT'].unique()
        eye_point=pd.DataFrame()
        eye_set=pd.DataFrame( )

        l_points=0
          
        for x in range(xTu_df.shape[0]):
            for y in range(yTu_df.shape[0]):
                point_feature_series=pd.DataFrame([[session, int(label), trial]]*segment_number*(segment_length), columns =['session', 'label', 'trial'])
                point_feature_values=pd.DataFrame([[session, int(label), trial]]*segment_number, columns =['session', 'label', 'trial'])
           
                if trial=='S1':
                    l_rows=eye_data[(eye_data['xT']==xTu_df[x]) & (eye_data['yT']==yTu_df[y])].shape[0]
            
                    if (l_rows>0):
                        if not (eye_data[(eye_data['xT']==xTu_df[x]) & (eye_data['yT']==yTu_df[y])]['x'].isna().any()):
                    
                            eye_point_all=eye_data[(eye_data['xT']==xTu_df[x]) & (eye_data['yT']==yTu_df[y])]
                            if (l_points<lp): # so that all participants have the same number of points
                                l_points+=1
                                
                                eye_subset=eye_point_all.iloc[0:data_part+4,[1, 4, 5]]
                                
                                # feature extraction for entire data segments
                                eye_v=pd.DataFrame(eye_v_f(eye_subset.iloc[:,0], data_part+4), columns=['v'+str(col)]).reset_index(drop=True)
                                eye_a=pd.DataFrame(eye_a_f(eye_v,data_part+3),columns=['a'+str(col)]).reset_index(drop=True)
                                eye_jerk=pd.DataFrame(eye_j_f(eye_a,data_part+2),columns=['a'+str(col)]).reset_index(drop=True)
                                eye_vc=pd.DataFrame(eye_s_c(eye_v,data_part+3),columns=['vc'+str(col)]).reset_index(drop=True)
                                eye_ac=pd.DataFrame(eye_s_c(eye_a,data_part+2),columns=['va'+str(col)]).reset_index(drop=True)
                                eye_jc=pd.DataFrame(eye_s_c(eye_jerk,data_part+1),columns=['vj'+str(col)]).reset_index(drop=True)
                        
                                # data scaling - normalization
                                scaler = MinMaxScaler()
                                eye_v_norm = scaler.fit_transform(eye_v)
                                eye_a_norm = scaler.fit_transform(eye_a)
                                eye_jerk_norm = scaler.fit_transform(eye_jerk)
                                eye_vc_norm = scaler.fit_transform(eye_vc)
                                eye_ac_norm = scaler.fit_transform(eye_ac)
                                eye_jc_norm = scaler.fit_transform(eye_jc)
                        
                                # feature calculation for data segments
                                f_series,f_value=point_feature_set(segment_length, eye_v_norm,eye_a_norm, eye_jerk_norm,eye_vc_norm, eye_ac_norm, eye_jc_norm)
                                                    
                                point_feature_series=pd.concat([point_feature_series.reset_index(drop=True), f_series.reset_index(drop=True)], axis=1)
                                point_feature_series.columns =['session', 'label', 'trial', 'v', 'a', 'j', 'vc','ac','jc', 'f']
                                                        
                                # statistical features for the flat model
                                point_feature_values = pd.concat([point_feature_values.reset_index(drop=True), f_value.reset_index(drop=True)], axis=1)
                                point_feature_values.columns=['session', 'label', 'trial',  'minv', 'maxv', 'avgv', 'stdv','mina', 'maxa', 'avga', 'stda', 'minj', 'maxj', 'avgj', 'stdj', 'minf', 'maxf', 'avgf', 'stdf']
                            
                           
                                # all sessions in one file 
                                f_series_all=pd.concat([f_series_all, point_feature_series],  axis=0)
                                f_values_all=pd.concat([f_values_all, point_feature_values],  axis=0)
                            
print('completed')


**Time series experiments**

In [None]:
# LSTM network classification

EPOCHS = 150
BATCH_SIZE= 64

ACC, F1, CK=[], [], []

# Cross-validation
for session in range (1,lb_sessions+1):
    f_series_train=f_series_all[f_series_all['session']!=str(session)]
    f_series_test=f_series_all[f_series_all['session']==str(session)]
      
    trainSamples_LSTM, trainLabels_LSTM= train_test_set (f_series_train, segment_length, col_number)
    testSamples_LSTM, testLabels_LSTM=train_test_set (f_series_test, segment_length, col_number)
    
    lstm_model= model_LSTM(class_nb, col_number)
    
    H = lstm_model.fit(tf.convert_to_tensor(trainSamples_LSTM, dtype=tf.float32), trainLabels_LSTM, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE, validation_data=(tf.convert_to_tensor(testSamples_LSTM, dtype=tf.float32), testLabels_LSTM))

    modelResults = lstm_model.predict([testSamples_LSTM])
    A, F, C = calc_metrics(modelResults, testLabels_LSTM)
    ACC.append(A)
    F1.append(F)
    CK.append(C)

metrics_df=pd.concat([pd.DataFrame([[lp, session, EPOCHS]]).reset_index(drop=True), pd.DataFrame(ACC,columns=['ACC']).reset_index(drop=True), pd.DataFrame(F1,columns=['F1']).reset_index(drop=True), pd.DataFrame(CK,columns=['CK']).reset_index(drop=True)], axis=1)
metrics_df.to_csv(output_dir+'BE_'+str(lb_sessions)+'_'+str(lp)+'_metrics.csv', sep=';', mode='a', header=True, index=False)          
  

**Statistical features experiments**

In [None]:
# Flat network classification

# Number of layers
layers=3

ACC_flat, F1_flat, CK_flat=[], [], []

# Cross-validation
for session in range (1,lb_sessions+1):
    f_values_train=f_values_all[f_values_all['session']!=str(session)]
    f_values_test=f_values_all[f_values_all['session']==str(session)]
    
    trainSamples_flat, trainLabels_flat= train_test_set_flat (f_values_train, flat_col_number)
    testSamples_flat, testLabels_flat=train_test_set_flat (f_values_test, flat_col_number)
    
    flat_model=model_flat(class_nb, flat_col_number,layers)

    H_flat = flat_model.fit(tf.convert_to_tensor(trainSamples_flat, dtype=tf.float32), trainLabels_flat, epochs=EPOCHS,verbose=0,batch_size=BATCH_SIZE)

    modelResults_flat = flat_model.predict(tf.convert_to_tensor(testSamples_flat, dtype=tf.float32))

    A, F, C = calc_metrics(modelResults_flat, testLabels_flat)
    ACC_flat.append(A)
    F1_flat.append(F)
    CK_flat.append(C)


metrics_flat_df=pd.concat([pd.DataFrame([[lp, session, EPOCHS]]).reset_index(drop=True), pd.DataFrame(ACC_flat,columns=['ACC']).reset_index(drop=True), pd.DataFrame(F1_flat,columns=['F1']).reset_index(drop=True), pd.DataFrame(CK_flat,columns=['CK']).reset_index(drop=True)], axis=1)
metrics_flat_df.to_csv(output_dir+'BE_Flat_'+str(layers)+'L_'+str(lb_sessions)+'_'+str(lp)+'_metrics.csv', sep=';', mode='a', header=True, index=False)  
