In [25]:
import copy
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [26]:
stat_features_df = pd.read_csv('../../data/allParticipant_feature_stats.csv')
stat_features_df.head()

Unnamed: 0,participant_id,class,video,confidence_avg,confidence_std,success_avg,success_std,gaze_0_x_avg,gaze_0_x_std,gaze_0_y_avg,...,AU23_c_avg,AU23_c_std,AU25_c_avg,AU25_c_std,AU26_c_avg,AU26_c_std,AU28_c_avg,AU28_c_std,AU45_c_avg,AU45_c_std
0,1499,ch,ch1_1,0.98,3.330669e-16,1.0,0.0,-0.100496,0.075156,0.147423,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068646,0.252851
1,1499,ch,ch2_1,0.98,2.220446e-16,1.0,0.0,-0.028505,0.038915,0.181629,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1499,ch,ch3_1,0.98,3.330669e-16,1.0,0.0,0.109447,0.098708,0.119623,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.084615,0.278309
3,1499,ch,ch4_1,0.98,4.440892e-16,1.0,0.0,0.142628,0.02594,0.020894,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.316017,0.46492
4,1499,ch,ch5_1,0.98,2.220446e-16,1.0,0.0,-0.011453,0.024152,0.173759,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023256,0.150715


### Match all the required features to their calculated statistical columns

i.e: If we require for example the feature ``` gaze_0_x ```, then we select the columns where some statistics are calculated on ``` gaze_0_x ``` - such as - ```standard deviation (std) & average (avg)```

Therefore, the matching columns for ``` gaze_0_x ``` would be : ```gaze_0_x_std``` & ```gaze_0_x_avg```

In [27]:
required_features = ['gaze_0_x','gaze_0_y','gaze_0_z','gaze_1_x','gaze_1_y','gaze_1_z','gaze_angle_x','gaze_angle_y','pose_Tx', 'pose_Ty', 'pose_Tz','pose_Rx', 'pose_Ry', 'pose_Rz','AU01_r','AU02_r','AU04_r','AU05_r','AU06_r','AU07_r','AU09_r','AU10_r','AU12_r','AU14_r','AU15_r','AU17_r','AU20_r','AU23_r','AU25_r','AU26_r','AU45_r','AU01_c','AU02_c','AU04_c','AU05_c','AU06_c','AU07_c','AU09_c','AU10_c','AU12_c','AU14_c','AU15_c','AU17_c','AU20_c','AU23_c','AU25_c','AU26_c','AU28_c','AU45_c']

# Some initial information regarding the participant and their corresponding responseVideo information to be included
final_features = ['participant_id', 'class', 'video']

for req_col in required_features:
#     matches = [stat_col for stat_col in stat_features_df.columns if req_col in stat_col]
#     print(f"{req_col} matches len = {len(matches)} {matches}")
    for stat_col in stat_features_df.columns:
        if req_col in stat_col:
            final_features.append(stat_col)

# Create a new df from stat_features_df retaining all the information of only the columns present in final_features
model_features_df = stat_features_df[final_features]

In [28]:
model_features_df

Unnamed: 0,participant_id,class,video,gaze_0_x_avg,gaze_0_x_std,gaze_0_y_avg,gaze_0_y_std,gaze_0_z_avg,gaze_0_z_std,gaze_1_x_avg,...,AU23_c_avg,AU23_c_std,AU25_c_avg,AU25_c_std,AU26_c_avg,AU26_c_std,AU28_c_avg,AU28_c_std,AU45_c_avg,AU45_c_std
0,1499,ch,ch1_1,-0.100496,0.075156,0.147423,0.081668,-0.977575,0.014022,-0.194417,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.068646,0.252851
1,1499,ch,ch2_1,-0.028505,0.038915,0.181629,0.028025,-0.981762,0.006542,-0.188262,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
2,1499,ch,ch3_1,0.109447,0.098708,0.119623,0.085425,-0.977946,0.017138,-0.161373,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.084615,0.278309
3,1499,ch,ch4_1,0.142628,0.025940,0.020894,0.038029,-0.988476,0.004009,-0.285068,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.316017,0.464920
4,1499,ch,ch5_1,-0.011453,0.024152,0.173759,0.024943,-0.984099,0.004532,-0.171837,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.023256,0.150715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
858,9214,fr,fr5_1,0.000735,0.045928,0.142032,0.031226,-0.988284,0.006082,0.009775,...,0.955645,0.205882,0.070565,0.256096,0.056452,0.230792,0.0,0.0,0.147177,0.354283
859,9214,fr,fr6_1,0.017454,0.046117,0.092820,0.032214,-0.993926,0.005157,0.029900,...,0.787832,0.408844,0.127925,0.334006,0.057722,0.233218,0.0,0.0,0.084243,0.277752
860,9214,fr,fr7_1,-0.006322,0.037577,0.110335,0.073321,-0.990447,0.003712,0.024218,...,0.779070,0.414874,0.341085,0.474074,0.325581,0.468592,0.0,0.0,0.120155,0.325143
861,9214,fr,fr8_1,0.029679,0.040779,0.109212,0.022427,-0.992479,0.003418,0.029769,...,0.454545,0.497930,0.473684,0.499307,0.459330,0.498343,0.0,0.0,0.129187,0.335406


The below code modifies the ```class``` of the stimulus video in ```model_features_df``` - to - ```control, failure_robot, and failure_human``` - as we want to predict what type of failure occurs, if any.

In [29]:
class_types = {
    'ch': 'control',
    'cr': 'control',
    'fh': 'failure_human',
    'fr': 'failure_robot'
}

for index, row in model_features_df.iterrows():
    if row['class'] in class_types:
        model_features_df.at[index, 'class'] = class_types[row['class']]

In [30]:
model_features_df

Unnamed: 0,participant_id,class,video,gaze_0_x_avg,gaze_0_x_std,gaze_0_y_avg,gaze_0_y_std,gaze_0_z_avg,gaze_0_z_std,gaze_1_x_avg,...,AU23_c_avg,AU23_c_std,AU25_c_avg,AU25_c_std,AU26_c_avg,AU26_c_std,AU28_c_avg,AU28_c_std,AU45_c_avg,AU45_c_std
0,1499,control,ch1_1,-0.100496,0.075156,0.147423,0.081668,-0.977575,0.014022,-0.194417,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.068646,0.252851
1,1499,control,ch2_1,-0.028505,0.038915,0.181629,0.028025,-0.981762,0.006542,-0.188262,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
2,1499,control,ch3_1,0.109447,0.098708,0.119623,0.085425,-0.977946,0.017138,-0.161373,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.084615,0.278309
3,1499,control,ch4_1,0.142628,0.025940,0.020894,0.038029,-0.988476,0.004009,-0.285068,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.316017,0.464920
4,1499,control,ch5_1,-0.011453,0.024152,0.173759,0.024943,-0.984099,0.004532,-0.171837,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.023256,0.150715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
858,9214,failure_robot,fr5_1,0.000735,0.045928,0.142032,0.031226,-0.988284,0.006082,0.009775,...,0.955645,0.205882,0.070565,0.256096,0.056452,0.230792,0.0,0.0,0.147177,0.354283
859,9214,failure_robot,fr6_1,0.017454,0.046117,0.092820,0.032214,-0.993926,0.005157,0.029900,...,0.787832,0.408844,0.127925,0.334006,0.057722,0.233218,0.0,0.0,0.084243,0.277752
860,9214,failure_robot,fr7_1,-0.006322,0.037577,0.110335,0.073321,-0.990447,0.003712,0.024218,...,0.779070,0.414874,0.341085,0.474074,0.325581,0.468592,0.0,0.0,0.120155,0.325143
861,9214,failure_robot,fr8_1,0.029679,0.040779,0.109212,0.022427,-0.992479,0.003418,0.029769,...,0.454545,0.497930,0.473684,0.499307,0.459330,0.498343,0.0,0.0,0.129187,0.335406


In [31]:
model_features_df.to_excel('../../data/model_stat_features.xlsx', index = False)

### OHE for the target_class

In [32]:
class_types = {
    'control': 0,
    'failure_robot': 1,
    'failure_human': 2
}

for index, row in model_features_df.iterrows():
    if row['class'] in class_types:
        model_features_df.at[index, 'class'] = class_types[row['class']]

In [33]:
model_features_df

Unnamed: 0,participant_id,class,video,gaze_0_x_avg,gaze_0_x_std,gaze_0_y_avg,gaze_0_y_std,gaze_0_z_avg,gaze_0_z_std,gaze_1_x_avg,...,AU23_c_avg,AU23_c_std,AU25_c_avg,AU25_c_std,AU26_c_avg,AU26_c_std,AU28_c_avg,AU28_c_std,AU45_c_avg,AU45_c_std
0,1499,0,ch1_1,-0.100496,0.075156,0.147423,0.081668,-0.977575,0.014022,-0.194417,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.068646,0.252851
1,1499,0,ch2_1,-0.028505,0.038915,0.181629,0.028025,-0.981762,0.006542,-0.188262,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.000000
2,1499,0,ch3_1,0.109447,0.098708,0.119623,0.085425,-0.977946,0.017138,-0.161373,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.084615,0.278309
3,1499,0,ch4_1,0.142628,0.025940,0.020894,0.038029,-0.988476,0.004009,-0.285068,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.316017,0.464920
4,1499,0,ch5_1,-0.011453,0.024152,0.173759,0.024943,-0.984099,0.004532,-0.171837,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.023256,0.150715
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
858,9214,1,fr5_1,0.000735,0.045928,0.142032,0.031226,-0.988284,0.006082,0.009775,...,0.955645,0.205882,0.070565,0.256096,0.056452,0.230792,0.0,0.0,0.147177,0.354283
859,9214,1,fr6_1,0.017454,0.046117,0.092820,0.032214,-0.993926,0.005157,0.029900,...,0.787832,0.408844,0.127925,0.334006,0.057722,0.233218,0.0,0.0,0.084243,0.277752
860,9214,1,fr7_1,-0.006322,0.037577,0.110335,0.073321,-0.990447,0.003712,0.024218,...,0.779070,0.414874,0.341085,0.474074,0.325581,0.468592,0.0,0.0,0.120155,0.325143
861,9214,1,fr8_1,0.029679,0.040779,0.109212,0.022427,-0.992479,0.003418,0.029769,...,0.454545,0.497930,0.473684,0.499307,0.459330,0.498343,0.0,0.0,0.129187,0.335406


In [34]:
model_features_df.to_excel('../../data/model_stat_features_ohe.xlsx', index = False)

### Normalise the features

In [35]:
model_features_df_norm = copy.deepcopy(model_features_df)

columns_to_normalize = model_features_df_norm.columns[3:]
scaler = StandardScaler()
model_features_df_norm[columns_to_normalize] = scaler.fit_transform(model_features_df_norm[columns_to_normalize])

model_features_df_norm

Unnamed: 0,participant_id,class,video,gaze_0_x_avg,gaze_0_x_std,gaze_0_y_avg,gaze_0_y_std,gaze_0_z_avg,gaze_0_z_std,gaze_1_x_avg,...,AU23_c_avg,AU23_c_std,AU25_c_avg,AU25_c_std,AU26_c_avg,AU26_c_std,AU28_c_avg,AU28_c_std,AU45_c_avg,AU45_c_std
0,1499,0,ch1_1,-1.848976,1.832604,0.486999,2.091978,-0.013749,0.051782,-1.280947,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-0.648536,-0.399438
1,1499,0,ch2_1,-0.998615,0.053947,0.733473,-0.384329,-0.111539,-0.175017,-1.201650,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-1.341785,-2.487180
2,1499,0,ch3_1,0.630893,2.988496,0.286679,2.265382,-0.022411,0.146258,-0.855233,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-0.487258,-0.189236
3,1499,0,ch4_1,1.022831,-0.582850,-0.424730,0.077492,-0.268368,-0.251832,-2.448838,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,1.849659,1.351579
4,1499,0,ch5_1,-0.797190,-0.670601,0.676771,-0.526596,-0.166126,-0.235961,-0.990040,...,-0.796167,-1.016020,-0.744197,-1.050672,-0.575468,-0.757102,-0.127239,-0.189371,-1.106926,-1.242754
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
858,9214,1,fr5_1,-0.653220,0.398168,0.448151,-0.236552,-0.263871,-0.188972,1.349746,...,1.770707,0.093749,-0.188711,0.358751,0.013142,0.639736,-0.127239,-0.189371,0.144553,0.438069
859,9214,1,fr6_1,-0.455734,0.407437,0.093545,-0.190948,-0.395646,-0.217012,1.609016,...,1.319957,1.187774,0.262834,0.787531,0.026391,0.654418,-0.127239,-0.189371,-0.491015,-0.193827
860,9214,1,fr7_1,-0.736579,-0.011720,0.219754,1.706656,-0.314383,-0.260818,1.535812,...,1.296423,1.220278,1.940840,1.558392,2.819306,2.078990,-0.127239,-0.189371,-0.128345,0.197466
861,9214,1,fr8_1,-0.311335,0.145468,0.211662,-0.642755,-0.361850,-0.269734,1.607338,...,0.424747,1.667976,2.984664,1.697262,4.213879,2.259057,-0.127239,-0.189371,-0.037135,0.282210


In [36]:
model_features_df.to_excel('../../data/model_stat_features_ohe_norm.xlsx', index = False)