In [162]:
import pandas as pd
import numpy as np
import itertools

from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

from helpers import collect_results

In [163]:
pd.options.display.max_rows = None

In [164]:
# change to True to run the experiments with the sex labels.
run_experiment_with_sex_labels = False

In [None]:
# read data
data_directory = 'BagOfLies'
df = pd.read_csv(f'{data_directory}/Annotations.csv')
display(df)

In [166]:
# take lie labels
y_lie = df['truth'].eq(0).mul(1).to_numpy()

# take sex labels
y_sex = df['sex'].eq('F').mul(1).to_numpy()

# take subjects
subjects = df['usernum'].to_numpy()

In [167]:
# number of truths/lies
print('Lie:', len(y_lie[y_lie == 1]))
print('Truth:', len(y_lie[y_lie == 0]), end='\n\n')

# number of males/females
print('Females:', len(y_sex[y_sex == 1]))
print('Males:', len(y_sex[y_sex == 0]))

Lie: 162
Truth: 163

Females: 94
Males: 231


In [168]:
def gaze_vector(gaze_file, N=20):
    '''Constructs the features for each data point'''
    result = []
    df_gaze = pd.read_csv(gaze_file)
    
    # pupils
    df_pupil = df_gaze.query("LPV == 1 & RPV == 1")
    pupil_size = pd.concat([df_pupil['LPD'], df_pupil['RPD']])
    result += [pupil_size.mean(), pupil_size.std()]
    
    # fixations
    df_fixation = df_gaze.query("FPOGV == 1") 
    df_fixations_grouped = df_fixation.groupby('FPOGID').agg({"FPOGD": max, "FPOGX": max, "FPOGY": max})
    df_fixations_sorted = df_fixations_grouped.sort_values('FPOGD', ascending=False)
        
    fixation_count = df_fixations_sorted.shape[0]
    result += [fixation_count]
    result += list(itertools.chain(*df_fixations_sorted.head(min(fixation_count, N)).to_records(index=False)))
    
    nan_x = 0.5 # if fixation_count == 0 else df_fixations_sorted['FPOGX'].iloc[0] 
    nan_y = 0.5 # if fixation_count == 0 else df_fixations_sorted['FPOGY'].iloc[0] 
    result += [0, nan_x, nan_y] * (N - min(fixation_count, N))
    
    return result

In [169]:
features = []
for gaze_file in df['gaze']:
    features += [gaze_vector(data_directory + '/' + gaze_file[2:])]    

In [170]:
x_unnormalized = np.array(features)
x = preprocessing.StandardScaler().fit(x_unnormalized).transform(x_unnormalized)
x = np.append(x, np.ones((x.shape[0], 1)), axis=1)

In [None]:
# Random Forest
rf_hyperparameter_map = {}
for max_depth in [3, 8, 13, 21, 30, 45, 60]:
    for max_features in ['sqrt', 'log2']:
        clf = RandomForestClassifier(n_estimators=1000, max_depth=max_depth, max_features=max_features)
        collect_results(x, y_lie, y_sex, subjects, run_experiment_with_sex_labels, (max_depth, max_features), 
                        clf, rf_hyperparameter_map, folds=3, iterations=5)

In [None]:
# MLP
mlp_hyperparameter_map = {}
for hidden_layer_size in [30, 50, 80, 100, 120]:
    for alpha in [1e-4, 1e-3, 5e-3, 1e-2, 5e-2, 1e-1, 5e-1, 1]:
        clf = MLPClassifier(hidden_layer_sizes=hidden_layer_size, alpha=alpha, max_iter=10000)
        collect_results(x, y_lie, y_sex, subjects, run_experiment_with_sex_labels, (hidden_layer_size, alpha), 
                        clf, mlp_hyperparameter_map, folds=3, iterations=5) 