In [268]:
import os
import tensorflow as tf
print(tf.__version__)

import numpy as np, pandas as pd, matplotlib.pyplot as plt
import os, sys, glob, csv, keras
from os import walk, path
from keras import models, layers, optimizers, preprocessing as KRSpreps, utils as KRSutils
from __future__ import absolute_import, division, print_function
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.impute import SimpleImputer

2.16.1


### Create Dictionary and store paths for all different Modalities(Micro-expression and Gaze)

In [291]:
data_path = {}
data_path['gazedata_path'] = "Gaze_Features/"
data_path['mexpdata_path'] = "Mexp_Features/"

In [292]:
os.chdir("/Users/jingweiong/Downloads")

### Checking No. of files in each of Micro-expression & Gaze Folders && Shape of the Dataframes

In [293]:
data_shape_all = pd.DataFrame()
for key in data_path.keys():
    count = 0
    data_shape, file_names = [], []
    for filepath in glob(path.join(data_path[key], '*.csv')):
        file_shape = pd.read_csv(filepath).shape
        filename = path.basename(filepath)
        for reps in (("Gaze_", ""), ("Mexp_", "")):
            filename = filename.replace(*reps)
        if filename not in ['Annotation_mexp_features.csv', 'Annotation_gaze_features.csv', 'Youtube_splitsteal_deception_user5_3.csv']:
            data_shape.append([file_shape[0], file_shape[1]])
            file_names.append(filename)
            count+=1
    data_shape = pd.DataFrame(data_shape)
    data_shape.columns = [key + str(0), key +str(1)]
    data_shape.index = pd.Series(file_names)
    data_shape_all = pd.concat([data_shape_all, data_shape], axis = 1, sort=True)
    print(f"No. of file in {key}: ", count)
#data_shape_all

No. of file in gazedata_path:  473
No. of file in mexpdata_path:  473


## Creating Dictionaries of Micro-expression & Gaze
Remove Initials and Make the Keys Same for the Same data

In [294]:
gaze_dict, mexp_dict = {}, {}
listofdicts = [gaze_dict, mexp_dict]
for key, data_dict_indiv in zip(data_path.keys(), listofdicts):
    for filepath in glob(path.join(data_path[key], '*.csv')):
        data = pd.read_csv(filepath)
        filename = path.basename(filepath)
        for reps in (("Gaze_", ""), ("Mexp_", "")):
            filename = filename.replace(*reps)
        data_dict_indiv[filename] = data

### Checking If the Labels are Same for Same Keys in Each Dcitionaries & Separating Labels from Training Data

In [295]:
filename_dictkeys = list(gaze_dict)
label_dict = {}
for key in filename_dictkeys:
    # print(key)
    gazedata, mexpdata = gaze_dict[key], mexp_dict[key]
    label_gaze = gazedata.loc[:, "label"].unique()[0]
    label_mexp = mexpdata.loc[:, "label"].unique()[0]
    label_set = set([label_gaze, label_mexp])
    if len(label_set) > 1:
        print(key)
    else:
        label_dict[key] = list(label_set)[0]
print("No. of files with same label: ", len(label_dict))

No. of files with same label:  473


### Dropping Indexing Columns & Labels from Training Data && Reindexing with TIme && Upsampling

In [297]:
from scipy.signal import resample

filename_dictkeys = list(gaze_dict)
gaze_dict_upsampled, mexp_dict_upsampled = {}, {}
for key in filename_dictkeys:
    gaze_data = gaze_dict[key]
    if "Unnamed: 0" in gaze_data.columns:
        gaze_data = gaze_data.drop("Unnamed: 0", axis=1)
    else:
        print(f"Warning: 'Unnamed: 0' column not found in '{key}'")
    
    # Drop other columns like "frame" and "label"
    gaze_data = gaze_data.drop(["frame", "label", "face_id", "timestamp", "confidence", "success"], axis=1, errors='ignore')
    gaze_data = np.array(gaze_data.drop_duplicates())
    gaze_dict_upsampled[key] = resample(gaze_data, 300)

    mexp_data = mexp_dict[key]
    if "Unnamed: 0" in mexp_data.columns:
        mexp_data = mexp_data.drop("Unnamed: 0", axis=1)
    else:
        print(f"Warning: 'Unnamed: 0' column not found in '{key}'")
    
    # Drop other columns like "frame" and "label"
    mexp_data = mexp_data.drop(["frame", "label", "face_id", "timestamp", "confidence", "success"], axis=1, errors='ignore')
    mexp_data = np.array(mexp_data.drop_duplicates())
    mexp_dict_upsampled[key] = resample(mexp_data, 300)

In [298]:
import random
random.seed(3986)
train_split_keys = random.sample(list(gaze_dict_upsampled), int(0.75*len(list(gaze_dict_upsampled))))
test_split_keys = list(set(list(gaze_dict_upsampled)) - set(train_split_keys))

In [299]:
dict_split_train = lambda dict_data: {key: value for key, value in dict_data.items() if key in train_split_keys}
dict_split_test = lambda dict_data: {key: value for key, value in dict_data.items() if key in test_split_keys}

mapped_train = list(map(dict_split_train, [gaze_dict_upsampled, mexp_dict_upsampled, label_dict]))
mapped_test = list(map(dict_split_test, [gaze_dict_upsampled, mexp_dict_upsampled, label_dict]))

train_data = {'gaze_train': mapped_train[0], 'mexp_train': mapped_train[1], 'y_train': mapped_train[2]}
test_data = {'gaze_test': mapped_test[0], 'mexp_test': mapped_test[1], 'y_test': mapped_test[2]}

In [300]:
conv2arr = lambda x: np.array(x)

train_data['gaze_train'] = conv2arr(list(train_data['gaze_train'].values()))
test_data['gaze_test'] = conv2arr(list(test_data['gaze_test'].values()))

train_data['mexp_train'] = conv2arr(list(train_data['mexp_train'].values()))
test_data['mexp_test'] = conv2arr(list(test_data['mexp_test'].values()))

train_data['y_train'] = conv2arr(list(train_data['y_train'].values()))
test_data['y_test'] = conv2arr(list(test_data['y_test'].values()))

In [182]:

# # Initialize separate scalers for gaze and microexpression data
# gaze_scaler = StandardScaler()
# mexp_scaler = StandardScaler()

# # Fit the scalers on their respective training data
# # Assuming train_gaze_data and train_mexp_data are your training data arrays for gaze and microexpression

# # Flatten the training data for fitting
# train_gaze_flat = train_data['gaze_train'].reshape(-1, train_data['gaze_train'].shape[-1])
# train_mexp_flat = train_data['mexp_train'].reshape(-1, train_data['mexp_train'].shape[-1])

# gaze_scaler.fit(train_gaze_flat)
# mexp_scaler.fit(train_mexp_flat)

# # Transform the training and testing data using their respective fitted scalers
# for key1 in train_data.keys():
#     if 'gaze' in key1:
#         scaler = gaze_scaler
#     elif 'mexp' in key1:
#         scaler = mexp_scaler
#     else:
#         continue  # Skip non-feature data like labels

#     data = train_data[key1]
#     data_flat = data.reshape(-1, data.shape[-1])
#     data_scaled = scaler.transform(data_flat)
#     train_data[key1] = data_scaled.reshape(data.shape)

# for key2 in test_data.keys():
#     if 'gaze' in key2:
#         scaler = gaze_scaler
#     elif 'mexp' in key2:
#         scaler = mexp_scaler
#     else:
#         continue  # Skip non-feature data like labels

#     data = test_data[key2]
#     data_flat = data.reshape(-1, data.shape[-1])
#     data_scaled = scaler.transform(data_flat)
#     test_data[key2] = data_scaled.reshape(data.shape)

# # Now, prepare your model. You could either concatenate the features or use separate input channels.
# # Let's assume concatenation for simplicity in a traditional machine learning model:

# # Example of preparing combined features for SVM training
# train_combined_features = np.concatenate([
#     train_data['gaze_train'].reshape(train_data['gaze_train'].shape[0], -1), 
#     train_data['mexp_train'].reshape(train_data['mexp_train'].shape[0], -1)
# ], axis=1)

# test_combined_features = np.concatenate([
#     test_data['gaze_test'].reshape(test_data['gaze_test'].shape[0], -1), 
#     test_data['mexp_test'].reshape(test_data['mexp_test'].shape[0], -1)
# ], axis=1)

In [158]:
# # Check and handle NaNs
# train_combined_features = np.nan_to_num(train_combined_features, nan=np.nanmean(train_combined_features))
# test_combined_features = np.nan_to_num(test_combined_features, nan=np.nanmean(test_combined_features))

# # Ensure the labels are integers
# y_train = np.array([0 if value == 'Truthful' else 1 for value in train_data['y_train']])
# y_test = np.array([0 if value == 'Truthful' else 1 for value in test_data['y_test']])

# # Train an SVM classifier
# svm_classifier = SVC(kernel='linear')
# svm_classifier.fit(train_combined_features, y_train)

In [301]:
train_key, test_key = list(train_data.keys()), list(test_data.keys())
train_key.remove('y_train')
test_key.remove('y_test')

for key1, key2 in zip(train_key, test_key):
    scaler = StandardScaler()
    data1, data2 = train_data[key1], test_data[key2]

    s0, s1, s2 = data1.shape[0], data1.shape[1], data1.shape[2]
    print(data1.shape)
    data1 = data1.reshape(s0*s1, s2)
    data1 = scaler.fit_transform(data1)
    print(data1.shape)
    train_data[key1] = data1.reshape(s0, s1, s2)

    s0, s1, s2 = data2.shape[0], data2.shape[1], data2.shape[2]
    print(data2.shape)
    data2 = data2.reshape(s0*s1, s2)
    data2 = scaler.transform(data2)
    print(data2.shape)
    test_data[key2] = data2.reshape(s0, s1, s2)
    print(data1.shape, data2.shape)

(354, 300, 292)
(106200, 292)
(119, 300, 292)
(35700, 292)
(106200, 292) (35700, 292)
(354, 300, 45)
(106200, 45)
(119, 300, 45)
(35700, 45)
(106200, 45) (35700, 45)


In [302]:
gaze_features = train_data['gaze_train'].copy()
test_gaze_features = test_data['gaze_test'].copy()

mexp_features = train_data['mexp_train'].copy()
test_mexp_features = test_data['mexp_test'].copy()

gaze_features = np.clip(gaze_features, -5, 5)
mexp_features = np.clip(mexp_features, -5, 5)

test_gaze_features = np.clip(test_gaze_features, -5, 5)
test_mexp_features = np.clip(test_mexp_features, -5, 5)

print('Gaze train shape:', gaze_features.shape)
print('Mexp train shape:', mexp_features.shape)

print('Gaze test shape:', test_gaze_features.shape)
print('mexp test shape:', test_mexp_features.shape)

Gaze train shape: (354, 300, 292)
Mexp train shape: (354, 300, 45)
Gaze test shape: (119, 300, 292)
mexp test shape: (119, 300, 45)


In [303]:
y_train = pd.Series(train_data['y_train']).apply(lambda value: 0 if value == 'Truthful' else 1)
y_test = pd.Series(test_data['y_test']).apply(lambda value: 0 if value == 'Truthful' else 1)

In [312]:
# Concatenate gaze and microexpression features for training and testing
X_train = np.concatenate((gaze_features, mexp_features), axis=2)
X_test = np.concatenate((test_gaze_features, test_mexp_features), axis=2)

# Reshape the features to 2D for SVM
X_train = X_train.reshape(X_train.shape[0], -1)
X_test = X_test.reshape(X_test.shape[0], -1)

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_test = imputer.transform(X_test)

# Train labels
y_train = train_data['y_train']
y_test = test_data['y_test']

# Initialize and train the SVM model
svm_model = SVC(kernel='linear') #, C=1.0, random_state=42
svm_model.fit(X_train, y_train)



# Make predictions
y_pred_train = svm_model.predict(X_train)
y_pred_test = svm_model.predict(X_test)

# Evaluate the model

print("Testing Classification Report:")
print(classification_report(y_test, y_pred_test))
print("Testing Accuracy:", accuracy_score(y_test, y_pred_test))
print("Training Accuracy:", accuracy_score(y_train, y_pred_train))



Testing Classification Report:
              precision    recall  f1-score   support

   Deceptive       0.57      0.41      0.48        58
    Truthful       0.56      0.70      0.62        61

    accuracy                           0.56       119
   macro avg       0.56      0.56      0.55       119
weighted avg       0.56      0.56      0.55       119

Testing Accuracy: 0.5630252100840336
Training Accuracy: 1.0


In [311]:
# Initialize and train the SVM model
svm_model = SVC(kernel='poly') #, C=1.0, random_state=42
svm_model.fit(X_train, y_train)

# Make predictions
y_pred_train = svm_model.predict(X_train)
y_pred_test = svm_model.predict(X_test)

# Evaluate the model

print("Testing Classification Report:")
print(classification_report(y_test, y_pred_test))
print("Testing Accuracy:", accuracy_score(y_test, y_pred_test))

Testing Classification Report:
              precision    recall  f1-score   support

   Deceptive       0.81      0.29      0.43        58
    Truthful       0.58      0.93      0.72        61

    accuracy                           0.62       119
   macro avg       0.70      0.61      0.57       119
weighted avg       0.69      0.62      0.58       119

Testing Accuracy: 0.6218487394957983


In [306]:
# Initialize and train the SVM model
svm_model = SVC(kernel='rbf') #, C=1.0, random_state=42
svm_model.fit(X_train, y_train)

# Make predictions
y_pred_train = svm_model.predict(X_train)
y_pred_test = svm_model.predict(X_test)

# Evaluate the model

print("Testing Classification Report:")
print(classification_report(y_test, y_pred_test))
print("Testing Accuracy:", accuracy_score(y_test, y_pred_test))

Testing Classification Report:
              precision    recall  f1-score   support

   Deceptive       0.81      0.29      0.43        58
    Truthful       0.58      0.93      0.72        61

    accuracy                           0.62       119
   macro avg       0.70      0.61      0.57       119
weighted avg       0.69      0.62      0.58       119

Testing Accuracy: 0.6218487394957983


In [307]:
# Initialize and train the SVM model
svm_model = SVC(kernel='sigmoid') #, C=1.0, random_state=42
svm_model.fit(X_train, y_train)

# Make predictions
y_pred_train = svm_model.predict(X_train)
y_pred_test = svm_model.predict(X_test)

# Evaluate the model

print("Testing Classification Report:")
print(classification_report(y_test, y_pred_test))
print("Testing Accuracy:", accuracy_score(y_test, y_pred_test))

Testing Classification Report:
              precision    recall  f1-score   support

   Deceptive       0.64      0.40      0.49        58
    Truthful       0.58      0.79      0.67        61

    accuracy                           0.60       119
   macro avg       0.61      0.59      0.58       119
weighted avg       0.61      0.60      0.58       119

Testing Accuracy: 0.5966386554621849


In [308]:
import joblib
joblib.dump(svm_model, 'multimodal_mexp_and_gaze.pkl')

['multimodal_mexp_and_gaze.pkl']

In [310]:
from sklearn.decomposition import PCA

def preprocess_data_with_pca(filepath, n_samples, expected_features):
    data = pd.read_csv(filepath)
    data = data.drop(columns=["Unnamed: 0", "frame", "label", "face_id", "timestamp", "confidence", "success"], errors='ignore')
    data = data.drop_duplicates()

    # Resample to a fixed number of samples
    if len(data) > n_samples:
        data = resample(data, n_samples)
    elif len(data) < n_samples:
        repeat_factor = n_samples // len(data) + 1
        data = pd.DataFrame(np.tile(data, (repeat_factor, 1)), columns=data.columns)[:n_samples]

    # PCA for dimensionality reduction if the number of features is more than expected
    if data.shape[1] > expected_features:
        pca = PCA(n_components=expected_features)
        data = pca.fit_transform(data)
    elif data.shape[1] < expected_features:
        raise ValueError(f"Data has fewer features ({data.shape[1]}) than expected ({expected_features}).")

    return data

def predict_deception(gaze_filepath, mexp_filepath, svm_model, gaze_features=288, mexp_features=41):
    # Preprocess gaze data with PCA
    gaze_data = preprocess_data_with_pca(gaze_filepath, n_samples=300, expected_features=gaze_features)

    # Preprocess microexpression data with PCA
    mexp_data = preprocess_data_with_pca(mexp_filepath, n_samples=300, expected_features=mexp_features)

    # Concatenate gaze and microexpression features
    features = np.concatenate((gaze_data, mexp_data), axis=1).reshape(1, -1)

    # Use a pre-trained SimpleImputer or ensure it is fitted with the training data
    imputer = SimpleImputer(strategy='mean')
    features = imputer.fit_transform(features)  # It's better to fit this with training data only

    # Predict using the SVM model
    prediction = svm_model.predict(features)
    print(prediction)

    # Return the result
    return 'Deceptive' if prediction == 'Deceptive' else 'truthful'

# Example usage
gaze_file = "/Users/jingweiong/Downloads/Gaze_Features/Gaze_reallifedeception_trial_lie_043.csv"
mexp_file = "/Users/jingweiong/Downloads/Mexp_Features/Mexp_reallifedeception_trial_lie_043.csv"

result = predict_deception(gaze_file, mexp_file, svm_model, gaze_features=292, mexp_features=45)
print(result)


['Deceptive']
Deceptive
