In [None]:
import os
import tensorflow as tf
print(tf.__version__)

import numpy as np, pandas as pd, matplotlib.pyplot as plt
import os, sys, glob, csv, keras
from sklearn import model_selection, preprocessing
from os import walk, path
from keras import models, layers, optimizers, preprocessing as KRSpreps, utils as KRSutils
#from tslearn import preprocessing as TSpreps, utils as TSutils
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint

from __future__ import absolute_import, division, print_function

from tensorflow.keras.layers import Lambda, Input, Dense, Flatten, Dropout, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mse, binary_crossentropy
from tensorflow.keras.utils import plot_model
from tensorflow.keras import backend as K

from tensorflow import keras
import tempfile

import matplotlib as mpl
import seaborn as sns

import sklearn
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from glob import glob
import datetime

### Create Dictionary and store paths for all different Modalities(Micro-expression and Gaze)

In [None]:
data_path = {}
data_path['gazedata_path'] = "Gaze_Features/"
data_path['mexpdata_path'] = "Mexp_Features/"

In [None]:
os.chdir("/Users/jingweiong/Downloads/Deception-Detection-master")

### Checking No. of files in each of Micro-expression & Gaze Folders && Shape of the Dataframes

In [None]:
dir = "/Users/jingweiong/Downloads/Deception_detection_output_mexp_gaze"
data_shape_all = pd.DataFrame()

for key in data_path.keys():
    count = 0
    data_shape, file_names = [], []
    for filepath in glob(path.join(dir, '*.csv')):
        file_shape = pd.read_csv(filepath).shape
        filename = path.basename(filepath)
        for reps in (("Mexp_", ""), ("Gaze_", "")):
            filename = filename.replace(*reps)       
        if filename not in ['Annotation_mexp_features.csv', 'Annotation_gaze_features.csv']:
            data_shape.append([file_shape[0], file_shape[1]])
            file_names.append(filename)
            count+=1
    data_shape = pd.DataFrame(data_shape)
    data_shape.columns = [key + str(0), key + str(1)]
    data_shape.index = pd.Series(file_names)
    
    # Ensure index values are unique before concatenating
    data_shape = data_shape[~data_shape.index.duplicated(keep='first')]
    
    data_shape_all = pd.concat([data_shape_all, data_shape], axis=1, sort=True)
    print(f"No. of file in {key}: ", count)

## Creating Dictionaries of Micro-expression & Gaze
Remove Initials and Make the Keys Same for the Same data

In [None]:
gaze_dict, mexp_dict = {}, {}
listofdicts = [gaze_dict, mexp_dict]
for key, data_dict_indiv in zip(data_path.keys(), listofdicts):
    for filepath in glob(path.join(dir, '*.csv')):
        data = pd.read_csv(filepath)
        filename = path.basename(filepath)
        for reps in (("Gaze_", ""), ("Mexp_", "")):
            filename = filename.replace(*reps)
        data_dict_indiv[filename] = data

### Checking If the Labels are Same for Same Keys in Each Dcitionaries & Separating Labels from Training Data

In [None]:
filename_dictkeys = list(gaze_dict)
label_dict = {}
for key in filename_dictkeys:
    # print(key)
    gazedata, mexpdata = gaze_dict[key], mexp_dict[key]
    label_gaze = gazedata.loc[:, "label"].unique()[0]
    label_mexp = mexpdata.loc[:, "label"].unique()[0]
    label_set = set([label_gaze, label_mexp])
    if len(label_set) > 1:
        print(key)
    else:
        label_dict[key] = list(label_set)[0]
print("No. of files with same label: ", len(label_dict))

### Dropping Indexing Columns & Labels from Training Data && Reindexing with TIme && Upsampling

In [None]:
from scipy.signal import resample

filename_dictkeys = list(gaze_dict)
gaze_dict_upsampled, mexp_dict_upsampled = {}, {}
for key in filename_dictkeys:
    gaze_data = np.array(gaze_dict[key].drop(["frame", "Unnamed: 0", "label", "face_id", "timestamp", "confidence", "success"], axis = 1).drop_duplicates())
    gaze_dict_upsampled[key] = resample(gaze_data, 300)
    mexp_data = np.array(mexp_dict[key].drop(["frame", "Unnamed: 0", "label", "face_id", "timestamp", "confidence", "success"], axis = 1).drop_duplicates())
    mexp_dict_upsampled[key] = resample(mexp_data, 300)

## Combine Gaze and Microexpression Data

In [None]:
combined_data = {}
for key in gaze_dict_upsampled:
    combined_features = np.hstack([gaze_dict_upsampled[key], mexp_dict_upsampled[key]])
    combined_data[key] = combined_features

In [None]:
max_columns = max(data.shape[1] for data in combined_data.values())
print("Maximum columns required:", max_columns)

In [None]:
import numpy as np

adjusted_combined_data = {}

for key, data in combined_data.items():
    current_columns = data.shape[1]
    if current_columns < max_columns:
        # Calculate how many columns to add
        additional_columns = max_columns - current_columns
        
        # Create an array of NaNs to add
        empty_columns = np.full((data.shape[0], additional_columns), np.nan)  # Use np.nan or any other placeholder
        
        # Concatenate the original data with the new empty columns
        new_data = np.hstack([data, empty_columns])
    else:
        new_data = data

    # Store the adjusted data back into the dictionary
    adjusted_combined_data[key] = new_data



In [None]:
# Flatten each array into a single vector per sample
X = np.array([adjusted_combined_data[key].flatten() for key in adjusted_combined_data])
y = np.array([label_dict[key] for key in adjusted_combined_data])

# Now split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

# Identify rows with NaN values and remove them
valid_indices = ~np.isnan(X_train).any(axis=1)
X_train_clean = X_train[valid_indices]
y_train_clean = y_train[valid_indices]

# Perform the same operation for the test set
valid_indices_test = ~np.isnan(X_test).any(axis=1)
X_test_clean = X_test[valid_indices_test]
y_test_clean = y_test[valid_indices_test]

# Create and train the pipeline
model = make_pipeline(StandardScaler(), SVC(kernel='linear'))
model.fit(X_train_clean, y_train_clean)

In [None]:
# Make predictions using the trained pipeline model
predictions = model.predict(X_test_clean)

# Evaluate the model's performance
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test_clean, predictions)
report = classification_report(y_test_clean, predictions, zero_division=0)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

In [None]:
# Create and train the pipeline with a polynomial kernel
model = make_pipeline(
    StandardScaler(),
    SVC(kernel='poly')  # You can adjust the 'degree' parameter as needed
)
model.fit(X_train_clean, y_train_clean)

# Make predictions on the test data
predictions = model.predict(X_test_clean)

# Evaluate the model's performance
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test_clean, predictions)
report = classification_report(y_test_clean, predictions, zero_division=0)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

In [None]:
# Create and train the pipeline with a polynomial kernel
model = make_pipeline(
    StandardScaler(),
    SVC(kernel='rbf')  # You can adjust the 'degree' parameter as needed
)
model.fit(X_train_clean, y_train_clean)

# Make predictions on the test data
predictions = model.predict(X_test_clean)

# Evaluate the model's performance
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test_clean, predictions)
report = classification_report(y_test_clean, predictions, zero_division=0)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)

In [None]:
# Create and train the pipeline with a polynomial kernel
model = make_pipeline(
    StandardScaler(),
    SVC(kernel='sigmoid')  # You can adjust the 'degree' parameter as needed
)
model.fit(X_train_clean, y_train_clean)

# Make predictions on the test data
predictions = model.predict(X_test_clean)

# Evaluate the model's performance
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test_clean, predictions)
report = classification_report(y_test_clean, predictions, zero_division=0)

print("Accuracy:", accuracy)
print("Classification Report:\n", report)