In [15]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# import sklearn.metrics as sk
# import tensorflow_decision_forests as tfdf

featuresToKeep = ["gaze_0_x","gaze_0_y","gaze_0_z","gaze_angle_x", "gaze_angle_y",
                  "AU01_r","AU04_r","AU10_r","AU12_r","AU45_r", 
                  "pose_Tx", "pose_Ty", "pose_Tz", "pose_Ry", 
                  "Result", "confidence", "Person"]

def getLSTMBlocks(inputLst, dataLength, blockSize = 10, start = 0):
  inputLst.sort()
  
  listOfRanges = []
  
  inputLst.append(dataLength)

  while inputLst:
    if (start + blockSize - 1) < inputLst[0]:
      listOfRanges.append([start, start + blockSize - 1])
      start += 1
    else:
      start = inputLst[0] + 1
      inputLst.pop(0)

  return listOfRanges

def shuffleByPerson(df, ratio = 0.2, lst = []):

    if lst == []:
        df = df.sort_values(by=['Person']) # Sort by person

        index = int(df.shape[0] * (1 - ratio)) # Get the index of the last person to be in the training set
        tempnum = df["Person"].iloc[index] # Get the person number of the last person to be in the training set

        temp = index
        while temp < df.shape[0]:
            temp += 1
            if df["Person"].iloc[temp] != tempnum:
                index = temp - 1
                break

        print(f"Persons 0 to {tempnum} are in the training set, and {tempnum + 1} to {df['Person'].iloc[-1]} are in the testing set")
        
        return filterColumn(df.iloc[:index]), filterColumn(df.iloc[index:])
    else:
        
        Test = df.loc[~df['Person'].isin(lst)]
        Train = df.loc[df['Person'].isin(lst)]

        return filterColumn(Train), filterColumn(Test)
        
def displayHeatmap(df):
    plt.figure(figsize=(16, 6))
    sns.heatmap(df.corr(), vmin=-1, vmax=1, annot=True, cmap='BrBG')

def displayConfusion(actual, predicted):
    sk.ConfusionMatrixDisplay(sk.confusion_matrix(actual, predicted)).plot()
    print("Accuracy is ", round(sk.accuracy_score(actual, predicted) * 100, 2), "%")

def filterColumn(df, colList = featuresToKeep):
    currdf = df
    for col in currdf.columns:
        if (str(col) not in colList):
            currdf = currdf.drop(columns = [str(col)])

    return currdf

def filterConfidence(df, colList = featuresToKeep):
    currdf = df
    currdf = filterColumn(currdf, colList)

    currdf = currdf.query("confidence >= 0.9")
    return currdf.drop(columns = ["confidence"]).dropna()

def veticalMerge(df1, df2, shuffle = False):
    df = pd.concat([df1, df2]).reset_index()
    if shuffle:
        df = df.sample(frac=1)
    return df

def addTFLabel(df, TrueOrFalse):
    if TrueOrFalse:
        df["Result"] = 1
    elif not TrueOrFalse:
        df["Result"] = 0

def shuffleDF(df):
    return df.sample(frac=1)

def addGazeDelta(currCSV):
  for j in range(10, currCSV.shape[0]):
      if currCSV.iloc[[j - 10]]["confidence"].iloc[0] >= 0.8:
        currCSV.at[j, 'dgaze_0_x'] = abs(currCSV.at[j - 10, 'gaze_0_x'] - currCSV.at[j, 'gaze_0_x'])
        currCSV.at[j, 'dgaze_0_y'] = abs(currCSV.at[j - 10, 'gaze_0_y'] - currCSV.at[j, 'gaze_0_y'])
        currCSV.at[j, 'dgaze_0_z'] = abs(currCSV.at[j - 10, 'gaze_0_z'] - currCSV.at[j, 'gaze_0_z'])
        currCSV.at[j, 'dgaze_angle_x'] = abs(currCSV.at[j - 10, 'gaze_angle_x'] - currCSV.at[j, 'gaze_angle_x'])
        currCSV.at[j, 'dgaze_angle_y'] = abs(currCSV.at[j - 10, 'gaze_angle_y'] - currCSV.at[j, 'gaze_angle_y'])

  return currCSV

def predictRF(df, modelName, modelObj):
    
    counterLie, counterTrue = 0, 0

    if modelName == 'tf':
        dataSet = tfdf.keras.pd_dataframe_to_tf_dataset(df)
        res = pd.DataFrame(modelObj.predict(dataSet))

        for i in range(res.shape[0]):
            if res.iloc[i,0] > 0.5: 
                res.iloc[i] = 1 
            else:
                res.iloc[i] = 0

    elif modelName == "sk":
        res = modelObj.predict(df)
        temp = res.shape[0]
        res = pd.DataFrame(np.reshape(res, (temp, 1)))

    for i in range(res.shape[0]):
        if res.iloc[i][0] > 0.5:
            counterTrue = counterTrue + 1
        else:
            counterLie = counterLie + 1
            
    print("Lie Possibility: ", round(counterLie/res.shape[0] * 100, 2), "%")
    print("Truth Possibility: ", round(counterTrue/res.shape[0]* 100, 2), "%")

def predictLSTM(df, modelObj):
    return modelObj.predict(filterConfidence(df))

In [65]:
import os
import glob
import random
# import pandas as pd
# import numpy as np
from tqdm import tqdm

# import tensorflow_decision_forests as tfdf
# from sklearn.model_selection import train_test_split

truthPath = './processed_truth/'
liePath = './processed_lie/'

featuresToKeep = featuresToKeep

newFeaturesToKeep = ["gaze_0_x","gaze_0_y","gaze_0_z","gaze_angle_x", "gaze_angle_y", "AU01_r","AU04_r","AU10_r","AU12_r","AU45_r"]
#["pose_Tx","pose_Ty", "pose_Tz", "pose_Ry"]

# create a single dataset from a specified patha (must be all truth or all lie)
def createDatasetSingle(path, truth):
  df = pd.concat(map(pd.read_csv, glob.glob(os.path.join(path+"*.csv")))).reset_index()
  addGazeDelta(df)
  addTFLabel(df, truth)
  df = filterColumn(df)

  return df

# input a truthpath and a liepath, create a dual dataset and create a train
# test split based on the testRatio
# outputs total train, train with x, train with y, test with x, and test with y
def createDatasetRF(truthPath, liePath, testRatio, byPerson = False, personlst = []):
  dfT = createDatasetSingle(truthPath, True)
  dfL = createDatasetSingle(liePath, False)
  
  dfTotal = veticalMerge(dfT, dfL, shuffle=True)
  
  if byPerson:
    Train, Test = shuffleByPerson(dfTotal, testRatio, personlst)
  else:
    Train, Test = train_test_split(dfTotal, test_size=testRatio, shuffle=False)

  Xtrain, Ytrain = Train.reset_index().drop(columns = ["Result", "Person", "index", "level_0"]), Train["Result"]
  Xtest, Ytest = Test.reset_index().drop(columns = ["Result", "Person", "index", "level_0"]), Test["Result"]
  Train = Train.reset_index().drop(columns = ["index", "Person", "level_0"])

  return Train, Xtrain, Ytrain, Xtest, Ytest

def createDatasetLSTM(truthPath, liePath, testRatio, numFrames=10, minConfidence=0.9, byPerson=False, personlst = []):
  dfT = createDatasetSingle(truthPath, True)
  dfL = createDatasetSingle(liePath, False)

  dfMap = {1:dfT, 0:dfL}

  Xtrain, Ytrain, Xtest, Ytest = [], [], [], []

  idxTotext = {0:"Lie", 1:"Truth"}

  for idx in dfMap:
    print(f'Processing {idxTotext[idx]}')
    
    if byPerson:
      Train, Test = shuffleByPerson(dfMap[idx], lst = personlst)
    elif not byPerson:
      Train, Test = shuffleByPerson(dfMap[idx], ratio = testRatio)
    
    print(f'Processing Train')
    trainGroups = Train.groupby("Person")

    for i in trainGroups.groups:
      currData = trainGroups.get_group(i).sort_index()
      bad_frames = np.where(currData["confidence"] < minConfidence)[0]
      # print(f'Processing Person {i}, shape of data is {currData.shape}')
      
      blocksLst = getLSTMBlocks(bad_frames.tolist(), currData.shape[0], blockSize=numFrames, start=0)

      for i, j in tqdm(blocksLst):
        Xtrain.append(currData.iloc[i:j+1].reset_index().drop(columns = ["index", "confidence", "Result", "Person"]).to_numpy())
        Ytrain.append(idx)
      
    print(f'Processing Test')
    testGroups = Test.groupby("Person")
    for i in testGroups.groups:
      currData = testGroups.get_group(i).sort_index()
      bad_frames = np.where(currData["confidence"] < minConfidence)[0]
      # print(f'Processing Person {i}, shape of data is {currData.shape}')
      
      blocksLst = getLSTMBlocks(bad_frames.tolist(), currData.shape[0], blockSize=numFrames, start=0)

      for i, j in tqdm(blocksLst):
        Xtest.append(currData.iloc[i:j+1].reset_index().drop(columns = ["index", "confidence", "Result", "Person"]).to_numpy())
        Ytest.append(idx)

  Xtrain = np.array(Xtrain)
  Ytrain = np.array(Ytrain)
  Xtest = np.array(Xtest)
  Ytest = np.array(Ytest)
  print(f'Processing Person {i}, currData is {currData.shape}')

  random.seed(random.randint(1, 100))

  # Create an array of indices, then shuffle it
  indices = np.arange(len(Xtrain)).astype(int)
  np.random.shuffle(indices)

  # Same order of indices for both X and Y
  Xtrain  = Xtrain[indices]
  Ytrain = Ytrain[indices]

  random.seed(random.randint(1, 100))

  # Create an array of indices, then shuffle it
  indices = np.arange(len(Xtest)).astype(int)
  np.random.shuffle(indices)

  # Same order of indices for both X and Y
  Xtest  = Xtest[indices]
  Ytest = Ytest[indices]

  return Xtrain, Ytrain, Xtest, Ytest

def preprocessing(truthPath, liePath, additionalPath=None, minConfidence = 0.9, numOfFrames = 10, byPerson = False):

  data = []
  label = []

  if not byPerson:

    for file in sorted(os.listdir(truthPath)):
      if file.endswith(".csv"):
        df = pd.read_csv(truthPath + file)
        
        bad_frame = set(np.where(df["confidence"] < minConfidence)[0])
        df = filterColumn(df, colList=newFeaturesToKeep)

        index = numOfFrames
        next_index = numOfFrames
        
        while index < len(df):
          if index not in bad_frame and index >= next_index:
            data.append((df.iloc[index-numOfFrames:index]).to_numpy())
            label.append(1)
          elif index in bad_frame:
            next_index = index + numOfFrames
          index += 1

    for file in sorted(os.listdir(liePath)):
      if file.endswith(".csv"):
        df = pd.read_csv(liePath + file)

        bad_frame = set(np.where(df["confidence"] < minConfidence)[0])
        df= filterColumn(df, colList=newFeaturesToKeep)

        index = numOfFrames
        next_index = numOfFrames
        
        while index < len(df):
          if index not in bad_frame and index >= next_index:
            data.append((df.iloc[index-numOfFrames:index]).to_numpy())
            label.append(0)
          elif index in bad_frame:
            next_index = index + numOfFrames
          index += 1

    if additionalPath:
      for file in sorted(os.listdir(additionalPath)):
        if file.endswith(".csv"):
          df = pd.read_csv(additionalPath + file)
          
          bad_frame = set(np.where(df["confidence"] < minConfidence)[0])
          df= filterColumn(df, colList=newFeaturesToKeep)

          index = numOfFrames
          next_index = numOfFrames
          
          while index < len(df):
            if index not in bad_frame and index >= next_index:
              data.append((df.iloc[index-numOfFrames:index]).to_numpy())
              if file.endswith("T.csv"):
                label.append(1)
              elif file.endswith("L.csv"):
                label.append(0)
            elif index in bad_frame:
              next_index = index + numOfFrames
            index += 1

    data = np.array(data)
    label = np.array(label)
    random.seed(random.randint(1, 100))

    # Create an array of indices, then shuffle it
    indices = np.arange(len(data)).astype(int)
    np.random.shuffle(indices)

    # Same order of indices for both X and Y
    data  = data[indices]
    label = label[indices]

  return data, label

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# import dataset
# import test_lstm

  from .autonotebook import tqdm as notebook_tqdm


In [156]:
#Models - LSTM and Transformer
class classifierLSTM(nn.Module):
  
  def __init__(self, input_size, hidden_size, frame_count, device, dropout = 0.3, output_size = 2):

    super().__init__()
    
    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear((hidden_size * frame_count), output_size)
    self.device = device

  def forward(self, x):
    # x = x.unsqueeze(0)
    pred, _ = self.lstm(x)
    dropped = self.dropout(pred)
    data = dropped.reshape((dropped.shape[0], -1))
    #reshape to [1, num_frames * hidden_size]
    data = self.fc(data)
    data = nn.functional.softmax(data, dim = 1).to(self.device)
    return data

In [136]:
#data prep

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

lie_trial_path = './processed_lie/' #60 entries
truth_trial_path = './processed_truth/' #61 entries
MU3D_path = './processed/' # 300 entries

# no split by person
numOfFrames = 10

In [66]:
TEST_RATIO = 0.2

xTrain, yTrain, xTest, yTest = createDatasetLSTM(truth_trial_path, lie_trial_path, TEST_RATIO, numFrames=numOfFrames)

yTrain_temp, yTest_temp = [], []

for i in range(yTrain.shape[0]):
    yTrain_temp.append([1,0]) if yTrain[i] == 0 else yTrain_temp.append([0,1])

for i in range(yTest.shape[0]):
    yTest_temp.append([1,0]) if yTest[i] == 0 else yTest_temp.append([0,1])

y_Train = torch.tensor(yTrain_temp, dtype=torch.float32).to(device)
y_Test = torch.tensor(yTest_temp, dtype=torch.float32).to(device)

x_Train = torch.tensor(xTrain, dtype=torch.float32).to(device)
x_Test = torch.tensor(xTest, dtype=torch.float32).to(device)

Processing Truth
Persons 0 to 29 are in the training set, and 30 to 36 are in the testing set
Processing Train


100%|███████████████████████████████████████| 395/395 [00:00<00:00, 2559.81it/s]
100%|███████████████████████████████████████| 563/563 [00:00<00:00, 2555.51it/s]
100%|█████████████████████████████████████| 6559/6559 [00:02<00:00, 2605.10it/s]
100%|█████████████████████████████████████| 8145/8145 [00:03<00:00, 2609.44it/s]
100%|█████████████████████████████████████████| 35/35 [00:00<00:00, 2374.65it/s]
100%|███████████████████████████████████████| 490/490 [00:00<00:00, 2611.48it/s]
100%|███████████████████████████████████████| 152/152 [00:00<00:00, 2521.77it/s]
100%|███████████████████████████████████████| 239/239 [00:00<00:00, 2572.32it/s]
100%|███████████████████████████████████████| 802/802 [00:00<00:00, 2588.00it/s]
100%|███████████████████████████████████████| 666/666 [00:00<00:00, 2644.61it/s]
100%|█████████████████████████████████████| 1225/1225 [00:00<00:00, 2577.13it/s]
100%|███████████████████████████████████████| 721/721 [00:00<00:00, 2593.79it/s]
100%|███████████████████████

Processing Test


0it [00:00, ?it/s]
100%|███████████████████████████████████████| 221/221 [00:00<00:00, 2608.07it/s]
100%|███████████████████████████████████████| 186/186 [00:00<00:00, 2606.61it/s]
100%|███████████████████████████████████████| 848/848 [00:00<00:00, 2549.17it/s]
100%|███████████████████████████████████████████| 5/5 [00:00<00:00, 2077.21it/s]
100%|█████████████████████████████████████| 1056/1056 [00:00<00:00, 2542.58it/s]
100%|█████████████████████████████████████| 2600/2600 [00:01<00:00, 2564.32it/s]
100%|█████████████████████████████████████| 2428/2428 [00:00<00:00, 2565.98it/s]


Processing Lie
Persons 0 to 46 are in the training set, and 47 to 58 are in the testing set
Processing Train


100%|█████████████████████████████████████| 4393/4393 [00:01<00:00, 2615.91it/s]
100%|█████████████████████████████████████| 4428/4428 [00:01<00:00, 2610.77it/s]
100%|█████████████████████████████████████| 2617/2617 [00:01<00:00, 2547.29it/s]
100%|█████████████████████████████████████| 4462/4462 [00:01<00:00, 2563.81it/s]
100%|███████████████████████████████████████| 690/690 [00:00<00:00, 2547.85it/s]
100%|███████████████████████████████████████| 429/429 [00:00<00:00, 2560.58it/s]
100%|███████████████████████████████████████| 677/677 [00:00<00:00, 2540.29it/s]
100%|███████████████████████████████████████| 168/168 [00:00<00:00, 2526.36it/s]
100%|███████████████████████████████████████| 263/263 [00:00<00:00, 2516.10it/s]
100%|███████████████████████████████████████| 503/503 [00:00<00:00, 2524.32it/s]
100%|███████████████████████████████████████| 495/495 [00:00<00:00, 2569.22it/s]
100%|███████████████████████████████████████| 301/301 [00:00<00:00, 2551.98it/s]
100%|███████████████████████

Processing Test


0it [00:00, ?it/s]
100%|█████████████████████████████████████████| 70/70 [00:00<00:00, 2526.99it/s]
100%|█████████████████████████████████████████| 75/75 [00:00<00:00, 2389.14it/s]
100%|█████████████████████████████████████| 1148/1148 [00:00<00:00, 2552.40it/s]
100%|█████████████████████████████████████████| 29/29 [00:00<00:00, 2339.04it/s]
100%|███████████████████████████████████████| 339/339 [00:00<00:00, 2488.68it/s]
100%|███████████████████████████████████████| 447/447 [00:00<00:00, 2576.92it/s]
100%|███████████████████████████████████████| 576/576 [00:00<00:00, 2545.66it/s]
100%|███████████████████████████████████████| 476/476 [00:00<00:00, 2546.20it/s]
100%|███████████████████████████████████████| 569/569 [00:00<00:00, 2531.91it/s]
100%|█████████████████████████████████████████| 81/81 [00:00<00:00, 2443.51it/s]
100%|███████████████████████████████████████| 199/199 [00:00<00:00, 2459.31it/s]
100%|███████████████████████████████████████| 246/246 [00:00<00:00, 2534.96it/s]


Processing Person 827, currData is (870, 17)


In [64]:
print(x_Train.shape)

torch.Size([50182, 10, 14])


In [None]:
import warnings
warnings.filterwarnings("ignore")

#model prep
featCount = 14
num_frames = 10
encoder_layers = 2
LSTM_hidden = 256

LSTM = classifierLSTM(featCount, LSTM_hidden, num_frames, device)

# training
def train(model, xTrain, yTrain, xTest, yTest, epochs = 100, lr = 0.005, batch_size = 10):
    """ Train a model on a dataset """
    
    # create a data loader to handle batching
    xTrain_loader =  torch.utils.data.DataLoader(xTrain, batch_size=batch_size, shuffle=False)
    xTest_loader = torch.utils.data.DataLoader(xTest, batch_size=batch_size, shuffle=False)

    # create a loss function and optimizer
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # train the model
    for epoch in range(epochs):

        # train

        idx = 0
        model.train()

        tot_loss = 0
        tot_acc = 0
        for batch in xTrain_loader:

            # get data
            x_train = batch.to(device).float()
            y_train = torch.tensor(yTrain[idx:min(idx+batch_size,len(yTrain))]).float().clone().detach().to(device)
            
            if x_train.shape == torch.Size([10, 10, 14]):
            
                optimizer.zero_grad()

                # forward pass
                y_pred = model(x_train)

                actual_batch = torch.argmax(y_train, dim=1).long()
                my_pred_batch = torch.argmax(y_pred, dim=1).long()
                tot_acc += ((actual_batch == my_pred_batch).sum().item() / len(actual_batch))
                #print("actual for batch ", idx, " is ", torch.argmax(y_train, dim=1).long())
                #print("my prediction for batch ", idx, " is ", torch.argmax(y_pred, dim=1).long())

                # compute loss
                loss = loss_fn(y_pred,torch.argmax(y_train, dim=1).long())

                tot_loss += loss.item()

                # backward pass
                loss.backward()

                # update weights
                optimizer.step()

                idx += batch_size
            
        total_loss = tot_loss / len(xTrain_loader)
        total_acc = tot_acc / len(xTrain_loader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Accuracy: {total_acc:.4f}')

        # evaluate
        model.eval()

        if epoch % 10 == 0:

            with torch.no_grad():
            
                idx_test = 0
                test_acc = 0
                for batch in xTest_loader:
                    xTest = batch.to(device).float()
                    y_test = torch.tensor(yTest[idx_test:min(idx_test+batch_size,len(yTest))]).float().clone().detach().to(device)
                    y_pred = model(xTest)

                    actual_batch = torch.argmax(y_test, dim=1).long()
                    my_pred_batch = torch.argmax(y_pred, dim=1).long()

                    #compute test accuracy
                    test_acc += (actual_batch == my_pred_batch).float().mean().item()
                    idx_test += batch_size

                test_acc /= len(xTest_loader)
                print(f'Epoch {epoch+1}/{epochs}, Test Accuracy: {test_acc:.4f}')  

train(LSTM, x_Train, y_Train, x_Test, y_Test)