In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import helpers
import os, random

In [2]:
#Model - LSTM
class classifierLSTM(nn.Module):
  
  def __init__(self, input_size, hidden_size, frame_count, device, dropout = 0.3, output_size = 2):
    super().__init__()
    
    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
    self.dropout = nn.Dropout(dropout)
    self.fc = nn.Linear((hidden_size * frame_count), output_size)
    self.device = device

  def forward(self, x):
    # x = x.unsqueeze(0)
    pred, _ = self.lstm(x)
    dropped = self.dropout(pred)
    data = dropped.reshape((dropped.shape[0], -1))
    data = self.fc(data)
    return data

DATA PREP

In [3]:
lie_trial_path = './data/OpenFace/trial/lie/' #60 entries
truth_trial_path = './data/OpenFace/trial/truth/' #61 entries

lie_MU3D_path = './data/OpenFace/MU3D/lie/' 
truth_MU3D_path = './data/OpenFace/MU3D/truth/'

lie_BOL_path = './data/OpenFace/BOL/lie/' 
truth_BOL_path = './data/OpenFace/BOL/truth/' 

OFTruth = [truth_trial_path, truth_MU3D_path, truth_BOL_path]
OFLie = [lie_trial_path, lie_MU3D_path, lie_BOL_path]

lie_trial_path = './data/TransFormer/trial/lie/' #60 entries
truth_trial_path = './data/TransFormer/trial/truth/' #61 entries

lie_MU3D_path = './data/TransFormer/MU3D/lie/'
truth_MU3D_path = './data/TransFormer/MU3D/truth/'

lie_BOL_path = './data/TransFormer/BOL/lie/'
truth_BOL_path = './data/TransFormer/BOL/truth/'

TTruth = [truth_trial_path, truth_MU3D_path, truth_BOL_path]
TLie = [lie_trial_path, lie_MU3D_path, lie_BOL_path]

features = ["gaze_0_x","gaze_0_y","gaze_0_z","gaze_angle_x", "gaze_angle_y", "AU01_r","AU04_r","AU10_r","AU12_r","AU45_r"]

# 0 = Openface
# 1 = Openface + TransFormer
# 2 = TransFormer

mode = 0

def processingOF(truthPath, liePath, minConfidence = 0.9, numOfFrames = 10):

    data = []
    label = []

    #truthPath is going to be a list of paths
    for path in truthPath:
        for file in sorted(os.listdir(path)):
            if file.endswith(".csv"):
                df = pd.read_csv(path + file)
            
                truth_bad_frame = set(np.where(df["confidence"] < minConfidence)[0])
                df = helpers.filterColumn(df, colList=features)

                index = numOfFrames
                next_index = numOfFrames
                
                while index < len(df):
                    if index not in truth_bad_frame and index >= next_index:
                        data.append((df.iloc[index-numOfFrames:index]).to_numpy())
                        label.append(1)
                    elif index in truth_bad_frame:
                        next_index = index + numOfFrames
                    index += 1

    for path in liePath:
        for file in sorted(os.listdir(path)):
            if file.endswith(".csv"):
                df = pd.read_csv(path + file)
            
                lie_bad_frame = set(np.where(df["confidence"] < minConfidence)[0])
                df = helpers.filterColumn(df, colList=features)

                index = numOfFrames
                next_index = numOfFrames
                
                while index < len(df):
                    if index not in lie_bad_frame and index >= next_index:
                        data.append((df.iloc[index-numOfFrames:index]).to_numpy())
                        label.append(0)
                    elif index in lie_bad_frame:
                        next_index = index + numOfFrames
                    index += 1

    data = np.array(data)
    label = np.array(label)
    random.seed(random.randint(1, 100))

    # Create an array of indices, then shuffle it
    indices = np.arange(len(data)).astype(int)
    np.random.shuffle(indices)

    # Same order of indices for both X and Y
    data  = data[indices]
    label = label[indices]

    return data, label, truth_bad_frame, lie_bad_frame

def processingTF(truthPath, liePath, numOfFrames = 10, combine = False, truth_bad_frame = None, lie_bad_frame = None):
                
        data = []
        label = []
    
        #truthPath is going to be a list of paths
        for path in truthPath:
            for file in sorted(os.listdir(path)):
                if file.endswith(".csv"):
                    df = pd.read_csv(path + file)
    
                    index = numOfFrames
                    next_index = numOfFrames
                    
                    while index < len(df):

                        if combine:
                            if index not in truth_bad_frame and index >= next_index:
                                data.append((df.iloc[index-numOfFrames:index]).to_numpy())
                                label.append(1)
                            else:
                                next_index = index + numOfFrames
                        else:
                            data.append((df.iloc[index-numOfFrames:index]).to_numpy())
                            label.append(1)
                        
                        index += 1
    
        for path in liePath:
            for file in sorted(os.listdir(path)):
                if file.endswith(".csv"):
                    df = pd.read_csv(path + file)
    
                    index = numOfFrames
                    next_index = numOfFrames
                    
                    if combine:
                        if index not in lie_bad_frame and index >= next_index:
                            data.append((df.iloc[index-numOfFrames:index]).to_numpy())
                            label.append(0)
                        else:
                            next_index = index + numOfFrames
                    else:
                        data.append((df.iloc[index-numOfFrames:index]).to_numpy())
                        label.append(0)
                    
                    index += 1
    
        data = np.array(data)
        label = np.array(label)
        random.seed(random.randint(1, 100))
    
        # Create an array of indices, then shuffle it
        indices = np.arange(len(data)).astype(int)
        np.random.shuffle(indices)
    
        # Same order of indices for both X and Y
        data  = data[indices]
        label = label[indices]
    
        if combine:
            data = np.reshape(data, (data.shape[0], data.shape[1] * data.shape[2]))
    
        return data, label

if mode == 0:
    X, Y, _, _ = processingOF(OFTruth, OFLie)
elif mode == 1:
    X1, Y, TB, LB = processingOF(OFTruth, OFLie)
    X2, _ = processingTF(TTruth, TLie, combine = True, truth_bad_frame = TB, lie_bad_frame = LB)
    X = np.concatenate((X1, X2), axis = 1)
elif mode == 2:
    X, Y = processingTF(TTruth, TLie, combine = False)

print(f'There are {X.shape[0]} batches, each with {X.shape[1]} frames and {X.shape[2]} features. Together there are {X.shape[0] * X.shape[1] * X.shape[2]} data points and {X.shape[0] * X.shape[1]} frames.')

There are 475920 batches, each with 10 frames and 10 features. Together there are 47592000 data points and 4759200 frames.


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# no split by person
numOfFrames = 10

TEST_RATIO = 0.2

xTrain, xTest = train_test_split(X, test_size=TEST_RATIO, shuffle=False)
yTrain, yTest = train_test_split(Y, test_size=TEST_RATIO, shuffle=False)

yTrain_temp, yTest_temp = [], []

for i in range(yTrain.shape[0]):
    yTrain_temp.append([1,0]) if yTrain[i] == 0 else yTrain_temp.append([0,1])

for i in range(yTest.shape[0]):
    yTest_temp.append([1,0]) if yTest[i] == 0 else yTest_temp.append([0,1])

y_Train = torch.tensor(yTrain_temp).to(device)
y_Test = torch.tensor(yTest_temp).to(device)

x_Train = torch.tensor(xTrain, dtype=torch.float32).to(device)
x_Test = torch.tensor(xTest, dtype=torch.float32).to(device)

In [5]:
print(x_Train.shape)

torch.Size([380736, 10, 10])


TRAINING

In [6]:
import warnings
warnings.filterwarnings("ignore")

#model prep
featCount = 10
num_frames = 10
encoder_layers = 2
LSTM_hidden = 256

LSTM = classifierLSTM(featCount, LSTM_hidden, num_frames, device)

# training
def train(model, xTrain, yTrain, xTest, yTest, epochs = 100, lr = 0.005, batch_size = 10):
    """ Train a model on a dataset """
    
    # create a data loader to handle batching
    xTrain_loader =  torch.utils.data.DataLoader(xTrain, batch_size=batch_size, shuffle=False)
    xTest_loader = torch.utils.data.DataLoader(xTest, batch_size=batch_size, shuffle=False)

    # create a loss function and optimizer
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # train the model
    for epoch in range(epochs):

        # train

        idx = 0
        model.train()

        tot_loss = 0
        tot_acc = 0
        for batch in xTrain_loader:

            # get data
            x_train = batch.to(device).float()
            y_train = torch.tensor(yTrain[idx:min(idx+batch_size,len(yTrain))]).float().clone().detach().to(device)
            
            optimizer.zero_grad()

            # forward pass
            y_pred = model(x_train)

            actual_batch = torch.argmax(y_train, dim=1).long()
            my_pred_batch = torch.argmax(y_pred, dim=1).long()
            tot_acc += ((actual_batch == my_pred_batch).sum().item() / len(actual_batch))
            #print("actual for batch ", idx, " is ", torch.argmax(y_train, dim=1).long())
            #print("my prediction for batch ", idx, " is ", torch.argmax(y_pred, dim=1).long())

            # compute loss
            loss = loss_fn(y_pred,torch.argmax(y_train, dim=1).long())

            tot_loss += loss.item()

            # backward pass
            loss.backward()

            # update weights
            optimizer.step()

            idx += batch_size
            
        total_loss = tot_loss / len(xTrain_loader)
        total_acc = tot_acc / len(xTrain_loader)
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}, Accuracy: {total_acc:.4f}')

        # evaluate
        model.eval()

        if epoch % 10 == 0:

            with torch.no_grad():
            
                idx_test = 0
                test_acc = 0
                for batch in xTest_loader:
                    xTest = batch.to(device).float()
                    y_test = torch.tensor(yTest[idx_test:min(idx_test+batch_size,len(yTest))]).float().clone().detach().to(device)
                    y_pred = model(xTest)

                    actual_batch = torch.argmax(y_test, dim=1).long()
                    my_pred_batch = torch.argmax(y_pred, dim=1).long()

                    #compute test accuracy
                    test_acc += (actual_batch == my_pred_batch).float().mean().item()
                    idx_test += batch_size

                test_acc /= len(xTest_loader)
                print(f'Epoch {epoch+1}/{epochs}, Test Accuracy: {test_acc:.4f}')  

train(LSTM, x_Train, y_Train, x_Test, y_Test)

Epoch 1/100, Loss: 0.6638, Accuracy: 0.5615
Epoch 1/100, Test Accuracy: 0.5754


KeyboardInterrupt: 

In [7]:
sweep_config = {
    'method': 'random'
    }
# we can choose from random, grid, and bayes

## The followings are necessary for bayes method
metric = {
    'name': 'loss',
    'goal': 'minimize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'fc_layer_size': {
        'values': [128, 256, 512]
        },
    }

sweep_config['parameters'] = parameters_dict

# Parameters we don't want to vary
parameters_dict.update({
    'epochs': {'value': 100}
    })

parameters_dict.update({
    'learning_rate': {
        # a flat distribution between 0 and 0.1
        'distribution': 'uniform',
        'min': 0,
        'max': 0.1
      },
    'batch_size': {
        # integers between 32 and 256
        # with evenly-distributed logarithms 
        'distribution': 'q_log_uniform_values',
        'q': 8,
        'min': 32,
        'max': 256,
      }
    })

import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform_values',
                               'max': 256,
                               'min': 32,
                               'q': 8},
                'epochs': {'value': 100},
                'fc_layer_size': {'values': [128, 256, 512]},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0}}}


In [8]:
import wandb
wandb.login()
print("file exists?", os.path.exists('Classifier-LSTM.ipynb'))
os.environ["WANDB_NOTEBOOK_NAME"] = "Classifier-LSTM.ipynb"

def sweep(config=None):
    global counter
    counter+=1
    with wandb.init(config=config, name = f"Experiment{counter}"):
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = wandb.config
        print(config)
        train(LSTM, x_Train, y_Train, x_Test, y_Test, epochs = config.epochs, lr = config.learning_rate, batch_size = config.batch_size)
        wandb.finish()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mellali2023[0m ([33mdeception_449[0m). Use [1m`wandb login --relogin`[0m to force relogin


file exists? True


In [9]:
counter=0
sweep_id = wandb.sweep(sweep_config, project="Classifier_LSTM")
wandb.agent(sweep_id, sweep, count=10)
counter=0

Create sweep with ID: wut1tkkz
Sweep URL: https://wandb.ai/deception_449/Classifier_LSTM/sweeps/wut1tkkz


[34m[1mwandb[0m: Agent Starting Run: 8vw1i5uy with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	fc_layer_size: 512
[34m[1mwandb[0m: 	learning_rate: 0.05244019160850472


{'batch_size': 64, 'epochs': 100, 'fc_layer_size': 512, 'learning_rate': 0.05244019160850472}
Epoch 1/100, Loss: 0.6728, Accuracy: 0.5462
Epoch 1/100, Test Accuracy: 0.5547
Epoch 2/100, Loss: 0.6786, Accuracy: 0.5386
