In [None]:
# FACT HRC (FACT-support): example BLSTM model implementation
# Attention-BLSTM emotion classification

# import the required modules
from __future__ import print_function
import pandas as pd
import numpy as np
import csv
import datetime
import statistics

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices(device_type='GPU')
tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU')
tf.config.experimental.set_memory_growth(device=gpus[0], enable=True)

import keras
from keras import backend as K
from keras.models import *
from keras.layers import *
from keras.callbacks import EarlyStopping
from keras.optimizers import Adamax
from keras import initializers

from sklearn.metrics import confusion_matrix,f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize,LabelEncoder
from sklearn.utils import class_weight

# Attention implemented by https://github.com/CyberZHG/keras-self-attention
from keras_self_attention import SeqSelfAttention

# turn off the warnings, be careful when use this
#import warnings
#warnings.filterwarnings("ignore")

In [None]:
# BLSTM parameters
batch_size = 32 # for estimating error gradient
# number of total epochs to train the model
nb_epoch = 20
# optimization function
opt_func = Adamax(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 
# to prevent over-fitting
early_stopping = EarlyStopping(monitor='loss', patience=5)

# number of features
# nb_feat_time = 3 # ['time (s)', 'episode', 'step']
# nb_feat_kp_cor = 75 # (x,y,z) of the 25 facial and upper body keypoints
# nb_feat_kp_con = 25 # confidence of the facial and upper body keypoints
# nb_feat_task = 1 # task progress
# nb_feat_emo = 20 # categorical and arousal-valence for both cameras
# nb_feat_rw = 3 # reward values: operator's rating, emotional reward, combined
# nb_feat_all = nb_feat_time + nb_feat_kp_cor + nb_feat_kp_con + nb_feat_task + nb_feat_emo + nb_feat_rw

# number of classes
nb_class_base = 4 # {'STATIONARY', 'TO OPERATOR', 'ROTATING', 'TO PARTICIPANT'}
nb_class_arm = 3 # {'STATIONARY', 'REACHING', 'TUCKING'}
nb_class_otp = 3 # {'MIDDLE', 'LEFT', 'RIGHT'}

In [None]:
# reshape panda.DataFrame to Keras style: (batch_size, time_step, nb_features)
def reshape_data(data, n_prev):
    docX = []
    for i in range(len(data)):
        if i < (len(data)-n_prev):
            docX.append(data[i:i+n_prev])
        else: # the frames in the last window use the same context
            docX.append(data[(len(data)-n_prev):len(data)])
    alsX = np.array(docX)
    return alsX

# one-hot encoding of the class labels
def one_hot(labels, c_mode):
    labels_converted = []
    # labels in each class
    if c_mode == 'arm':
        for label in labels:
            if label == 'STATIONARY':
                label_converted = [1,0,0]
            elif label == 'REACHING':
                label_converted = [0,1,0]
            elif label == 'TUCKING':
                label_converted = [0,0,1]
            labels_converted.append(label_converted)
    elif c_mode == 'base':
        for label in labels:
            if label == 'STATIONARY':
                label_converted = [1,0,0,0]
            elif label == 'TO OPERATOR':
                label_converted = [0,1,0,0]
            elif label == 'ROTATING':
                label_converted = [0,0,1,0]
            elif label == 'TO PARTICIPANT':
                label_converted = [0,0,0,1]
            labels_converted.append(label_converted)
    elif c_mode == 'otp':
        for label in labels:
            if label == 'LEFT':
                label_converted = [1,0,0]
            elif label == 'MIDDLE':
                label_converted = [0,1,0]
            elif label == 'RIGHT':
                label_converted = [0,0,1]
            labels_converted.append(label_converted)
    labels_converted = np.asarray(labels_converted)
    return labels_converted

# construct feature sets
def feature_ab(df_file, c_mode):
    # read in data
    data = pd.read_csv(df_file, header=0)
    
    # creating feature sets for ablation studies
    x_all = data.iloc[:,:127]
    # drop S1 handover episodes in training set: episodes [0,1,2,3]
    no_S1_data = data[data.episode > 3]
    X_no_S1 = no_S1_data.iloc[:,:127]
    # for ablation studies
    x_no_time = data.iloc[:,3:127]
    x_no_rw = data.iloc[:,3:124]
    x_no_emo = data.iloc[:,23:124]
    x_no_tp = data.iloc[:,23:123]
    data = data[data.columns.drop(list(data.filter(regex='(confidence)')))]
    x_no_conf = data.iloc[:,23:98]
    
    # creating one-hot encoded label arrays
    if c_mode == 'arm':
        y = one_hot(data['arm status'], c_mode)
        y_no_S1 = one_hot(no_S1_data['arm status'], c_mode)
    elif c_mode == 'base':
        y = one_hot(data['base status'], c_mode)
        y_no_S1 = one_hot(no_S1_data['base status'], c_mode)
    elif c_mode == 'otp':
        y = one_hot(data['handover status'], c_mode)
        y_no_S1 = one_hot(no_S1_data['handover status'], c_mode)
    
    return x_all, x_no_time, x_no_rw, x_no_emo, x_no_tp, x_no_conf, x_no_S1, y, y_no_S1

In [None]:
# define the BLSTM model with attention
def attBLSTM(lstm_size, attention_width, nb_class, opt_func):
    model = Sequential()
    model.add(Bidirectional(LSTM(units=lstm_size[0], return_sequences=True))) # BLSTM layer 1
    model.add(Bidirectional(LSTM(units=lstm_size[1], return_sequences=True))) # BLSTM layer 2
    model.add(Bidirectional(LSTM(units=lstm_size[2], return_sequences=True))) # BLSTM layer 3
    model.add(SeqSelfAttention(attention_width=attention_width, attention_activation='sigmoid')) # attention layer
    model.add(Dense(units=nb_class, activation='softmax')) # output layer, predict emotion dimensions seperately
    return model

# evaluate model performance and print results
def model_eval(X_tst, Y_tst, log_f, batch_size=32):
    model.evaluate(X_tst, Y_tst, batch_size=batch_size)
    tst_pred = model.predict(X_tst)
    y_tst_non_category = [ np.argmax(t[0]) for t in Y_tst ]
    y_tst_predict_non_category = [ np.argmax(t[0]) for t in tst_pred ]
    print('\nConfusion Matrix on test set')
    print(confusion_matrix(y_tst_non_category, y_tst_predict_non_category))
    tst_f1 = f1_score(y_tst_non_category, y_tst_predict_non_category, average='weighted')
    tst_f1_unweighted = f1_score(y_tst_non_category, y_tst_predict_non_category, average='macro')
    print('\nWeighted F1-score on test set:', tst_f1)
    print('\nUnweighted F1-score on test set:', tst_f1_unweighted)
    with open(log_f, 'a') as logfile:
        logfile.write('\nConfusion Matrix on test set\n')
        np.savetxt(logfile, confusion_matrix(y_tst_non_category, y_tst_predict_non_category))
        logfile.write('\nWeighted F1-score on test set: %s' % tst_f1)
        logfile.write('\nUneighted F1-score on test set: %s' % tst_f1_unweighted)
        
    return tst_f1, tst_f1_unweighted

In [None]:
# Mode control for feature set and class
time_stamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')
c_mode_list = ['arm', 'base', 'otp'] # class
f_mode_list = ['all', 'no_time', 'no_rw', 'no_emo', 'no_tp', 'no_conf'] # ablation
# f_mode_list = ['all', 'no_time', 'no_rw', 'no_emo', 'no_tp', 'no_conf', 'no_S1'] # test removing S1 data
cv_list = ['cv_1.csv', 'cv_2.csv', 'cv_3.csv', 'cv_4.csv', 'cv_5.csv'] # 5-fold CV

In [None]:
# ablation study with the best model parameters
# loop by each class
for c_mode in c_mode_list:
    file_log = 'exp_cv/logs/LSTM_' + c_mode + '_log_' + time_stamp + '.txt'
    # best parameter combo resulted from grid search on dev set
    if c_mode == 'arm':
        para = [1,[16,8,4],2]
    elif c_mode == 'base':
        para = [5,[64,32,16],1]
    elif c_mode == 'otp':
        para = [10,[64,32,16],1]
    
    # loop by feature set
    for f_mode in f_mode_list:
        model_dir = 'exp_cv/models/LSTM/' + c_mode + '/' + f_mode + '/'
        # compute cv average
        f1_weighted = []
        f1_unweighted = []
        f1_weighted_average = 0.0
        f1_unweighted_average = 0.0
        
        # loop by cross-validation fold
        for cv in cv_list:
            # input data files
            df_file_trn = 'data/ML/combined_ML_trn_' + cv
            df_file_tst = 'data/ML/combined_ML_tst_' + cv
            
            # information about the run
            print('\n====================================')
            print('\nclass = %s, features = %s, fold = %s\ntime_step = %s, [h1,h2,h3] = %s, attention = %s\n' 
                  % (c_mode, f_mode, cv, para[0], para[1], para[2]))
            print('------------------\n')
            with open(file_log, 'a') as outfile:
                outfile.write('\n====================================')
                outfile.write('\nclass = %s, features = %s, fold = %s\ntime_step = %s, [h1,h2,h3] = %s, attention = %s\n' 
                              % (c_mode, f_mode, cv, para[0], para[1], para[2]))
                outfile.write('------------------\n')
            
            # read in data
            x_all_trn, x_no_time_trn, x_no_rw_trn, x_no_emo_trn, x_no_tp_trn, x_no_conf_trn, x_no_S1_trn, y_trn, y_no_S1_trn = 
            feature_ab(df_file_trn, c_mode)
            x_all_tst, x_no_time_tst, x_no_rw_tst, x_no_emo_tst, x_no_tp_tst, x_no_conf_tst, x_no_S1_tst, y_tst, y_no_S1_tst = 
            feature_ab(df_file_tst, c_mode)
            
            # time step padding
            X_trn = reshape_data(globals()['x_'+str(f_mode)+'_trn'], para[0])
            X_tst = reshape_data(globals()['x_'+str(f_mode)+'_tst'], para[0])
            if f_mode == 'no_S1':
                Y_trn = reshape_data(y_no_S1_trn, para[0])
                # Y_tst = reshape_data(y_no_S1_tst, para[0]) # drop S1 in test set too
                Y_tst = reshape_data(y_tst, para[0])
            else:
                Y_trn = reshape_data(y_trn, para[0])
                Y_tst = reshape_data(y_tst, para[0])
            
            # training
            model = attBLSTM(para[1], para[2], globals()['nb_class_'+str(c_mode)], opt_func)
            model.compile(loss='categorical_crossentropy', optimizer=opt_func, metrics=['categorical_accuracy'])
            model.fit(X_trn, Y_trn, batch_size=batch_size, epochs=nb_epoch, 
                      validation_split=0.05, callbacks=[early_stopping], verbose=2)
            
            # evaluation
            tst_f1, tst_f1_unweighted = model_eval(X_tst, Y_tst, file_log, batch_size=batch_size)
            f1_weighted.append(tst_f1)
            f1_unweighted.append(tst_f1_unweighted)
            model.save(model_dir)
            
        # calculate CV average
        f1_weighted_average = statistics.mean(f1_weighted)
        f1_unweighted_average = statistics.mean(f1_unweighted)
        print('\n****** CV Summary ******\n')
        print('class = %s, features = %s, F1 (weighted) = %s, F1 (unweighted) = %s\n' 
              % (c_mode, f_mode, f1_weighted_average, f1_unweighted_average))
        print('====================================\n')
        with open(file_log, 'a') as outfile:
            outfile.write('\n****** CV Summary ******\n')
            outfile.write('class = %s, features = %s, F1 (weighted) = %s, F1 (unweighted) = %s\n' 
                          % (c_mode, f_mode, f1_weighted_average, f1_unweighted_average))
            outfile.write('====================================\n')


In [None]:
# para_list = []
# tst_pred_list = []
# f1_list = []
# count = 1

# # parameters to be investigated in grid seearch
# time_steps = [1,5,10]  # input history to include: [1,5,10]
# lstm_sizes = [[16,8,4],[32,16,8],[64,32,16]] # number of neurons in the BLSTM layers: [[16,8,4],[32,16,8],[64,32,16]]
# attention_widths = [1,2,4] # width of the local context for the attention layer: [1,2,4]

# grid_search = False

# # Grid search on dev set
# if grid_search:
#     for time_step in time_steps:
#         # pad data
#         X_trn = reshape_data(globals()['x_'+str(f_mode)+'_trn'], time_step)
#         X_dev = reshape_data(globals()['x_'+str(f_mode)+'_dev'], time_step)
#         Y_trn = reshape_data(globals()['y_'+str(c_mode)+'_trn'], time_step)
#         Y_dev = reshape_data(globals()['y_'+str(c_mode)+'_dev'], time_step)
#         for lstm_size in lstm_sizes:
#             for attention_width in attention_widths:
#                 para_list.append([time_step, lstm_size, attention_width]) # save parameter set
#                 print('\n================================ No. %s of 27 ========================================' % count)
#                 print('\nParameters: time_step = %s, [h1, h2, h3] = %s, attention_width = %s\n' 
#                       % (time_step, lstm_size, attention_width))
#                 # build model with given parameters
#                 model = attBLSTM(lstm_size, attention_width, globals()['nb_class_'+str(c_mode)], opt_func)
#                 # compile the model
#                 model.compile(loss='categorical_crossentropy', optimizer=opt_func, metrics=['categorical_accuracy'])
#                 # training the model
#                 model.fit(X_trn, Y_trn, batch_size=batch_size, epochs=nb_epoch, 
#                           validation_split=0.05, callbacks=[early_stopping], verbose=2)
#                 # evaluation
#                 model.evaluate(X_dev, Y_dev, batch_size=batch_size)
#                 # save model
#                 model_f = model_dir + str(count)
#                 model.save(model_f)

#                 # save predictions
#                 tst_pred = model.predict(X_dev)
#                 tst_pred_list.append(tst_pred) # save predictions

#                 # print confusion matrix
#                 y_test_non_category = [ np.argmax(t[0]) for t in Y_dev ]
#                 y_predict_non_category = [ np.argmax(t[0]) for t in tst_pred ]
#                 print('Confusion Matrix on dev set')
#                 print(confusion_matrix(y_test_non_category, y_predict_non_category))
#                 tst_f1 = f1_score(y_test_non_category, y_predict_non_category, average='weighted')
#                 f1_list.append(tst_f1) # save f1 score
#                 print('Weighted F1-score on dev set:', tst_f1)
#                 # print grid search log
#                 with open(file_log, 'a') as logfile:
#                     logfile.write('\n================================ No. %s of 27 ========================================\n' % count)
#                     logfile.write('F1 = %s; Parameters: time_step = %s, [h1, h2, h3] = %s, attention_width = %s\n' 
#                                   % (tst_f1, time_step, lstm_size, attention_width))
#                     logfile.write('Confusion Matrix on dev set\n')
#                     np.savetxt(logfile, confusion_matrix(y_test_non_category, y_predict_non_category))          
#                 count = count + 1

In [None]:
# # get the best parameter set
# best = f1_list.index(max(f1_list)) # find the highest F1 score
# best_count = best + 1
# result = f1_list[best]
# para = para_list[best]
# # prediction = tst_pred_list[best]
# print('Best Run at No.%s; F1 = %s; Parameters: time_step = %s, [h1,h2,h3] = %s, attention_width = %s\n' 
#       % (best_count, result, para[0], para[1], para[2]))

# with open(file_out, 'a') as outfile:
#     outfile.write('Best Run at No.%s; F1 = %s; Parameters: time_step = %s, [h1,h2,h3] = %s, attention_width = %s\n' 
#                    % (best_count, result, para[0], para[1], para[2]))

# # save predictions in case of significance test
# # with open(file_pred, 'a') as predfile:
# #     for pred in prediction:
# #         indi_pred = []
# #         indi_pred = pred[0] # reform the seq prediction to individual samples
# #         row = ', '.join(map(str, indi_pred))
# #         predfile.write('%s\n' % row)