## Referenced from https://github.com/amaiya/ktrain

### Example notebook for BERT text classification using ktrain package. 
Example task: predict deception ("Straightforward" or "Cassandra") using input text.

### Notes:
- BERT model takes extremely long to train, even the supposedly faster method (DistillBERT, which is implemented in this notebook) also takes quite some time.
- However, validation accuracy looks promising. In the event that we use this BERT methods for individual models, need to save models and load them each time.

In [53]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID";
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import ktrain
from ktrain import text

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, precision_recall_fscore_support

from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import LSTM, Input, InputLayer, Dropout, Dense, Flatten, Embedding, Add, Concatenate
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

import pandas as pd
import numpy as np

import import_ipynb
import metadata_options
import models_nn

In [10]:
#######
# KTRAIN MODEL OPTIONS: #
#distilbert-base-uncased, bert-base-uncased, albert-base-v2, roberta-base
######
k_train_model = 'distilbert-base-uncased'

In [26]:
# Data with Throughput & WorkTime
df = pd.read_csv('./data/kokil dec 6 reprepare/conf_pc_worker_sem_test.csv')
df = df.dropna()

In [12]:
######################################
## Weighted Onehot Encoding options ##
######################################
throughput_option = 'TP1'
worktime_option = 'WT1'
pc_agreement_option = 'PC1'
textlength_option = 'TL1'
special_option = 'SP1'
k_option_for_tp = 3

df_throughput, df_worktime, df_agreement, df_textlength, df_special = metadata_options.set_OHE_pipeline_options(df, throughput_option, worktime_option, pc_agreement_option, textlength_option, special_option, k_option_for_tp)

TP1: weighted by 1 average per set of OHE, i.e. (a, b, c, d) -> (w*a, w*b, w*c, w*d)
WT1: weighted by 1 average per set of OHE, i.e. (a, b, c, d) -> (w*a, w*b, w*c, w*d)
PC1: weighted by 1 average per set of OHE, i.e. (a, b, c, d) -> (w*a, w*b, w*c, w*d)
TL1: weighted by 1 normalised number of characters per set of OHE, i.e. (a, b, c, d) -> (w*a, w*b, w*c, w*d)
SP1: weighted by average of TP1 and TP2 per set of OHE, i.e. (a, b, c, d) -> (w*a, w*b, w*c, w*d)


In [27]:
# train_test_split using Stratified Shaffled Splits
y = df["Input.deception_quadrant"].copy()
X = df.drop(["Input.deception_quadrant"], axis=1)
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)
splits_generator = sss.split(X, y)

for train_idx, test_idx in splits_generator:
    indices_train = train_idx
    indices_test = test_idx

train = df.take(indices_train)
test = df.take(indices_test)

In [28]:
y_train_deception = train["Input.deception_quadrant"].tolist()
y_train_rapport = train['Answer.3rapport.yes_label'].tolist()
y_train_share_information = train['Answer.4shareinformation.yes_label'].tolist()
y_train_reasoning = train['Answer.2reasoning.yes_label'].tolist()
y_train_gamemove = train['Answer.1gamemove.yes_label'].tolist()

y_test_deception = test['Input.deception_quadrant'].tolist()

y_test_rapport = test['Answer.3rapport.yes_label'].tolist()
y_test_share_information = test['Answer.4shareinformation.yes_label'].tolist()
y_test_reasoning = test['Answer.2reasoning.yes_label'].tolist()
y_test_gamemove = test['Answer.1gamemove.yes_label'].tolist()

In [29]:
x_train = train['Input.full_text'].tolist()
x_test = test['Input.full_text'].tolist()

### Individual Models

In [30]:
# Game move classifier
t_gamemove_label = list(set(y_train_gamemove))
t_gamemove = text.Transformer(k_train_model, maxlen=500, classes=t_gamemove_label)



In [31]:
trn_gamemove = t_gamemove.preprocess_train(x_train, y_train_gamemove)
val_gamemove = t_gamemove.preprocess_test(x_test, y_test_gamemove)

gamemove_model = t_gamemove.get_classifier()
learner_gamemove = ktrain.get_learner(gamemove_model, 
                                      train_data=trn_gamemove, 
                                      val_data=val_gamemove, batch_size=6)
learner_gamemove.fit_onecycle(3e-5, 1)
learner_gamemove.validate(class_names=t_gamemove.get_classes())

preprocessing train...
language: en
train sequence lengths:
	mean : 18
	95percentile : 29
	99percentile : 31


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 18
	95percentile : 19
	99percentile : 19






begin training using onecycle policy with max lr of 3e-05...
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.50      0.67         2

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



  _warn_prf(average, modifier, msg_start, len(result))


array([[0, 0],
       [1, 1]], dtype=int64)

In [32]:
pred_gamemove = ktrain.get_predictor(gamemove_model, preproc=t_gamemove)
y_pred_test_gamemove = pred_gamemove.predict(x_test)
y_pred_train_gamemove = pred_gamemove.predict(x_train)



In [33]:
# Reasoning Classifier 
t_reasoning_label = list(set(y_train_reasoning))
t_reasoning = text.Transformer(k_train_model, maxlen=500, classes=t_reasoning_label)

trn_reasoning = t_reasoning.preprocess_train(x_train, y_train_reasoning)
val_reasoning = t_reasoning.preprocess_test(x_test, y_test_reasoning)

reasoning_model = t_reasoning.get_classifier()
learner_reasoning = ktrain.get_learner(reasoning_model, 
                                      train_data=trn_reasoning, 
                                      val_data=val_reasoning, batch_size=6)
learner_reasoning.fit_onecycle(3e-5, 1)
learner_reasoning.validate(class_names=t_reasoning.get_classes())



preprocessing train...
language: en
train sequence lengths:
	mean : 18
	95percentile : 29
	99percentile : 31


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 18
	95percentile : 19
	99percentile : 19




begin training using onecycle policy with max lr of 3e-05...
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))


array([[1, 0],
       [1, 0]], dtype=int64)

In [None]:
pred_reasoning = ktrain.get_predictor(reasoning_model, preproc=t_reasoning)
y_pred_test_reasoning = pred_reasoning.predict(x_test)
y_pred_train_reasoning = pred_reasoning.predict(x_train)

In [None]:
# Share info classifier 
t_share_information_label = list(set(y_train_share_information))
t_share_information = text.Transformer(k_train_model, maxlen=500, classes=t_reasoning_label)

trn_share_information = t_share_information.preprocess_train(x_train, y_train_share_information)
val_share_information = t_share_information.preprocess_test(x_test, y_test_share_information)

share_information_model = t_share_information.get_classifier()
learner_share_information = ktrain.get_learner(share_information_model, 
                                      train_data=trn_share_information, 
                                      val_data=val_share_information, batch_size=6)
learner_share_information.fit_onecycle(3e-5, 1)
learner_share_information.validate(class_names=t_share_information.get_classes())

In [36]:
pred_share_information = ktrain.get_predictor(share_information_model, preproc=t_share_information)
y_pred_test_share_information = pred_share_information.predict(x_test)
y_pred_train_share_information = pred_share_information.predict(x_train)



In [37]:
# Rapport classifier 
t_rapport_label = list(set(y_train_rapport))
t_rapport = text.Transformer(k_train_model, maxlen=500, classes=t_rapport_label)

trn_rapport = t_rapport.preprocess_train(x_train, y_train_rapport)
val_rapport = t_rapport.preprocess_test(x_test, y_test_rapport)

rapport_model = t_rapport.get_classifier()
learner_rapport = ktrain.get_learner(rapport_model, 
                                      train_data=trn_rapport, 
                                      val_data=val_rapport, batch_size=6)
learner_rapport.fit_onecycle(3e-5, 1)
learner_rapport.validate(class_names=t_rapport.get_classes())



preprocessing train...
language: en
train sequence lengths:
	mean : 18
	95percentile : 29
	99percentile : 31


Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 18
	95percentile : 19
	99percentile : 19




begin training using onecycle policy with max lr of 3e-05...
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.50      1.00      0.67         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))


array([[0, 1],
       [0, 1]], dtype=int64)

In [38]:
pred_rapport = ktrain.get_predictor(rapport_model, preproc=t_rapport)
y_pred_test_rapport = pred_rapport.predict(x_test)
y_pred_train_rapport = pred_rapport.predict(x_train)



In [39]:
# Deception classifier 
t_deception_label = list(set(y_train_deception))
t_deception = text.Transformer(k_train_model, maxlen=500, classes=t_deception_label)

trn_deception = t_deception.preprocess_train(x_train, y_train_deception)
val_deception = t_deception.preprocess_test(x_test, y_test_deception)

deception_model = t_rapport.get_classifier()
learner_deception = ktrain.get_learner(deception_model, 
                                      train_data=trn_deception, 
                                      val_data=val_deception, batch_size=6)
learner_deception.fit_onecycle(3e-5, 1)
learner_deception.validate(class_names=t_deception.get_classes())



preprocessing train...
language: en
train sequence lengths:
	mean : 18
	95percentile : 29
	99percentile : 31




Is Multi-Label? False
preprocessing test...
language: en
test sequence lengths:
	mean : 18
	95percentile : 19
	99percentile : 19




begin training using onecycle policy with max lr of 3e-05...
                 precision    recall  f1-score   support

      Cassandra       0.50      1.00      0.67         1
Straightforward       0.00      0.00      0.00         1

       accuracy                           0.50         2
      macro avg       0.25      0.50      0.33         2
   weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, msg_start, len(result))


array([[1, 0],
       [1, 0]], dtype=int64)

In [40]:
pred_deception = ktrain.get_predictor(deception_model, preproc=t_deception)
y_pred_test_deception = pred_deception.predict(x_test)
y_pred_train_deception = pred_deception.predict(x_train)



### One hot encodings

In [41]:
# Train encodings
pred_df_arr_full = []
pred_df_arr = []
for i in range(0, len(y_pred_train_reasoning)):
    pred_obj_1 = {}
    pred_obj_1['gamemove'] = y_pred_train_gamemove[i]
    pred_obj_1['reasoning'] = y_pred_train_reasoning[i]
    pred_obj_1['shareinfo'] = y_pred_train_share_information[i]
    pred_df_arr.append(pred_obj_1)
    
    pred_obj_2 = pred_obj_1.copy()
    pred_obj_2['rapport'] = y_pred_train_rapport[i]
    pred_df_arr_full.append(pred_obj_2)
    
pred_df_full = pd.DataFrame(pred_df_arr_full)
pred_df = pd.DataFrame(pred_df_arr)

In [43]:
# Test encodings
pred_test_df_arr_full = []
pred_test_df_arr = []

for i in range(0, len(y_pred_test_reasoning)):
    pred_obj_1 = {}
    pred_obj_1['gamemove'] = y_pred_test_gamemove[i]
    pred_obj_1['reasoning'] = y_pred_test_reasoning[i]
    pred_obj_1['shareinfo'] = y_pred_test_share_information[i]
    pred_test_df_arr.append(pred_obj_1)
    
    pred_obj_2 = pred_obj_1.copy()
    pred_obj_2['rapport'] = y_pred_test_rapport[i]
    pred_test_df_arr_full.append(pred_obj_2)
    
pred_test_df_full = pd.DataFrame(pred_test_df_arr_full)
pred_test_df = pd.DataFrame(pred_test_df_arr)

### Joint model with one hot encoding 

In [50]:
new_deception_test = test["Input.deception_quadrant"].copy()
new_deception_test['Input.deception_quadrant'] = test["Input.deception_quadrant"].apply(lambda x : 1 if x == "Straightforward" else 0)
y_test_deception = new_deception_test['Input.deception_quadrant'].tolist()

new_deception_train = train["Input.deception_quadrant"].copy()
new_deception_train['Input.deception_quadrant'] = train["Input.deception_quadrant"].apply(lambda x : 1 if x == "Straightforward" else 0)
y_train_deception = new_deception_train['Input.deception_quadrant'].tolist()

y_test_rapport = np.asarray(y_test_rapport)
y_train_rapport = np.asarray(y_train_rapport)

y_test_deception = np.asarray(y_test_deception)
y_train_deception = np.asarray(y_train_deception)

In [51]:
print('Joint full model with one hot encoding, predicting deception')
joint_full_model = models_nn.create_joint_model(pred_df_full)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_full, 
                               y=y_train_deception, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_test_df_full, y_test_deception))

Joint full model with one hot encoding, predicting deception
Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 10        
_________________________________________________________________
dense_6 (Dense)              (None, 4)                 12        
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 5         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32


Epoch 30/32
Epoch 31/32
Epoch 32/32


In [54]:
joint_predict = joint_full_model.predict(pred_test_df_full)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_deception, np.array(joint_predict_round), average='macro')

  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

In [None]:
print('Joint full model with one hot encoding, predicting rapport')
joint_full_model = models_nn.create_joint_model(pred_df)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df, 
                               y=y_train_rapport, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_test_df,y_test_rapport))

joint_predict = joint_full_model.predict(pred_test_df)
joint_predict_round = joint_predict.round()
print(precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro'))

### Weighted against Throughput, WorkTime, PC Agreement & Text Length

In [55]:
# Train weighted encodings
pred_df_full_throughput, pred_df_throughput, pred_df_full_worktime, pred_df_worktime, pred_df_full_agreement, pred_df_agreement, pred_df_full_textlength, pred_df_textlength, pred_df_full_special, pred_df_special = metadata_options.construct_weighted_dataframe(indices_train, df_throughput, df_worktime, df_agreement, df_textlength, df_special, pred_df, pred_df_full)

In [56]:
# Test weighted encodings
pred_df_full_throughput_test, pred_df_throughput_test, pred_df_full_worktime_test, pred_df_worktime_test, pred_df_full_agreement_test, pred_df_agreement_test, pred_df_full_textlength_test, pred_df_textlength_test, pred_df_full_special_test, pred_df_special_test = metadata_options.construct_weighted_dataframe(indices_test, df_throughput, df_worktime, df_agreement, df_textlength, df_special, pred_test_df, pred_test_df_full)

## Throughput only

In [58]:
print('Joint full model with one hot encoding, predicting deception, weighted by throughput')
joint_full_model = models_nn.create_joint_model(pred_df_full_throughput)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_full_throughput, 
                               y=y_train_deception, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_full_throughput_test,y_test_deception))

joint_predict = joint_full_model.predict(pred_df_full_throughput_test)
joint_predict_round = joint_predict.round()
print(precision_recall_fscore_support(y_test_deception, np.array(joint_predict_round), average='macro'))

Joint full model with one hot encoding, predicting deception, weighted by throughput
Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense_11 (Dense)             (None, 2)                 10        
_________________________________________________________________
dense_12 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 5         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32


Epoch 30/32
Epoch 31/32
Epoch 32/32
(0.25, 0.5, 0.3333333333333333, None)


  _warn_prf(average, modifier, msg_start, len(result))


In [59]:
print('Joint full model with one hot encoding, predicting rapport, weighted by throughput')
joint_full_model = models_nn.create_joint_model(pred_df_throughput)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_throughput, 
                               y=y_train_rapport, 
                               epochs=32, 
                               batch_size=64,
                               validation_data=(pred_df_throughput_test,y_test_rapport))

joint_predict = joint_full_model.predict(pred_df_throughput_test)
joint_predict_round = joint_predict.round()
print(precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro'))

Joint full model with one hot encoding, predicting rapport, weighted by throughput
Model: "functional_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 3)]               0         
_________________________________________________________________
dense_14 (Dense)             (None, 2)                 8         
_________________________________________________________________
dense_15 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_16 (Dense)             (None, 1)                 5         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
E

Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
(0.25, 0.5, 0.3333333333333333, None)


  _warn_prf(average, modifier, msg_start, len(result))


## WorkTime only

In [60]:
print('Joint full model with one hot encoding, predicting deception, weighted by worktime')
joint_full_model = models_nn.create_joint_model(pred_df_full_worktime)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_full_worktime, 
                               y=y_train_deception, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_full_worktime_test,y_test_deception))

joint_predict = joint_full_model.predict(pred_df_full_worktime_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_deception, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting deception, weighted by worktime
Model: "functional_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense_17 (Dense)             (None, 2)                 10        
_________________________________________________________________
dense_18 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_19 (Dense)             (None, 1)                 5         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
E

Epoch 30/32
Epoch 31/32
Epoch 32/32


  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

In [61]:
print('Joint full model with one hot encoding, predicting rapport, weighted by worktime')
joint_full_model = models_nn.create_joint_model(pred_df_worktime)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_worktime, 
                               y=y_train_rapport, 
                               epochs=32, 
                               batch_size=64,
                               validation_data=(pred_df_worktime_test,y_test_rapport))

joint_predict = joint_full_model.predict(pred_df_worktime_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting rapport, weighted by worktime
Model: "functional_15"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 3)]               0         
_________________________________________________________________
dense_20 (Dense)             (None, 2)                 8         
_________________________________________________________________
dense_21 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_22 (Dense)             (None, 1)                 5         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epo

Epoch 30/32
Epoch 31/32
Epoch 32/32


  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

## PC Agreement only

In [62]:
print('Joint full model with one hot encoding, predicting deception, weighted by PC Agreement')
joint_full_model = models_nn.create_joint_model(pred_df_full_agreement)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_full_agreement, 
                               y=y_train_deception, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_full_agreement_test,y_test_deception))

joint_predict = joint_full_model.predict(pred_df_full_throughput_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting deception, weighted by PC Agreement
Model: "functional_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 4)]               0         
_________________________________________________________________
dense_23 (Dense)             (None, 2)                 10        
_________________________________________________________________
dense_24 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_25 (Dense)             (None, 1)                 5         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/

Epoch 30/32
Epoch 31/32
Epoch 32/32


In [63]:
print('Joint full model with one hot encoding, predicting rapport, weighted by PC Agreement')
joint_full_model = models_nn.create_joint_model(pred_df_agreement)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_agreement, 
                               y=y_train_rapport, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_agreement_test,y_test_rapport))

joint_predict = joint_full_model.predict(pred_df_throughput_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting rapport, weighted by PC Agreement
Model: "functional_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 3)]               0         
_________________________________________________________________
dense_26 (Dense)             (None, 2)                 8         
_________________________________________________________________
dense_27 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_28 (Dense)             (None, 1)                 5         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32

Epoch 30/32
Epoch 31/32
Epoch 32/32


  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

## Text Length only

In [64]:
print('Joint full model with one hot encoding, predicting deception, weighted by text length')
joint_full_model = models_nn.create_joint_model(pred_df_full_textlength)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_full_textlength, 
                               y=y_train_deception, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_full_textlength_test,y_test_deception))

joint_predict = joint_full_model.predict(pred_df_full_textlength_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_deception, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting deception, weighted by text length
Model: "functional_21"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        [(None, 4)]               0         
_________________________________________________________________
dense_29 (Dense)             (None, 2)                 10        
_________________________________________________________________
dense_30 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_31 (Dense)             (None, 1)                 5         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/3

Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32


  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

In [65]:
print('Joint full model with one hot encoding, predicting rapport, weighted by text length')
joint_full_model = models_nn.create_joint_model(pred_df_textlength)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_textlength, 
                               y=y_train_rapport, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_textlength_test,y_test_rapport))

joint_predict = joint_full_model.predict(pred_df_textlength_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting rapport, weighted by text length
Model: "functional_23"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_11 (InputLayer)        [(None, 3)]               0         
_________________________________________________________________
dense_32 (Dense)             (None, 2)                 8         
_________________________________________________________________
dense_33 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_34 (Dense)             (None, 1)                 5         
Total params: 25
Trainable params: 25
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32


Epoch 30/32
Epoch 31/32
Epoch 32/32


  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

### Other special options (either SP1, SP2, SP3, RAND_UNI, or RAND_NORM)

In [66]:
print('Joint full model with one hot encoding, predicting deception, weighted by special option')
joint_full_model = models_nn.create_joint_model(pred_df_full_special)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_full_special, 
                               y=y_train_deception, 
                               epochs=32, 
                               batch_size=64, 
                               validation_data=(pred_df_full_special_test,y_test_deception))

joint_predict = joint_full_model.predict(pred_df_full_special_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_deception, np.array(joint_predict_round), average='macro')

Joint full model with one hot encoding, predicting deception, weighted by special option
Model: "functional_25"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 4)]               0         
_________________________________________________________________
dense_35 (Dense)             (None, 2)                 10        
_________________________________________________________________
dense_36 (Dense)             (None, 4)                 12        
_________________________________________________________________
dense_37 (Dense)             (None, 1)                 5         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 1

Epoch 30/32
Epoch 31/32
Epoch 32/32


  _warn_prf(average, modifier, msg_start, len(result))


(0.25, 0.5, 0.3333333333333333, None)

In [None]:
print('Joint full model with one hot encoding, predicting rapport, weighted by special option')
joint_full_model = models_nn.create_joint_model(pred_df_special)
joint_full_model.summary()
history = joint_full_model.fit(x=pred_df_special, 
                               y=y_train_rapport, 
                               epochs=32, 
                               batch_size=64,
                               validation_data=(pred_df_special_test,y_test_rapport))

joint_predict = joint_full_model.predict(pred_df_special_test)
joint_predict_round = joint_predict.round()
precision_recall_fscore_support(y_test_rapport, np.array(joint_predict_round), average='macro')