In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, InputLayer, Dropout, Dense, Flatten, Embedding
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

from tensorflow.keras.layers import concatenate
from sklearn.model_selection import train_test_split

In [2]:
full_df = pd.read_csv('politeness_strategies_merge.csv')

In [30]:
train, test = train_test_split(full_df, test_size=0.2)

In [3]:
X = full_df['Input.full_text'].to_list()
y = full_df['affcon_rapport'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

In [4]:
max_length = 100

from transformers import AutoTokenizer, TFAutoModel, AutoConfig, TFAutoModelForPreTraining 

model_name = 'bert-base-uncased'
config = AutoConfig.from_pretrained(model_name)
#"microsoft/deberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

auto_model = TFAutoModelForPreTraining.from_pretrained(model_name, config=config)

All model checkpoint layers were used when initializing TFBertForPreTraining.

All the layers of TFBertForPreTraining were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForPreTraining for predictions without further training.


In [35]:
Y_train_class = to_categorical(train['affcon_rapport'].to_list())
X_train_text = tokenizer(
    text=train['Input.full_text'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

In [36]:
Y_test_class = to_categorical(test['affcon_rapport'].to_list())
X_test_text = tokenizer(
    text=test['Input.full_text'].to_list(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

In [7]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto',
    baseline=None, restore_best_weights=False
)

Using TensorFlow backend.


In [8]:
# LSTM

input_ids_in = tf.keras.layers.Input(shape=(100,), name='input_token', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(100,), name='masked_token', dtype='int32') 

embedding_layer = auto_model(input_ids_in)[0]
#cls_token = embedding_layer[:,0,:]
X = tf.keras.layers.BatchNormalization()(embedding_layer)
X = tf.keras.layers.LSTM(64, return_sequences=True)(X)
X = tf.keras.layers.LSTM(32, return_sequences=True)(X)
X = tf.keras.layers.LSTM(16, return_sequences=True)(X)
X = tf.keras.layers.LSTM(16)(X)
X = tf.keras.layers.Dense(24, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(24, activation='relu')(X)
X = tf.keras.layers.Dense(2, activation='sigmoid')(X)
text_model = tf.keras.Model(inputs=input_ids_in, outputs = X)

In [37]:
# Numerical model
numerical_train_x = train.drop(columns=['affcon_rapport', 'Input.full_text', 'Unnamed: 0',
                                    'msg_id', 'Input.convo_id', 'Input.train_test_val',
                                     'Input.msg_id', 'Input.timestamp', 'Input.full_text',
                                     'affcon_gamemove', 'affcon_reasoning', 'affcon_rapport',
                                     'affcon_shareinformation', 'Input.speaker', 'Input.reply_to',
                                     'Input.speaker_intention', 'Input.reciever_perception',
                                     'Input.reciever', 'Input.absolute_message_index', 
                                     'Input.relative_message_index', 'Input.year', 'Input.game_score_speaker',
                                     'Input.game_score_receiver', 'Input.game_score_delta',
                                     'Input.deception_quadrant', 'Input.num_words', 
                                     'Input.num_characters', 'Input.sno', 'Input.sno1'
                                    ])
numerical_train_y = to_categorical(train['affcon_rapport'].to_list())

numerical_test_x = test.drop(columns=['affcon_rapport', 'Input.full_text', 'Unnamed: 0',
                                    'msg_id', 'Input.convo_id', 'Input.train_test_val',
                                     'Input.msg_id', 'Input.timestamp', 'Input.full_text',
                                     'affcon_gamemove', 'affcon_reasoning', 'affcon_rapport',
                                     'affcon_shareinformation', 'Input.speaker', 'Input.reply_to',
                                     'Input.speaker_intention', 'Input.reciever_perception',
                                     'Input.reciever', 'Input.absolute_message_index', 
                                     'Input.relative_message_index', 'Input.year', 'Input.game_score_speaker',
                                     'Input.game_score_receiver', 'Input.game_score_delta',
                                     'Input.deception_quadrant', 'Input.num_words', 
                                     'Input.num_characters', 'Input.sno', 'Input.sno1'
                                    ])
numerical_test_y = to_categorical(test['affcon_rapport'].to_list())

In [38]:
inputB = Input(shape=(numerical_df.shape[1],))
c = Dense(len(full_df['affcon_rapport'].value_counts()), activation='relu')(inputB)
c = Dense(4, activation='relu')(c)
c = Dense(len(full_df['affcon_rapport'].value_counts()), activation='linear')(c)
numeric_model = Model(inputs=inputB, outputs=c)

In [44]:
numeric_model.compile(loss='categorical_crossentropy', optimizer=optimizer, 
                      metrics=['acc',f1_m,precision_m, recall_m])
history = numeric_model.fit(x=numerical_train_x, y=numerical_train_y, epochs=32, 
                    batch_size=64, 
                    validation_split=0.3, callbacks=[callback])

Epoch 1/32
Epoch 2/32


In [45]:
combined = concatenate([text_model.output, numeric_model.output])

In [46]:
z = Dense(len(full_df['affcon_rapport'].value_counts()), activation="relu")(combined)
z = Dense(1, activation="linear")(z)

In [47]:
model = Model(inputs=[text_model.input, numeric_model.input], outputs=z)

In [55]:
optimizer = Adam(
    learning_rate=5e-05,
    epsilon=1e-08,
    decay=0.01,
    clipnorm=1.0)

model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc',f1_m,precision_m, recall_m])

model.summary()

Model: "functional_13"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_token (InputLayer)        [(None, 100)]        0                                            
__________________________________________________________________________________________________
tf_bert_for_pre_training (TFBer ((None, 100, 30522), 110106428   input_token[0][0]                
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 100, 30522)   122088      tf_bert_for_pre_training[0][0]   
__________________________________________________________________________________________________
lstm (LSTM)                     (None, 100, 64)      7830272     batch_normalization[0][0]        
______________________________________________________________________________________

In [52]:
numerical_train_x.shape

(12590, 114)

In [53]:
X_train_text['input_ids'].shape

TensorShape([12590, 100])

In [54]:
Y_train_class.shape

(12590, 2)

In [58]:
history = model.fit(x=[X_train_text['input_ids'], numerical_train_x], y=Y_train_class, epochs=32, 
                    batch_size=16, validation_split=0.3, callbacks=[callback])
loss, accuracy, f1_score, precision, recall = model.evaluate(
                x=[X_test_text['input_ids'], numerical_test_x], y=Y_test_class, verbose=0)
print(precision, recall, f1_score)

Epoch 1/32
Epoch 2/32
0.0 0.0 0.0
