In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, InputLayer, Dropout, Dense, Flatten, Embedding
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

from tensorflow.keras.layers import concatenate
from sklearn.model_selection import train_test_split

In [2]:
full_df = pd.read_csv('data/affcon_final.csv')

In [3]:
X = full_df['Input.full_text'].to_list()
y = full_df['affcon_rapport'].tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

In [4]:
max_length = 100

from transformers import AutoTokenizer, TFAutoModel, AutoConfig, TFAutoModelForPreTraining 

model_name = 'bert-base-uncased'
config = AutoConfig.from_pretrained(model_name)
#"microsoft/deberta-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

auto_model = TFAutoModelForPreTraining.from_pretrained(model_name, config=config)

All model checkpoint layers were used when initializing TFBertForPreTraining.

All the layers of TFBertForPreTraining were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForPreTraining for predictions without further training.


In [15]:
tokenizer.pad_token = tokenizer.eos_token
#tokenizer.pad_token = "[PAD]"

Using eos_token, but it is not set yet.


In [5]:
Y_train_class = to_categorical(y_train)
X_train_text = tokenizer(
    text=X_train,
    add_special_tokens=True,
    max_length=max_length,
    padding='max_length',
    truncation=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

In [13]:
Y_test_class = to_categorical(y_test)
#tokenizer.pad_token = tokenizer.eos_token
X_test_text = tokenizer(
    text=X_test,
    add_special_tokens=True,
    max_length=max_length,
    padding='max_length',
    truncation=True,
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = False,
    verbose = True)

Using eos_token, but it is not set yet.
Using pad_token, but it is not set yet.


ValueError: Asking to pad but the tokenizer does not have a padding token. Please select a token to use as `pad_token` `(tokenizer.pad_token = tokenizer.eos_token e.g.)` or add a new pad token via `tokenizer.add_special_tokens({'pad_token': '[PAD]'})`.

In [None]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', min_delta=0, patience=0, verbose=0, mode='auto',
    baseline=None, restore_best_weights=False
)

In [16]:
# Pure Transformers

input_ids_in = tf.keras.layers.Input(shape=(128,), name='input_token', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(128,), name='masked_token', dtype='int32') 

embedding_layer = auto_model(input_ids_in)[0]
cls_token = embedding_layer[:,0,:]
X = tf.keras.layers.BatchNormalization()(cls_token)
X = tf.keras.layers.Dense(192, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(2, activation='softmax')(X)
model = tf.keras.Model(inputs=input_ids_in, outputs = X)

for layer in model.layers[:3]:
  layer.trainable = False
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',f1_m,precision_m, recall_m])
history = model.fit(X_train_text['input_ids'], Y_train_class, epochs=32, batch_size=64, 
                    validation_split=0.3, callbacks=[callback])
loss, accuracy, f1_score, precision, recall = model.evaluate(X_test_text['input_ids'], Y_test_class, verbose=0)
print(precision, recall, f1_score)

Epoch 1/32
Epoch 2/32
0.39572811126708984 0.39572811126708984 0.39572805166244507


In [11]:
# CNN 

input_ids_in = tf.keras.layers.Input(shape=(100,), name='input_token', dtype='int32')

embedding_layer = auto_model(input_ids_in)[0]
X = tf.keras.layers.BatchNormalization()(embedding_layer)
X = tf.keras.layers.Lambda(lambda x: tf.keras.backend.expand_dims(x, 3))(X)
X = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(X)
X = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(X)
X = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(X)
X = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(X)
X = tf.keras.layers.Flatten()(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(len(np.unique(Y_train_class)), activation='sigmoid')(X)

model = tf.keras.Model(inputs=input_ids_in, outputs = X)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',f1_m,precision_m, recall_m])

model.summary()

history = model.fit(X_train_text['input_ids'], Y_train_class, epochs=32, batch_size=16, 
                    validation_split=0.2, callbacks=[callback])
loss, accuracy, f1_score, precision, recall = model.evaluate(X_test_text['input_ids'], Y_test_class, verbose=0)
print(precision, recall, f1_score)

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_token (InputLayer)     [(None, 100)]             0         
_________________________________________________________________
tf_bert_for_pre_training (TF ((None, 100, 30522), (Non 110106428 
_________________________________________________________________
batch_normalization_2 (Batch (None, 100, 30522)        122088    
_________________________________________________________________
lambda_2 (Lambda)            (None, 100, 30522, 1)     0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 99, 30521, 32)     160       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 98, 30520, 32)     4128      
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 97, 30519, 32)    

ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted:  OOM when allocating tensor with shape[16,32,99,30521] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node functional_5/conv2d_6/Conv2D (defined at <ipython-input-11-fff9a0153623>:22) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[gradient_tape/functional_5/tf_bert_for_pre_training/bert/embeddings/position_embeddings/embedding_lookup/Reshape/_558]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted:  OOM when allocating tensor with shape[16,32,99,30521] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node functional_5/conv2d_6/Conv2D (defined at <ipython-input-11-fff9a0153623>:22) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_82449]

Errors may have originated from an input operation.
Input Source operations connected to node functional_5/conv2d_6/Conv2D:
 functional_5/lambda_2/ExpandDims (defined at <ipython-input-11-fff9a0153623>:7)

Input Source operations connected to node functional_5/conv2d_6/Conv2D:
 functional_5/lambda_2/ExpandDims (defined at <ipython-input-11-fff9a0153623>:7)

Function call stack:
train_function -> train_function


In [24]:
## CNN with LSTM 

input_ids_in = tf.keras.layers.Input(shape=(100,), name='input_token', dtype='int32')

embedding_layer = auto_model(input_ids_in)[0]
X = tf.keras.layers.BatchNormalization()(embedding_layer)
X = tf.keras.layers.Lambda(lambda x: tf.keras.backend.expand_dims(x, 3))(X)
X = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(X)
X = tf.keras.layers.Conv2D(32, (2, 2), activation='relu')(X)
X = tf.keras.layers.MaxPooling2D(pool_size=(2,2))(X)
X = tf.keras.layers.Flatten()(X)
X = tf.keras.layers.Dense(32, activation='relu')(X)
X = tf.keras.layers.Lambda(lambda x: tf.keras.backend.expand_dims(x, 2))(X)
X = tf.keras.layers.LSTM(128, return_sequences=True, activation='relu')(X)
X = tf.keras.layers.LSTM(256)(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(len(np.unique(Y_train_class)), activation='sigmoid')(X)

model = tf.keras.Model(inputs=input_ids_in, outputs = X)
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',f1_m,precision_m, recall_m])

model.summary()

history = model.fit(X_train_text['input_ids'], Y_train_class, epochs=32, batch_size=8, 
                    validation_split=0.3, callbacks=[callback])
loss, accuracy, f1_score, precision, recall = model.evaluate(X_test_text['input_ids'], Y_test_class, verbose=0)
print(precision, recall, f1_score)

ResourceExhaustedError: OOM when allocating tensor with shape[23927680,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:RandomUniform]

In [8]:
# LSTM

input_ids_in = tf.keras.layers.Input(shape=(100,), name='input_token', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(100,), name='masked_token', dtype='int32') 

embedding_layer = auto_model(input_ids_in)[0]
#cls_token = embedding_layer[:,0,:]
X = tf.keras.layers.BatchNormalization()(embedding_layer)
X = tf.keras.layers.LSTM(64, return_sequences=True)(X)
X = tf.keras.layers.LSTM(32, return_sequences=True)(X)
X = tf.keras.layers.LSTM(16, return_sequences=True)(X)
X = tf.keras.layers.LSTM(16)(X)
X = tf.keras.layers.Dense(24, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(24, activation='relu')(X)
X = tf.keras.layers.Dense(2, activation='sigmoid')(X)
model = tf.keras.Model(inputs=input_ids_in, outputs = X)

for layer in model.layers[:3]:
  layer.trainable = False

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',f1_m,precision_m, recall_m])

model.summary()

history = model.fit(X_train_text['input_ids'], Y_train_class, epochs=32, batch_size=64, 
                    validation_split=0.3, callbacks=[callback])
loss, accuracy, f1_score, precision, recall = model.evaluate(X_test_text['input_ids'], Y_test_class, verbose=0)
print(precision, recall, f1_score)

Model: "functional_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_token (InputLayer)     [(None, 100)]             0         
_________________________________________________________________
tf_bert_for_pre_training (TF ((None, 100, 30522), (Non 110106428 
_________________________________________________________________
batch_normalization_1 (Batch (None, 100, 30522)        122088    
_________________________________________________________________
lstm_4 (LSTM)                (None, 100, 64)           7830272   
_________________________________________________________________
lstm_5 (LSTM)                (None, 100, 32)           12416     
_________________________________________________________________
lstm_6 (LSTM)                (None, 100, 16)           3136      
_________________________________________________________________
lstm_7 (LSTM)                (None, 16)               

NameError: name 'X_train_text' is not defined

In [14]:
# 3x Bi-LSTM
input_ids_in = tf.keras.layers.Input(shape=(100,), name='input_token', dtype='int32')
input_masks_in = tf.keras.layers.Input(shape=(100,), name='masked_token', dtype='int32') 

embedding_layer = auto_model(input_ids_in)[0]
#cls_token = embedding_layer[:,0,:]
X = tf.keras.layers.BatchNormalization()(embedding_layer)
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True))(X)
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True))(X)
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences=True))(X)
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16))(X)
X = tf.keras.layers.Dense(24, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(24, activation='relu')(X)
X = tf.keras.layers.Dense(2, activation='sigmoid')(X)
model = tf.keras.Model(inputs=input_ids_in, outputs = X)

for layer in model.layers[:3]:
  layer.trainable = False

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc',f1_m,precision_m, recall_m])

model.summary()

Model: "functional_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_token (InputLayer)     [(None, 100)]             0         
_________________________________________________________________
tf_bert_for_pre_training (TF ((None, 100, 30522), (Non 110106428 
_________________________________________________________________
batch_normalization_2 (Batch (None, 100, 30522)        122088    
_________________________________________________________________
bidirectional_8 (Bidirection (None, 100, 128)          15660544  
_________________________________________________________________
bidirectional_9 (Bidirection (None, 100, 64)           41216     
_________________________________________________________________
bidirectional_10 (Bidirectio (None, 100, 32)           10368     
_________________________________________________________________
bidirectional_11 (Bidirectio (None, 32)               

In [18]:
history = model.fit(X_train_text['input_ids'], Y_train_class, epochs=32, batch_size=64, 
                    validation_split=0.3, callbacks=[callback])
loss, accuracy, f1_score, precision, recall = model.evaluate(X_test_text['input_ids'], Y_test_class, verbose=0)
print(precision, recall, f1_score)

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
0.5063555240631104 0.9148778915405273 0.6517022848129272
