## Imports

In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf
# import tensorflow_addons as tfa
import transformers
from transformers import AutoTokenizer,TFRobertaModel
# from transformers import AutoTokenizer,AutoModel
from tensorflow.keras import backend as K

## Functions and Constants

In [2]:
def to_arrays(df):
  X = df['comment_text'].to_numpy()
  y = df['toxicity_binary'].to_numpy()
  return X, y

def load_data(group):
  df_train = pd.read_csv('data/' + group + '-dataset-train.csv')
  df_val = pd.read_csv('data/' + group + '-dataset-val.csv')
  df_test = pd.read_csv('data/' + group + '-dataset-test.csv')

  X_train, y_train = to_arrays(df_train)
  X_val, y_val = to_arrays(df_val)
  X_test, y_test = to_arrays(df_test)

  return X_train, y_train, X_test, y_test, X_val, y_val

In [3]:
MAX_SEQUENCE_LENGTH = 128

In [4]:
def tokenizing_pipeline(X, tokenizer):
  bert_tokenized = tokenizer(list(X),
                max_length=MAX_SEQUENCE_LENGTH,
                truncation=True,
                padding='max_length',
                return_tensors='tf')
  bert_inputs = [bert_tokenized.input_ids,
                 bert_tokenized.token_type_ids,
                 bert_tokenized.attention_mask]
  return bert_inputs

## Load Data

In [5]:
X_train_disability, y_train_disability, X_test_disability, y_test_disability, X_val_disability, y_val_disability = load_data('disability')

In [6]:
X_train_disability[0]

'It\'s too bad you don\'t enough to actually respond to my post. Now go apply for more "disability" payments turdflake.'

In [7]:
X_train_gender, y_train_gender, X_test_gender, y_test_gender, X_val_gender, y_val_gender = load_data('gender')

In [8]:
X_train_gender[0]

'Most "norms" don\'t follow the minutiae of what goes on in Rome and church politics...to them the pope is a nice old man who lives in Rome who smiles, waves, and kisses babies but has little impact on their day to day lives as Catholics. If you stood outside of Mass that was getting out and asked 10 people what Amoris Laetitia is, how many would even know what you are talking about? The fact that we are even discussing an apostolic exhortation puts us outside the "norm".'

## Disability BERTweet Model

In [9]:
# For transformers v4.x+:
bertweet_tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
bertweet_model = TFRobertaModel.from_pretrained("vinai/bertweet-base")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Metal device set to: Apple M1 Pro


Some layers from the model checkpoint at vinai/bertweet-base were not used when initializing TFRobertaModel: ['lm_head']
- This IS expected if you are initializing TFRobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFRobertaModel were initialized from the model checkpoint at vinai/bertweet-base.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaModel for predictions without further training.


### BERT Tokenization of Training, Test, and Validation Data

In [10]:
bert_train_inputs_disability = tokenizing_pipeline(X_train_disability, bertweet_tokenizer)
bert_test_inputs_disability = tokenizing_pipeline(X_test_disability, bertweet_tokenizer)
bert_val_inputs_disability = tokenizing_pipeline(X_val_disability, bertweet_tokenizer)

Got custom f1_score function from here: https://neptune.ai/blog/implementing-the-macro-f1-score-in-keras

In [11]:
### Define F1 measures: F1 = 2 * (precision * recall) / (precision + recall)

def f1_score(y_true, y_pred):
    def recall_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Positives = K.sum(K.round(K.clip(y_true, 0, 1)))

        recall = TP / (Positives+K.epsilon())
        return recall


    def precision_m(y_true, y_pred):
        TP = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        Pred_Positives = K.sum(K.round(K.clip(y_pred, 0, 1)))

        precision = TP / (Pred_Positives+K.epsilon())
        return precision

    precision, recall = precision_m(y_true, y_pred), recall_m(y_true, y_pred)

    return 2*((precision*recall)/(precision+recall+K.epsilon()))


In [12]:
def create_bert_cls_model(max_sequence_length=MAX_SEQUENCE_LENGTH,
                          hidden_size = 100, 
                          dropout=0.3,
#                           learning_rate=0.00005,
                          learning_rate=0.0001,
                          num_train_layers=0):

    # freeze pre-trained BERT layers
    if num_train_layers == 0:
      bertweet_model.trainable = False
    
    # partially freeze pre-trained BERT layers
    else:
      retrain_layers = []

      for layer_num in range(num_train_layers):
          layer_code = '_' + str(11 - layer_num)
          retrain_layers.append(layer_code)
        
      for w in bert_model.weights:
          if not any([x in w.name for x in retrain_layers]):
              w._trainable = False
    
    input_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}      

    bert_out = bertweet_model(bert_inputs)

    cls_token = bert_out[0][:, 0, :]

    
    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(cls_token)

    hidden = tf.keras.layers.Dropout(dropout)(hidden)  


    classification = tf.keras.layers.Dense(1, activation='sigmoid', name='classification_layer')(hidden)
    
    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), 
                                 metrics=[tf.keras.metrics.BinaryAccuracy(),
                                        tf.keras.metrics.Precision(),
                                        tf.keras.metrics.Recall(),
                                        f1_score])

    return classification_model

In [13]:
neg, pos = np.bincount(y_train_disability)
total = neg + pos
print('Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))

Examples:
    Total: 13438
    Positive: 2831 (21.07% of total)

Weight for class 0: 0.63
Weight for class 1: 2.37


## class_weight = (1/neg)(total / 2.0) <-- BEST f1 scores

In [14]:
disability_model = create_bert_cls_model()
disability_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=50,
                                          epochs=20,
                                          class_weight=class_weight)



Epoch 1/20


2023-04-09 03:47:37.508853: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20

KeyboardInterrupt: 

In [16]:
disability_model = create_bert_cls_model()
disability_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=5,
                                          class_weight=class_weight)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
disability_model.evaluate(bert_test_inputs_disability, y_test_disability)

In [None]:
history = pd.DataFrame(disability_history.history)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.xticks(range(0, len(history['loss'] + 1)))
plt.plot(history['loss'], label="training", marker='o')
plt.plot(history['val_loss'], label="validation", marker='o')
plt.legend()
plt.show()

In [14]:
disability_model = create_bert_cls_model()
disability_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=5,
                                          class_weight=class_weight)



Epoch 1/5


2023-04-09 02:54:23.049835: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




KeyboardInterrupt: 

In [14]:
counts = np.bincount(y_train_disability)
print(
    "Number of positive samples in training data: {} ({:.2f}% of total)".format(
        counts[1], 100 * float(counts[1]) / len(y_train_disability)
    )
)

weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.10f}'.format(weight_for_0))
print('Weight for class 1: {:.10f}'.format(weight_for_1))

Number of positive samples in training data: 2831 (21.07% of total)
Weight for class 0: 0.0000942774
Weight for class 1: 0.0003532321


## class_weight = 1/counts

In [15]:
disability_model = create_bert_cls_model()
disability_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=5,
                                          class_weight=class_weight)



Epoch 1/5


2023-04-09 02:50:48.212405: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




KeyboardInterrupt: 

## Low f1 score for class_weight={0:0.3, 1:0.7}

In [15]:
disability_model = create_bert_cls_model()
class_weight={0:0.3, 1:0.7}
disability_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=5,
                                          class_weight=class_weight)



Epoch 1/5

KeyboardInterrupt: 

In [22]:
disability_model = create_bert_cls_model()
# class_weight={0:0.3, 1:0.7}
disability_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=5,
                                          class_weight=class_weight)



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
disability_model.evaluate(bert_test_inputs_disability, y_test_disability)

In [None]:
history = pd.DataFrame(disability_history.history)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.xticks(range(0, len(history['loss'] + 1)))
plt.plot(history['loss'], label="training", marker='o')
plt.plot(history['val_loss'], label="validation", marker='o')
plt.legend()
plt.show()

### Fine-tuning

In [None]:
bertweet_model.trainable = True

disability_model.compile(optimizer=keras.optimizers.Adam(1e-5),  # Very low learning rate
              loss=keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=[keras.metrics.BinaryAccuracy(),
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall()])

disability_history_ft = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=5,
                                          class_weight=class_weight)
# class_weight={0:0.3, 1:0.7}

In [None]:
disability_model.evaluate(bert_test_inputs_disability, y_test_disability)