## Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_addons as tfa
import transformers
from transformers import AutoTokenizer,TFRobertaModel
# from transformers import AutoTokenizer,AutoModel
# from tensorflow.keras import backend as K

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
# What version of Python do you have?
import sys
import platform
import sklearn as sk

print(f"Python Platform: {platform.platform()}")
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tf.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

## Functions and Constants

In [None]:
def to_arrays(df):
  X = df['comment_text'].to_numpy()
  y = df['toxicity_binary'].to_numpy()
  return X, y

def load_data(group):
  df_train = pd.read_csv('data/' + group + '-dataset-train.csv')
  df_val = pd.read_csv('data/' + group + '-dataset-val.csv')
  df_test = pd.read_csv('data/' + group + '-dataset-test.csv')

  X_train, y_train = to_arrays(df_train)
  X_val, y_val = to_arrays(df_val)
  X_test, y_test = to_arrays(df_test)

  return X_train, y_train, X_test, y_test, X_val, y_val

In [None]:
MAX_SEQUENCE_LENGTH = 128

In [None]:
def tokenizing_pipeline(X, tokenizer):
  bert_tokenized = tokenizer(list(X),
                max_length=MAX_SEQUENCE_LENGTH,
                truncation=True,
                padding='max_length',
                return_tensors='tf')
  bert_inputs = [bert_tokenized.input_ids,
                 bert_tokenized.token_type_ids,
                 bert_tokenized.attention_mask]
  return bert_inputs

In [None]:
def build_bertweet_cls_model(max_sequence_length=MAX_SEQUENCE_LENGTH,
                          hidden_size=100, 
                          dropout=0.3,
                          learning_rate=0.0001,
                          num_train_layers=0):

    # freeze all pre-trained BERTweet layers
    if num_train_layers == 0:
      bertweet_model.trainable = False
    
    # partially freeze pre-trained BERTweet layers
    else:
      retrain_layers = []

      for layer_num in range(num_train_layers):
          layer_code = '_' + str(11 - layer_num)
          retrain_layers.append(layer_code)
        
      for w in bert_model.weights:
          if not any([x in w.name for x in retrain_layers]):
              w._trainable = False
    
    input_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='input_ids_layer')
    token_type_ids = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='token_type_ids_layer')
    attention_mask = tf.keras.layers.Input(shape=(max_sequence_length,), dtype=tf.int64, name='attention_mask_layer')

    bert_inputs = {'input_ids': input_ids,
                   'token_type_ids': token_type_ids,
                   'attention_mask': attention_mask}      

    # Use the same bertweet model instance
    bert_out = bertweet_model(bert_inputs)

    cls_token = bert_out[0][:, 0, :]

    
    hidden = tf.keras.layers.Dense(hidden_size, activation='relu', name='hidden_layer')(cls_token)

    hidden = tf.keras.layers.Dropout(dropout)(hidden)  

    f1_score = tfa.metrics.F1Score(1, threshold = 0.5)

    classification = tf.keras.layers.Dense(1, activation='sigmoid', name='classification_layer')(hidden)
    
    classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])
    
    classification_model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate),
                                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), 
                                 metrics=[tf.keras.metrics.BinaryAccuracy(),
                                        tf.keras.metrics.Precision(),
                                        tf.keras.metrics.Recall(),
                                        f1_score])

    return classification_model

## Load Data

In [None]:
X_train_disability, y_train_disability, X_test_disability, y_test_disability, X_val_disability, y_val_disability = load_data('disability')

In [None]:
X_train_disability[0]

In [None]:
X_train_disability[1]

In [None]:
X_train_gender, y_train_gender, X_test_gender, y_test_gender, X_val_gender, y_val_gender = load_data('gender')

In [None]:
X_train_gender[0]

In [None]:
X_train_gender[1]

## Load BERTweet Model from_pretrained()

In [None]:
# For transformers v4.x+:
bertweet_tokenizer = AutoTokenizer.from_pretrained("vinai/bertweet-base", use_fast=False)
bertweet_model = TFRobertaModel.from_pretrained("vinai/bertweet-base")

### Tokenize Data Splits

In [None]:
bertweet_train_inputs_disability = tokenizing_pipeline(X_train_disability, bertweet_tokenizer)
bertweet_test_inputs_disability = tokenizing_pipeline(X_test_disability, bertweet_tokenizer)
bertweet_val_inputs_disability = tokenizing_pipeline(X_val_disability, bertweet_tokenizer)

bertweet_train_inputs_gender = tokenizing_pipeline(X_train_gender, bertweet_tokenizer)
bertweet_test_inputs_gender = tokenizing_pipeline(X_test_gender, bertweet_tokenizer)
bertweet_val_inputs_gender = tokenizing_pipeline(X_val_gender, bertweet_tokenizer)

# Calculate Class Weights for Each Identity Group

Get class weights for disability train set:

In [None]:
neg, pos = np.bincount(y_train_disability)
total = neg + pos
print('Disability Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

disability_class_weight = {0: weight_for_0, 1: weight_for_1}

print('Disability Weight for class 0: {:.2f}'.format(weight_for_0))
print('Disability Weight for class 1: {:.2f}'.format(weight_for_1))

Get class weights for gender train set:

In [None]:
neg, pos = np.bincount(y_train_gender)
total = neg + pos
print('Gender Examples:\n    Total: {}\n    Positive: {} ({:.2f}% of total)\n'.format(
    total, pos, 100 * pos / total))

# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

gender_class_weight = {0: weight_for_0, 1: weight_for_1}

print('Gender Weight for class 0: {:.2f}'.format(weight_for_0))
print('Gender Weight for class 1: {:.2f}'.format(weight_for_1))

# Build Disability Model

In [None]:
disability_model = build_bertweet_cls_model(num_train_layers=6, learning_rate=0.0001)

# Build Gender Model

In [None]:
gender_model = build_bertweet_cls_model(num_train_layers=6, learning_rate=1e-5)

# Round 1: Train Disability Model for 3 epochs

In [None]:
disability_round1_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=64,
                                          epochs=3,
                                          class_weight=disability_class_weight)

In [None]:
history = pd.DataFrame(disability_round1_history.history)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.title('Round 1: Disability Train vs Val Loss for Half-Frozen Bertweet')
plt.xticks(range(0, len(history['loss'] + 1)))
plt.plot(history['loss'], label="training", marker='o')
plt.plot(history['val_loss'], label="validation", marker='o')
plt.legend()
plt.show()

In [None]:
history = pd.DataFrame(disability_round1_history.history)
plt.ylabel('F1 Score')
plt.xlabel('Epoch')
plt.title('Round 1: Disability Train vs Val F1 Score for Half-Frozen Bertweet')
plt.xticks(range(0, len(history['f1_score'] + 1)))
plt.plot(history['f1_score'], label="training", marker='o')
plt.plot(history['val_f1_score'], label="validation", marker='o')
plt.legend()
plt.show()

In [None]:
history = pd.DataFrame(disability_round1_history.history)
plt.ylabel('Binary Accuracy')
plt.xlabel('Epoch')
plt.title('Round 1: Disability Train vs Val Binary Accuracy for Half-Frozen Bertweet')
plt.xticks(range(0, len(history['binary_accuracy'] + 1)))
plt.plot(history['binary_accuracy'], label="training", marker='o')
plt.plot(history['val_binary_accuracy'], label="validation", marker='o')
plt.legend()
plt.show()

### Change learning rate for disability_model to something much smaller for future training steps and recompile:

In [None]:
f1_score = tfa.metrics.F1Score(1, threshold = 0.5)
disability_model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=1e-5),
                                 loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), 
                                 metrics=[tf.keras.metrics.BinaryAccuracy(),
                                        tf.keras.metrics.Precision(),
                                        tf.keras.metrics.Recall(),
                                        f1_score])

# Round 2: Train Gender Model for 1 epoch

In [None]:
gender_round2_history = gender_model.fit(bert_train_inputs_gender,
                                          y_train_gender,
                                          validation_data=(bert_val_inputs_gender, y_val_gender),
                                          batch_size=32,
                                          epochs=1,
                                          class_weight=gender_class_weight)

# Round 3: Train Disability Model for 1 epoch

In [None]:
disability_round3_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=1,
                                          class_weight=disability_class_weight)

# Round 4: Train Gender Model for 1 epoch

In [None]:
gender_round4_history = gender_model.fit(bert_train_inputs_gender,
                                          y_train_gender,
                                          validation_data=(bert_val_inputs_gender, y_val_gender),
                                          batch_size=32,
                                          epochs=1,
                                          class_weight=gender_class_weight)

# Round 4: Train Disability Model for 1 epoch

In [None]:
disability_round4_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=1,
                                          class_weight=disability_class_weight)

# Round 5: Train Gender Model for 1 epoch

In [None]:
gender_round5_history = gender_model.fit(bert_train_inputs_gender,
                                          y_train_gender,
                                          validation_data=(bert_val_inputs_gender, y_val_gender),
                                          batch_size=32,
                                          epochs=1,
                                          class_weight=gender_class_weight)

# Round 6: Train Disability Model for 1 epoch

In [None]:
disability_round6_history = disability_model.fit(bert_train_inputs_disability,
                                          y_train_disability,
                                          validation_data=(bert_val_inputs_disability, y_val_disability),
                                          batch_size=32,
                                          epochs=1,
                                          class_weight=disability_class_weight)

# Evaluate disability_model on Disability Test Set (MAIN)

In [None]:
disability_model.evaluate(bertweet_test_inputs_disability, y_test_disability)

# Evaluate gender_model on Gender Test Set (supporting)
To see if gender is easier to predict than disability.

In [None]:
gender_model.evaluate(bertweet_test_inputs_gender, y_test_gender)

# Save Model Weights

In [None]:
disability_model.save_weights('saved_models/disability_interwoven_half_frozen_weights.h5')
gender_model.save_weights('saved_models/gender_interwoven_half_frozen_weights.h5')