In [28]:
# General
import os
import shutil
from collections import Counter
from tempfile import mkdtemp
from shutil import rmtree
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Sklearn
from sklearn.feature_extraction.text import (
    CountVectorizer, TfidfVectorizer)
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import (
    SelectKBest, VarianceThreshold, f_classif)
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import f1_score, balanced_accuracy_score, make_scorer
from sklearn.preprocessing import LabelBinarizer

# Custom
from data_io import read_data
from utils import label_map, normalize

# Tensorflow BERT
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from official.nlp import optimization  # to create AdamW optimizer

# Weights & Biases
import wandb
from wandb.keras import WandbCallback

In [29]:
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  4


### Load W&B
Use Weights and Biases (http://wandb.ai) for monitoring (account required)

In [1]:
# wandb.login()
# os.environ["WANDB_NOTEBOOK_NAME"] = "bert.ipynb"
# wandb.init(project='mlhc-bert', entity='burgerm')

In [2]:
# sweep_config = {
#   "name" : "mlhc-bert-sweep-trainable-test",
#   "project" : "mlhc-bert",
#   "method" : "grid",
#   "entity": "mlhc-bert",
#   "parameters" : {
#     "epochs" : {
#       "values" : [1, 5]
#     },
#     "trainable" :{
#       "values" : [True]
#     }
#   }
# }

# wandb_sweep_id = wandb.sweep(sweep_config, project='mlhc-bert', entity='burgerm')

### Load data

In [None]:
texts_train, labels_train = read_data(mode='train')
y_train_full = np.asarray([label_map[label] for label in labels_train])

In [33]:
texts_val, labels_val = read_data(mode='val')
y_val_full = np.asarray([label_map[label] for label in labels_val])

In [34]:
texts_test, labels_test = read_data(mode='test')
y_test_full = np.asarray([label_map[label] for label in labels_test])

#### Class Weights

In [35]:
N = len(y_train_full)
count = Counter(y_train_full)
class_weights = {cl: 1/(count[cl] / N) for cl in count}
print(class_weights)

{0: 11.245473819074784, 2: 3.061034949473142, 1: 6.510950387679048, 4: 2.8865257852639603, 3: 11.853425222801594}


### BERT
Use pretrained BERT language models from Tensorflow Hub, specifically use model pretrained on the Pubmed dataset

In [4]:
# bert_model_name = 'small_bert/bert_en_uncased_L-2_H-128_A-2' 
bert_model_name = 'experts_pubmed' 

map_name_to_handle = {
    'experts_pubmed':
        'https://tfhub.dev/google/experts/bert/pubmed/2'
}

map_model_to_preprocess = {
    'experts_pubmed':
        'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',
}

tfhub_handle_encoder = map_name_to_handle[bert_model_name]
tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]

print(f'BERT model selected           : {tfhub_handle_encoder}')
print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')

BERT model selected           : https://tfhub.dev/google/experts/bert/pubmed/2
Preprocess model auto-selected: https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3


In [None]:
# Load pretrained BERT model
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)

### Preprocessing

#### One-Hot encode labels

In [38]:
labelencoder = LabelBinarizer()

In [39]:
y_train_oh = labelencoder.fit_transform(y_train_full)
y_val_oh = labelencoder.fit_transform(y_val_full)
y_test_oh = labelencoder.fit_transform(y_test_full)

In [41]:
# Use raw input sentences, we will use the tensorflow BERT preprocessor
train_ds = tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(texts_train, dtype=tf.string), tf.convert_to_tensor(y_train_oh, dtype=tf.int32)))
val_ds = tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(texts_val, dtype=tf.string), tf.convert_to_tensor(y_val_oh, dtype=tf.int32)))
test_ds = tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(texts_test, dtype=tf.string), tf.convert_to_tensor(y_test_oh, dtype=tf.int32)))

In [5]:
# Check data
# for text_batch, label_batch in train_ds.take(2):
#     print(f'Review: {text_batch.numpy()}')
#     label = label_batch.numpy()
#     print(f'Label : {label}')
#     print()

### Load model

In [6]:
# Load pretrained BERT model from Tensorflow Hub
bert_model = hub.KerasLayer(tfhub_handle_encoder)

In [44]:
# Build a pipeline with preprocessor, BERT language model, dense classification head
def build_classifier_model(train_transformer=False):
    
    # Input Layer
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
    
    # Preprocessing with preprocessor
    preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')
    encoder_inputs = preprocessing_layer(text_input)
    
    # Pass through pretrained BERT model
    encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=train_transformer, name='BERT_encoder')
    outputs = encoder(encoder_inputs)
    
    # Attach dense classification head to the pooled output of the language model
    net = outputs['pooled_output']
    net = tf.keras.layers.Dropout(0.2)(net)
    net = tf.keras.layers.Dense(128)(net)
    net = tf.keras.layers.Dense(128)(net)
    net = tf.keras.layers.Dense(5, activation="softmax", name='classifier')(net)
    
    return tf.keras.Model(text_input, net)

In [45]:
# Check model structure
# tf.keras.utils.plot_model(classifier_model, show_shapes=True)

## Training

#### W&B Sweep
Use W&B Sweeps to tune hyperparamteres and directly store best models

In [46]:
def training():
    
    # Initialize W&B
    with wandb.init() as run:
        
        # Load W&B configuration for current run
        config = wandb.config

        # Loss function
        loss = tf.keras.losses.CategoricalCrossentropy()

        # Hyperparameter configuration
        epochs = config["epochs"]
        batch_size = 128
        steps_per_epoch = tf.data.experimental.cardinality(train_ds.batch(batch_size)).numpy()
        num_train_steps = steps_per_epoch * epochs
        num_warmup_steps = int(0.1*num_train_steps)
        init_lr = 3e-5

        # Setup multi-GPU training
        strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1", "GPU:2", "GPU:3"])
        with strategy.scope():

            # Optimizer, Adam with weight decay
            optimizer = optimization.create_optimizer(
                init_lr=init_lr,
                num_train_steps=num_train_steps,
                num_warmup_steps=num_warmup_steps,
                optimizer_type='adamw'
            )

            # Load model
            classifier_model = build_classifier_model(train_transformer=config["trainable"])

            # Compile model
            classifier_model.compile(
                optimizer=optimizer,
                loss=loss,
                metrics=['accuracy']
            )
         
        # Store best loss model locally
        save_best_cb = tf.keras.callbacks.ModelCheckpoint(
            f'./model/bert_trainable_epochs{config["epochs"]}_best_chkpt', monitor='val_loss', verbose=1, save_best_only=True,
            save_weights_only=False, mode='min', save_freq='epoch'
        )

        # Run training
        # The W&B Callback will report all scores to the online dashboard and store the
        # final best validation loss model's weights for further usage
        history = classifier_model.fit(
            x=train_ds.batch(batch_size),
            validation_data=val_ds.batch(batch_size),
            epochs=config["epochs"],
            class_weight=class_weights,
            callbacks=[WandbCallback(), save_best_cb]
        )

        # Predict score on Validation
        y_proba_train = classifier_model.predict(train_ds.batch(64))
        y_pred_train = np.argmax(y_proba_train, axis=1)
        score = f1_score(y_train_full, y_pred_train, average="micro")
        print(f"F1 Score on train: {score:.4f}")

        # Log score to W&B
        wandb.log({"train_f1": score, "trainable": config["trainable"], "epochs": config["epochs"]})

        # Predict score on Validation
        y_proba_val = classifier_model.predict(val_ds.batch(64))
        y_pred_val = np.argmax(y_proba_val, axis=1)
        score = f1_score(y_val_full, y_pred_val, average="micro")
        print(f"F1 Score on val: {score:.4f}")
        # Log score to W&B
        wandb.log({"val_f1": score, "trainable": config["trainable"], "epochs": config["epochs"]})


        # Predict score on Test
        y_proba_test = classifier_model.predict(test_ds.batch(64))
        y_pred_test = np.argmax(y_proba_test, axis=1)
        score = f1_score(y_test_full, y_pred_test, average="micro")
        print(f"F1 Score on Test: {score:.4f}")
        # Log score to W&B
        wandb.log({"test_f1": score, "trainable": config["trainable"], "epochs": config["epochs"]})


#### Run W&B Sweep

In [7]:
# Run W&B Sweep
# wandb.agent(wandb_sweep_id, function=training)