In [None]:
import bert
from bert import run_classifier
from bert import optimization
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
from sklearn import metrics
logger = tf.get_logger()
logger.propagate = False

## Pretrained and fine-tuned weights
For the baseline model, the pretrained weights were taken from tfhub.dev. But the final model will re-use weights obtained by further pre-training the tfhub model on the fine-foods dataset

In [None]:
bert_model_hub = "https://tfhub.dev/google/small_bert/bert_uncased_L-4_H-512_A-8/1"
baseline_output_dir = "finetuned_weights/baseline"
model_output_dir = "finetuned_weights/bert_small"
start_checkpoint_in_task_pretraining = "finetuned_weights/in_task_pretraining/model.ckpt-12500"
tf.gfile.MakeDirs(model_output_dir)
tf.gfile.MakeDirs(baseline_output_dir)

# BERT + Classifier layer
Take the pretrained weights from BERT and add a single classifier layer on top

In [None]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, num_labels):
    bert_module = hub.Module(bert_model_hub, trainable=not is_predicting)
    bert_inputs = dict(input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids)
    bert_outputs = bert_module(inputs=bert_inputs, signature="tokens", as_dict=True)

    # Use "pooled_output" for classification tasks on an entire sentence.
    output_layer = bert_outputs["pooled_output"]
    hidden_size = output_layer.shape[-1].value
    A = tf.get_variable("output_weights", [hidden_size, num_labels], initializer=tf.truncated_normal_initializer(stddev=0.02))
    bias = tf.get_variable("output_bias", [num_labels], initializer=tf.zeros_initializer())
    
    output_layer = tf.keras.layers.Dropout(rate=0.1)(output_layer, training= not is_predicting)
    logits = tf.nn.xw_plus_b(output_layer, A, bias)

    with tf.variable_scope("loss"):
        predictions = tf.argmax(logits, axis=-1, output_type=tf.int32)
        return predictions, logits

It is convenient to wrap this model into a tensorflow estimator which automates the training loop for us. 

In [None]:
def model_fn_builder(num_labels, learning_rate, num_train_steps, num_warmup_steps):
    def model_fn(features, labels, mode, params):
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:
            predictions, logits = create_model(is_predicting, input_ids, input_mask, segment_ids, num_labels)
            loss = tf.keras.losses.sparse_categorical_crossentropy(label_ids, logits, from_logits=True)
            loss = tf.reduce_mean(loss)
            train_op = bert.optimization.create_optimizer(loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)
            
            # Summaries
            tf.summary.scalar("cross_entropy_loss", loss)
            accuracy_value, accuracy_op = tf.metrics.accuracy(label_ids, predictions)
            with tf.control_dependencies([accuracy_op]):
                tf.summary.scalar("accuracy", accuracy_value)            
            for o in tf.get_default_graph().get_operations():
                if "PolynomialDecay" == o.name:
                    print(o.name)
                    lr = o.values()[0]                    
            tf.summary.scalar("learning_rate", lr)
            
            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
            else:
                # Calculate evaluation metrics. 
                eval_metrics = {}
                eval_metrics["accuracy"] = tf.metrics.accuracy(label_ids, predictions)
                return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
        else:
            predictions, logits = create_model(is_predicting, input_ids, input_mask, segment_ids, num_labels)
            probs = tf.nn.softmax(logits,axis=-1)
            predictions = {'probabilities': probs, 'predictions': predictions, 'labels' : label_ids}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Return the actual model function in the closure
    return model_fn


# Training the final model
This can conditionally warm-start from a in-task pretraining checkpoint

In [None]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
batch_size = 64
max_seq_len = 128
learning_rate = 5e-5
num_train_steps = 500000//batch_size
num_warmup_steps = 0
num_labels = 5 # i.e. num_categories

# Select whether to warm start from a specific checkpoint, such as a in-task pretraining
# checkpoint. Off  by default
warm_start_from = tf.estimator.WarmStartSettings(start_checkpoint_in_task_pretraining,
                                                 vars_to_warm_start=".*bert.*")
warm_start_from = None

# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(model_dir=model_output_dir, save_summary_steps=10,
                                    save_checkpoints_steps=500, keep_checkpoint_max=2)

model_fn = model_fn_builder(num_labels, learning_rate=learning_rate, num_train_steps=num_train_steps,
                            num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(model_fn=model_fn, config=run_config,
                                   params={"batch_size": batch_size},
                                   warm_start_from=warm_start_from)

train_input_fn = bert.run_classifier.file_based_input_fn_builder("datasets/training2", max_seq_len,
                                                                 is_training=True, drop_remainder=True)
print(f"Training for {num_train_steps} steps")
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

# Run (dev) evaluation

In [None]:
dev_input_fn = bert.run_classifier.file_based_input_fn_builder("datasets/dev2", max_seq_len, is_training=False, drop_remainder=False)
estimator.evaluate(input_fn=dev_input_fn, steps=None)

# Classification report on test set

In [None]:
dev_input_fn = bert.run_classifier.file_based_input_fn_builder("datasets/test2", max_seq_len, is_training=False, drop_remainder=False)
predictions = list(estimator.predict(input_fn=dev_input_fn))

In [None]:
y_true = []
y_pred = []
for e in predictions:
    y_true.append(e["predictions"])
    y_pred.append(e["labels"])
report = metrics.classification_report(y_true, y_pred, target_names=[f"{i} star review" for i in range(1,6)])
print(report)