From d75844bbae056fbca3cfd2c8643a9b20581159da Mon Sep 17 00:00:00 2001 From: Chen Qian Date: Tue, 10 May 2022 14:15:59 -0700 Subject: [PATCH 1/2] Fix the finetuning script --- examples/bert/run_glue_finetuning.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/examples/bert/run_glue_finetuning.py b/examples/bert/run_glue_finetuning.py index 156bc6011f..d46aa52962 100644 --- a/examples/bert/run_glue_finetuning.py +++ b/examples/bert/run_glue_finetuning.py @@ -18,7 +18,7 @@ import datasets import keras_tuner import tensorflow as tf -import tensorflow_text as tftext +import tensorflow_text as tf_text from absl import app from absl import flags from tensorflow import keras @@ -81,20 +81,20 @@ def pack_inputs( ): # In case inputs weren't truncated (as they should have been), # fall back to some ad-hoc truncation. - trimmed_segments = tftext.RoundRobinTrimmer( + trimmed_segments = tf_text.RoundRobinTrimmer( seq_length - len(inputs) - 1 ).trim(inputs) # Combine segments. - segments_combined, segment_ids = tftext.combine_segments( + segments_combined, segment_ids = tf_text.combine_segments( trimmed_segments, start_of_sequence_id=start_of_sequence_id, end_of_segment_id=end_of_segment_id, ) # Pad to dense Tensors. - input_word_ids, _ = tftext.pad_model_inputs( + input_word_ids, _ = tf_text.pad_model_inputs( segments_combined, seq_length, pad_value=padding_id ) - input_type_ids, input_mask = tftext.pad_model_inputs( + input_type_ids, input_mask = tf_text.pad_model_inputs( segment_ids, seq_length, pad_value=0 ) # Assemble nest of input tensors as expected by BERT model. @@ -153,9 +153,10 @@ def __init__(self, bert_model, hidden_size, num_classes, **kwargs): activation="tanh", name="pooler", ) - self._logit_layer = tf.keras.layers.Dense( + self._probability_layer = tf.keras.layers.Dense( num_classes, - name="logits", + name="probability", + activation="softmax", ) def call(self, inputs): @@ -163,7 +164,7 @@ def call(self, inputs): # Get the first [CLS] token from each output. outputs = outputs[:, 0, :] outputs = self._pooler_layer(outputs) - return self._logit_layer(outputs) + return self._probability_layer(outputs) class BertHyperModel(keras_tuner.HyperModel): @@ -184,8 +185,8 @@ def build(self, hp): optimizer=keras.optimizers.Adam( learning_rate=hp.Choice("lr", [5e-5, 4e-5, 3e-5, 2e-5]) ), - loss="sparse_categorical_crossentropy", - metrics=["accuracy"], + loss=keras.losses.SparseCategoricalCrossentropy(), + metrics=[keras.metrics.SparseCategoricalAccuracy()], ) return finetuning_model @@ -197,7 +198,7 @@ def main(_): with open(FLAGS.vocab_file, "r") as vocab_file: for line in vocab_file: vocab.append(line.strip()) - tokenizer = tftext.BertTokenizer( + tokenizer = tf_text.BertTokenizer( FLAGS.vocab_file, lower_case=FLAGS.do_lower_case, token_out_type=tf.int32, From 2bfef5d1ddd8c5acc9b96be6c6db26b38b45e405 Mon Sep 17 00:00:00 2001 From: Chen Qian Date: Wed, 11 May 2022 10:14:14 -0700 Subject: [PATCH 2/2] change softmax layer to return logits --- examples/bert/run_glue_finetuning.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/bert/run_glue_finetuning.py b/examples/bert/run_glue_finetuning.py index d46aa52962..e18e5029f9 100644 --- a/examples/bert/run_glue_finetuning.py +++ b/examples/bert/run_glue_finetuning.py @@ -153,10 +153,9 @@ def __init__(self, bert_model, hidden_size, num_classes, **kwargs): activation="tanh", name="pooler", ) - self._probability_layer = tf.keras.layers.Dense( + self._logit_layer = tf.keras.layers.Dense( num_classes, - name="probability", - activation="softmax", + name="logits", ) def call(self, inputs): @@ -164,7 +163,7 @@ def call(self, inputs): # Get the first [CLS] token from each output. outputs = outputs[:, 0, :] outputs = self._pooler_layer(outputs) - return self._probability_layer(outputs) + return self._logit_layer(outputs) class BertHyperModel(keras_tuner.HyperModel): @@ -185,7 +184,7 @@ def build(self, hp): optimizer=keras.optimizers.Adam( learning_rate=hp.Choice("lr", [5e-5, 4e-5, 3e-5, 2e-5]) ), - loss=keras.losses.SparseCategoricalCrossentropy(), + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy()], ) return finetuning_model