In [1]:
#pip install transformers



In [2]:
from tensorflow.keras import layers

In [4]:
from transformers import BertTokenizer,TFBertModel,TFBertForSequenceClassification,modeling_tf_utils
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers

class BERT_Classification():
  def __init__(self,x_train, y_train, x_val, y_val, nums_category, batch_size):
    self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    self.batch_size = batch_size
    print('Start Tokenization')
    self.input_ids_train, self.attention_masks_train, self.labels_train = self.tokenization(x_train,y_train,self.tokenizer)
    self.input_ids_val, self.attention_masks_val, self.labels_val = self.tokenization(x_val,y_val,self.tokenizer)
    self.nums_category = nums_category
  
  def tokenization(self, sentences,labels,tokenizer):
    #input_id : Indices of input sequence tokens in the vocabulary.
    #attention_mask: Mask to avoid performing attention on padding token indices
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in sentences:
        # `encode_plus` will:
        #   (1) Tokenize the sentence.
        #   (2) Prepend the `[CLS]` token to the start.
        #   (3) Append the `[SEP]` token to the end.
        #   (4) Map tokens to their IDs.
        #   (5) Pad or truncate the sentence to `max_length`
        #   (6) Create attention masks for [PAD] tokens.
        encoded_dict = self.tokenizer.encode_plus(
                            sent,                      # Sentence to encode.
                            add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                            max_length = 512,           # Pad & truncate all sentences.
                            pad_to_max_length = True,
                            return_attention_mask = True,   # Construct attn. masks.
                            return_tensors = 'tf',     # Return pytorch tensors.
                      )
        
        # Add the encoded sentence to the list.    
        input_ids.append(encoded_dict['input_ids'])
        
        # And its attention mask (simply differentiates padding from non-padding).
        attention_masks.append(encoded_dict['attention_mask'])

    # Convert the lists into tensors.
    input_ids = np.concatenate(input_ids, axis=0)
    attention_masks = np.concatenate(attention_masks, axis=0)
    labels = np.array(labels)
    return input_ids, attention_masks, labels
  
  def create_model_sequence_output(self, trainable=True):
    ## BERT encoder
    
    encoder = TFBertModel.from_pretrained("bert-base-uncased")
    encoder.trainable = trainable

    input_ids = layers.Input(shape=(512,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(512,), dtype=tf.int32)

    embed = encoder(
        input_ids, attention_mask=attention_mask
    )
    averaged = tf.reduce_mean(embed[0], axis=1)
    dropout = layers.Dropout(0.1)(averaged)
    out = layers.Dense(self.nums_category, kernel_initializer=modeling_tf_utils.get_initializer(0.02))(dropout)
    
    model = tf.keras.Model(
        inputs=[input_ids, attention_mask],
        outputs=[out],
    )
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=["acc"])
    model.summary()
    return model

  def create_model_cls_output(self, trainable=True):
    ## BERT encoder
    encoder = TFBertModel.from_pretrained("bert-base-uncased")
    encoder.trainable = trainable

    input_ids = layers.Input(shape=(512,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(512,), dtype=tf.int32)

    embed = encoder(
        input_ids, attention_mask=attention_mask
    )
    #averaged = tf.reduce_mean(sequence_output, axis=1)
    dropout = layers.Dropout(0.1)(embed[1])
    out = layers.Dense(self.nums_category, kernel_initializer=modeling_tf_utils.get_initializer(0.02))(dropout)
    
    model = tf.keras.Model(
        inputs=[input_ids, attention_mask],
        outputs=[out],
    )
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=["acc"])
    model.summary()
    return model
  
  def create_model_2(self):
    model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased')
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=2e-5, epsilon=1e-08), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=["acc"])
    model.summary()
    return model
  
  def train(self, model,epochs):
    model.fit(
      [self.input_ids_train, self.attention_masks_train],
      self.labels_train,
      epochs=epochs,  
      batch_size=self.batch_size,
      validation_data=([self.input_ids_val, self.attention_masks_val],self.labels_val)
      )



In [7]:
def index_to_word(x_train, y_train, index_word_dict,num_data):
  data = []
  for i in range(0,num_data):
    data.append(' '.join(index_word.get(index-3,'') for index in x_train[i]))
  y_train = y_train[:num_data]
  return data,y_train

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(maxlen=512)
imdb_dict = tf.keras.datasets.imdb.get_word_index(path="imdb_word_index.json")
index_word = dict((value,key) for key, value in imdb_dict.items())
x_train_words, train_label = index_to_word(x_train,y_train, index_word,10000) ## test set
x_test_words, test_label = index_to_word(x_test, y_test, index_word,1000)  ## validation set

bert_model = BERT_Classification(x_train_words, train_label, x_test_words, test_label, 2, 32)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Start Tokenization




In [8]:
use_tpu = True
if use_tpu:
    # Create distribution strategy
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)

    # Create model
    with strategy.scope():
        fine_tune_model = bert_model.create_model_sequence_output(trainable=True)
else:
    fine_tune_model = bert_model.create_model_sequence_output(trainable=True)

bert_model.train(fine_tune_model,2)





INFO:tensorflow:Initializing the TPU system: grpc://10.45.234.218:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.45.234.218:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)
Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
tf_bert_model_1 (TFBertModel)   TFBaseModelOutputWit 109482240   input_3[0][0]                    
                                                                 input_4[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_Mean_1 (TensorFlowO [(None, 768)]        0           tf_bert_model_1[0][0] 





















KeyboardInterrupt: ignored