In [1]:
import transformers
import tensorflow as tf
import json

In [2]:
custom_objects={'TFDistilBertModel': transformers.TFDistilBertModel}

2022-11-29 12:03:16.940737: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.


In [3]:
cohesion_model = tf.keras.models.load_model('../input/cohesion/cohesion.h5', custom_objects)
syntax_model = tf.keras.models.load_model('../input/syntax/syntax.h5', custom_objects)
vocabulary_model = tf.keras.models.load_model('../input/vocabulary/vocabulary.h5', custom_objects)
phraseology_model = tf.keras.models.load_model('../input/phraseology/phraseology.h5', custom_objects)
grammar_model = tf.keras.models.load_model('../input/grammar/grammar.h5', custom_objects)
conventions_model = tf.keras.models.load_model('../input/conventions/conventions.h5', custom_objects)

2022-11-29 12:03:17.081686: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


In [4]:
for layer in cohesion_model.layers:
    layer._name = layer._name + str("_cohesion")
for layer in syntax_model.layers:
    layer._name = layer._name + str("_syntax")
for layer in vocabulary_model.layers:
    layer._name = layer._name + str("_vocabulary")
for layer in phraseology_model.layers:
    layer._name = layer._name + str("_phraseology")
for layer in grammar_model.layers:
    layer._name = layer._name + str("_grammar")
for layer in conventions_model.layers:
    layer._name = layer._name + str("_conventions")

In [5]:
input_ids_layer = tf.keras.Input(shape=512, dtype=tf.int32)
attention_mask_layer = tf.keras.Input(shape=512, dtype=tf.int32)
base_model = transformers.TFDistilBertModel.from_pretrained('../input/huggingface-bert-variants/distilbert-base-uncased/distilbert-base-uncased')
base_model_output = base_model(input_ids=input_ids_layer, attention_mask = attention_mask_layer)

cohesion_pooling = cohesion_model.layers[3]
cohesion_pooling_output = cohesion_pooling(base_model_output.last_hidden_state)
cohesion_output = cohesion_model.layers[4]
cohesion_output_output = cohesion_output(cohesion_pooling_output)

syntax_pooling = syntax_model.layers[3]
syntax_pooling_output = syntax_pooling(base_model_output.last_hidden_state)
syntax_output = syntax_model.layers[4]
syntax_output_output = syntax_output(syntax_pooling_output)

vocabulary_pooling = vocabulary_model.layers[3]
vocabulary_pooling_output = vocabulary_pooling(base_model_output.last_hidden_state)
vocabulary_output = vocabulary_model.layers[4]
vocabulary_output_output = vocabulary_output(vocabulary_pooling_output)

phraseology_pooling = phraseology_model.layers[3]
phraseology_pooling_output = phraseology_pooling(base_model_output.last_hidden_state)
phraseology_output = phraseology_model.layers[4]
phraseology_output_output = phraseology_output(phraseology_pooling_output)

grammar_pooling = grammar_model.layers[3]
grammar_pooling_output = grammar_pooling(base_model_output.last_hidden_state)
grammar_output = grammar_model.layers[4]
grammar_output_output = grammar_output(grammar_pooling_output)

conventions_pooling = conventions_model.layers[3]
conventions_pooling_output = conventions_pooling(base_model_output.last_hidden_state)
conventions_output = conventions_model.layers[4]
conventions_output_output = conventions_output(conventions_pooling_output)

model = tf.keras.Model(
    inputs=[input_ids_layer, attention_mask_layer],
    outputs=[cohesion_output_output, syntax_output_output, vocabulary_output_output,
            phraseology_output_output, grammar_output_output, conventions_output_output],)
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.003), 
    loss='mse',
)

Some layers from the model checkpoint at ../input/huggingface-bert-variants/distilbert-base-uncased/distilbert-base-uncased were not used when initializing TFDistilBertModel: ['vocab_transform', 'vocab_projector', 'activation_13', 'vocab_layer_norm']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at ../input/huggingface-bert-variants/distilbert-base-uncased/distilbert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertMo

In [6]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 512)]        0                                            
__________________________________________________________________________________________________
tf_distil_bert_model_6 (TFDisti TFBaseModelOutput(la 66362880    input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
global_average_pooling1d_cohesi (None, 768)          0           tf_distil_bert_model_6[0][0] 

In [7]:
model.save('model.h5')
config = transformers.DistilBertConfig.from_pretrained('../input/huggingface-bert-variants/distilbert-base-uncased/distilbert-base-uncased')
config.save_pretrained('./')
tokenizer = transformers.DistilBertTokenizerFast.from_pretrained('../input/huggingface-bert-variants/distilbert-base-uncased/distilbert-base-uncased')
tokenizer.save_pretrained('./')

('./tokenizer_config.json',
 './special_tokens_map.json',
 './vocab.txt',
 './added_tokens.json',
 './tokenizer.json')