# Model built with raw TensorFlow

This model is an attempt to drop HuggingFace transformers library and use pure TensorFlow code.

The goal is to get a single source of truth and to use directly Google's models.

Adapted from: <https://www.tensorflow.org/text/tutorials/classify_text_with_bert>

In [60]:
# Base
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Tensorflow
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
import tensorflowjs as tfjs
#from official.nlp import optimization  # to create AdamW optimizer

# Data
from datasets import load_dataset

# Custom
from helper_functions import load_sst_dataset, plot_model_history, save_ts_model

## Configuration

In [61]:
# Set TensorFlow to log only the errors
tf.get_logger().setLevel('ERROR')

# Force the use of the CPU instead of the GPU if running out of GPU memory
device = '/CPU:0' # input '/CPU:0' to use the CPU or '/GPU:0' for the GPU

# Model to be used
bert_model_name         = 'small_bert/bert_en_uncased_L-2_H-128_A-2'
tfhub_handle_encoder    = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/2'
tfhub_handle_preprocess = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'


# Tokenizing parameters
max_length = 60    # Max length of an input

# Training parameters
epochs = 1  # 1 is enough for code testing

## Load the dataset

In [62]:
text_train, Y1, Y2, text_test, Y1_test, Y2_test = load_sst_dataset()

100%|██████████| 3/3 [00:00<00:00, 1000.23it/s]


## Load Bert

In [63]:
# For preprocessing
bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)

# Bert itself
bert_model = hub.KerasLayer(tfhub_handle_encoder)

## Build the model

In [64]:
def build_model():

  # Input
  text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')

  # Preprocessing 
  preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')

  # Encoder
  encoder_inputs = preprocessing_layer(text_input)
  encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')

  # Encoder's output
  outputs = encoder(encoder_inputs)
  net = outputs['pooled_output']
  net = tf.keras.layers.Dropout(0.1)(net)

  # Classifier
  regression = tf.keras.layers.Dense(1, name='regression', activation=None)(net)
  classifier = tf.keras.layers.Dense(1, name='classifier', activation='sigmoid')(net)

  # Final output
  outputs = {'regression': regression, 'classifier': classifier}

  # Return the model
  return tf.keras.Model(text_input, outputs)

In [65]:
# Build the model
model = build_model()

In [66]:
# Loss function used
loss = tf.keras.losses.BinaryCrossentropy(from_logits=False)

# Metric for results evaluation
metrics = tf.metrics.BinaryAccuracy()

In [67]:
# Define the optimizer
optimizer = tf.keras.optimizers.Adam(
        learning_rate=5e-05,
        epsilon=1e-08,
        decay=0.01,
        clipnorm=1.0)

In [68]:
# Compile the model
model.compile(optimizer=optimizer,
              loss=loss,
              metrics=metrics)

In [69]:
# Training input
x = {'text': tf.convert_to_tensor(text_train)}

# Training output
y = {'classifier': Y2, 'regression':Y1}

In [70]:
# doc: https://www.tensorflow.org/api_docs/python/tf/keras/Model#fit
history = model.fit(
    x=x,
    y=y,
    validation_split=0.2,
    batch_size=64,
    epochs=epochs,
    )



In [71]:
# Test input
x_test = {'text': tf.convert_to_tensor(text_test)}

# Test output
y_test = {'classifier': Y2_test, 'regression':Y1_test}

model_eval = model.evaluate(
    x=x_test,
    y=y_test,
    )



## Save the model with model.save

Documentation: <https://www.tensorflow.org/api_docs/python/tf/keras/Model#save>

```save_format```

- tf: Tensorflow SavedModel
- h5: HDF5

In [72]:
# Save to Tensorflow SavedModel
model.save("./formats/tf_savedmodel",save_format='tf')



In [73]:
# Save to HDF5
model.save('./formats/tf_hdf5/model.h5',save_format='h5')

## Convert the model with tensorflowjs_converter

Documentation: <https://github.com/tensorflow/tfjs/tree/master/tfjs-converter>


```--input_format```

- tf_saved_model: SavedModel
- tfjs_layers_model: TensorFlow.js JSON format
- keras: Keras HDF5

```--output_format```

- tfjs_layers_model
- tfjs_graph_model
- keras

In [74]:
# Keras HDF5 --> tfjs_layers_model
!tensorflowjs_converter --input_format keras --output_format tfjs_layers_model ./formats/tf_hdf5/model.h5 ./formats/tfjs_layers_model_from_keras_hdf5

In [75]:
# Keras HDF5 --> tfjs_graph_model
!tensorflowjs_converter --input_format keras --output_format tfjs_graph_model ./formats/tf_hdf5/model.h5 ./formats/tfjs_graph_model_from_keras_hdf5

Traceback (most recent call last):
  File "C:\Users\thiba\anaconda3\envs\bert\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\thiba\anaconda3\envs\bert\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\thiba\anaconda3\envs\bert\Scripts\tensorflowjs_converter.exe\__main__.py", line 7, in <module>
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.py", line 813, in pip_main
    main([' '.join(sys.argv[1:])])
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.py", line 817, in main
    convert(argv[0].split(' '))
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.py", line 803, in convert
    _dispatch_converter(input_format, output_format, args, quantization_dtype_map,
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.p

In [76]:
# tf_saved_model --> tfjs_layers_model
!tensorflowjs_converter --input_format tf_saved_model --output_format=tfjs_layers_model ./formats/tf_savedmodel ./formats/tfjs_layers_model_from_tf_saved_model

Traceback (most recent call last):
  File "C:\Users\thiba\anaconda3\envs\bert\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\thiba\anaconda3\envs\bert\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\thiba\anaconda3\envs\bert\Scripts\tensorflowjs_converter.exe\__main__.py", line 7, in <module>
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.py", line 813, in pip_main
    main([' '.join(sys.argv[1:])])
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.py", line 817, in main
    convert(argv[0].split(' '))
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.py", line 803, in convert
    _dispatch_converter(input_format, output_format, args, quantization_dtype_map,
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflowjs\converters\converter.p

In [77]:
# tf_saved_model --> tfjs_graph_model
!tensorflowjs_converter --input_format tf_saved_model --output_format=tfjs_graph_model ./formats/tf_savedmodel ./formats/tfjs_graph_model_from_tf_saved_model

2021-10-17 21:13:22.786947: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-17 21:13:23.483860: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 1789 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1
Traceback (most recent call last):
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflow\python\framework\ops.py", line 3962, in _get_op_def
    return self._op_def_cache[type]
KeyError: 'CaseFoldUTF8'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\thiba\anaconda3\envs\bert\lib\site-packages\tensorflow

## Export directly the model with tfjs convertors

As suggested in: <https://www.tensorflow.org/js/tutorials/conversion/import_keras>

Where is the API documentation of tfjs.converters ?

From our test, it saves the model to the tfjs_layers_model format by default.

In [78]:
# Save directly
tfjs.converters.save_keras_model(model, './formats/tfjs-direct')