*This notebook is adapted from [here](https://github.com/dipanjanS/deep_transfer_learning_nlp_dhs2019/blob/master/notebooks/6%20-%20Transformers%20-%20DistilBERT.ipynb). The purpose of this notebook is to show how to convert a custom DistilBERT-based model to TensorFlow Lite.*

In [1]:
# Reference - https://www.kaggle.com/docs/tpu
from kaggle_datasets import KaggleDatasets
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

In [2]:
import tensorflow as tf
print(tf.__version__)

2.2.0


In [3]:
# Configure TPU
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)


# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)

BATCH_SIZE = 32 * tpu_strategy.num_replicas_in_sync

In [4]:
import os
import numpy as np
import pandas as pd
import transformers
import matplotlib.pyplot as plt

%matplotlib inline

# fix random seed for reproducibility
seed = 42
np.random.seed(seed)
tf.random.set_seed(seed)

print("TF Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())



TF Version:  2.2.0
Eager mode:  True


In [5]:
data_dir = tf.keras.utils.get_file(
      fname='SST-2.zip',
      origin='https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media&token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8',
      extract=True)
data_dir = os.path.join(os.path.dirname(data_dir), 'SST-2')

Downloading data from https://firebasestorage.googleapis.com/v0/b/mtl-sentence-representations.appspot.com/o/data%2FSST-2.zip?alt=media&token=aabc5f6b-e466-44a2-b9b4-cf6337f84ac8


In [6]:
train = os.path.join(data_dir, "train.tsv")
valid = os.path.join(data_dir, "dev.tsv")
test = os.path.join(data_dir, "test.tsv")

In [7]:
train_dataset = pd.read_csv(train, sep='\t')
valid_dataset = pd.read_csv(valid, sep='\t')
test_dataset = pd.read_csv(test, sep='\t')

In [8]:
train_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67349 entries, 0 to 67348
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   sentence  67349 non-null  object
 1   label     67349 non-null  int64 
dtypes: int64(1), object(1)
memory usage: 1.0+ MB


In [9]:
valid_dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 872 entries, 0 to 871
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   sentence  872 non-null    object
 1   label     872 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 13.8+ KB


In [10]:
train_dataset.head()

Unnamed: 0,sentence,label
0,hide new secretions from the parental units,0
1,"contains no wit , only labored gags",0
2,that loves its characters and communicates som...,1
3,remains utterly satisfied to remain the same t...,0
4,on the worst revenge-of-the-nerds clichés the ...,0


In [11]:
train_reviews = train_dataset['sentence'].values
train_sentiments = train_dataset['label'].values

valid_reviews = valid_dataset['sentence'].values
valid_sentiments = valid_dataset['label'].values

test_reviews = test_dataset['sentence'].values

train_reviews.shape, valid_reviews.shape, test_reviews.shape

((67349,), (872,), (1821,))

In [12]:
tokenizer = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




In [13]:
import tqdm

def create_bert_input_features(tokenizer, docs, max_seq_length):
    
    all_ids, all_masks = [], []
    for doc in tqdm.tqdm(docs, desc="Converting docs to features"):
        tokens = tokenizer.tokenize(doc)
        if len(tokens) > max_seq_length-2:
            tokens = tokens[0 : (max_seq_length-2)]
        tokens = ['[CLS]'] + tokens + ['[SEP]']
        ids = tokenizer.convert_tokens_to_ids(tokens)
        masks = [1] * len(ids)
        # Zero-pad up to the sequence length.
        while len(ids) < max_seq_length:
            ids.append(0)
            masks.append(0)
        all_ids.append(ids)
        all_masks.append(masks)
    encoded = np.array([all_ids, all_masks])
    return encoded

In [14]:
MAX_SEQ_LENGTH = 500

train_features_ids, train_features_masks = create_bert_input_features(tokenizer, train_reviews, 
                                                                      max_seq_length=MAX_SEQ_LENGTH)
val_features_ids, val_features_masks = create_bert_input_features(tokenizer, valid_reviews, 
                                                                  max_seq_length=MAX_SEQ_LENGTH)
#test_features = create_bert_input_features(tokenizer, test_reviews, max_seq_length=MAX_SEQ_LENGTH)
print('Train Features:', train_features_ids.shape, train_features_masks.shape)
print('Val Features:', val_features_ids.shape, val_features_masks.shape)

Converting docs to features: 100%|██████████| 67349/67349 [00:39<00:00, 1684.12it/s]
Converting docs to features: 100%|██████████| 872/872 [00:00<00:00, 1045.51it/s]

Train Features: (67349, 500) (67349, 500)
Val Features: (872, 500) (872, 500)





In [15]:
# Create TensorFlow datasets for better performance
train_ds = (
    tf.data.Dataset
    .from_tensor_slices(((train_features_ids, train_features_masks), train_sentiments))
    .shuffle(2048)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.experimental.AUTOTUNE)
)
    
valid_ds = (
    tf.data.Dataset
    .from_tensor_slices(((val_features_ids, val_features_masks), valid_sentiments))
    .batch(BATCH_SIZE)
    .prefetch(tf.data.experimental.AUTOTUNE)
)

In [16]:
def get_training_model():
    inp_id = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype='int32', name="bert_input_ids")
    inp_mask = tf.keras.layers.Input(shape=(MAX_SEQ_LENGTH,), dtype='int32', name="bert_input_masks")
    inputs = [inp_id, inp_mask]

    hidden_state = transformers.TFDistilBertModel.from_pretrained('distilbert-base-uncased')(inputs)[0]
    pooled_output = hidden_state[:, 0]    
    dense1 = tf.keras.layers.Dense(256, activation='relu')(pooled_output)
    drop1 = tf.keras.layers.Dropout(0.25)(dense1)
    dense2 = tf.keras.layers.Dense(256, activation='relu')(drop1)
    drop2 = tf.keras.layers.Dropout(0.25)(dense2)
    output = tf.keras.layers.Dense(1, activation='sigmoid')(drop2)


    model = tf.keras.Model(inputs=inputs, outputs=output)
    model.compile(optimizer=tf.optimizers.Adam(learning_rate=2e-5, 
                                               epsilon=1e-08), 
                  loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [17]:
# Compile the model with TPU Strategy
with tpu_strategy.scope():
    model = get_training_model()
    
# Train the model
model.fit(train_ds, 
    validation_data=valid_ds,
    epochs=3)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=363423424.0, style=ProgressStyle(descri…


Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7f0ca0d25890>

Slightly overfits. With more careful hyperparameter tuning this may be prevented. 

In [18]:
model.save_weights('distillbert_ft_wts.h5')

Reference - https://github.com/huggingface/tflite-android-transformers/blob/master/models_generation/distilbert.py

### Dynamic-range

In [19]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_model = converter.convert()
open("distilbert_sst_tflite.tflite", "wb").write(tflite_model)

68234624

In [20]:
!ls -lh distilbert_sst_tflite.tflite

-rw-r--r-- 1 root root 66M Sep 28 05:35 distilbert_sst_tflite.tflite


### Float16


In [21]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                       tf.lite.OpsSet.SELECT_TF_OPS]
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()
open("distilbert_sst_tflite_fp16.tflite", "wb").write(tflite_model)

133530416

In [22]:
!ls -lh distilbert_sst_tflite_fp16.tflite

-rw-r--r-- 1 root root 128M Sep 28 05:36 distilbert_sst_tflite_fp16.tflite


### Integer

In [23]:
# # ==============Representative dataset====================
# train_features_ids = train_features_ids.astype(np.int32)
# train_features_masks = train_features_masks.astype(np.int32)
# train_tf_dataset = tf.data.Dataset.from_tensor_slices((train_features_ids, 
#     train_features_masks))

# def representative_dataset_gen():
#     for feature_id, feature_mask in train_tf_dataset.take(10):
#         yield [feature_id, feature_mask]

# # ==============Conversion====================
# converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
#                                        tf.lite.OpsSet.SELECT_TF_OPS]
# converter.representative_dataset = representative_dataset_gen
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# tflite_model = converter.convert()
# open("distilbert_sst_tflite_int.tflite", "wb").write(tflite_model)

***Integer quantization isn't supported currently for this model.*** 