In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [2]:
S3_PATH_SIMILARITY = {
    'bert': {
        'model': 'v36/similarity/bert-base-similarity.pb',
        'vocab': 'tokenizer/sp10m.cased.bert.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.bert.model',
    },
    'tiny-bert': {
        'model': 'v36/similarity/tiny-bert-similarity.pb',
        'vocab': 'tokenizer/sp10m.cased.bert.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.bert.model',
    },
    'albert': {
        'model': 'v36/similarity/albert-base-similarity.pb',
        'vocab': 'tokenizer/sp10m.cased.v10.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v10.model',
    },
    'tiny-albert': {
        'model': 'v36/similarity/albert-tiny-similarity.pb',
        'vocab': 'tokenizer/sp10m.cased.v10.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v10.model',
    },
    'xlnet': {
        'model': 'v36/similarity/xlnet-base-similarity.pb',
        'vocab': 'tokenizer/sp10m.cased.v9.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v9.model',
    },
    'alxlnet': {
        'model': 'v36/similarity/alxlnet-base-similarity.pb',
        'vocab': 'tokenizer/sp10m.cased.v9.vocab',
        'tokenizer': 'tokenizer/sp10m.cased.v9.model',
    },
}

In [3]:
for k in S3_PATH_SIMILARITY.keys():
    if k != 'multinomial':
        print(k)
        os.system(f"wget https://f000.backblazeb2.com/file/malaya-model/{S3_PATH_SIMILARITY[k]['model']}")

bert
tiny-bert
albert
tiny-albert
xlnet
alxlnet


In [4]:
import tensorflow as tf
from tensorflow.tools.graph_transforms import TransformGraph
from glob import glob
tf.set_random_seed(0)

In [13]:
# with tf.gfile.GFile('tiny-bert-similarity.pb', "rb") as f:
#     graph_def = tf.GraphDef()
#     graph_def.ParseFromString(f.read())

# with tf.Graph().as_default() as graph:
#     tf.import_graph_def(graph_def)

# op = graph.get_operations()
# x = []
# for i in op:
#     try:
#         if i.values()[0].shape[-1] == 312:
#         #if 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add' in i.values()[0].name:
#             x.append(i.values())
#     except Exception as e:
#         pass
    
# x[-100:]

In [14]:
mapping = {'albert-base-similarity.pb': 'import/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1:0',
          'albert-tiny-similarity.pb': 'import/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1:0',
          'bert-base-similarity.pb': 'import/bert/encoder/layer_11/output/LayerNorm/batchnorm/add_1:0',
          'tiny-bert-similarity.pb': 'import/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0',
          'xlnet-base-similarity.pb': 'import/model/transformer/layer_11/ff/LayerNorm/batchnorm/add_1:0',
          'alxlnet-base-similarity.pb': 'import/model/transformer/layer_shared_11/ff/LayerNorm/batchnorm/add_1:0'}

In [15]:
pbs = glob('*.pb')
pbs

['albert-tiny-similarity.pb',
 'xlnet-base-similarity.pb',
 'albert-base-similarity.pb',
 'bert-base-similarity.pb',
 'alxlnet-base-similarity.pb',
 'tiny-bert-similarity.pb']

In [16]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-10, fallback_max=10)',
             'strip_unused_nodes',
             'sort_by_execution_order']

for pb in pbs:
    input_graph_def = tf.GraphDef()
    with tf.gfile.FastGFile(pb, 'rb') as f:
        input_graph_def.ParseFromString(f.read())
        
    inputs = ['Placeholder', 'Placeholder_1', 'Placeholder_2']
    
    if 'bert' in pb:
        outputs = ['logits', 'bert/pooler/dense/BiasAdd']
        
    if 'xlnet'in pb:
        outputs = ['logits', 'model_1/sequnece_summary/summary/BiasAdd']
        
    a = [mapping[pb].replace('import/','').replace(':0','')]
        
    print(pb, inputs)
    
    transformed_graph_def = TransformGraph(input_graph_def, 
                                           inputs,
                                           outputs + a, transforms)
    
    with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
        f.write(transformed_graph_def.SerializeToString())

Instructions for updating:
Use tf.gfile.GFile.
albert-tiny-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
xlnet-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
albert-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
bert-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
alxlnet-base-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']
tiny-bert-similarity.pb ['Placeholder', 'Placeholder_1', 'Placeholder_2']


In [17]:
def load_graph(frozen_graph_filename, **kwargs):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091
    # to fix import T5
    for node in graph_def.node:
        if node.op == 'RefSwitch':
            node.op = 'Switch'
            for index in xrange(len(node.input)):
                if 'moving_' in node.input[index]:
                    node.input[index] = node.input[index] + '/read'
        elif node.op == 'AssignSub':
            node.op = 'Sub'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'AssignAdd':
            node.op = 'Add'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'Assign':
            node.op = 'Identity'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
            if 'validate_shape' in node.attr:
                del node.attr['validate_shape']
            if len(node.input) == 2:
                node.input[0] = node.input[1]
                del node.input[1]

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [18]:
g = load_graph('tiny-bert-similarity.pb')
x = g.get_tensor_by_name('import/Placeholder:0')
segment_ids = g.get_tensor_by_name('import/Placeholder_1:0')
input_masks = g.get_tensor_by_name('import/Placeholder_2:0')
logits = g.get_tensor_by_name(mapping['tiny-bert-similarity.pb'])

In [19]:
logits

<tf.Tensor 'import/bert/encoder/layer_3/output/LayerNorm/batchnorm/add_1:0' shape=(?, 312) dtype=float32>

In [20]:
test_sess = tf.InteractiveSession(graph = g)

In [21]:
import malaya

model = malaya.similarity.transformer(model = 'alxlnet')




In [27]:
from malaya.text.bpe import xlnet_tokenization
import numpy as np

r = xlnet_tokenization(model._tokenizer, ['benci', 'suka', 'hodoh la', 'sakai bodoh la la la la'])
batch_x = r[0]
batch_mask = r[1]
batch_segment = np.array(r[2])

In [28]:
batch_segment[batch_segment == 0 ] = 1
batch_segment

array([[1, 1, 2, 4, 4, 4, 4, 4, 4],
       [1, 1, 2, 4, 4, 4, 4, 4, 4],
       [1, 1, 1, 2, 4, 4, 4, 4, 4],
       [1, 1, 1, 1, 1, 1, 1, 1, 2]])

In [29]:
np.array(batch_x).shape

(4, 9)

In [30]:
%%time
l = test_sess.run(logits, feed_dict = {x: batch_x,
                                  segment_ids: batch_segment,
                                  input_masks: batch_mask})
l.shape

CPU times: user 70 ms, sys: 4.86 ms, total: 74.9 ms
Wall time: 15.9 ms


(36, 312)

In [33]:
len(l.shape)

2

In [36]:
l.reshape((*np.array(batch_x).shape,-1))

(4, 9, 312)

In [None]:
%%time
l = test_sess.run(logits, feed_dict = {x: batch_x,
                                  segment_ids: batch_segment,
                                  input_masks: batch_mask})
l.shape

In [37]:
# x, x_len, logits

In [38]:
# %%time
# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})

In [39]:
# %%time
# test_sess.run(logits, feed_dict = {x: [[1,2,3,3,4]], x_len: [[1,1,1,1,1]]})

In [40]:
quantized = glob('*.pb.quantized')
quantized

['albert-base-similarity.pb.quantized',
 'albert-tiny-similarity.pb.quantized',
 'bert-base-similarity.pb.quantized',
 'xlnet-base-similarity.pb.quantized',
 'tiny-bert-similarity.pb.quantized',
 'alxlnet-base-similarity.pb.quantized']

In [43]:
rm *.pb*

In [None]:
# converter = tf.compat.v1.lite.TFLiteConverter.from_frozen_graph(
#     graph_def_file='test.pb',
#     input_arrays=['Placeholder', 'Placeholder_1'],
#     input_shapes={'Placeholder' : [None, 512], 'Placeholder_1': [None, 512]},
#     output_arrays=['logits'],
# )
# # converter.allow_custom_ops=True

In [None]:
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
# converter.target_spec.supported_types = [tf.float16]
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# converter.experimental_new_converter = True
# tflite_model = converter.convert()

In [None]:
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
#                                        tf.lite.OpsSet.SELECT_TF_OPS]
# converter.target_spec.supported_types = [tf.float16]
# converter.optimizations = [tf.lite.Optimize.DEFAULT]
# tflite_model = converter.convert()

# with open('tiny-bert-sentiment-float16.tflite', 'wb') as f:
#     f.write(tflite_model)

In [None]:
# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
#                                        tf.lite.OpsSet.SELECT_TF_OPS]
# converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
# tflite_model = converter.convert()

# with open('tiny-bert-sentiment-hybrid.tflite', 'wb') as f:
#     f.write(tflite_model)

In [None]:
# interpreter = tf.lite.Interpreter(model_path='tiny-bert-sentiment-hybrid.tflite')
# interpreter.allocate_tensors()