In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [2]:
from malaya_speech.train.model import best_rq, ctc
from malaya_speech.train.model.conformer.model import Model as ConformerModel
import malaya_speech
import tensorflow as tf
import numpy as np
import json
from glob import glob
import string

In [3]:
unique_vocab = [''] + list(
    string.ascii_lowercase + string.digits
) + [' ']
len(unique_vocab)

38

In [4]:
X = tf.compat.v1.placeholder(tf.float32, [None, None], name = 'X_placeholder')
X_len = tf.compat.v1.placeholder(tf.int32, [None], name = 'X_len_placeholder')

In [5]:
training = True

class Encoder:
    def __init__(self, config):
        self.config = config
        self.encoder = ConformerModel(**self.config)

    def __call__(self, x, input_mask, training = True):
        return self.encoder(x, training = training)

In [6]:
config_conformer = malaya_speech.config.conformer_tiny_encoder_config
config_conformer['subsampling']['type'] = 'none'
config_conformer['dropout'] = 0.0
encoder = Encoder(config_conformer)
cfg = best_rq.Best_RQConfig(dropout=0.0,
                    attention_dropout=0.0,
                    encoder_layerdrop=0.0,
                    dropout_input=0.0,
                    dropout_features=0.0)
model = best_rq.Model(cfg, encoder)
r = model(X, padding_mask = X_len, features_only = True, mask = False)
logits = tf.layers.dense(r['x'], len(unique_vocab) + 1)
seq_lens = tf.reduce_sum(
    tf.cast(tf.logical_not(r['padding_mask']), tf.int32), axis = 1
)


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.


In [7]:
logits = tf.transpose(logits, [1, 0, 2])
logits = tf.identity(logits, name = 'logits')
seq_lens = tf.identity(seq_lens, name = 'seq_lens')

In [8]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())
var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
saver = tf.train.Saver(var_list = var_list)
saver.restore(sess, 'best-rq-conformer-tiny-ctc-char/model.ckpt-2000000')

INFO:tensorflow:Restoring parameters from best-rq-conformer-tiny-ctc-char/model.ckpt-2000000


In [9]:
saver = tf.train.Saver()
saver.save(sess, 'output-best-rq-conformer-tiny-ctc/model.ckpt')

'output-best-rq-conformer-tiny-ctc/model.ckpt'

In [10]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'gather' in n.op.lower()
        or 'placeholder' in n.name
        or 'logits' in n.name
        or 'seq_lens' in n.name)
        and 'adam' not in n.name
        and 'global_step' not in n.name
        and 'Assign' not in n.name
        and 'ReadVariableOp' not in n.name
        and 'Gather' not in n.name
    ]
)
strings.split(',')

['X_placeholder',
 'X_len_placeholder',
 'embeddings_vqvae',
 'projection_vqvae',
 'mask_emb',
 'dense/kernel',
 'dense/bias',
 'logits',
 'seq_lens']

In [11]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [12]:
freeze_graph('output-best-rq-conformer-tiny-ctc', strings)

INFO:tensorflow:Restoring parameters from output-best-rq-conformer-tiny-ctc/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 292 variables.
INFO:tensorflow:Converted 292 variables to const ops.
4868 ops in the final graph.


In [13]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
                
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
        
    return graph

In [14]:
files = [
    'speech/record/savewav_2020-11-26_22-36-06_294832.wav',
    'speech/record/savewav_2020-11-26_22-40-56_929661.wav',
    'speech/record/675.wav',
    'speech/record/664.wav',
    'speech/example-speaker/husein-zolkepli.wav',
    'speech/example-speaker/mas-aisyah.wav',
    'speech/example-speaker/khalil-nooh.wav',
    'speech/example-speaker/shafiqah-idayu.wav',
    'speech/khutbah/wadi-annuar.wav',
]

ys = [malaya_speech.load(f)[0] for f in files]
padded, lens = malaya_speech.padding.sequence_1d(ys, return_len = True)

In [15]:
g = load_graph('output-best-rq-conformer-tiny-ctc/frozen_model.pb')

In [16]:
input_nodes = [
    'X_placeholder',
    'X_len_placeholder',
]
output_nodes = [
    'logits',
    'seq_lens',
]
inputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in input_nodes}
outputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in output_nodes}

In [17]:
test_sess = tf.Session(graph = g)

In [18]:
r = test_sess.run(outputs['logits'], feed_dict = {inputs['X_placeholder']: padded, 
                                                          inputs['X_len_placeholder']: lens})

In [19]:
from tensorflow.tools.graph_transforms import TransformGraph

In [20]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-10, fallback_max=10)',
             'strip_unused_nodes',
             'sort_by_execution_order']

pb = 'output-best-rq-conformer-tiny-ctc/frozen_model.pb'

input_graph_def = tf.GraphDef()
with tf.gfile.FastGFile(pb, 'rb') as f:
    input_graph_def.ParseFromString(f.read())

transformed_graph_def = TransformGraph(input_graph_def, 
                                           input_nodes,
                                           output_nodes, transforms)
    
with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())

Instructions for updating:
Use tf.gfile.GFile.


In [21]:
g = load_graph('output-best-rq-conformer-tiny-ctc/frozen_model.pb.quantized')

In [22]:
!tar -czvf output-best-rq-conformer-tiny-ctc.tar.gz output-best-rq-conformer-tiny-ctc

output-best-rq-conformer-tiny-ctc/
output-best-rq-conformer-tiny-ctc/model.ckpt.index
output-best-rq-conformer-tiny-ctc/model.ckpt.data-00000-of-00001
output-best-rq-conformer-tiny-ctc/frozen_model.pb.quantized
output-best-rq-conformer-tiny-ctc/checkpoint
output-best-rq-conformer-tiny-ctc/model.ckpt.meta
output-best-rq-conformer-tiny-ctc/frozen_model.pb


In [25]:
b2_application_key_id = os.environ['b2_application_key_id']
b2_application_key = os.environ['b2_application_key']

In [24]:
from b2sdk.v1 import *
info = InMemoryAccountInfo()
b2_api = B2Api(info)
application_key_id = b2_application_key_id
application_key = b2_application_key
b2_api.authorize_account("production", application_key_id, application_key)
file_info = {'how': 'good-file'}
b2_bucket = b2_api.get_bucket_by_name('malaya-speech-model')

In [26]:
key = 'output-best-rq-conformer-tiny-ctc.tar.gz'
outPutname = "pretrained/output-best-rq-conformer-tiny-ctc.tar.gz"
b2_bucket.upload_local_file(
    local_file=key,
    file_name=outPutname,
    file_infos=file_info,
)

<b2sdk.file_version.FileVersionInfo at 0x7fb7f821c588>

In [27]:
file = 'output-best-rq-conformer-tiny-ctc/frozen_model.pb'
outPutname = 'speech-to-text-ctc-v2/best-rq-conformer-tiny/model.pb'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)

<b2sdk.file_version.FileVersionInfo at 0x7fb7f821ce48>

In [None]:
file = 'output-best-rq-conformer-tiny-ctc/frozen_model.pb.quantized'
outPutname = 'speech-to-text-ctc-v2/best-rq-conformer-tiny-quantized/model.pb'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)

In [None]:
!rm -rf output-best-rq-conformer-tiny-ctc*