In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''
os.environ['MALAYA_USE_HUGGINGFACE'] = 'true'

In [2]:
import malaya_speech
import tensorflow as tf
import malaya_speech.train.model.conformer as conformer

In [3]:
featurizer = malaya_speech.tf_featurization.STTFeaturizer(
    normalize_per_feature = True
)
X = tf.compat.v1.placeholder(tf.float32, [None, None], name = 'X_placeholder')
X_len = tf.compat.v1.placeholder(tf.int32, [None], name = 'X_len_placeholder')
batch_size = tf.shape(X)[0]
features = tf.TensorArray(dtype = tf.float32, size = batch_size, dynamic_size = True, infer_shape = False)
features_len = tf.TensorArray(dtype = tf.int32, size = batch_size)

init_state = (0, features, features_len)

def condition(i, features, features_len):
    return i < batch_size

def body(i, features, features_len):
    f = featurizer(X[i, :X_len[i]])
    f_len = tf.shape(f)[0]
    return i + 1, features.write(i, f), features_len.write(i, f_len)

_, features, features_len = tf.while_loop(condition, body, init_state)
features_len = features_len.stack()
padded_features = tf.TensorArray(dtype = tf.float32, size = batch_size)
padded_lens = tf.TensorArray(dtype = tf.int32, size = batch_size)
maxlen = tf.reduce_max(features_len)

init_state = (0, padded_features, padded_lens)

def condition(i, padded_features, padded_lens):
    return i < batch_size

def body(i, padded_features, padded_lens):
    f = features.read(i)
    len_f = tf.shape(f)[0]
    f = tf.pad(f, [[0, maxlen - tf.shape(f)[0]], [0,0]])
    return i + 1, padded_features.write(i, f), padded_lens.write(i, len_f)

_, padded_features, padded_lens = tf.while_loop(condition, body, init_state)
padded_features = padded_features.stack()
padded_lens = padded_lens.stack()
padded_lens.set_shape((None,))
padded_features.set_shape((None, None, 80))
padded_features = tf.expand_dims(padded_features, -1)
padded_features, padded_lens

(<tf.Tensor 'ExpandDims:0' shape=(?, ?, 80, 1) dtype=float32>,
 <tf.Tensor 'TensorArrayStack_2/TensorArrayGatherV3:0' shape=(?,) dtype=int32>)

In [4]:
padded_features = tf.identity(padded_features, name = 'padded_features')
padded_lens = tf.identity(padded_lens, name = 'padded_lens')

In [5]:
config = malaya_speech.config.conformer_tiny_encoder_config
conformer_model = conformer.Model(
    kernel_regularizer=None, bias_regularizer=None, **config
)

In [6]:
seq = conformer_model(padded_features)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [7]:
embedding_dim = 512
first_token_tensor = tf.squeeze(seq[:, 0:1, :], axis=1)
pooled_output = tf.keras.layers.Dense(embedding_dim, activation=None,
                                   use_bias=True, trainable=True)(first_token_tensor)
pooled_output

<tf.Tensor 'dense/BiasAdd:0' shape=(?, 512) dtype=float32>

In [8]:
y = tf.keras.layers.Lambda(lambda x: tf.keras.backend.l2_normalize(x, 1))(pooled_output)
y

<tf.Tensor 'lambda/l2_normalize:0' shape=(?, 512) dtype=float32>

In [9]:
y = tf.identity(y, name = 'logits')
y

<tf.Tensor 'logits:0' shape=(?, 512) dtype=float32>

In [10]:
sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [13]:
var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
saver = tf.train.Saver(var_list = var_list)
saver.restore(sess, 'conformer-tiny-voxceleb/model.ckpt-1250002')

INFO:tensorflow:Restoring parameters from conformer-tiny-voxceleb/model.ckpt-1250002


In [14]:
f = '1.wav'
y_, _ = malaya_speech.load(f)

In [15]:
%%time

sess.run(y, feed_dict = {X: [y_], X_len: [len(y_)]})[0]

CPU times: user 2.15 s, sys: 39.9 ms, total: 2.19 s
Wall time: 2.02 s


array([ 0.03484458, -0.01998513, -0.00710905,  0.07466675, -0.02841696,
       -0.00868465,  0.01289403, -0.0312736 ,  0.05596884,  0.00380661,
        0.00341373,  0.0500209 , -0.02445722,  0.00545035,  0.01686173,
        0.01573677,  0.05881055, -0.04549103,  0.04015015, -0.02866226,
       -0.01152191, -0.03644461,  0.05127164, -0.02407388,  0.02928791,
        0.00653268,  0.02991817, -0.00261496,  0.07868502, -0.06669214,
       -0.05935227,  0.02547841, -0.08400825, -0.05166451,  0.10441592,
        0.04643709, -0.09213266, -0.0584141 , -0.11615202,  0.03479329,
       -0.0708966 , -0.0216461 ,  0.04703213, -0.01165836,  0.04350013,
        0.07851542,  0.08677597,  0.05299966, -0.08006323,  0.02643891,
       -0.00730029,  0.03158351, -0.01471287,  0.03444113, -0.04655893,
       -0.00236137, -0.01712618,  0.0173832 ,  0.06266257, -0.02172057,
       -0.00300325, -0.05793135, -0.03559558,  0.0459433 , -0.01044114,
        0.04623517, -0.08776169,  0.00990042, -0.06199295, -0.06

In [16]:
saver = tf.train.Saver()
saver.save(sess, 'output-conformer-tiny/model.ckpt')

'output-conformer-tiny/model.ckpt'

In [17]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'gather' in n.op.lower()
        or 'placeholder' in n.name
        or 'logits' in n.name)
        and 'adam' not in n.name
        and 'global_step' not in n.name
        and 'Assign' not in n.name
        and 'ReadVariableOp' not in n.name
        and 'Gather' not in n.name
    ]
)
strings.split(',')

['X_placeholder', 'X_len_placeholder', 'logits']

In [18]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [19]:
freeze_graph('output-conformer-tiny', strings)

INFO:tensorflow:Restoring parameters from output-conformer-tiny/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 296 variables.
INFO:tensorflow:Converted 296 variables to const ops.
5056 ops in the final graph.


In [20]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
                
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
        
    return graph

In [21]:
g = load_graph('output-conformer-tiny/frozen_model.pb')

In [22]:
input_nodes = [
    'X_placeholder',
    'X_len_placeholder',
]
output_nodes = [
    'logits'
]
inputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in input_nodes}
outputs = {n: g.get_tensor_by_name(f'import/{n}:0') for n in output_nodes}

In [23]:
test_sess = tf.Session(graph = g)

In [24]:
test_sess.run(outputs['logits'], feed_dict = {inputs['X_placeholder']: [y_], 
                                                          inputs['X_len_placeholder']: [len(y_)]})

array([[ 0.0348338 , -0.01990655, -0.00705371,  0.074632  , -0.02843177,
        -0.00884296,  0.01292641, -0.03096639,  0.05625195,  0.00367646,
         0.00350613,  0.04992089, -0.02470116,  0.00547091,  0.0167574 ,
         0.01596049,  0.05897055, -0.04543317,  0.04011566, -0.02861482,
        -0.01147247, -0.03657398,  0.05145307, -0.02401252,  0.0293633 ,
         0.00651208,  0.02995388, -0.00277792,  0.07844449, -0.06677869,
        -0.0593472 ,  0.02557779, -0.08402275, -0.05179776,  0.1044316 ,
         0.04645899, -0.09193722, -0.05837262, -0.11606829,  0.03469423,
        -0.07100603, -0.02180051,  0.04694816, -0.0117168 ,  0.04367796,
         0.07852592,  0.0868068 ,  0.05292723, -0.0802034 ,  0.02657255,
        -0.00730873,  0.03148327, -0.01475812,  0.03437043, -0.04651646,
        -0.00243126, -0.0169327 ,  0.01729014,  0.0628133 , -0.02188044,
        -0.00301995, -0.05804709, -0.03552859,  0.0458205 , -0.01032988,
         0.04621391, -0.08759291,  0.00990863, -0.0

In [25]:
from tensorflow.tools.graph_transforms import TransformGraph

In [26]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-10, fallback_max=10)',
             'strip_unused_nodes',
             'sort_by_execution_order']

input_nodes = [
    'X_placeholder',
    'X_len_placeholder',
]
output_nodes = [
    'logits'
]

pb = 'output-conformer-tiny/frozen_model.pb'

input_graph_def = tf.GraphDef()
with tf.gfile.FastGFile(pb, 'rb') as f:
    input_graph_def.ParseFromString(f.read())

transformed_graph_def = TransformGraph(input_graph_def, 
                                           input_nodes,
                                           output_nodes, transforms)
    
with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())

Instructions for updating:
Use tf.gfile.GFile.


In [27]:
b2_application_key_id = os.environ['b2_application_key_id']
b2_application_key = os.environ['b2_application_key']

In [28]:
from b2sdk.v1 import *
info = InMemoryAccountInfo()
b2_api = B2Api(info)
application_key_id = b2_application_key_id
application_key = b2_application_key
b2_api.authorize_account("production", application_key_id, application_key)
file_info = {'how': 'good-file'}
b2_bucket = b2_api.get_bucket_by_name('malaya-speech-model')

In [29]:
!tar -cf output-conformer-tiny.tar output-conformer-tiny

In [30]:
file = 'output-conformer-tiny.tar'
outPutname = 'pretrained/output-conformer-tiny-speaker-embedding.tar'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)

<b2sdk.file_version.FileVersionInfo at 0x7efc386cfb38>

In [31]:
file = 'output-conformer-tiny/frozen_model.pb'
outPutname = 'speaker-vector/conformer-tiny/model.pb'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)


<b2sdk.file_version.FileVersionInfo at 0x7efc387156a0>

In [32]:
file = 'output-conformer-tiny/frozen_model.pb.quantized'
outPutname = 'speaker-vector/conformer-tiny-quantized/model.pb'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)


<b2sdk.file_version.FileVersionInfo at 0x7efc70281668>

In [33]:
from malaya_boilerplate.huggingface import upload_dict

In [34]:
files_mapping = {'output-conformer-tiny.tar': 'output-conformer-tiny.tar'}
upload_dict(model = 'pretrained-speaker-embedding', files_mapping = files_mapping)

409 Client Error: Conflict for url: https://huggingface.co/api/repos/create - You already created this model repo


In [None]:
files_mapping = {'output-conformer-tiny/frozen_model.pb': 'model.pb'}
upload_dict(model = 'speaker-vector-conformer-tiny', files_mapping = files_mapping)

In [None]:
files_mapping = {'output-conformer-tiny/frozen_model.pb.quantized': 'model.pb'}
upload_dict(model = 'speaker-vector-conformer-tiny-quantized', files_mapping = files_mapping)