In [1]:
import malaya_speech
import malaya_speech.train.model.speakernet as speakernet
import tensorflow as tf




The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
inputs = tf.placeholder(tf.float32, [None, None, 64])
inputs_length = tf.placeholder(tf.int32, [None])
model = speakernet.Model(inputs, inputs_length, mode = 'eval')
model.logits = tf.identity(model.logits, name = 'logits')


Instructions for updating:
Use `tf.keras.layers.Conv1D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).
Instructions for updating:
Use `tf.keras.layers.SeparableConv1D` instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use keras.layers.Dense instead.


In [3]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [4]:
saver = tf.train.Saver()
saver.restore(sess, 'speakernet/model.ckpt')

INFO:tensorflow:Restoring parameters from speakernet/model.ckpt


In [5]:
import malaya_speech.config

config = malaya_speech.config.speakernet_featurizer_config
featurizer = malaya_speech.featurization.SpeakerNetFeaturizer(config)

In [6]:
from glob import glob

speakers = glob('speech/example-speaker/*.wav')
speakers

['speech/example-speaker/khalil-nooh.wav',
 'speech/example-speaker/husein-zolkepli.wav',
 'speech/example-speaker/mas-aisyah.wav',
 'speech/example-speaker/shafiqah-idayu.wav']

In [7]:
wavs = [malaya_speech.load(f, sr = 16000)[0] for f in speakers]
vectors = [featurizer.vectorize(w) for w in wavs]
padded, l = malaya_speech.padding.sequence_nd(vectors, dim = 0, return_len = True)
padded.shape

(4, 564, 64)

In [8]:
logits = sess.run(model.logits, feed_dict = {inputs: padded, inputs_length: l})
logits.shape

(4, 7205)

In [9]:
from scipy.spatial.distance import cdist

1 - cdist(logits, logits, metric = 'cosine')

array([[1.        , 0.91986295, 0.85895558, 0.85036781],
       [0.91986295, 1.        , 0.85086457, 0.86070392],
       [0.85895558, 0.85086457, 1.        , 0.88895706],
       [0.85036781, 0.86070392, 0.88895706, 1.        ]])

In [10]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name
        or 'self/Softmax' in n.name)
        and 'adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
        and 'Assign' not in n.name
    ]
)

In [11]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [12]:
freeze_graph('speakernet', strings)

INFO:tensorflow:Restoring parameters from speakernet/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 81 variables.
INFO:tensorflow:Converted 81 variables to const ops.
385 ops in the final graph.


In [13]:
def load_graph(frozen_graph_filename, **kwargs):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091
    # to fix import T5
    for node in graph_def.node:
        if node.op == 'RefSwitch':
            node.op = 'Switch'
            for index in xrange(len(node.input)):
                if 'moving_' in node.input[index]:
                    node.input[index] = node.input[index] + '/read'
        elif node.op == 'AssignSub':
            node.op = 'Sub'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'AssignAdd':
            node.op = 'Add'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'Assign':
            node.op = 'Identity'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
            if 'validate_shape' in node.attr:
                del node.attr['validate_shape']
            if len(node.input) == 2:
                node.input[0] = node.input[1]
                del node.input[1]

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [15]:
g = load_graph('speakernet/frozen_model.pb')
x = g.get_tensor_by_name('import/Placeholder:0')
x_len = g.get_tensor_by_name('import/Placeholder_1:0')
logits = g.get_tensor_by_name('import/logits:0')

In [16]:
test_sess = tf.InteractiveSession(graph = g)



In [22]:
l = test_sess.run(logits, feed_dict = {x: padded, x_len: l})
l.shape

(4, 7205)

In [23]:
1 - cdist(l, l, metric = 'cosine')

array([[1.        , 0.91986295, 0.85895558, 0.85036781],
       [0.91986295, 1.        , 0.85086457, 0.86070392],
       [0.85895558, 0.85086457, 1.        , 0.88895706],
       [0.85036781, 0.86070392, 0.88895706, 1.        ]])