In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [2]:
import tensorflow as tf
import numpy as np
from glob import glob
from itertools import cycle

mels = glob('../speech-bahasa/output-male-v2/mels/*.npy')
file_cycle = cycle(mels)
f = next(file_cycle)

In [3]:
path = 'mbmelgan-male-combined'
ckpt_path = tf.train.latest_checkpoint(path)
ckpt_path

'mbmelgan-male-combined/model.ckpt-1100000'

In [4]:
def generate(batch_max_steps = 8192, hop_size = 256):
    while True:
        f = next(file_cycle)
        mel = np.load(f)
        audio = np.load(f.replace('mels', 'audios'))

        yield {'mel': mel, 'audio': audio}

In [5]:
dataset = tf.data.Dataset.from_generator(
    generate,
    {'mel': tf.float32, 'audio': tf.float32},
    output_shapes = {
        'mel': tf.TensorShape([None, 80]),
        'audio': tf.TensorShape([None]),
    },
)
features = dataset.make_one_shot_iterator().get_next()
features

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


{'mel': <tf.Tensor 'IteratorGetNext:1' shape=(?, 80) dtype=float32>,
 'audio': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float32>}

In [6]:
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import melgan, mb_melgan
import malaya_speech.config

mb_melgan_config = malaya_speech.config.mb_melgan_config
generator = mb_melgan.Generator(
    mb_melgan.GeneratorConfig(**mb_melgan_config['melgan_generator_params']),
    name = 'mb_melgan-generator',
)
pqmf = mb_melgan.PQMF(
    mb_melgan.GeneratorConfig(**mb_melgan_config['melgan_generator_params']),
    dtype = tf.float32,
    name = 'pqmf',
)






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.





In [7]:
y_hat = generator([features['mel']], training = False)
y_hat

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


<tf.Tensor 'mb_melgan-generator/conv1d/Squeeze:0' shape=(?, ?, 1) dtype=float32>

In [8]:
x = tf.placeholder(tf.float32, [None, None, 80])
y_hat_ = generator(x, training = False)
y_hat_

<tf.Tensor 'mb_melgan-generator_1/conv1d/Squeeze:0' shape=(?, ?, 1) dtype=float32>

In [9]:
y_hat_ = tf.identity(y_hat_, name = 'logits')

In [10]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [11]:
var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
saver = tf.train.Saver(var_list = var_list)
saver.restore(sess, ckpt_path)

INFO:tensorflow:Restoring parameters from mbmelgan-male-combined/model.ckpt-1100000


In [12]:
import IPython.display as ipd

In [13]:
%%time
f, y_ = sess.run([features, y_hat])

CPU times: user 958 ms, sys: 279 ms, total: 1.24 s
Wall time: 242 ms


In [14]:
ipd.Audio(f['audio'], rate = 22050)

In [15]:
ipd.Audio(y_[0,:,0], rate = 22050)

In [16]:
import pickle

with open('a.pkl', 'rb') as fopen:
    data = pickle.load(fopen)

In [17]:
y_ = sess.run(y_hat_, feed_dict = {x: [data[0]]})
ipd.Audio(y_[0,:,0], rate = 22050)

In [18]:
y_ = sess.run(y_hat_, feed_dict = {x: [data[1]]})
ipd.Audio(y_[0,:,0], rate = 22050)

In [19]:
saver = tf.train.Saver()
saver.save(sess, 'mbmelgan-male-output/model.ckpt')

'mbmelgan-male-output/model.ckpt'

In [20]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'gather' in n.op.lower()
        or 'Placeholder' in n.name
        or 'logits' in n.name)
        and 'adam' not in n.name
        and 'global_step' not in n.name
        and 'Assign' not in n.name
        and 'ReadVariableOp' not in n.name
        and 'Gather' not in n.name
    ]
)
strings.split(',')

['Placeholder', 'logits']

In [21]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [22]:
freeze_graph('mbmelgan-male-output', strings)

INFO:tensorflow:Restoring parameters from mbmelgan-male-output/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 82 variables.
INFO:tensorflow:Converted 82 variables to const ops.
1098 ops in the final graph.


In [23]:
def load_graph(frozen_graph_filename):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
                
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
        
    return graph

In [24]:
g = load_graph('mbmelgan-male-output/frozen_model.pb')

In [25]:
test_sess = tf.InteractiveSession(graph = g)



In [26]:
X = g.get_tensor_by_name(f'import/Placeholder:0')
logits = g.get_tensor_by_name(f'import/logits:0')

In [27]:
y_ = test_sess.run(logits, feed_dict = {X: [data[1]]})
ipd.Audio(y_[0,:,0], rate = 22050)

In [28]:
from tensorflow.tools.graph_transforms import TransformGraph

In [29]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-1024, fallback_max=1024)',
             'strip_unused_nodes',
             'sort_by_execution_order']

In [30]:
pb = 'mbmelgan-male-output/frozen_model.pb'

In [31]:
input_graph_def = tf.GraphDef()
with tf.gfile.FastGFile(pb, 'rb') as f:
    input_graph_def.ParseFromString(f.read())

transformed_graph_def = TransformGraph(input_graph_def, 
                                           ['Placeholder'],
                                           ['logits'], transforms)
    
with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())

Instructions for updating:
Use tf.gfile.GFile.


In [32]:
g = load_graph('mbmelgan-male-output/frozen_model.pb.quantized')

In [33]:
test_sess = tf.InteractiveSession(graph = g)
X = g.get_tensor_by_name(f'import/Placeholder:0')
logits = g.get_tensor_by_name(f'import/logits:0')

In [34]:
y_ = test_sess.run(logits, feed_dict = {X: [data[1]]})
ipd.Audio(y_[0,:,0], rate = 22050)