In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [2]:
import tensorflow as tf
import numpy as np
from glob import glob
from itertools import cycle

mels = glob('universal-mel/*.npy')
file_cycle = cycle(mels)
f = next(file_cycle)

In [3]:
path = 'hifigan-combined'
ckpt_path = tf.train.latest_checkpoint(path)
ckpt_path

'hifigan-combined/model.ckpt-640000'

In [4]:
def generate(batch_max_steps = 8192, hop_size = 256):
    while True:
        f = next(file_cycle)
        mel = np.load(f)
        audio = np.load(f.replace('mels', 'audios'))

        yield {'mel': mel, 'audio': audio}

In [5]:
dataset = tf.data.Dataset.from_generator(
    generate,
    {'mel': tf.float32, 'audio': tf.float32},
    output_shapes = {
        'mel': tf.TensorShape([None, 80]),
        'audio': tf.TensorShape([None]),
    },
)
features = dataset.make_one_shot_iterator().get_next()
features

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


{'mel': <tf.Tensor 'IteratorGetNext:1' shape=(?, 80) dtype=float32>,
 'audio': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float32>}

In [6]:
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import hifigan
from malaya_speech.train.model import stft
import malaya_speech.config

In [7]:
hifigan_config = malaya_speech.config.hifigan_config_v2
hifigan_config['hifigan_generator_params']['filters'] = 768
generator = hifigan.Generator(
    hifigan.GeneratorConfig(**hifigan_config['hifigan_generator_params']),
    name = 'hifigan_generator',
)

In [8]:
y_hat = generator([features['mel']], training = False)
y_hat

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


<tf.Tensor 'hifigan_generator/sequential/activation/Tanh:0' shape=(1, ?, 1) dtype=float32>

In [9]:
x = tf.placeholder(tf.float32, [None, None, 80])
y_hat_ = generator(x, training = False)
y_hat_

<tf.Tensor 'hifigan_generator_1/sequential/activation/Tanh:0' shape=(?, ?, 1) dtype=float32>

In [10]:
y_hat_ = tf.identity(y_hat_, name = 'logits')

In [11]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [12]:
var_list = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
saver = tf.train.Saver(var_list = var_list)
saver.restore(sess, 'hifigan-combined/model.ckpt-600000')

INFO:tensorflow:Restoring parameters from hifigan-combined/model.ckpt-600000


In [13]:
import IPython.display as ipd

In [14]:
# %%time
# f, y_hat_ = sess.run([features, y_hat])

In [15]:
# ipd.Audio(f['audio'], rate = 22050)

In [16]:
# ipd.Audio(y_hat_[0,:,0], rate = 22050)

In [17]:
y, _ = malaya_speech.load('wadi-annuar.wav', sr = 22050)
m = malaya_speech.featurization.universal_mel(y)

In [18]:
%%time

y_ = sess.run(y_hat_, feed_dict = {x: [m]})
ipd.Audio(y_[0,:,0], rate = 22050)

CPU times: user 29.4 s, sys: 4.64 s, total: 34 s
Wall time: 3.06 s


In [19]:
%%time

y_ = sess.run(y_hat_, feed_dict = {x: [np.load(mels[-10000])]})
ipd.Audio(y_[0,:,0], rate = 22050)

CPU times: user 10.2 s, sys: 863 ms, total: 11.1 s
Wall time: 776 ms


In [20]:
saver = tf.train.Saver()
saver.save(sess, 'universal-hifigan-output/model.ckpt')

'universal-hifigan-output/model.ckpt'

In [21]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'gather' in n.op.lower()
        or 'Placeholder' in n.name
        or 'logits' in n.name)
        and 'adam' not in n.name
        and 'global_step' not in n.name
        and 'Assign' not in n.name
        and 'ReadVariableOp' not in n.name
        and 'Gather' not in n.name
    ]
)
strings.split(',')

['Placeholder', 'logits']

In [22]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [23]:
freeze_graph('universal-hifigan-output', strings)

INFO:tensorflow:Restoring parameters from universal-hifigan-output/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 82 variables.
INFO:tensorflow:Converted 82 variables to const ops.
1434 ops in the final graph.


In [24]:
from tensorflow.tools.graph_transforms import TransformGraph

In [25]:
transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-1024, fallback_max=1024)',
             'strip_unused_nodes',
             'sort_by_execution_order']

In [26]:
pb = 'universal-hifigan-output/frozen_model.pb'

In [27]:
input_graph_def = tf.GraphDef()
with tf.gfile.FastGFile(pb, 'rb') as f:
    input_graph_def.ParseFromString(f.read())

transformed_graph_def = TransformGraph(input_graph_def, 
                                           ['Placeholder'],
                                           ['logits'], transforms)
    
with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())

Instructions for updating:
Use tf.gfile.GFile.


In [28]:
b2_application_key_id = os.environ['b2_application_key_id']
b2_application_key = os.environ['b2_application_key']

In [29]:
from b2sdk.v1 import *
info = InMemoryAccountInfo()
b2_api = B2Api(info)
application_key_id = b2_application_key_id
application_key = b2_application_key
b2_api.authorize_account("production", application_key_id, application_key)
file_info = {'how': 'good-file'}
b2_bucket = b2_api.get_bucket_by_name('malaya-speech-model')

In [30]:
file = 'universal-hifigan-output/frozen_model.pb'
outPutname = 'vocoder-hifigan/universal-768/model.pb'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)


<b2sdk.file_version.FileVersionInfo at 0x7f6ce02eb940>

In [31]:
file = 'universal-hifigan-output/frozen_model.pb.quantized'
outPutname = 'vocoder-hifigan/universal-768-quantized/model.pb'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)


<b2sdk.file_version.FileVersionInfo at 0x7f6ce02ebc88>

In [32]:
!tar -zcvf universal-hifigan-output.tar.gz universal-hifigan-output

universal-hifigan-output/
universal-hifigan-output/model.ckpt.index
universal-hifigan-output/model.ckpt.data-00000-of-00001
universal-hifigan-output/frozen_model.pb.quantized
universal-hifigan-output/checkpoint
universal-hifigan-output/model.ckpt.meta
universal-hifigan-output/frozen_model.pb


In [None]:
file = 'universal-hifigan-output.tar.gz'
outPutname = 'pretrained/universal-hifigan-output.tar.gz'
b2_bucket.upload_local_file(
    local_file=file,
    file_name=outPutname,
    file_infos=file_info,
)