In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''

In [2]:
import tensorflow as tf
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import resnet_unet
from malaya_speech.utils import tf_featurization
import malaya_speech.augmentation.waveform as augmentation
import IPython.display as ipd
import numpy as np
from malaya_speech.utils.tf_featurization import separation_exponent, EPSILON


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [3]:
class Model:
    def __init__(self, size = 2):
        self.X = tf.placeholder(tf.float32, (None))
        
        stft_X, D_X = tf_featurization.get_stft(self.X)
        
        self.outputs = []
        for i in range(size):
            with tf.variable_scope(f'model_{i}'):
                self.outputs.append(resnet_unet.Model(D_X).logits)
        
        output_sum = tf.reduce_sum([o ** separation_exponent for o in self.outputs], axis=0) + EPSILON
        
        self.istft = []
        for no, D in enumerate(self.outputs):

            instrument_mask = (D ** separation_exponent + (EPSILON / size)) / output_sum
            instrument_mask = tf_featurization.extend_mask(instrument_mask)
            old_shape = tf.shape(instrument_mask)
            new_shape = tf.concat(
                [[old_shape[0] * old_shape[1]], old_shape[2:]],
                axis=0)
            instrument_mask = tf.reshape(instrument_mask, new_shape)

            instrument_mask = instrument_mask[:tf.shape(stft_X)[0]]
            masked_stft = tf.cast(instrument_mask, dtype=tf.complex64) * stft_X
            self.istft.append(tf_featurization.istft(masked_stft, self.X)[:,0])
        
        for i in range(size):
            tf.identity(self.istft[i], name = f'logits_{i}')

In [4]:
tf.compat.v1.reset_default_graph()
model = Model()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [5]:
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
saver = tf.train.Saver(var_list = var_lists)
saver.restore(sess, 'noise-reduction-unet9/model.ckpt-500000')

INFO:tensorflow:Restoring parameters from noise-reduction-unet9/model.ckpt-500000


In [6]:
from glob import glob
import random

files = glob('../youtube/clean-wav/*.wav')
noises = glob('../noise-44k/clean-wav/*.wav')

In [7]:
def read_wav(f):
    return malaya_speech.load(f, sr = 44100)

y = read_wav('2x5%20Ep%2010.wav')[0]
# n = read_wav(random.choice(noises))[0]
sr = 44100

In [None]:
# y = y[:sr * 15]

In [None]:
# combined, noise = augmentation.add_noise(
#     y, n, factor = random.uniform(0.5, 0.9), return_noise = True
# )

In [10]:
istft = sess.run(model.istft, feed_dict = {model.X: y})

In [8]:
import IPython.display as ipd

In [28]:
# ipd.Audio(y[:sr * 15], rate = sr)

In [27]:
# ipd.Audio(istft[0][:sr * 15], rate = sr)

In [13]:
# ipd.Audio(istft[1][:sr * 30], rate = sr)

In [14]:
saver = tf.train.Saver()
saver.save(sess, 'noise-reduction-resnet-unet-output/model.ckpt')

'noise-reduction-resnet-unet-output/model.ckpt'

In [15]:
strings = ','.join(
    [
        n.name
        for n in tf.get_default_graph().as_graph_def().node
        if ('Variable' in n.op
        or 'Placeholder' in n.name
        or 'logits' in n.name
        or 'alphas' in n.name
        or 'self/Softmax' in n.name)
        and 'adam' not in n.name
        and 'beta' not in n.name
        and 'global_step' not in n.name
        and 'Assign' not in n.name
    ]
)
strings.split(',')

['Placeholder',
 'model_0/conv2d/kernel/Read/ReadVariableOp',
 'model_0/conv2d/Conv2D/ReadVariableOp',
 'model_0/conv2d_1/kernel/Read/ReadVariableOp',
 'model_0/conv2d_1/bias/Read/ReadVariableOp',
 'model_0/conv2d_1/Conv2D/ReadVariableOp',
 'model_0/conv2d_1/BiasAdd/ReadVariableOp',
 'model_0/batch_normalization/gamma/Read/ReadVariableOp',
 'model_0/batch_normalization/moving_mean/Read/ReadVariableOp',
 'model_0/batch_normalization/moving_variance/Read/ReadVariableOp',
 'model_0/batch_normalization/cond/ReadVariableOp',
 'model_0/batch_normalization/cond/ReadVariableOp_1',
 'model_0/batch_normalization/cond/ReadVariableOp_2',
 'model_0/batch_normalization/cond/ReadVariableOp_3',
 'model_0/batch_normalization/cond/FusedBatchNormV3_1/ReadVariableOp',
 'model_0/batch_normalization/cond/FusedBatchNormV3_1/ReadVariableOp_1',
 'model_0/conv2d_2/kernel/Read/ReadVariableOp',
 'model_0/conv2d_2/bias/Read/ReadVariableOp',
 'model_0/conv2d_2/Conv2D/ReadVariableOp',
 'model_0/conv2d_2/BiasAdd/Read

In [16]:
def freeze_graph(model_dir, output_node_names):

    if not tf.gfile.Exists(model_dir):
        raise AssertionError(
            "Export directory doesn't exists. Please specify an export "
            'directory: %s' % model_dir
        )

    checkpoint = tf.train.get_checkpoint_state(model_dir)
    input_checkpoint = checkpoint.model_checkpoint_path

    absolute_model_dir = '/'.join(input_checkpoint.split('/')[:-1])
    output_graph = absolute_model_dir + '/frozen_model.pb'
    clear_devices = True
    with tf.Session(graph = tf.Graph()) as sess:
        saver = tf.train.import_meta_graph(
            input_checkpoint + '.meta', clear_devices = clear_devices
        )
        saver.restore(sess, input_checkpoint)
        output_graph_def = tf.graph_util.convert_variables_to_constants(
            sess,
            tf.get_default_graph().as_graph_def(),
            output_node_names.split(','),
        )
        with tf.gfile.GFile(output_graph, 'wb') as f:
            f.write(output_graph_def.SerializeToString())
        print('%d ops in the final graph.' % len(output_graph_def.node))

In [17]:
freeze_graph('noise-reduction-resnet-unet-output', strings)

INFO:tensorflow:Restoring parameters from noise-reduction-resnet-unet-output/model.ckpt
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 210 variables.
INFO:tensorflow:Converted 210 variables to const ops.
2282 ops in the final graph.


In [18]:
def load_graph(frozen_graph_filename, **kwargs):
    with tf.gfile.GFile(frozen_graph_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())

    # https://github.com/onnx/tensorflow-onnx/issues/77#issuecomment-445066091
    # to fix import T5
    for node in graph_def.node:
        if node.op == 'RefSwitch':
            node.op = 'Switch'
            for index in xrange(len(node.input)):
                if 'moving_' in node.input[index]:
                    node.input[index] = node.input[index] + '/read'
        elif node.op == 'AssignSub':
            node.op = 'Sub'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'AssignAdd':
            node.op = 'Add'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
        elif node.op == 'Assign':
            node.op = 'Identity'
            if 'use_locking' in node.attr:
                del node.attr['use_locking']
            if 'validate_shape' in node.attr:
                del node.attr['validate_shape']
            if len(node.input) == 2:
                node.input[0] = node.input[1]
                del node.input[1]

    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def)
    return graph

In [19]:
instruments = ['voice', 'noise']

In [21]:
g = load_graph('noise-reduction-resnet-unet-output/frozen_model.pb')
x = g.get_tensor_by_name('import/Placeholder:0')

logits = {instrument: g.get_tensor_by_name(f'import/logits_{no}:0') for no, instrument in enumerate(instruments)}

In [23]:
[f'logits_{no}' for no in range(len(instruments))]

['logits_0', 'logits_1']

In [None]:
pb = 'noise-reduction-resnet-unet-output/frozen_model.pb'

transforms = ['add_default_attributes',
             'remove_nodes(op=Identity, op=CheckNumerics, op=Dropout)',
             'fold_constants(ignore_errors=true)',
             'fold_batch_norms',
             'fold_old_batch_norms',
             'quantize_weights(fallback_min=-10, fallback_max=10)',
             'quantize_nodes(fallback_min=-10, fallback_max=10)',
             'strip_unused_nodes',
             'sort_by_execution_order']

input_graph_def = tf.GraphDef()
with tf.gfile.FastGFile(pb, 'rb') as f:
    input_graph_def.ParseFromString(f.read())

transformed_graph_def = TransformGraph(input_graph_def, 
                                           ['Placeholder'],
                                           [f'logits_{no}' for no in range(len(instruments))], transforms)
    
with tf.gfile.GFile(f'{pb}.quantized', 'wb') as f:
    f.write(transformed_graph_def.SerializeToString())

In [None]:
logits

In [None]:
test_sess = tf.InteractiveSession(graph = g)

In [None]:
# l = test_sess.run(logits, feed_dict = {x: y})

In [None]:
# ipd.Audio(l['voice'][:sr * 30], rate = sr)

In [None]:
!tar -czvf noise-reduction-unet-output-500k.tar.gz noise-reduction-unet-output