In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [3]:
import tensorflow as tf
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import resnest
from malaya_speech.utils import tf_featurization
from tensorflow.keras.layers import Multiply
import IPython.display as ipd
import numpy as np






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.





In [4]:
class Model:
    def __init__(self, size = 2):
        self.X = tf.placeholder(tf.float32, (None))
        self.Y = tf.placeholder(tf.float32, (2, None))
        
        stft_X, D_X = tf_featurization.get_stft(self.X)
        
        self.stft = []
        for i in range(size):
            self.stft.append(tf_featurization.get_stft(self.Y[i]))
        
        self.outputs = []
        for i in range(size):
            with tf.variable_scope(f'model_{i}'):
                output = resnest.Model(D_X).logits
                self.outputs.append(output)
        
        self.loss = []
        for i in range(size):
            self.loss.append(tf.reduce_mean(tf.abs(self.outputs[i] - self.stft[i][1])))        
        
        self.cost = tf.reduce_sum(self.loss)
        
        separation_exponent = 2
        EPSILON = 1e-10
        
        output_sum = tf.reduce_sum([o ** separation_exponent for o in self.outputs], axis=0) + EPSILON
        
        self.istft = []
        for no, D in enumerate(self.outputs):

            instrument_mask = (D ** separation_exponent + (EPSILON / size)) / output_sum
            instrument_mask = tf_featurization.extend_mask(instrument_mask)
            old_shape = tf.shape(instrument_mask)
            new_shape = tf.concat(
                [[old_shape[0] * old_shape[1]], old_shape[2:]],
                axis=0)
            instrument_mask = tf.reshape(instrument_mask, new_shape)

            instrument_mask = instrument_mask[:tf.shape(stft_X)[0]]
            masked_stft = tf.cast(instrument_mask, dtype=tf.complex64) * stft_X
            self.istft.append(tf_featurization.istft(masked_stft, self.X)[:,0])

In [5]:
tf.compat.v1.reset_default_graph()
model = Model()
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

Tensor("strided_slice_3:0", shape=(?, 512, 1024, 1), dtype=float32)
Tensor("model_0/max_pooling2d/MaxPool:0", shape=(?, 256, 512, 16), dtype=float32)
Tensor("model_0/max_pooling2d_1/MaxPool:0", shape=(?, 128, 256, 32), dtype=float32)
Tensor("model_0/max_pooling2d_2/MaxPool:0", shape=(?, 64, 128, 64), dtype=float32)
Tensor("model_0/max_pooling2d_3/MaxPool:0", shape=(?, 32, 64, 128), dtype=float32)
Tensor("model_0/max_pooling2d_4/MaxPool:0", shape=(?, 16, 32, 256), dtype=float32)
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Tensor("strided_slice_3:0", shape=(?, 512, 1024, 1), dtype=float32)
Tensor("model_1/max_pooling2d_5/MaxPool:0", shape=(?, 256, 512, 16), dtype=float32)
Tensor("model_1/max_pooling2d_6/MaxPool:0", shape=(?, 128, 256, 32), dtype=float32)
Tensor("model_1/max_pooling2d_7/MaxPool:0", shape=(?, 64, 128, 64), dtype=float32)
Tensor("model_1/max_pooling2d_8/MaxPool:0", shape=(?, 32, 64, 128), dtype=float32)
Tensor("model_1/max_pooling2d_9/Ma

In [6]:
model.outputs

[<tf.Tensor 'model_0/multiply/mul:0' shape=(?, 512, 1024, 1) dtype=float32>,
 <tf.Tensor 'model_1/multiply_1/mul:0' shape=(?, 512, 1024, 1) dtype=float32>]

In [7]:
y, sr = malaya_speech.load('../speech/example-speaker/husein-zolkepli.wav', sr = 44100)
len(y) / sr

5.630680272108844

In [8]:
ipd.Audio(y, rate = sr)

In [9]:
# !pip3 install pysndfx

In [10]:
y_ = y
ipd.Audio(y_, rate = sr)

In [11]:
noise = y - y_

In [13]:
outputs = sess.run(model.outputs, feed_dict = {model.X: y_})
[o.shape for o in outputs]

[(1, 512, 1024, 1), (1, 512, 1024, 1)]

In [14]:
sess.run(model.loss, feed_dict = {model.X: y_, model.Y: [y, noise]})

[0.46936542, 0.2634989]

In [15]:
istft = sess.run(model.istft, feed_dict = {model.X: y_})
[s.shape for s in istft]

[(248313,), (248313,)]

In [16]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, "test/model.ckpt")

'test/model.ckpt'

In [17]:
!ls -lh test

total 172416
-rw-r--r--  1 huseinzolkepli  staff    77B Feb  8 15:33 checkpoint
-rw-r--r--  1 huseinzolkepli  staff    80M Feb  8 15:33 model.ckpt.data-00000-of-00001
-rw-r--r--  1 huseinzolkepli  staff    48K Feb  8 15:33 model.ckpt.index
-rw-r--r--  1 huseinzolkepli  staff   4.5M Feb  8 15:33 model.ckpt.meta


In [18]:
!rm -rf test