In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [2]:
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [3]:
import tensorflow as tf
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import resnet_unet, stft
from malaya_speech.utils import tf_featurization
import IPython.display as ipd
import numpy as np






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.




In [4]:
from malaya_speech.utils.tf_featurization import pad_and_partition

In [5]:
x = tf.placeholder(tf.float32, (None, 1))
y = tf.placeholder(tf.float32, (None, 1))
x_min = tf.reduce_min(x)
x_max = tf.reduce_max(x)
x_std = (x - x_min) / (x_max - x_min)
partitioned = pad_and_partition(x_std, 44000 * 2)

model = resnet_unet.Model1D(partitioned, logging = True)
logits = tf.reshape(model.logits, (-1, 1))
logits = logits[:tf.shape(x)[0]]
logits = (logits * (x_max - x_min)) + x_min

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Tensor("max_pooling1d/Squeeze:0", shape=(?, 44000, 66), dtype=float32)
Tensor("max_pooling1d_1/Squeeze:0", shape=(?, 22000, 132), dtype=float32)
Tensor("max_pooling1d_2/Squeeze:0", shape=(?, 11000, 264), dtype=float32)
Tensor("max_pooling1d_3/Squeeze:0", shape=(?, 5500, 528), dtype=float32)
Tensor("max_pooling1d_4/Squeeze:0", shape=(?, 2750, 1056), dtype=float32)
Tensor("conv1d_15/BiasAdd:0", shape=(?, 1375, 2112), dtype=float32)
Tensor("concatenate/concat:0", shape=(?, 2750, 2112), dtype=float32)
Tensor("concatenate_1/concat:0", shape=(?, 5500, 1056), dtype=float32)
Tensor("concatenate_2/concat:0", shape=(?, 11000, 528), dtype=float32)
Tensor("concatenate_3/concat:0", shape=(?, 22000, 264), dtype=float32)
Tensor("concatenate_4/concat:0", shape=(?, 44000, 132), dtype=float32)


In [6]:
stft_loss = stft.loss.MultiResolutionSTFT(factor_sc = 0.5, factor_mag = 0.5)
l1 = tf.abs(y - logits)
sc_loss, mag_loss = stft_loss(tf.expand_dims(y[:,0], 0), tf.expand_dims(logits[:,0], 0))
loss = l1 + sc_loss + mag_loss
loss = tf.reduce_mean(loss)

In [7]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [8]:
actual, sr = malaya_speech.load('../speech/example-speaker/husein-zolkepli.wav', sr = 44100)
len(actual) / sr

5.630680272108844

In [9]:
ipd.Audio(actual, rate = sr)

In [10]:
combined = malaya_speech.augmentation.waveform.sox_augment_high(actual, min_bass_gain = 70,
                                                          reverberance = 50, 
                                                          negate = 0)
ipd.Audio(combined, rate = sr)

In [11]:
l = sess.run(logits, feed_dict = {x: np.expand_dims(combined, 1)})

In [12]:
l.shape

(248313, 1)

In [13]:
sess.run(loss, feed_dict = {x: np.expand_dims(combined, 1),
                                 y: np.expand_dims(actual, 1)})

79.87748

In [14]:
saver = tf.train.Saver(tf.trainable_variables())
saver.save(sess, 'test/model.ckpt')

'test/model.ckpt'

In [15]:
tf.trainable_variables()

[<tf.Variable 'conv1d/kernel:0' shape=(1, 1, 66) dtype=float32>,
 <tf.Variable 'conv1d_1/kernel:0' shape=(5, 1, 66) dtype=float32>,
 <tf.Variable 'conv1d_1/bias:0' shape=(66,) dtype=float32>,
 <tf.Variable 'batch_normalization/gamma:0' shape=(66,) dtype=float32>,
 <tf.Variable 'batch_normalization/beta:0' shape=(66,) dtype=float32>,
 <tf.Variable 'conv1d_2/kernel:0' shape=(5, 66, 66) dtype=float32>,
 <tf.Variable 'conv1d_2/bias:0' shape=(66,) dtype=float32>,
 <tf.Variable 'batch_normalization_1/gamma:0' shape=(66,) dtype=float32>,
 <tf.Variable 'batch_normalization_1/beta:0' shape=(66,) dtype=float32>,
 <tf.Variable 'conv1d_3/kernel:0' shape=(1, 66, 132) dtype=float32>,
 <tf.Variable 'conv1d_4/kernel:0' shape=(5, 66, 132) dtype=float32>,
 <tf.Variable 'conv1d_4/bias:0' shape=(132,) dtype=float32>,
 <tf.Variable 'batch_normalization_2/gamma:0' shape=(132,) dtype=float32>,
 <tf.Variable 'batch_normalization_2/beta:0' shape=(132,) dtype=float32>,
 <tf.Variable 'conv1d_5/kernel:0' shape=(5