In [9]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

import numpy as np
import tensorflow as tf
import datetime
import mir_eval

import Audio_functions as af
import UNet
import Dataset

In [17]:
#  Set variables
sample_rate=16384
n_fft=1024
fft_hop=256
patch_window=256
patch_hop=128
n_parallel_readers=4
normalise=True
batch_size = 5
shuffle=False
n_shuffle = 10

checkpoint = '26/26-21'
model_base_dir = 'C:/Users/Toby/MSc_Project/MScFinalProjectCheckpoints'

directory_a = 'C:/Users/Toby/MSc_Project/Test_Audio/CHiME/test/Mixed'
directory_b = 'C:/Users/Toby/MSc_Project/Test_Audio/CHiME/test/Voice'

#  Create the pipeline
tf.reset_default_graph()
data = Dataset.zip_files(directory_a, directory_b)
data = Dataset.get_paired_dataset(data,
                                  sample_rate,
                                  n_fft,
                                  fft_hop,
                                  patch_window,
                                  patch_hop,
                                  n_parallel_readers,
                                  batch_size,
                                  n_shuffle,
                                  normalise)

#  Create the iterator
mixed_spec, voice_spec, mixed_audio, voice_audio = data.make_one_shot_iterator().get_next()

#  Create variable placeholders
is_training = tf.placeholder(shape=(), dtype=bool)
mixed_mag = tf.expand_dims(mixed_spec[:, :, 1:, 0], 3)
mixed_phase = tf.expand_dims(mixed_spec[:, :, 1:, 1], 3)
voice_mag = tf.expand_dims(voice_spec[:, :, 1:, 0], 3)

# Build U-Net model
print('Creating model')
model = UNet.UNetModel(mixed_mag, voice_mag, mixed_phase, mixed_audio, voice_audio, 'unet', is_training, name='U_Net_Model')
sess = tf.Session()
sess.run(tf.global_variables_initializer())


print('Loading checkpoint')
checkpoint_path = os.path.join(model_base_dir, checkpoint)
restorer = tf.train.Saver()
restorer.restore(sess, checkpoint_path)

Creating model
Loading checkpoint
INFO:tensorflow:Restoring parameters from C:/Users/Toby/MSc_Project/MScFinalProjectCheckpoints\26/26-21


### Normal For Loop

In [18]:
test_costs = list()
sdrs = list()
sirs = list()
sars = list()
nsdrs = list()

cost, voice_est_mag, voice, mixed_audio, mixed_phase = sess.run([model.cost, model.gen_voice,
                                                                 model.voice_audio, model.mixed_audio,
                                                                 model.mixed_phase], {model.is_training: False})
#test_costs.append(cost)
print('{ts}:\tBatch retrieved'.format(ts=datetime.datetime.now()))
for i in range(voice_est_mag.shape[0]):
    # Transform output back to audio
    print('{ts}:\tConverting spectrogram to audio'.format(ts=datetime.datetime.now()))
    voice_est = af.spectrogramToAudioFile(np.squeeze(voice_est_mag[i, :, :, :]).T, n_fft,
                                          fft_hop, phase=np.squeeze(mixed_phase[i, :, :, :]).T)
    # Reshape for mir_eval
    voice_est = np.expand_dims(voice_est, 1).T
    voice_patch = voice[i, :, :].T
    mixed_patch = mixed_audio[i, :, :].T
    # Calculate audio quality statistics
    print('{ts}:\tCalculating audio quality metrics'.format(ts=datetime.datetime.now()))
    sdr, sir, sar, _ = mir_eval.separation.bss_eval_sources(voice_patch, voice_est, compute_permutation=False)
    sdr_mr, _, _, _ = mir_eval.separation.bss_eval_sources(voice_patch, mixed_patch, compute_permutation=False)
    nsdr = sdr[0] - sdr_mr[0]
    sdrs.append(sdr[0])
    sirs.append(sir[0])
    sars.append(sar[0])
    nsdrs.append(nsdr)
#if iteration % 200 == 0:
print("{ts}:\tTesting iteration: 1, Loss: {c}".format(ts=datetime.datetime.now(), c=cost))

2018-09-13 16:26:51.048300:	Batch retrieved
2018-09-13 16:26:51.049300:	Converting spectrogram to audio
2018-09-13 16:26:51.113289:	Calculating audio quality metrics


  z[index] = x


2018-09-13 16:26:51.875023:	Converting spectrogram to audio
2018-09-13 16:26:51.907004:	Calculating audio quality metrics
2018-09-13 16:26:52.140990:	Converting spectrogram to audio
2018-09-13 16:26:52.174968:	Calculating audio quality metrics
2018-09-13 16:26:52.468803:	Converting spectrogram to audio
2018-09-13 16:26:52.523768:	Calculating audio quality metrics
2018-09-13 16:26:52.832593:	Converting spectrogram to audio
2018-09-13 16:26:52.872568:	Calculating audio quality metrics
2018-09-13 16:26:53.197384:	Testing iteration: 1, Loss: 0.005031157284975052


In [19]:
from joblib import Parallel, delayed
import multiprocessing

In [20]:
multiprocessing.cpu_count()

4

In [21]:
print(datetime.datetime.now())
inputs = range(voice_est_mag.shape[0]) 
def get_test_metrics(i):
        # Transform output back to audio
    print('{ts}:\tConverting spectrogram to audio'.format(ts=datetime.datetime.now()))
    voice_est = af.spectrogramToAudioFile(np.squeeze(voice_est_mag[i, :, :, :]).T, n_fft,
                                          fft_hop, phase=np.squeeze(mixed_phase[i, :, :, :]).T)
    # Reshape for mir_eval
    voice_est = np.expand_dims(voice_est, 1).T
    voice_patch = voice[i, :, :].T
    mixed_patch = mixed_audio[i, :, :].T
    # Calculate audio quality statistics
    print('{ts}:\tCalculating audio quality metrics'.format(ts=datetime.datetime.now()))
    sdr, sir, sar, _ = mir_eval.separation.bss_eval_sources(voice_patch, voice_est, compute_permutation=False)
    sdr_mr, _, _, _ = mir_eval.separation.bss_eval_sources(voice_patch, mixed_patch, compute_permutation=False)
    nsdr = sdr[0] - sdr_mr[0]
    sdrs.append(sdr[0])
    sirs.append(sir[0])
    sars.append(sar[0])
    nsdrs.append(nsdr)
 
num_cores = multiprocessing.cpu_count()
     
results = Parallel(n_jobs=num_cores)(delayed(get_test_metrics)(i) for i in inputs)
print("{ts}:\tTesting iteration: 1, Loss: {c}".format(ts=datetime.datetime.now(), c=cost))

2018-09-13 16:26:59.694589
2018-09-13 16:27:31.340167:	Testing iteration: 1, Loss: 0.005031157284975052
