In [1]:
import numpy as np

import random
import h5py
from keras.models import load_model

import os
import random
import time

# for reproducibility
np.random.seed(1337) 
random.seed(1337)

Using TensorFlow backend.


In [2]:
# control amount of GPU memory used
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))

In [3]:
# external custom code I wrote
from load_data import *
from windowing import *
from nn_util import *
from pesq import *
from consts import *
from nn_blocks import *
from perceptual_loss import *
from evaluation import *

In [4]:
[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

# flatten all of the train windows into vectors
train_processed = np.array([i for z in train_windows for i in z])
train_processed = np.reshape(train_processed, (train_processed.shape[0], WINDOW_SIZE,))

X_train = np.copy(train_processed)

In [5]:
MODEL_FILENAME = './best_coder.h5'

KERAS_LOAD_MAP = {'PhaseShiftUp1D' : PhaseShiftUp1D,
                  'SoftmaxQuantization' : SoftmaxQuantization,
                  'SoftmaxDequantization' : SoftmaxDequantization}

autoencoder = load_model(MODEL_FILENAME, KERAS_LOAD_MAP)
K.set_value(QUANTIZATION_ON, True)



In [6]:
encoder = autoencoder.layers[1]
decoder = autoencoder.layers[2]

# Test bitrate

In [9]:
all_embed = encoder.predict(X_train[:10000], batch_size = BATCH_SIZE, verbose = 1)

probs = np.reshape(all_embed, (all_embed.shape[0] * all_embed.shape[1], NBINS))
hist = np.sum(probs, axis = 0)
hist /= np.sum(hist)

entropy = 0
for i in hist:
    if (i < 1e-5): continue
    entropy += i * math.log(i, 2)
entropy = -entropy

bitrate = float(SAMPLE_RATE) / (WINDOW_SIZE - OVERLAP_SIZE) * 256.0 * entropy
bitrate /= 1000

print "Bitrate:", str(bitrate) + "kbps"



# Test speed

In [10]:
windows = np.random.uniform(-1.0, 1.0, (150, WINDOW_SIZE))

# test encoder
start = time.time()
encoded = encoder.predict(windows, batch_size = 1, verbose = 0)
end = time.time()

averageMs = (end - start) / encoded.shape[0] * 1000.0
print "Encoder: Averaged", averageMs, "ms per window"

# test encoder
start = time.time()
decoded = decoder.predict(encoded, batch_size = 1, verbose = 0)
end = time.time()

averageMs = (end - start) / decoded.shape[0] * 1000.0
print "Decoder: Averaged", averageMs, "ms per window"

Encoder: Averaged 2.77137438456 ms per window
Decoder: Averaged 3.14704577128 ms per window


# Test quality

In [11]:
test_model_on_wav("./SA1.wav", "SA1_final", autoencoder)
test_model_on_wav("./SA1.wav", "SA1_final", autoencoder, argmax = True)

test_model_on_wav("./SX383.wav", "SX383_final", autoencoder)
test_model_on_wav("./SX383.wav", "SX383_final", autoencoder, argmax = True)

test_model_on_wav("./fiveYears.wav", "fy_final", autoencoder)
test_model_on_wav("./fiveYears.wav", "fy_final", autoencoder, argmax = True) 

MSE:         2306.33
Avg err:     30.6305
PESQ:        3.93665719032
MSE:         2348.96
Avg err:     30.9254
PESQ:        3.90884542465
MSE:         1834.33
Avg err:     22.6247
PESQ:        4.03377771378
MSE:         1846.17
Avg err:     22.7764
PESQ:        4.03461074829
MSE:         845594.0
Avg err:     665.507
PESQ:        3.94512104988
MSE:         848768.0
Avg err:     666.906
PESQ:        3.94009804726


[848767.5, 666.90582, 3.9400980472564697]

In [None]:
captions = ["training", "validation", "test"]
datasets = [train_paths, val_paths, test_paths]

for i in xrange(0, 3):
    print "Model evaluation (" + captions[i] + ")"

    base_scores = []
    for path in datasets[i]:
        pesq = test_model_on_wav(path, "", autoencoder,
                                 save_recons = False,
                                 verbose = False,
                                 argmax = True)[2]
        base_scores.append(pesq)

    print "Mean:", np.mean(base_scores)
    print "Max: ", np.max(base_scores)
    print "Min: ", np.min(base_scores)
    print ""