In [1]:
import numpy as np

import random
import h5py
from keras.models import *
from keras.layers import *
from keras.layers.core import *
from keras.layers.normalization import *
from keras.optimizers import *
from keras.callbacks import *
from keras import backend as K
from keras.regularizers import *
from keras.initializers import *
from keras.models import load_model
from keras.losses import *
from sklearn import metrics
from sklearn.preprocessing import *
from scipy.fftpack import dct, idct
from keras.activations import softmax
from sklearn.cluster import KMeans, MiniBatchKMeans
import scipy.io.wavfile as sciwav

import os
import random
import time
import matplotlib
import matplotlib.pyplot as plt
import glob

import operator
import math
import re

# for reproducibility
np.random.seed(1337) 
random.seed(1337)

# increase recursion limit for adaptive VQ
import sys
sys.setrecursionlimit(40000)

Using TensorFlow backend.


In [2]:
# control amount of GPU memory used
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))

In [3]:
# external custom code I wrote
from load_data import *
from windowing import *
from nn_util import *
from pesq import *
from consts import *
from nn_blocks import *
from perceptual_loss import *
from evaluation import *

In [4]:
# number of speech files for train, val, and test
TRAIN_SIZE = 1000
VAL_SIZE = 100
TEST_SIZE = 500

[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

In [5]:
KERAS_LOAD_MAP = {'PhaseShiftUp1D' : PhaseShiftUp1D,
                  'SoftmaxQuantization' : SoftmaxQuantization,
                  'SoftmaxDequantization' : SoftmaxDequantization,
                  'ChannelResize1D' : ChannelResize1D,
                  'LinearUpSampling1D' : LinearUpSampling1D,
                  'AdaptiveQuantization' : AdaptiveQuantization,
                  'AdaptiveDequantization' : AdaptiveDequantization}

autoencoder = load_model('best_auto.h5', KERAS_LOAD_MAP)
K.set_value(QUANTIZATION_ON, True)



In [6]:
test_model_on_wav("./SA1.wav", "SA1_final", autoencoder)
test_model_on_wav("./SA1.wav", "SA1_final", autoencoder, argmax = True)

test_model_on_wav("./SX383.wav", "SX383_final", autoencoder)
test_model_on_wav("./SX383.wav", "SX383_final", autoencoder, argmax = True)

test_model_on_wav("./fiveYears.wav", "fy_final", autoencoder)
test_model_on_wav("./fiveYears.wav", "fy_final", autoencoder, argmax = True) 

MSE:         2879.21
Avg err:     35.3214
PESQ:        3.48673629761
MSE:         2920.69
Avg err:     35.5772
PESQ:        3.46105217934
MSE:         1715.38
Avg err:     25.6162
PESQ:        3.34725499153
MSE:         1737.58
Avg err:     25.7959
PESQ:        3.32471323013
MSE:         1.28733e+06
Avg err:     821.663
PESQ:        3.283608675
MSE:         1.29026e+06
Avg err:     822.983
PESQ:        3.28554964066


[1290256.5, 822.98285, 3.2855496406555176]

In [7]:
captions = ["training", "validation", "test"]
datasets = [train_paths, val_paths, test_paths]

for i in xrange(0, 3):
    print "Model evaluation (" + captions[i] + ")"

    base_scores = []
    for path in datasets[i]:
        pesq = test_model_on_wav(path, "", autoencoder,
                                 save_recons = False,
                                 verbose = False,
                                 argmax = True)[2]
        base_scores.append(pesq)

    print "Mean:", np.mean(base_scores)
    print "Max: ", np.max(base_scores)
    print "Min: ", np.min(base_scores)
    print ""

Model evaluation (training)
Mean: 3.75964650083
Max:  4.30502557755
Min:  2.00504350662

Model evaluation (validation)
Mean: 3.83147942305
Max:  4.18813896179
Min:  2.63376164436

Model evaluation (test)
Mean: 3.72867670822
Max:  4.28991651535
Min:  2.51075410843

