In [1]:
import numpy as np
import random
import shutil

In [2]:
# external custom code I wrote
from load_data import *
from evaluation import *
from nn_blocks import *
import scipy.io.wavfile as sciwav

Using TensorFlow backend.


In [3]:
[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

In [4]:
NUM_SELECT = 20

test_paths = test_paths[:NUM_SELECT]

In [5]:
base_dir = ""
mturk_dir = base_dir + "mturk-data/"
amr_rates = [8.85, 15.85, 19.85, 23.85]

dnn_rates = [8.53, 15.56, 19.70, 23.71]
dnn_folders = ['./8.53-3.523tst/',
               './15.56-4.049tst/',
               './19.70-4.172tst/',
               './23.71-4.222tst/']

if not os.path.exists(mturk_dir):
    os.makedirs(mturk_dir)

In [6]:
# ------------------------------------------------------
# no compression
# ------------------------------------------------------
print "Original files..."

orig_dir = mturk_dir + "orig/"
if not os.path.exists(orig_dir):
    os.makedirs(orig_dir)

for path in test_paths:
    # normalize volume of waveform to 75% of maximum
    [rate, data] = sciwav.read(path)
    data = data.astype(np.float32)
    data = data / np.max(np.abs(data)) * 0.75 * 32767.0
    data = np.clip(data, -32767, 32767)
    data = data.astype(np.int16)

    split = path.split("/")
    identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
    dest_file = orig_dir + identifier + ".wav"
    sciwav.write(dest_file, rate, data)

Original files...


In [7]:
# ------------------------------------------------------
# compress using AMR-WB
# ------------------------------------------------------
for rate in amr_rates:
    print "AMR-WB bitrate " + str(rate) + "..."

    amr_dir = mturk_dir + "amr-" + str(rate) + "/"
    if not os.path.exists(amr_dir):
        os.makedirs(amr_dir)

    for path in test_paths:
        split = path.split("/")
        identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
        orig_wav = orig_dir + identifier + ".wav"
        dest_amr = amr_dir + identifier + ".amr"
        dest_wav = amr_dir + identifier + ".wav"

        command_encode = 'ffmpeg -y -i ' + orig_wav + ' -ar 16000 -ab ' + str(rate) + 'k ' + \
                         '-acodec libvo_amrwbenc ' + dest_amr
        command_decode = 'ffmpeg -y -i ' + dest_amr + ' ' + dest_wav

        os.system(command_encode)
        os.system(command_decode)
        os.system('rm ' + dest_amr)

AMR-WB bitrate 8.85...
AMR-WB bitrate 15.85...
AMR-WB bitrate 19.85...
AMR-WB bitrate 23.85...


In [None]:
KERAS_LOAD_MAP = {'PhaseShiftUp1D' : PhaseShiftUp1D,
                  'SoftmaxQuantization' : SoftmaxQuantization,
                  'SoftmaxDequantization' : SoftmaxDequantization}

for i in xrange(0, len(dnn_rates)):
    rate = dnn_rates[i]
    dnn_path = dnn_folders[i] + 'best_coder.h5'
    
    print "DNN bitrate " + str(rate) + "..."
    
    dnn_dir = mturk_dir + "dnn-" + str(rate) + "/"
    if not os.path.exists(dnn_dir):
        os.makedirs(dnn_dir)
    
    print "    Loading model..."
    autoencoder = load_model(dnn_path, KERAS_LOAD_MAP)
    K.set_value(QUANTIZATION_ON, True)
    
    print "    Processing files..."
    for path in test_paths:
        split = path.split("/")
        identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
        orig_wav = orig_dir + identifier + ".wav"
        dest_wav = dnn_dir + identifier + ".wav"
        
        _, processed = run_model_on_wav(orig_wav, autoencoder, argmax = True)
        sciwav.write(dest_wav, 16000, processed.astype(np.int16))

DNN bitrate 8.53...
    Loading model...




    Processing files...
DNN bitrate 15.56...
    Loading model...
    Processing files...
DNN bitrate 19.7...
    Loading model...
    Processing files...


In [None]:
# prepare input hit file in the format wanted by CrowdMOS
hit = open("MOS.hit_input", "w")
hit.write("sentence\talgorithm\tURL\n")

URL_BASE = "http://srik.zamn.net/mturk-data/"

for path in test_paths:
    src_file = path

    split = path.split("/")
    identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
    
    # write line for original file
    algo = "orig"
    orig_url = URL_BASE + algo + "/" + identifier + ".wav"
    line = identifier + "\t" + "REF" + "\t" + orig_url
    hit.write(line + "\n")
    
    # write line for all the AMR-WB rates
    for rate in amr_rates:
        algo = "amr-" + str(rate)
        amr_url = URL_BASE + algo + "/" + identifier + ".wav"
        line = identifier + "\t" + algo + "\t" + amr_url
        hit.write(line + "\n")
        
    # write line for all the ANN rates
    for rate in dnn_rates:
        algo = "dnn-" + str(rate)
        amr_url = URL_BASE + algo + "/" + identifier + ".wav"
        line = identifier + "\t" + algo + "\t" + amr_url
        hit.write(line + "\n")
    
hit.close()