In [1]:
import numpy as np
import random
import shutil

In [2]:
# external custom code I wrote
from load_data import *
from evaluation import *
from nn_blocks import *
import scipy.io.wavfile as sciwav

Using TensorFlow backend.


In [3]:
[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

In [4]:
NUM_SELECT = 20

# for reproducibility
np.random.seed(42) 
random.seed(42)
test_paths = random.sample(test_paths, NUM_SELECT)

In [5]:
base_dir = ""
mturk_dir = base_dir + "mturk-data/"

amr_bitrates = [8.85, 15.85, 19.85, 23.85]
speex_bitrates = [9.00, 16.00, 20.00, 24.00, 32.00]
opus_bitrates = [9.00, 16.00, 20.00, 24.00, 32.00]

codec_names = ["amr", "speex", "opus"]
codec_bitrates = [amr_bitrates, speex_bitrates, opus_bitrates]
codec_exts = [".amr", ".spx", ".opus"]
codec_libs = ["libvo_amrwbenc", "libspeex", "libopus"]
codec_options = ["", "-abr 1", "-vbr off -application voip"]

In [6]:
dnn_rates = [9.02, 16.24, 20.06, 24.06, 32.25]
dnn_folders = ['./9.02kbps/',
               './16.24kbps/',
               './20.06kbps/',
               './24.06kbps/',
               './32.25kbps/']

if not os.path.exists(mturk_dir):
    os.makedirs(mturk_dir)

In [7]:
# ------------------------------------------------------
# no compression
# ------------------------------------------------------
print "Original files..."

orig_dir = mturk_dir + "orig/"
if not os.path.exists(orig_dir):
    os.makedirs(orig_dir)

for path in test_paths:
    # normalize volume of waveform to 50% of maximum
    [rate, data] = sciwav.read(path)

    split = path.split("/")
    identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
    dest_file = orig_dir + identifier + ".wav"
    sciwav.write(dest_file, rate, data)

Original files...


In [8]:
# ------------------------------------------------------
# compress with ffmpeg codecs
# ------------------------------------------------------
for i in xrange(0, len(codec_names)):
    c_name = codec_names[i]
    c_rates = codec_bitrates[i]
    c_ext = codec_exts[i]
    c_lib = codec_libs[i]
    c_opt = codec_options[i]
    
    for rate in c_rates:
        print c_name + " bitrate " + str(rate) + "..."

        curr_dir = mturk_dir + c_name + "-" + str(rate) + "/"
        if not os.path.exists(curr_dir):
            os.makedirs(curr_dir)

        for path in test_paths:
            split = path.split("/")
            identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
            orig_wav = orig_dir + identifier + ".wav"
            dest_enc = curr_dir + identifier + c_ext
            dest_wav = curr_dir + identifier + ".wav"

            command_encode = 'ffmpeg -hide_banner ' + \
                             '-i ' + orig_wav + ' -ar 16000 -ab ' + str(rate) + 'k ' + \
                             '-acodec ' + c_lib + ' ' + c_opt + ' ' + dest_enc
            command_decode = 'ffmpeg -hide_banner ' + \
                             '-i ' + dest_enc + ' -ar 16000 ' + dest_wav

            os.system(command_encode)
            os.system(command_decode)
            os.system('rm ' + dest_enc)

amr bitrate 8.85...
amr bitrate 15.85...
amr bitrate 19.85...
amr bitrate 23.85...
speex bitrate 9.0...
speex bitrate 16.0...
speex bitrate 20.0...
speex bitrate 24.0...
speex bitrate 32.0...
opus bitrate 9.0...
opus bitrate 16.0...
opus bitrate 20.0...
opus bitrate 24.0...
opus bitrate 32.0...


In [9]:
# ------------------------------------------------------
# compress with DNN
# ------------------------------------------------------
KERAS_LOAD_MAP = {'PhaseShiftUp1D' : PhaseShiftUp1D,
                  'SoftmaxQuantization' : SoftmaxQuantization,
                  'SoftmaxDequantization' : SoftmaxDequantization}

for i in xrange(0, len(dnn_rates)):
    rate = dnn_rates[i]
    dnn_path = dnn_folders[i] + 'best_coder.h5'
    
    print "DNN bitrate " + str(rate) + "..."
    
    dnn_dir = mturk_dir + "dnn-" + str(rate) + "/"
    if not os.path.exists(dnn_dir):
        os.makedirs(dnn_dir)
    
    print "    Loading model..."
    autoencoder = load_model(dnn_path, KERAS_LOAD_MAP)
    K.set_value(QUANTIZATION_ON, True)
    
    print "    Processing files..."
    for path in test_paths:
        split = path.split("/")
        identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
        orig_wav = orig_dir + identifier + ".wav"
        dest_wav = dnn_dir + identifier + ".wav"
        
        _, processed = run_model_on_wav(orig_wav, autoencoder, argmax = True)
        sciwav.write(dest_wav, 16000, processed.astype(np.int16))

DNN bitrate 9.02...
    Loading model...




    Processing files...
DNN bitrate 16.24...
    Loading model...
    Processing files...
DNN bitrate 20.06...
    Loading model...
    Processing files...
DNN bitrate 24.06...
    Loading model...
    Processing files...
DNN bitrate 32.25...
    Loading model...
    Processing files...


In [24]:
# prepare metadata file for MTurk processing
hit = open("hit_data.csv", "w")
SEP = ","

# URL needs to be HTTPS to play well with MTurk's embeds
URL_BASE = "http://steelassault.com/mturk-data/"

# format of each line:
#     identifier, codec name, DNN bitrate, corresponding codec bitrate, orig URL, dnn URL, codec URL

for path in test_paths:
    src_file = path

    split = path.split("/")
    identifier = split[-3] + "-" + split[-2] + "-" + split[-1][:-4]
    
    # form URL of original file
    orig_url = URL_BASE + "orig/" + identifier + ".wav"
    
    # loop through the possible dnn rates, and match them up to codec rates
    for i in xrange(0, len(dnn_rates)):
        dnn_rate = dnn_rates[i]
        dnn_url = URL_BASE + "dnn-" + str(dnn_rate) + "/" + identifier + ".wav"
        
        for j in xrange(0, len(codec_names)):
            c_name = codec_names[j]
            c_rates = codec_bitrates[j]
            
            if (i < len(c_rates)):
                codec_rate = c_rates[i]
                codec_url = URL_BASE + c_name + "-" + str(codec_rate) + "/" + identifier + ".wav"
                
                
                line = identifier + SEP + c_name + SEP + str(dnn_rate) + SEP + str(codec_rate) + SEP \
                                        + orig_url + SEP + dnn_url + SEP + codec_url + "\n"
                hit.write(line)
    
hit.close()