In [1]:
import numpy as np

import random
import h5py
from keras import backend as K
from nn_util import *
from keras.models import *
from keras.layers import *
from keras.layers.core import *
from keras.layers.normalization import *
from keras.optimizers import *
from keras.initializers import *
from keras.models import load_model
from keras.losses import *
import scipy.io.wavfile as sciwav
import multiprocessing
from sklearn.cluster import KMeans, MiniBatchKMeans
from mem_top import mem_top

import os
import random
import time
import matplotlib
import matplotlib.pyplot as plt
import glob
import gc
import psutil

import operator
import math
import re

# for reproducibility
np.random.seed(1337) 
random.seed(1337)

# increase recursion limit for adaptive VQ
#import sys
#sys.setrecursionlimit(40000)

np.set_printoptions(formatter={'float_kind':'{:4f}'.format})

Using TensorFlow backend.


In [2]:
# control amount of GPU memory used
#import tensorflow as tf
#from keras.backend.tensorflow_backend import set_session
#config = tf.ConfigProto()
#config.gpu_options.allow_growth=True
#set_session(tf.Session(config=config))

In [3]:
# external custom code I wrote
from load_data import *
from windowing import *
from pesq import *
from consts import *
from nn_blocks import *
from perceptual_loss import *
from evaluation import *

In [4]:
[train_paths, val_paths, test_paths], \
[train_waveforms, val_waveforms, test_waveforms], \
[train_procwave, val_procwave, test_procwave], \
[train_wparams, val_wparams, test_wparams], \
[train_windows, val_windows, test_windows] = load_data(TRAIN_SIZE, VAL_SIZE, TEST_SIZE)

In [5]:
# flatten all of the train windows into vectors
train_processed = np.array([i for z in train_windows for i in z])
train_processed = np.reshape(train_processed, (train_processed.shape[0], WINDOW_SIZE, 1))

# randomly shuffle data, if we want to
if (RANDOM_SHUFFLE):
    train_processed = np.random.permutation(train_processed)
    
print train_processed.shape
print np.mean(train_processed, axis=None)
print np.std(train_processed, axis=None)
print np.min(train_processed, axis = None)
print np.max(train_processed, axis = None)

(101814, 512, 1)
6.41179e-06
0.103588
-1.0
1.0


In [6]:
DOWNSAMPLE_FACTOR = 2
CHANNEL_SIZE = WINDOW_SIZE / DOWNSAMPLE_FACTOR

# ---------------------------------------------------------------------------
# autoencoder: takes an audio window, compresses it, and tries to reconstruct it
# ---------------------------------------------------------------------------
def autoencoder_structure():   
    # - - - - - - - - - - - - - - - - - - - - -
    # parameters
    # - - - - - - - - - - - - - - - - - - - - -   
    NCHAN = 32
    FILT_SIZE = 9

    # - - - - - - - - - - - - - - - - - - - - -
    # encoder
    # - - - - - - - - - - - - - - - - - - - - -
    enc_input = Input(shape = (WINDOW_SIZE, 1))
    enc = enc_input
    
    enc = channel_change_block(NCHAN, FILT_SIZE)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 1)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 2)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 4)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 8)(enc)
    enc = downsample_block(NCHAN, FILT_SIZE)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 1)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 2)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 4)(enc)
    enc = residual_block(NCHAN, FILT_SIZE, 8)(enc)
    enc = channel_change_block(QUANT_CHANS, FILT_SIZE)(enc)
    
    # quantization
    enc = Reshape((CHANNEL_SIZE, QUANT_CHANS))(enc)
    pre_quant = Model(inputs = enc_input, outputs = enc)
    enc = SoftmaxQuantization()(enc)
    
    enc = Model(inputs = enc_input, outputs = enc, name = 'encoder')
    
    # - - - - - - - - - - - - - - - - - - - - -
    # decoder
    # - - - - - - - - - - - - - - - - - - - - -
    dec_input = Input(shape = (CHANNEL_SIZE, QUANT_CHANS, NBINS))
    dec = dec_input
    
    # dequantization
    dec = SoftmaxDequantization()(dec)
    post_dequant = Model(inputs = dec_input, outputs = dec)
    dec = Reshape((CHANNEL_SIZE, QUANT_CHANS))(dec)
    
    dec = channel_change_block(NCHAN, FILT_SIZE)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 8)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 4)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 2)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 1)(dec)
    dec = upsample_block(NCHAN, FILT_SIZE)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 8)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 4)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 2)(dec)
    dec = residual_block(NCHAN, FILT_SIZE, 1)(dec)
    dec = channel_change_block(1, FILT_SIZE)(dec)

    dec = Model(inputs = dec_input, outputs = dec, name = 'decoder')
    
    # return both encoder and decoder
    return enc, dec, pre_quant, post_dequant

In [7]:
# ---------------------------------------------------------------------------
# discriminator: tries to determine whether an audio window is "real" or "fake"
# ---------------------------------------------------------------------------
def discriminator_structure():   
    # - - - - - - - - - - - - - - - - - - - - -
    # parameters
    # - - - - - - - - - - - - - - - - - - - - -   
    NCHAN = 32
    FILT_SIZE = 9
    DENSE_SIZE = 16

    # - - - - - - - - - - - - - - - - - - - - -
    # model
    # - - - - - - - - - - - - - - - - - - - - -
    dsc_input = Input(shape = (WINDOW_SIZE, 1))
    dsc = dsc_input
    
    dsc = channel_change_block(NCHAN, FILT_SIZE)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = downsample_block(NCHAN, FILT_SIZE)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = downsample_block(NCHAN, FILT_SIZE)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = downsample_block(NCHAN, FILT_SIZE)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    dsc = residual_block(NCHAN, FILT_SIZE, 1)(dsc)
    
    dsc = Flatten()(dsc)
    
    dsc = Dense(DENSE_SIZE, kernel_initializer = W_INIT)(dsc)
    dsc = activation()(dsc)
    
    dsc = Dense(1, kernel_initializer = W_INIT)(dsc)
    dsc = Activation('linear')(dsc)
    
    dsc = Model(inputs = dsc_input, outputs = dsc, name = 'discriminator')
    
    # return both encoder and decoder
    return dsc

In [8]:
# map for load_model
KERAS_LOAD_MAP = {'PhaseShiftUp1D' : PhaseShiftUp1D,
                  'code_entropy' : code_entropy,
                  'code_sparsity' : code_sparsity,
                  'rmse' : rmse,
                  'SoftmaxQuantization' : SoftmaxQuantization,
                  'SoftmaxDequantization' : SoftmaxDequantization,
                  'DFT_REAL' : DFT_REAL,
                  'DFT_IMAG' : DFT_IMAG,
                  'MEL_FILTERBANKS' : MEL_FILTERBANKS,
                  'keras_dft_mag' : keras_dft_mag,
                  'keras_dct' : keras_dct,
                  'perceptual_transform' : perceptual_transform,
                  'perceptual_distance' : perceptual_distance}

In [9]:
# construct autoencoder
ac_input = Input(shape = (WINDOW_SIZE, 1))

encoder, decoder, pre_quant, post_dequant = autoencoder_structure()
ac_reconstructed = decoder(encoder(ac_input))
autoencoder = Model(inputs = [ac_input], outputs = [ac_reconstructed])

In [10]:
# construct discriminator: regular
dsc_input = Input(shape = (WINDOW_SIZE, 1))
dsc_struct = discriminator_structure()
dsc_label = dsc_struct(dsc_input)

discriminator = Model(inputs = [dsc_input], outputs = [dsc_label])
discriminator.compile(loss = [rmse], optimizer = Adam(lr = 0.0005))

In [11]:
# model parameters
loss_weights = [30.0, 5.0, 0.5, 10.0, 1.0]
loss_functions = [rmse, perceptual_distance, rmse, code_sparsity, code_entropy]
n_recons = 2
n_discrim = 1
n_code = 2
assert(n_recons + n_discrim + n_code == len(loss_weights))
assert(len(loss_weights) == len(loss_functions))

In [12]:
# model specification
make_trainable(discriminator, False)
make_trainable(autoencoder, True)

model_input = Input(shape = (WINDOW_SIZE, 1))
model_embedding = encoder(model_input)
model_pre_quant = pre_quant(model_input)
model_reconstructed = decoder(model_embedding)
model_post_dequant = post_dequant(model_embedding)
model_dsc_label = dsc_struct(model_reconstructed)

model = Model(inputs = [model_input], outputs = [model_reconstructed] * n_recons + \
                                            [model_dsc_label] * n_discrim + \
                                            [model_embedding] * n_code)

  ' Found: ' + str(self.outputs))


In [13]:
model.compile(loss = loss_functions,
              loss_weights = loss_weights,
              optimizer = Adam())

model.summary()
if (n_discrim > 0):
    discriminator.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 512, 1)            0         
_________________________________________________________________
encoder (Model)              (None, 256, 1, 32)        186927    
_________________________________________________________________
decoder (Model)              (None, 512, 1)            233167    
_________________________________________________________________
discriminator (Model)        (None, 1)                 274786    
Total params: 694,880
Trainable params: 420,094
Non-trainable params: 274,786
_________________________________________________________________
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 512, 1)            0         
________________________________________________________________

In [14]:
# get untrained baseline for model
test_model_on_wav("./SA1.wav", "./train_output/SA1_uninit", autoencoder)

MSE:         155041.0
Avg err:     210.465
PESQ:        1.03144824505


[155041.05, 210.4651, 1.031448245048523]

In [15]:
# saves current model
def save_model(prefix = 'best'):
    os.system('rm ./' + prefix + '_model.h5')
    os.system('rm ./' + prefix + '_auto.h5')
    #os.system('rm ./' + prefix + '_quant_bins.npy')
    
    model.save('./' + prefix + '_model.h5')
    autoencoder.save('./' + prefix + '_auto.h5')
    #np.save('./' + prefix + '_quant_bins.npy', K.eval(QUANT_BINS))
    
    f = h5py.File('./' + prefix + '_model.h5', 'r+')
    del f['optimizer_weights']
    f.close()

In [16]:
def evaluate_discriminator(discriminator, autoencoder, X, y, verbose = True):
    # verify discriminator was trained properly
    y_hat = discriminator.predict(X)
    y_hat[y_hat >= 0.5] = 1
    y_hat[y_hat < 0.5] = 0

    n_total = y.shape[0]
    n_correct = np.sum(np.ravel(y_hat) == y)

    acc = n_correct * 100.0 / n_total
    if (verbose):
        print "Discriminator accuracy: %0.02f pct (%d of %d) right"%(acc, n_correct, n_total)
    return acc

In [17]:
def evaluate_training(autoencoder, lead = ""):
    def set_evaluation(windows, wparams, eval_idxs):
        before_after_pairs = np.array([run_model_on_windows(windows[i],
                                                    wparams[i],
                                                    autoencoder,
                                                    argmax = True)
                                       for i in eval_idxs])
        
        NUM_THREADS = 8
        list_range = np.arange(0, len(eval_idxs))
        slices = [list_range[i:None:NUM_THREADS]
                  for i in xrange(0, NUM_THREADS)]
        
        def thread_func(pairs, q):
            for p in pairs:
                q.put(evaluation_metrics(p[0], p[1]))
                
        q = multiprocessing.Queue()
        threads = [multiprocessing.Process(target = thread_func,
                                           args = (before_after_pairs[slices[i]], q))
                   for i in xrange(0, NUM_THREADS)]
        [t.start() for t in threads]
        [t.join() for t in threads]
        results = np.array([q.get() for i in list_range])
        
        return results
    
    train_eval_idxs = random.sample(range(0, len(train_windows)), TRAIN_EVALUATE)
    val_eval_idxs = random.sample(range(0, len(val_windows)), VAL_EVALUATE)
    
    print lead + "Format: [MSE, avg err, PESQ]"
    
    # train set evaluation
    train_metrics = set_evaluation(train_windows, train_wparams,
                                   train_eval_idxs)
    print lead + "    Train: (mean)", np.mean(train_metrics, axis = 0)
    print lead + "    Train: (max) ", np.max(train_metrics, axis = 0)
    print lead + "    Train: (min) ", np.min(train_metrics, axis = 0)
    
    # validation set evaluation
    val_metrics = set_evaluation(val_windows, val_wparams,
                                 val_eval_idxs)
    print lead + "    Val:   (mean)", np.mean(val_metrics, axis = 0)
    print lead + "    Val:   (max) ", np.max(val_metrics, axis = 0)
    print lead + "    Val:   (min) ", np.min(val_metrics, axis = 0)
    
    # returns mean PESQ on validation
    return np.mean(val_metrics, axis = 0)[2]

In [18]:
X_train = np.copy(train_processed)
ntrain = X_train.shape[0]

BATCH_SIZE = 128
NUM_EPOCHS = 300
EPOCHS_BEFORE_QUANT_ON = 5
#EPOCHS_BEFORE_TAU = 20

ORIG_BITRATE = 256.00
TARGET_BITRATE = 20.00
PRE_ENTROPY_RATE = ORIG_BITRATE / DOWNSAMPLE_FACTOR

TARGET_ENTROPY = (TARGET_BITRATE / PRE_ENTROPY_RATE * 16.0)
TARGET_ENTROPY *= (STEP_SIZE / float(WINDOW_SIZE))
TARGET_ENTROPY_FUZZ = 0.1

TAU_CHANGE_RATE = 0.025
INITIAL_TAU = 0.5
MIN_TAU = 0.0

NUM_QUANT_VECS = 5000

STARTING_LR = 0.00025
ENDING_LR = 0.0001

print "Target entropy:", TARGET_ENTROPY

Target entropy: 2.34375


In [19]:
best_val_pesq = 0.0
K.set_value(tau, 0.0)
T_i = 0.0
K.set_value(QUANTIZATION_ON, False)

In [20]:
np.set_printoptions(formatter={'float_kind':'{:4f}'.format})
lead = "    "

for epoch in range(1, NUM_EPOCHS + 1):
    print "Epoch " + str(epoch) + ":"

    # present batches randomly each epoch
    lis = range(0, ntrain, BATCH_SIZE)
    random.shuffle(lis)
    num_batches = len(lis)
    
    # keep track of start time and current batch #
    i = 0
    startTime = time.time()
    for idx in lis:
        # cosine annealing for model's learning rate
        train_pct = T_i / float(NUM_EPOCHS)
        opt_lr = ENDING_LR + 0.5 * (STARTING_LR - ENDING_LR) * (1 + math.cos(3.14159 * train_pct))
        T_i += (1.0 / num_batches)
        K.set_value(model.optimizer.lr, opt_lr)
        
        batch = X_train[idx:idx+BATCH_SIZE, :,  :]
        nbatch = batch.shape[0]
               
        # train autoencoder
        a_y = [batch] * n_recons + \
              [np.ones(nbatch)] * n_discrim + \
              [np.zeros((nbatch, 1, 1, 1))] * n_code

        a_losses = model.train_on_batch(batch, a_y)
        
        if (n_discrim > 0):
            # train discriminator            
            generated = autoencoder.predict(batch)
            discrim_batch_X = interleave([batch, generated])
            discrim_batch_y = interleave([np.ones(nbatch), np.zeros(nbatch)])

            d_loss = discriminator.train_on_batch(discrim_batch_X, discrim_batch_y)
        
        # print statistics every 10 batches so we know what's going on
        if (i % 10 == 0):
            printStr = "        \r" + lead + str(i * BATCH_SIZE) + ": "
            if (n_discrim > 0):
                printStr += (str(d_loss) + " ")
            print printStr,
            
            loss_arr = np.asarray(a_losses)
            print loss_arr,
            
            if (len(loss_weights) > 1 and len(loss_arr) > 1):
                for w in xrange(0, len(loss_weights)):
                    loss_arr[w + 1] *= loss_weights[w]
                print loss_arr,
            
            print K.get_value(tau), opt_lr,
        
        i += 1
    print ""
    
    # print elapsed time for epoch
    elapsed = time.time() - startTime
    print lead + "Total time for epoch: " + str(elapsed) + "s"
    
    # ---------------------------------------------------------
    # evaluate discriminator on random samples every epoch
    # ---------------------------------------------------------
    startTime = time.time()
    print lead + "----------------"
    if (n_discrim > 0):
        NUM = 200
        rows = np.random.randint(X_train.shape[0], size = NUM)
        generated = autoencoder.predict(X_train[rows, :], verbose = 0)
        d_X = np.concatenate((X_train[rows, :], generated))
        d_y = np.concatenate((np.ones(NUM), np.zeros(NUM)))
        d_acc = evaluate_discriminator(discriminator, autoencoder,
                                   d_X, d_y, verbose = False)

        print lead + "Evaluated the discriminator: " + str(d_acc) + "% d_acc"
        elapsed = time.time() - startTime
        print lead + "Total time for evaluation: " + str(elapsed) + "s"
    else:
        print lead + "No discriminator"
    
    # ---------------------------------------------------------
    # estimate code entropy from random samples (if quantization is on)
    # ---------------------------------------------------------
    if (K.get_value(QUANTIZATION_ON) > 0):
        NUM = 20000
        rows = np.random.randint(X_train.shape[0], size = NUM)
        to_predict = np.copy(X_train[rows, :])
        code = encoder.predict(to_predict, verbose = 0, batch_size = 128)
        
        all_onehots = np.reshape(code, (-1, QUANT_CHANS, NBINS))
        onehot_hist = np.sum(all_onehots, axis = 0)
        onehot_hist /= np.sum(onehot_hist, axis = 1, keepdims = True)
        
        # entropy for each channel
        channel_entropy = -np.sum(onehot_hist * np.log(onehot_hist + np.finfo(float).eps) / np.log(2.0),
                                  axis = 1)

        # total entropy
        entropy = np.sum(channel_entropy)

        print lead + "----------------"
        print lead + "Code entropy:", entropy

        # ---------------------------------------------------------
        # handle updating entropy weight (tau)
        # ---------------------------------------------------------
        old_tau = K.get_value(tau)

        if (entropy < TARGET_ENTROPY - TARGET_ENTROPY_FUZZ):
            new_tau = old_tau - TAU_CHANGE_RATE
            if (new_tau <= MIN_TAU):
                new_tau = MIN_TAU

            K.set_value(tau, new_tau)
            print lead + "Updated tau from", old_tau, "to", new_tau
        elif (entropy > TARGET_ENTROPY + TARGET_ENTROPY_FUZZ):
            new_tau = old_tau + TAU_CHANGE_RATE

            K.set_value(tau, new_tau)
            print lead + "Updated tau from", old_tau, "to", new_tau
        else:
            print lead + "Tau stays at", old_tau
    
    # ---------------------------------------------------------
    # evaluate autoencoder on training/validation data evey epoch
    # ---------------------------------------------------------
    startTime = time.time()
    print lead + "----------------"
    print lead + "Evaluating autoencoder..."
    
    
    metrics = test_model_on_wav("./SA1.wav", "./train_output/SA1_train_epoch" + str(epoch),
                                autoencoder, lead = lead, verbose = False, argmax = False)
    print lead + "SA1:         ", metrics
    if (K.get_value(QUANTIZATION_ON) > 0):
        metrics = test_model_on_wav("./SA1.wav", "./train_output/SA1_train_epoch" + str(epoch),
                                    autoencoder, lead = lead, verbose = False, argmax = True)
        print lead + "SA1 (arg):   ", metrics
    
    metrics = test_model_on_wav("./SX383.wav", "./train_output/SX383_train_epoch" + str(epoch),
                                autoencoder, lead = lead, verbose = False, argmax = False)
    print lead + "SX383:       ", metrics
    if (K.get_value(QUANTIZATION_ON) > 0):
        metrics = test_model_on_wav("./SX383.wav", "./train_output/SX383_train_epoch" + str(epoch),
                                    autoencoder, lead = lead, verbose = False, argmax = True)
        print lead + "SX383 (arg): ", metrics
    
    if (K.get_value(QUANTIZATION_ON) > 0):
        val_pesq = evaluate_training(autoencoder, lead)
        if (val_pesq > best_val_pesq and entropy <= TARGET_ENTROPY):
            print lead + "NEW best model! Validation mean-PESQ", val_pesq

            print lead + "Saving model..."
            save_model()
            best_val_pesq = val_pesq
            patience_epoch = epoch
        else:
            print lead + "Best validation mean-PESQ seen:", best_val_pesq
    
    elapsed = time.time() - startTime
    print lead + "Total time for evaluation: " + str(elapsed) + "s"
    
    gc.collect()
    process = psutil.Process(os.getpid())
    mem_used = process.memory_info().rss
    print lead + "Total memory usage: " + str(mem_used)
    
    # ---------------------------------------------------------
    # turn quantization on after a certain # of epochs
    # ---------------------------------------------------------
    if (epoch == EPOCHS_BEFORE_QUANT_ON):
        print lead + "----------------"
        print lead + "Turning quantization on!"
        
        random_windows = []
        for i in xrange(0, NUM_QUANT_VECS):
            w_idx = random.randint(0, train_processed.shape[0] - 1)
            random_windows.append(train_processed[w_idx])

        random_windows = np.array(random_windows)
        print lead + "    Selecting random code vectors for analysis..."
        encoded_windows = encoder.predict(random_windows, batch_size = 128, verbose = 0)

        print lead + "    K means clustering for bins initialization..."

        all_clustered = []
        cluster_scores = []
        for i in xrange(0, QUANT_CHANS):
            channel_values = encoded_windows[:, :, i, 0]
            channel_values = np.reshape(channel_values, (-1, 1))

            km = MiniBatchKMeans(n_clusters = NBINS).fit(channel_values)
            clustered = np.sort(km.cluster_centers_.flatten())
            all_clustered.append(clustered)

            cluster_score = np.sqrt(np.median(np.min(km.transform(channel_values), axis = 1)))
            cluster_scores.append(cluster_score)

        print lead + "    Done. Cluster scores:", cluster_scores
        
        clustered_bins = np.vstack(all_clustered)
        K.set_value(QUANTIZATION_ON, True)
        K.set_value(QUANT_BINS, clustered_bins)
        K.set_value(tau, INITIAL_TAU)

Epoch 1:
    101120: 0.315088  [3.451242 0.007382 0.564559 0.813969 0.000000 0.000000] [3.451242 0.221462 2.822796 0.406984 0.000000 0.000000] 0.0 0.0002499959494694                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
    Total time for epoch: 163.967998981s
    ----------------
    Evaluated the discriminator: 84.0% d_acc
    Total time for evalu

        Val:   (mean) [13854.106834 59.680924 3.292110]
        Val:   (max)  [97580.273438 137.370331 3.757063]
        Val:   (min)  [503.798248 13.055804 2.728775]
    Best validation mean-PESQ seen: 0.0
    Total time for evaluation: 35.139893055s
    Total memory usage: 4625276928
Epoch 7:
    101120: 0.289329  [5.096374 0.011900 0.514065 0.726230 0.056002 1.245918] [5.096374 0.357000 2.570326 0.363115 0.560016 1.245918] 0.525 0.000249799019475                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   

    101120: 0.179792  [5.043114 0.010587 0.536585 0.844229 0.042151 1.198964] [5.043114 0.317606 2.682924 0.422114 0.421507 1.198964] 0.525 0.000249503638463                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
    Total time for epoch: 153.953372002s
    ----------------
    Evaluated the discriminator: 82.5% d_acc
    Total time for evaluation: 0.09322

    101120: 0.163576  [5.050081 0.014286 0.545288 0.886393 0.035641 1.095456] [5.050081 0.428582 2.726438 0.443196 0.356408 1.095456] 0.5 0.000249077552975                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          
    Total time for epoch: 154.632311106s
    ----------------
    Evaluated the discriminator: 84.75% d_acc
    Total time for evaluation: 0.09472

KeyboardInterrupt: 

In [None]:
#save_model('end')

In [None]:
if False:
    model = load_model('best_model.h5', KERAS_LOAD_MAP)
    autoencoder = load_model('best_auto.h5', KERAS_LOAD_MAP)
    encoder = autoencoder.layers[1]
    decoder = autoencoder.layers[2]

In [None]:
enc = model.layers[1].layers
dec = model.layers[2].layers

In [None]:
test_model_on_wav("./SA1.wav", "SA1_final", autoencoder)
test_model_on_wav("./SA1.wav", "SA1_final", autoencoder, argmax = True)

test_model_on_wav("./SX383.wav", "SX383_final", autoencoder)
test_model_on_wav("./SX383.wav", "SX383_final", autoencoder, argmax = True)

test_model_on_wav("./fiveYears.wav", "fy_final", autoencoder)
test_model_on_wav("./fiveYears.wav", "fy_final", autoencoder, argmax = True) 

In [None]:
all_embed = encoder.predict(X_train[:10000], batch_size = BATCH_SIZE, verbose = 1)

In [None]:
probs = np.reshape(all_embed, (all_embed.shape[0] * all_embed.shape[1], QUANT_CHANS, NBINS))
hist = np.sum(probs, axis = 0)
hist /= np.sum(hist, axis = 1, keepdims = True)

for i in xrange(0, hist.shape[0]):
    print "--- CHANNEL", i, "---"
    
    sample_hist_bins = np.linspace(0, NBINS - 1, NBINS)
    plt.bar(sample_hist_bins, hist[i], align = 'center', width = 1)
    plt.show()
    
    entropy = 0
    for j in hist[i]:
        if (j < 1e-4): continue
        entropy += j * math.log(j, 2)
    entropy = -entropy
    print "Entropy of distribution:", entropy
    
    print "Hist:", hist[i]
    print "Bins:", K.eval(QUANT_BINS[i])
    print ""

In [None]:
s = all_embed[0]

oh = K.eval(K.one_hot(K.argmax(K.variable(s)), NBINS))

print s.shape
print oh.shape

In [None]:
[rate, data] = sciwav.read("./SA1.wav")
data = data.astype(np.float32)
processedWave, wparams = preprocess_waveform(data)
windows = extract_windows(processedWave, STEP_SIZE, OVERLAP_SIZE)

transformed = np.reshape(windows, (windows.shape[0], WINDOW_SIZE, 1))
embed = encoder.predict(transformed, batch_size = BATCH_SIZE, verbose = 1)

In [None]:
recons = decoder.predict(embed, batch_size = BATCH_SIZE, verbose = 1)

In [None]:
before_quantization = pre_quant.predict(transformed, batch_size = BATCH_SIZE, verbose = 1)
after_dequantization = post_dequant.predict(embed, batch_size = BATCH_SIZE, verbose = 1)

In [None]:
max_pct = np.max(embed[25], axis = -1)
print max_pct
print np.argmax(embed[25], axis = -1)
print np.sum(max_pct > 0.98) / float(max_pct.size)

In [None]:
embed_max = np.max(embed, axis = -1)
print np.mean(embed_max)
print np.sum(embed_max > 0.98) / float(embed_max.size)

In [None]:
idx = 25

orig = windows[idx].flatten()
recn = recons[idx].flatten()

print "Original"
plt.plot(orig)
ylim = plt.gca().get_ylim()
plt.show()

print "Reconstruction"
plt.plot(recn)
plt.ylim(ylim)
plt.show()

if (K.get_value(QUANTIZATION_ON) > 0):
    print "Before quantization"
    plt.plot(before_quantization[idx])
    ylim = plt.gca().get_ylim()
    plt.show()

    print "After dequantization"
    plt.plot(after_dequantization[idx])
    plt.ylim(ylim)
    plt.show()
else:
    print "Embedding"
    plt.plot(before_quantization[idx])
    ylim = plt.gca().get_ylim()
    plt.show()

print "Error"
plt.plot(abs(orig - recn))
plt.show()