In [97]:
import os
import librosa
import glob
import json
import h5py
import numpy as np
from preprocess import get_allpaths, preprocess
from utils      import load_config, load_logger, load_parallel_pool, \
                       generateOutputCQTList, pitch_shift_CQT, get_querytoref
from search     import calculateMRR

In [3]:
config = load_config()
logger = load_logger()
c      = load_parallel_pool()

            Controller appears to be listening on localhost, but not on this machine.
            If this is true, you should specify Client(...,sshserver='you@mirlab')
            or instruct your controller to listen on an external IP.


In [4]:
artist = 'taylorswift'

In [7]:
queries.shape

(100, 459, 121)

In [8]:
alpha = 0.01
beta = 1

In [9]:
def create_model():
    import tensorflow as tf
    from keras.preprocessing.image import ImageDataGenerator
    from keras.models import Sequential, Model
    from keras.layers.core import Activation, Flatten, Dense, Dropout
    from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
    from keras.layers import Input, Concatenate, Layer
    from keras.layers.advanced_activations import ELU
    from keras.applications import vgg19
    from keras.utils import plot_model
    from keras.optimizers import RMSprop, Adam
    from keras import backend as K
    from keras.utils.vis_utils import model_to_dot

    
    K.set_image_data_format('channels_first')
    width = 76
    height = 121
    n_channel = 1
    
    image_a    = Input(shape=(n_channel, width, height), name='image_1')
    image_b    = Input(shape=(n_channel, width, height), name='image_2')
    similarity = Input(shape=(1,), name="similarity")
    weight     = Input(shape=(1,), name="weight")
    
    in_layer = Input(shape=(n_channel, width, height))
    x = Convolution2D(64, (7,7))(in_layer)
    x = Dropout(0.2)(x)
    x = MaxPooling2D(strides=(2,2))(x)

    x = Convolution2D(192, (3,3))(x)
    x = Dropout(0.2)(x)
    x = MaxPooling2D(strides=(2,2))(x)

    x = Convolution2D(128, (1,1))(x)
    x = Convolution2D(256, (3,3))(x)
    x = Convolution2D(256, (1,1))(x)
    x = Convolution2D(256, (3,3))(x)
    x = Dropout(0.2)(x)
    x = MaxPooling2D(strides=(2,2))(x)

    base_model = Model(in_layer, x)
    
    # Obtain intermediate tensor
    intermediate_a = base_model(image_a)
    intermediate_b = base_model(image_b)

    # Flatten the layer
    flatten_a = Flatten()(intermediate_a)
    flatten_b = Flatten()(intermediate_b)
    
    beta         = 1            # Initial beta value
    numberOfBits = 256
    alpha        = 0.01         # Set to be something small because it prevents the loss function from blowing up.
    logger.debug("[HashNet Model] beta = {:}, numberOfBits = {:}, alpha = {:}".format(beta, numberOfBits, alpha))
    
    def custom_activation(x):
        """
        Our own defined activation function
        """
        global beta
        return K.tanh(beta * x)
    
    class CustomizedLossLayer(Layer):
        """
        Our own defined layer for keeping track of loss function
        """

        def __init__(self, **kwargs):
            self.is_placeholder = True
            super(CustomizedLossLayer, self).__init__(**kwargs)

        def my_loss(self, encoded_a, encoded_b, similarity, weight):
            global alpha
            x = encoded_a
            y = encoded_b
            dot_product = K.sum(x * y, axis=-1, keepdims=True)
            logger.debug(dot_product)
            return K.sum(weight * (K.log(1 + K.exp(alpha * dot_product)) - alpha * similarity * dot_product))

        def call(self, inputs):
            encoded_a = inputs[0]
            encoded_b = inputs[1]
            similarity = inputs[2]
            weight = inputs[3]
            loss = self.my_loss(encoded_a, encoded_b, similarity, weight)
            self.add_loss(loss, inputs=inputs)
            return K.ones_like(similarity)

    hash_layer = Dense(numberOfBits, activation=custom_activation)
    encoded_a = hash_layer(flatten_a)
    encoded_b = hash_layer(flatten_b)
    
    # Define a loss layer with 4 inputs
    loss = CustomizedLossLayer()([encoded_a, encoded_b, similarity, weight])
    
    # Define a model that has 4 inputs and outputs loss
    model = Model(inputs=[image_a, image_b, similarity, weight], outputs=[loss])
    return model

In [10]:
model = create_model()
model.load_weights("2018-03-10[HashNet][Iteration=110].h5")

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


2018-03-10 18:51:35,381 - root - DEBUG - [HashNet Model] beta = 1, numberOfBits = 256, alpha = 0.01
2018-03-10 18:51:35,391 - root - DEBUG - Tensor("customized_loss_layer_1/Sum:0", shape=(?, 1), dtype=float32)


In [39]:
def get_base_model(model):
    return model.layers[2]
base_model = get_base_model(model)

In [90]:
f = open(generateOutputCQTList(config['DATA_OUTPUT_DIR'], artist), 'r')
db_path = os.path.join(config["DATA_OUTPUT_DIR"], artist + '_db.hdf5')
db = h5py.File(db_path, mode='w')

In [91]:
for line in f:
    logger.debug('==> Generating database for %s' % os.path.basename(line)[:-1])
    full_path = os.path.join(config["DATA_OUTPUT_DIR"], line[:-1])
    Q = np.load(full_path).T
        
    # Reshape
    width, height = Q.shape
    Q = Q[:(width - width%76), :].reshape(-1, 1, 76, 121)
    
    pitch_shift_Qs = np.empty((2 * config['MAX_PITCH_SHIFT'] + 1, ) + Q.shape)
    pitch_shift_Qs[0, :, :] = Q
    for i in range(1, config['MAX_PITCH_SHIFT'] + 1):
        pitch_shift_Qs[i, :, :] = pitch_shift_CQT(Q.T, i).T
    for i in range(1, config['MAX_PITCH_SHIFT'] + 1):
        pitch_shift_Qs[i + config['MAX_PITCH_SHIFT'], :, :] = pitch_shift_CQT(Q.T, -i).T
    
    fpseqs = np.array([base_model.predict(pitch_shift_Qs[i]) for i in range(pitch_shift_Qs.shape[0])])

    key = os.path.basename(line)[:-1]
    db.create_dataset(key, fpseqs.shape, np.bool)
    db[key][...] = np.where(fpseqs > 0, True, False)
f.close()


2018-03-11 05:32:35,997 - root - DEBUG - ==> Generating database for taylorswift_ref1.npy
2018-03-11 05:32:36,867 - root - DEBUG - ==> Generating database for taylorswift_ref2.npy
2018-03-11 05:32:37,617 - root - DEBUG - ==> Generating database for taylorswift_ref3.npy
2018-03-11 05:32:38,635 - root - DEBUG - ==> Generating database for taylorswift_ref4.npy
2018-03-11 05:32:39,338 - root - DEBUG - ==> Generating database for taylorswift_ref5.npy
2018-03-11 05:32:40,488 - root - DEBUG - ==> Generating database for taylorswift_ref6.npy
2018-03-11 05:32:41,379 - root - DEBUG - ==> Generating database for taylorswift_ref7.npy
2018-03-11 05:32:42,143 - root - DEBUG - ==> Generating database for taylorswift_ref8.npy
2018-03-11 05:32:43,084 - root - DEBUG - ==> Generating database for taylorswift_ref9.npy
2018-03-11 05:32:44,127 - root - DEBUG - ==> Generating database for taylorswift_ref10.npy
2018-03-11 05:32:45,029 - root - DEBUG - ==> Generating database for taylorswift_ref11.npy
2018-03-

In [92]:
db_path = os.path.join(config["DATA_OUTPUT_DIR"], artist + '_db.hdf5')
db = h5py.File(db_path, mode='r')

## Query

In [93]:
query_paths = get_allpaths(artist, os.path.join(config['AUDIO_DIR'], 'Lists/'), file_type='query')

In [94]:
def get_query_shape():
    '''
        returns the shape of query file in (width, height)
    '''
    assert len(query_paths) > 0
    cur_file = query_paths[0]
    y, sr = librosa.load(config["AUDIO_DIR"] + cur_file + '.wav')
    Q = librosa.cqt(y, sr=sr, fmin=130.81, n_bins=121, bins_per_octave=24, hop_length=96)
    logQ = preprocess(Q, 3)
    return logQ.T.shape

query_shape = get_query_shape()
queries = np.empty((len(query_paths), ) + query_shape)

for i in range(len(query_paths)):
    cur_file = query_paths[i]
    print('==> Computing CQT of %s'%cur_file)
    y, sr = librosa.load(config["AUDIO_DIR"] + cur_file + '.wav')
    Q = librosa.cqt(y, sr=sr, fmin=130.81, n_bins=121, bins_per_octave=24, hop_length=96)
    logQ = preprocess(Q, 3)
    queries[i, :, :] = logQ.T

==> Computing CQT of taylorswift_query1
==> Computing CQT of taylorswift_query2
==> Computing CQT of taylorswift_query3
==> Computing CQT of taylorswift_query4
==> Computing CQT of taylorswift_query5
==> Computing CQT of taylorswift_query6
==> Computing CQT of taylorswift_query7
==> Computing CQT of taylorswift_query8
==> Computing CQT of taylorswift_query9
==> Computing CQT of taylorswift_query10
==> Computing CQT of taylorswift_query11
==> Computing CQT of taylorswift_query12
==> Computing CQT of taylorswift_query13
==> Computing CQT of taylorswift_query14
==> Computing CQT of taylorswift_query15
==> Computing CQT of taylorswift_query16
==> Computing CQT of taylorswift_query17
==> Computing CQT of taylorswift_query18
==> Computing CQT of taylorswift_query19
==> Computing CQT of taylorswift_query20
==> Computing CQT of taylorswift_query21
==> Computing CQT of taylorswift_query22
==> Computing CQT of taylorswift_query23
==> Computing CQT of taylorswift_query24
==> Computing CQT of tayl

In [95]:
q = []
for query in queries:
    width, height = query.shape
    query = query[:(width - width%76), :].reshape(-1, 1, 76, 121)
    output = np.where(base_model.predict(query) > 0, True, False)
    q.append(output)

In [80]:
refs = list(db.values())

In [98]:
ground_truths = get_querytoref(artist, os.path.join(config['AUDIO_DIR'], 'Lists/'))

In [100]:
mrr = calculateMRR(querys, refs, ground_truths)

NameError: name 'calculateMRR' is not defined