In [1]:
from keras.models import load_model
from keras.preprocessing import sequence
import pickle
import numpy as np
from trainingDataPreparation import log_mel_feature
import time

Using Theano backend.


In [2]:
# keras model names
filename_model_timbre   = './models/crnn_model_cw_timbre_gru1layer_32nodes_thin_vgg_65k_bidirectional.h5'
filename_model_pitch    = './models/crnn_model_cw_pitch_gru1layer_32nodes_fat_vgg.h5'
filename_model_dynamics = './models/crnn_model_cw_dynamics_gru1layer_32nodes_thin_vgg.h5'
filename_model_richness = './models/crnn_model_cw_richness_gru1layer_32nodes_thin_vgg.h5'
filename_model_attack   = './models/crnn_model_cw_attack_gru1layer_32nodes_fat_vgg.h5'

# feature scaler path
filename_scaler_timbre = './dataset/scaler_timbre_train.pkl'
filename_scaler_pitch = './dataset/scaler_pitch_train.pkl'
filename_scaler_dynamics = './dataset/scaler_dynamics_train.pkl'
filename_scaler_richness = './dataset/scaler_richness_train.pkl'
filename_scaler_attack = './dataset/scaler_attack_train.pkl'

# load a good sound sample
goodsounds_sample = './goodsounds_samples/0023.wav'

In [3]:
# load models and scalers
start_time = time.time()

model_timbre = load_model(filename_model_timbre)
model_pitch = load_model(filename_model_pitch)
model_dynamics = load_model(filename_model_dynamics)
model_richness = load_model(filename_model_richness)
model_attack = load_model(filename_model_attack)

print("Model loading time --- %s seconds ---" % (time.time() - start_time))

Model loading time --- 54.0450699329 seconds ---


In [4]:
# load scalers
scaler_timbre = pickle.load(open(filename_scaler_timbre,"rb"))
scaler_pitch = pickle.load(open(filename_scaler_pitch,"rb"))
scaler_dynamics = pickle.load(open(filename_scaler_dynamics,"rb"))
scaler_richness = pickle.load(open(filename_scaler_richness,"rb"))
scaler_attack = pickle.load(open(filename_scaler_attack,"rb"))

In [5]:
# calculate log mel feature
feature = log_mel_feature(goodsounds_sample)

In [6]:
# mean and variance scaling
feature_timbre = scaler_timbre.transform(feature)
feature_pitch = scaler_pitch.transform(feature)
feature_dynamics = scaler_dynamics.transform(feature)
feature_richness = scaler_richness.transform(feature)
feature_attack = scaler_attack.transform(feature)

In [7]:
# do feature padding and adding dimension
# short sequence will be padded with 0
# longer sequence will be truncated
max_length = 1401
feature_timbre = sequence.pad_sequences(feature_timbre.transpose(), maxlen=max_length, dtype='float32')
feature_pitch = sequence.pad_sequences(feature_pitch.transpose(), maxlen=max_length, dtype='float32')
feature_dynamics = sequence.pad_sequences(feature_dynamics.transpose(), maxlen=max_length, dtype='float32')
feature_richness = sequence.pad_sequences(feature_richness.transpose(), maxlen=max_length, dtype='float32')
feature_attack = sequence.pad_sequences(feature_attack.transpose(), maxlen=max_length, dtype='float32')

feature_timbre = np.expand_dims(feature_timbre, axis=0)
feature_pitch = np.expand_dims(feature_pitch, axis=0)
feature_dynamics = np.expand_dims(feature_dynamics, axis=0)
feature_richness = np.expand_dims(feature_richness, axis=0)
feature_attack = np.expand_dims(feature_attack, axis=0)

In [8]:
# calculate goodness
start_time = time.time()

goodness_timbre = model_timbre.predict(feature_timbre)
goodness_pitch = model_pitch.predict(feature_pitch)
goodness_dynamics = model_dynamics.predict(feature_dynamics)
goodness_richness = model_richness.predict(feature_richness)
goodness_attack = model_richness.predict(feature_attack)

print("Goodness calculation time--- %s seconds ---" % (time.time() - start_time))

print('timbre stability', goodness_timbre[0][1])
print('pitch stability', goodness_pitch[0][1])
print('dynamics stability', goodness_dynamics[0][1])
print('timbre richness', goodness_richness[0][1])
print('attack clarity', goodness_attack[0][1])

Goodness calculation time--- 6.46161794662 seconds ---
('timbre stability', 0.99394739)
('pitch stability', 0.99973553)
('dynamics stability', 0.98907566)
('timbre richness', 0.99746829)
('attack clarity', 0.99744874)
