In [3]:
import pandas as pd
import numpy as np
import glob
import gc
import keras 
from keras.layers import LSTM, Dense, TimeDistributed, Dropout, MaxPooling2D, Conv2D, Input
from keras.layers import Flatten, BatchNormalization, Activation, Reshape, concatenate
from keras.models import Model
import random
from sklearn.model_selection import KFold
import keras.backend as K

gc.enable()

In [4]:
spectrums_index = np.array(glob.glob('E:/note_detection/spectrogram/*'))
labels_index = np.array(glob.glob('E:/note_detection/label/*'))

In [5]:
num_samples = 5000
height = 108
width = 108
steps = 20
spec_list = spectrums_index[:num_samples]
labe_list = labels_index[:num_samples]

In [6]:
def classifier_5(optimizer='adam', loss='binary_crossentropy', metrix='accuracy'):
    #cnn
    cnn_inputs = Input(shape=(height, width, 1), name='cnn_inputs')
    layers = BatchNormalization()(cnn_inputs)
    layers = Conv2D(32, (3,3), activation="relu")(layers)
    layers = Conv2D(32, (3,3), activation="relu")(layers)
    layers = MaxPooling2D(2,2)(layers)
    layers = Conv2D(64, (3,3), activation='relu')(layers)
    layers = Conv2D(64, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2)(layers)
    layers = Conv2D(128, (3,3), activation='relu')(layers)
    layers = Conv2D(128, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Conv2D(256, (1,1), activation='relu')(layers)
    layers = Flatten()(layers)
    layers = Dropout(0.3)(layers)
    layers = Dense(32, activation='relu')(layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps, 24), name='lstm_inputs')
    lstm_layers = LSTM(16, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(16)(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([lstm_layers, layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[lstm_inputs, cnn_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=[metrix])
    return model

In [7]:
def load_test_score(model, cnn_test, lstm_test, y_test):
    prediction = model.predict([cnn_test, lstm_test])
    wrong = 0
    for i in range(prediction.shape[0]):
        for j in range(prediction.shape[1]):
            if abs(prediction[i][j] - y_test[i][j]) > 0.5:
                wrong += 1
                break
    print('{} samples are loaded for testing'.format(prediction.shape[0]))
    print('{} testing samples are predicted wrong'.format(wrong))
    accuracy = 1 - wrong/(prediction.shape[0])
    return accuracy

In [8]:
def generate_random_cnn_lstm_input_and_target_label(spectrogram, label):
    length = np.shape(spectrogram)[1] - 108
    random_point = np.random.randint(0,length)

    cnn_input = spectrogram[:,random_point:random_point+108]
    lstm_input = label[random_point//27:random_point//27+20,:]
    target_label = label[random_point//27+20,:]
        
    return cnn_input, lstm_input, target_label

In [9]:
def generate_database(glob_spectrogram, glob_label, times=10):
    cnn_inputs = []
    lstm_inputs = []
    labels_inputs = []
    for spectrogram_,label_ in zip(glob_spectrogram,glob_label):
        spectrogram = np.load(spectrogram_)
        label = np.load(label_)
        for i in range(times):
            cnn_input, lstm_input, target_label = generate_random_cnn_lstm_input_and_target_label(spectrogram, label)
            cnn_inputs.append(cnn_input)
            lstm_inputs.append(lstm_input)
            labels_inputs.append(target_label)
    return np.array(cnn_inputs), np.array(lstm_inputs), np.array(labels_inputs)

In [10]:
def inputs_to_model_split(spectrum_list, labels_list, order, num, start, ending, times=10):
    cnn_inputs, lstm_inputs, labels_inputs = generate_database(spectrum_list[order[num][start:ending]], 
                                                               labels_list[order[num][start:ending]], times)
    cnn_inputs = cnn_inputs.reshape((cnn_inputs.shape[0], 108, 108, 1))
    return lstm_inputs, cnn_inputs, labels_inputs

In [11]:
#Generate the CV sets
kf = KFold(n_splits=3, random_state=7)

train_order = []
test_order = []
length = np.arange(num_samples)

for x, y in kf.split(range(num_samples)):
    train_order.append(length[x])
    test_order.append(length[y])
train_order = np.array(train_order)
test_order = np.array(test_order)

In [12]:
lstm_test, cnn_test, labels_test = inputs_to_model_split(spec_list, labe_list, test_order, 0, 0, test_order[0].shape[0]+1)

In [13]:
classifier = classifier_5()
classifier.load_weights('weights/weight-24units-model4-enopadding.hdf5')

In [14]:
accuracy = load_test_score(classifier, lstm_test, cnn_test, labels_test)

16670 samples are loaded for testing
1419 testing samples are predicted wrong


In [15]:
accuracy

0.914877024595081