In [1]:
import pandas as pd
import numpy as np
import glob
import gc
import keras 
from keras.layers import LSTM, Dense, TimeDistributed, Dropout, MaxPooling2D, Conv2D, Input
from keras.layers import Flatten, BatchNormalization, Activation, Reshape, concatenate
from keras.models import Model
import random
from sklearn.model_selection import KFold
import keras.backend as K

gc.enable()

Using TensorFlow backend.


In [7]:
num_samples = 2000
height = 108
width = 108
steps = 20
spec_list = spectrums_index[:num_samples]
labe_list = labels_index[:num_samples]

In [6]:
spectrums_index = np.array(glob.glob('../preparations/spectrograms/*'))
labels_index = np.array(glob.glob('../preparations/labels/*'))

In [3]:
def classifier_1(optimizer='adam', loss='binary_crossentropy', metrix='accuracy'):
    #cnn
    cnn_inputs = Input(shape=(height, width, 1), name='cnn_inputs')
    layers = BatchNormalization()(cnn_inputs)
    layers = Conv2D(32, (3,3), padding="same", activation="relu")(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(32, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Flatten()(layers)
    layers = Dropout(0.3)(layers)
    layers = Dense(32, activation='relu')(layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps, 24), name='lstm_inputs')
    lstm_layers = LSTM(16, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(16)(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([lstm_layers, layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[lstm_inputs, cnn_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=[metrix])
    return model

In [4]:
def load_test_score(model, cnn_test, lstm_test, y_test):
    prediction = model.predict([cnn_test, lstm_test])
    wrong = 0
    for i in range(prediction.shape[0]):
        for j in range(prediction.shape[1]):
            if abs(prediction[i][j] - y_test[i][j]) > 0.5:
                wrong += 1
                break
    print('{} samples are loaded for testing'.format(prediction.shape[0]))
    print('{} testing samples are predicted wrong'.format(wrong))
    accuracy = 1 - wrong/(prediction.shape[0])
    return accuracy

In [5]:
def inputs_to_model(spectrum_list, labels_list, order, num, start, ending):
    lstm_output = []
    cnn_output = []
    y_train = []
    
    import time
    start_ = time.time()

    for file in order[num][start:ending]:
        spectrum = np.load(spectrum_list[file])
        labels = np.load(labels_list[file])
        
        _lstm = np.zeros((20, 24))
        for count, i in enumerate(labels):
            _cnn = spectrum[:, 27*count:108+27*count]
            if _cnn.shape == (height, width):
                lstm_output.append(_lstm)
                cnn_output.append(_cnn.reshape((height, width, 1)))
                y_train.append(i)
        
            _lstm = _lstm[1:, :]
            _lstm = np.append(_lstm, i.reshape((1, 24)), axis=0)
        
    lstm_inputs = np.array(lstm_output)
    cnn_inputs = np.array(cnn_output)
    labels_inputs = np.array(y_train)
    print(time.time()-start_)
    return lstm_inputs, cnn_inputs, labels_inputs

In [8]:
#Generate the CV sets
kf = KFold(n_splits=3, random_state=7)

train_order = []
test_order = []
length = np.arange(num_samples)

for x, y in kf.split(range(num_samples)):
    train_order.append(length[x])
    test_order.append(length[y])
train_order = np.array(train_order)
test_order = np.array(test_order)

In [9]:
lstm_test, cnn_test, labels_test = inputs_to_model(spec_list, labe_list, test_order, 0, 0, test_order[0].shape[0]+1)

2.5931005477905273


In [12]:
classifier = classifier_1(optimizer='adam')
classifier.load_weights('weights/weight-75overlap-model1.hdf5')

  
  if __name__ == '__main__':


In [13]:
accuracy = load_test_score(classifier, lstm_test, cnn_test, labels_test)

82075 samples are loaded for testing
9509 testing samples are predicted wrong


In [14]:
accuracy

0.8841425525434055