In [1]:
import pandas as pd
import numpy as np
import glob
import gc
import keras 
from keras.layers import LSTM, Dense, TimeDistributed, Dropout, MaxPooling2D, Conv2D, Input
from keras.layers import Flatten, BatchNormalization, Activation, Reshape, concatenate
from keras.models import Model
import random
from sklearn.model_selection import KFold
import keras.backend as K
from sklearn.utils import class_weight

gc.enable()

Using TensorFlow backend.


In [2]:
spectrums_index = np.array(glob.glob('../preparations/spectrograms/*'))
labels_index = np.array(glob.glob('../preparations/labels/*'))

In [3]:
num_samples = 2000
steps_1 = 20
steps_2 = 27
spec_list = spectrums_index[:num_samples]
labe_list = labels_index[:num_samples]
kf = KFold(n_splits=3, random_state=7)

In [4]:
def inputs_to_model(spectrum_list, labels_list, order, num, start, ending):
    lstm_output = []
    lstm2_output = []
    y_train = []
    
    import time
    start_ = time.time()

    for file in order[num][start:ending]:
        spectrum = np.load(spectrum_list[file])
        labels = np.load(labels_list[file])
        
        _lstm = np.zeros((20, 24))
        for count, i in enumerate(labels):
            _lstm2 = spectrum[:, 27*count:27+27*count]
            if _lstm2.shape == (108, steps_2):
                lstm_output.append(_lstm)
                lstm2_output.append(np.transpose(_lstm2))
                y_train.append(i)
        
            _lstm = _lstm[1:, :]
            _lstm = np.append(_lstm, i.reshape((1, 24)), axis=0)
        
    lstm_inputs = np.array(lstm_output)
    lstm2_inputs = np.array(lstm2_output)
    labels_inputs = np.array(y_train)
    print(time.time()-start_)
    print(lstm2_inputs.shape)
    return lstm_inputs, lstm2_inputs, labels_inputs

In [5]:
def train_test_whole(classifier, num, name, spectrum_list, labels_list, epochs=100, batchsize=32):
    lstm_inputs, lstm2_inputs, labels_inputs = inputs_to_model(spectrum_list, labels_list, train_order, 
                                                             num, 0, train_order[num].shape[0]+1)

    classifier.fit([lstm_inputs, lstm2_inputs], labels_inputs, epochs=epochs, batch_size=batchsize, verbose=1,
                  validation_data = ([lstm_test, lstm2_test], labels_test))
    
    classifier.save_weights(name, overwrite=True)   
    del classifier
    K.clear_session()

In [6]:
train_order = []
test_order = []
length = np.arange(num_samples)

for x, y in kf.split(range(num_samples)):
    train_order.append(length[x])
    test_order.append(length[y])
train_order = np.array(train_order)
test_order = np.array(test_order)

In [7]:
lstm_test, lstm2_test, labels_test = inputs_to_model(spec_list, labe_list, test_order, 0, 0, test_order[0].shape[0]+1)

10.515265226364136
(82492, 27, 108)


# 2 layers

In [8]:
def classifier_0(optimizer='adam', loss='binary_crossentropy', metrix='accuracy'):
    #lstm2
    lstm2_inputs = Input(shape=(steps_2, 108), name='lstm2_inputs')
    lstm2_layers = LSTM(256, return_sequences=True)(lstm2_inputs)
    lstm2_layers = LSTM(128)(lstm2_layers)
    lstm2_layers = Dense(64, activation='relu')(lstm2_layers)
    lstm2_layers = Dense(24, activation='relu')(lstm2_layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps_1, 24), name='lstm_inputs')
    lstm_layers = LSTM(32, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(16)(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([lstm_layers, lstm2_layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[lstm_inputs, lstm2_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=[metrix])
    return model

In [14]:
adam = keras.optimizers.Adam(lr=0.005)
classifier = classifier_0(optimizer=adam)
train_test_whole(classifier, 0, 'weights/weight-75overlap-model0.hdf5', spec_list, labe_list, epochs=20)
gc.collect()

3.143380880355835
(165041, 27, 108)
Train on 165041 samples, validate on 82492 samples
Epoch 1/50
Epoch 2/50
  7616/165041 [>.............................] - ETA: 7:47 - loss: 0.0980 - acc: 0.9762

KeyboardInterrupt: 

In [9]:
rmsprop = keras.optimizers.RMSprop()
classifier = classifier_0(optimizer=rmsprop)
train_test_whole(classifier, 0, 'weights/weight-75overlap-model0-rmsprop.hdf5', spec_list, labe_list, epochs=20)
gc.collect()

3.0588245391845703
(165041, 27, 108)
Train on 165041 samples, validate on 82492 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
 16896/165041 [==>...........................] - ETA: 7:05 - loss: 0.0874 - acc: 0.9766

KeyboardInterrupt: 

# 3 layers

In [12]:
def classifier_1(optimizer='adam', loss='binary_crossentropy', metrix='accuracy'):
    #lstm2
    lstm2_inputs = Input(shape=(steps_2, 108), name='lstm2_inputs')
    lstm2_layers = LSTM(128, return_sequences=True)(lstm2_inputs)
    lstm2_layers = LSTM(128, return_sequences=True)(lstm2_layers)
    lstm2_layers = LSTM(128)(lstm2_layers)
    lstm2_layers = Dense(64, activation='relu')(lstm2_layers)
    lstm2_layers = Dense(24, activation='relu')(lstm2_layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps_1, 24), name='lstm_inputs')
    lstm_layers = LSTM(32, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(16)(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([lstm_layers, lstm2_layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[lstm_inputs, lstm2_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=[metrix])
    return model

In [20]:
adam = keras.optimizers.Adam(lr=0.005)
classifier = classifier_1(optimizer=adam)
train_test_whole(classifier, 0, 'weights/weight-75overlap-model1.hdf5', spec_list, labe_list, epochs=50)
gc.collect()

3.100712776184082
(165041, 27, 108)
Train on 165041 samples, validate on 82492 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
  5888/165041 [>.............................] - ETA: 10:26 - loss: 0.0928 - acc: 0.9767

KeyboardInterrupt: 

In [17]:
adam = keras.optimizers.Adam(lr=0.01)
classifier = classifier_1(optimizer=adam)
train_test_whole(classifier, 0, 'weights/weight-75overlap-model1.hdf5', spec_list, labe_list, epochs=50)
gc.collect()

3.3988873958587646
(165041, 27, 108)
Train on 165041 samples, validate on 82492 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
  1856/165041 [..............................] - ETA: 10:18 - loss: 0.0996 - acc: 0.9761

KeyboardInterrupt: 

In [19]:
rmsprop = keras.optimizers.RMSprop()
classifier = classifier_1(optimizer=rmsprop)
train_test_whole(classifier, 0, 'weights/weight-75overlap-model1-rmsprop.hdf5', spec_list, labe_list, epochs=20)
gc.collect()

12.801647901535034
(165041, 27, 108)
Train on 165041 samples, validate on 82492 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20

KeyboardInterrupt: 

# 4 layers

In [15]:
def classifier_2(optimizer='adam', loss='binary_crossentropy', metrix='accuracy'):
    #lstm2
    lstm2_inputs = Input(shape=(steps_2, 108), name='lstm2_inputs')
    lstm2_layers = LSTM(128, return_sequences=True)(lstm2_inputs)
    lstm2_layers = LSTM(256, return_sequences=True)(lstm2_layers)
    lstm2_layers = LSTM(128, return_sequences=True)(lstm2_layers)
    lstm2_layers = LSTM(128)(lstm2_layers)
    lstm2_layers = Dense(64, activation='relu')(lstm2_layers)
    lstm2_layers = Dense(24, activation='relu')(lstm2_layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps_1, 24), name='lstm_inputs')
    lstm_layers = LSTM(32, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(16)(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([lstm_layers, lstm2_layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[lstm_inputs, lstm2_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=[metrix])
    return model

In [16]:
adam = keras.optimizers.Adam(lr=0.005)
classifier = classifier_2(optimizer=adam)
train_test_whole(classifier, 0, 'weights/weight-75overlap-model2.hdf5', spec_list, labe_list, epochs=50)
gc.collect()

3.280686855316162
(165041, 27, 108)
Train on 165041 samples, validate on 82492 samples
Epoch 1/50
Epoch 2/50
  9056/165041 [>.............................] - ETA: 11:52 - loss: 0.0955 - acc: 0.9768

KeyboardInterrupt: 