In [2]:
import pandas as pd
import numpy as np
import glob
import gc
import keras 
from keras.layers import LSTM, Dense, TimeDistributed, Dropout, MaxPooling2D, Conv2D, Input
from keras.layers import Flatten, BatchNormalization, Activation, Reshape, concatenate
from keras.models import Model
import random
from sklearn.model_selection import KFold
import keras.backend as K
from sklearn.utils import class_weight

Using TensorFlow backend.


In [3]:
gc.enable()

In [4]:
steps = 5
height = 108
width = 108

In [5]:
#change the path to where you save the input numpy arrays
lstm = np.array(glob.glob('E:/notes_database/lstm_inputs_2/*'))
cnn = np.array(glob.glob('E:/notes_database/cnn_inputs_2/*'))
y_train = np.array(glob.glob('E:/notes_database/y_train_2/*'))

In [6]:
#The function analyzes the actual accuracy
def load_test_score(model, cnn_test, lstm_test, y_test):
    prediction = model.predict([cnn_test, lstm_test])
    error = (prediction - y_test) ** 2
    error = (error/y_test.shape[0]).sum()
    wrong = 0
    for x in range(prediction.shape[0]):
        for y in range(prediction.shape[1]):
            if abs(prediction[x][y] - y_test[x][y]) > 0.1:
                wrong += 1
                break
    accuracy = 1 - wrong/(prediction.shape[0])
    return error, accuracy

In [7]:
def inputs_to_model(order, num, start, ending):
    cnn_inputs = np.load(cnn[order[num][0]])
    labels_inputs = np.load(y_train[order[num][0]])
    lstm_inputs = np.load(lstm[order[num][0]])
    
    count = 0
    ce = 0
    for file in order[num][start+1:ending]:
        if count == ce + 50:
            print(count)
            ce = count
        count+=1
        
        lstm_inputs = np.append(lstm_inputs, np.load(lstm[file]), axis=0)
        cnn_inputs = np.append(cnn_inputs, np.load(cnn[file]), axis=0)
        labels_inputs = np.append(labels_inputs, np.load(y_train[file]), axis=0)
        
    return lstm_inputs, cnn_inputs, labels_inputs

In [8]:
def main(optimizer='adam', loss='mean_squared_error', metrix='accuracy'):
    #cnn
    cnn_inputs = Input(shape=(height, width, 1), name='cnn_inputs')
    layers = Conv2D(32, (3,3), padding="same", activation="relu")(cnn_inputs)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(32, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(64, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(64, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Flatten()(layers)
    layers = Dropout(0.3)(layers)
    layers = Dense(32, activation='relu')(layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps, 24), name='lstm_inputs')
    lstm_layers = LSTM(16, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(32)(lstm_layers)
    lstm_layers = Dense(32, activation='relu')(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([layers, lstm_layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[cnn_inputs, lstm_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=[metrix])
    return model

In [9]:
def main_2(optimizer, loss):
    #cnn
    cnn_inputs = Input(shape=(height, width, 1), name='cnn_inputs')
    layers = Conv2D(32, (3,3), padding="same", activation="relu")(cnn_inputs)
    layers = BatchNormalization()(layers)
    layers = Dropout(0.3)(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(32, (3,3), activation='relu')(layers)
    layers = Conv2D(64, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(64, (3,3), activation='relu')(layers)
    layers = Conv2D(32, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = MaxPooling2D(2,2, border_mode='same')(layers)
    layers = Conv2D(32, (3,3), activation='relu')(layers)
    layers = BatchNormalization()(layers)
    layers = Flatten()(layers)
    layers = Dense(32, activation='relu')(layers)
    
    #lstm
    lstm_inputs = Input(shape=(steps, 24), name='lstm_inputs')
    lstm_layers = LSTM(16, return_sequences=True)(lstm_inputs)
    lstm_layers = LSTM(32, return_sequences=True)(lstm_layers)
    lstm_layers = LSTM(64, return_sequences=True)(lstm_layers)
    lstm_layers = LSTM(32)(lstm_layers)
    lstm_layers = Dense(32, activation='relu')(lstm_layers)
    lstm_layers = Dense(24, activation='relu')(lstm_layers)
    
    #main route
    main_ = concatenate([layers, lstm_layers])
    main_ = BatchNormalization()(main_)
    main_ = Dense(64, activation='relu')(main_)
    main_ = Dense(64, activation='relu')(main_)
    output = Dense(24, activation='sigmoid')(main_)
    
    model = Model(inputs=[cnn_inputs, lstm_inputs], outputs=[output])
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])
    return model

In [None]:
def class_weights(y):
    samples = []  
    for i in range(y.shape[0]):
        pos = np.where(y[i] == 1)[0][0]
        samples.append(pos)
    samples = np.array(samples)
    classes = np.unique(samples)
    class_weights = class_weight.compute_class_weight('balanced', classes, samples)
    return classes, class_weights

In [10]:
def train_test_whole(classifier, num, name, epochs=100, batchsize=32):
    lstm_test, cnn_test, labels_test = inputs_to_model(test_order, num, 0, test_order[num].shape[0]+1)
    lstm_inputs, cnn_inputs, labels_inputs = inputs_to_model(train_order, num, 0, train_order[num].shape[0]+1)
    
    sample_weights = class_weight.compute_sample_weight('balanced', labels_inputs)
    classifier.fit([cnn_inputs, lstm_inputs], labels_inputs, epochs=epochs, batch_size=batchsize, verbose=1,
                  validation_data = ([cnn_test, lstm_test], labels_test), sample_weight=sample_weights)
    
    #CV_loss, accuracy = load_test_score(classifier, cnn_test, lstm_test, y_test)
    #print('val_loss: ', CV_loss)
    #print('val_accuarcy', accuracy)
    
    classifier.save_weights(name, overwrite=True)   
    del classifier
    K.clear_session()

In [11]:
#Generate the CV sets
kf = KFold(n_splits=3, random_state=7)

train_order = []
test_order = []
length = np.arange(len(lstm))

for x,y in kf.split(length[100:]):
    train_order.append(length[x])
    test_order.append(length[y])
train_order = np.array(train_order)
test_order = np.array(test_order)

In [11]:
adam = keras.optimizers.Adam(lr=0.01)
classifier = main(optimizer=adam, loss='binary_crossentropy')
train_test_whole(classifier, 0, 'weight-spectrums2-temp-lr0.01.hdf5', epochs=50)
gc.collect()

  
  if __name__ == '__main__':
  if sys.path[0] == '':
  from ipykernel import kernelapp as app


50
Train on 35791 samples, validate on 18593 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


32524

In [11]:
def train_test_split(num, name, epochs=100, batchsize=32, optimizer='adam', loss='mean_squared_error',
                     metrix='accuracy', split=2):
    classifier = main(optimizer, loss, metrix)
    
    lstm_test, cnn_test, labels_test = inputs_to_model(test_order, num, 0, test_order[num].shape[0]+1)
    for epoch in range(epochs):
        print("""
        
        THIS IS EPOCH {}""".format(epoch))
        for i in range(split):
            lstm,cnn,labels = inputs_to_model(train_order, num, (train_order[num].shape[0]*i)//split, 
                                              (train_order[num].shape[0]*(i+1))//split+1)
            sample_weights = class_weight.compute_sample_weight('balanced', labels)
            classifier.fit([cnn, lstm], labels, epochs=1, batch_size=batchsize, verbose=1,
                  validation_data = ([cnn_test, lstm_test], labels_test), sample_weight=sample_weights)
            
            # To save memory, it's possible to train the model first then test on the validation data.
            #CV_loss, accuracy = load_test_score(classifier, cnn_test, lstm_test, labels_test)
            #print('val_loss: ', CV_loss)
            #print('val_accuarcy', accuracy)
    
    classifier.save_weights(name, overwrite=True)
    del classifier
    K.clear_session()

In [12]:
#Generate the CV sets
kf = KFold(n_splits=3, random_state=7)

train_order = []
test_order = []
length = np.arange(len(lstm))

for x,y in kf.split(length):
    train_order.append(length[x])
    test_order.append(length[y])
train_order = np.array(train_order)
test_order = np.array(test_order)

In [13]:
adam = keras.optimizers.Adam(lr=0.01)
classifier = main(optimizer=adam, loss='binary_crossentropy')
train_test_split(classifier, 0, 'weight-split-test.hdf5', epochs=10)
gc.collect()

  
  if __name__ == '__main__':
  if sys.path[0] == '':
  from ipykernel import kernelapp as app


50
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36274 samples, validate on 36454 samples
Epoch 1/1
50
Train on 36319 samples, validate on 36454 samples

32524

In [15]:
adam = keras.optimizers.Adam(lr=0.01)
classifier = main(optimizer=adam, loss='binary_crossentropy')
train_test_whole(classifier, 0, 'weight-spectrums2-all-lr0.01.hdf5', epochs=30)
gc.collect()

  
  if __name__ == '__main__':
  if sys.path[0] == '':
  from ipykernel import kernelapp as app


50
100
50
Train on 72054 samples, validate on 36454 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


52726

In [14]:
adam = keras.optimizers.Adam(lr=0.01)
classifier = main_2(optimizer=adam, loss='binary_crossentropy')
train_test_whole(classifier, 0, 'weight-spectrums2-main_2-lr0.01.hdf5', epochs=30)
gc.collect()

  import sys
  # This is added back by InteractiveShellApp.init_path()
  from ipykernel import kernelapp as app


50
50
100
Train on 72054 samples, validate on 36454 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


64158