In [1]:
import keras
from keras.models import Sequential, Model, model_from_json
from keras.layers import LSTM, Dense, Dropout, Input, Flatten, concatenate, Reshape
from keras.optimizers import RMSprop, SGD
from keras.layers.normalization import BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger, History, Callback, LambdaCallback
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import os
import time
from matplotlib import pyplot
import glob
import csv
import re
import tensorflow as tf

Using TensorFlow backend.


In [11]:
def ShuffleData(x_order_addr, y_order_addr):
    x_order = np.load(x_order_addr)
    y_order = np.load(y_order_addr)
    idx_shuffle = np.array(range(x_order.shape[0]))
    np.random.shuffle(idx_shuffle)
    x_shuffle = x_order[idx_shuffle]
    y_shuffle = y_order[idx_shuffle]
    return x_shuffle, y_shuffle

def DataTwoStream(x_total, y_total, val_ratio = 0.2):
    index_split = int(x_total.shape[0]*0.8)
    x_train_a = x_total[:index_split,:,0]
    x_train_a = x_train_a.reshape((x_train_a.shape[0],x_train_a.shape[1],1))
    x_train_b = x_total[:index_split,:,1]
    x_train_b =x_train_b.reshape((x_train_a.shape[0],x_train_a.shape[1],1))
    y_train = y_total[:index_split]
    x_val_a = x_total[index_split:,:,0]
    x_val_a = x_val_a.reshape((x_val_a.shape[0],x_val_a.shape[1],1))
    x_val_b = x_total[index_split:,:,1]
    x_val_b = x_val_b.reshape((x_val_a.shape[0],x_val_a.shape[1],1))
    y_val = y_total[index_split:]
    return x_train_a, x_train_b, y_train, x_val_a, x_val_b, y_val

def GetBinData(class_name):
    file_list = os.listdir(os.path.join(os.getcwd(), "data", "npy", "bin_order"))
    
    for i in range(len(file_list)):
        re_x = class_name + '.*(?<!y\.npy)$'
        re_y = class_name + '(.+?)y.npy'
        mx = re.search(re_x, file_list[i])
        my = re.search(re_y, file_list[i])
        if mx:
            filex_idx = i
        if my:
            filey_idx = i
    
    x_order_addr = os.path.join(os.getcwd(), "data", "npy", "bin_order", file_list[filex_idx])
    y_order_addr = os.path.join(os.getcwd(), "data", "npy", "bin_order", file_list[filey_idx])
    x_shuffle, y_shuffle = ShuffleData(x_order_addr, y_order_addr)
    x_train_a, x_train_b, y_train, x_val_a, x_val_b, y_val = DataTwoStream(x_shuffle, y_shuffle)
    class_name = file_list[i][:-8]
    return x_train_a, x_train_b, y_train, x_val_a, x_val_b, y_val

Since I only want to train the last layer, I freeze all the pretrained weights in the 5-classes model which is called base_model here. (based_model + dense) is called head_model here. 

In [21]:
x_train_a, x_train_b, y_train, x_val_a, x_val_b, y_val = GetBinData('A4')

n_classes = 2
Initializer=keras.initializers.glorot_normal(seed=None)
#Initializer=keras.initializers.RandomUniform(minval=-0.05, maxval=0.05, seed=None)
#optimizer = RMSprop(lr=0.00001, rho=0.9, epsilon=1e-6)
optimizer = SGD(lr=0.00001)
json_file = open('model_5classes_dense64.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
base_model = model_from_json(loaded_model_json)
base_model.load_weights("471-0.907-5classes-dense64.hdf5")
######################################## New Model architecture ##############################################
x = base_model.output
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
#x = Dense(2, activation='relu', kernel_initializer = Initializer, name='dense_-2')(x)
#x = Dropout(0.2)(x)
#x = BatchNormalization()(x)
predictions = Dense(1, activation = 'softmax', kernel_initializer = Initializer, name='dense_-1')(x)
head_model = Model(input = base_model.input, output = predictions)
#for layer in base_model.layers:
    #layer.trainable = False
    
head_model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
head_model.summary()
##################################### Helpers in callbacks ##############################################
tb = TensorBoard(log_dir=os.path.join('tensorboard', 'logs'))
early_stopper = EarlyStopping(patience=10)
csv_logger = CSVLogger(os.path.join('logs', str(time.time()) + '.log'))
checkpointer = ModelCheckpoint(filepath=os.path.join('checkpoints','{epoch:03d}-{val_acc:.3f}.hdf5'),
                                verbose=1,save_best_only=True)
history = History()
callback = Callback()
print_weights1 = LambdaCallback(on_epoch_end=lambda batch, logs: print('layer-1', head_model.layers[-1].get_weights()))
#########################################################################################################


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           (None, 60, 1)        0                                            
__________________________________________________________________________________________________
input_12 (InputLayer)           (None, 60, 1)        0                                            
__________________________________________________________________________________________________
lstm_21 (LSTM)                  (None, 60, 32)       4352        input_11[0][0]                   
__________________________________________________________________________________________________
lstm_23 (LSTM)                  (None, 60, 32)       4352        input_12[0][0]                   
__________________________________________________________________________________________________
dropout_21



In [22]:
hist = head_model.fit([x_train_a, x_train_b], y_train,
            batch_size=64, epochs=2000, 
            validation_data=([x_val_a, x_val_b], y_val),
            callbacks=[tb, early_stopper, csv_logger, checkpointer, history, callback, print_weights1])

   
model_json = head_model.to_json()
with open("model_head.json", "w") as json_file:
    json_file.write(model_json)
head_model.save_weights("model_head.h5")
print("Saved model to disk")

Train on 1802 samples, validate on 451 samples
Epoch 1/2000

Epoch 00001: val_loss improved from inf to 6.71630, saving model to checkpoints\001-0.579.hdf5
layer-1 [array([[ 0.83001637],
       [ 0.84960645],
       [-0.57243991],
       [-1.02730703],
       [ 0.02720529]], dtype=float32), array([ 0.], dtype=float32)]
Epoch 2/2000

Epoch 00002: val_loss did not improve from 6.71630
layer-1 [array([[ 0.83001637],
       [ 0.84960645],
       [-0.57243991],
       [-1.02730703],
       [ 0.02720529]], dtype=float32), array([ 0.], dtype=float32)]
Epoch 3/2000

Epoch 00003: val_loss did not improve from 6.71630
layer-1 [array([[ 0.83001637],
       [ 0.84960645],
       [-0.57243991],
       [-1.02730703],
       [ 0.02720529]], dtype=float32), array([ 0.], dtype=float32)]
Epoch 4/2000

Epoch 00004: val_loss did not improve from 6.71630
layer-1 [array([[ 0.83001637],
       [ 0.84960645],
       [-0.57243991],
       [-1.02730703],
       [ 0.02720529]], dtype=float32), array([ 0.], dtype

In [7]:
head_model.layers

[<keras.engine.input_layer.InputLayer at 0x25709bb93c8>,
 <keras.engine.input_layer.InputLayer at 0x25709bb9e80>,
 <keras.layers.recurrent.LSTM at 0x25709bb9d30>,
 <keras.layers.recurrent.LSTM at 0x25709bb9cc0>,
 <keras.layers.core.Dropout at 0x25709ba1080>,
 <keras.layers.core.Dropout at 0x25709ba1710>,
 <keras.layers.recurrent.LSTM at 0x25709ba1588>,
 <keras.layers.recurrent.LSTM at 0x25709ba1128>,
 <keras.layers.core.Dropout at 0x25709ba12e8>,
 <keras.layers.core.Dropout at 0x25709ba1438>,
 <keras.layers.merge.Concatenate at 0x25709ba1208>,
 <keras.layers.core.Dense at 0x25709ba17b8>,
 <keras.layers.core.Dense at 0x25709ba1908>,
 <keras.layers.core.Dropout at 0x25709b74160>,
 <keras.layers.normalization.BatchNormalization at 0x2570674dfd0>,
 <keras.layers.core.Dense at 0x25709f7d4a8>]

In [14]:
y_val

array([0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0,
       1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 1,

In [16]:
y_train[100:120]

array([0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1])

This base_model for the binary classfication is of the architecture below (function TwoStreamLSTM, it is based on the example of 'shared layers' on Keras functional API guide page (https://keras.io/getting-started/functional-api-guide/)

There are more details on this whole tasks:
I have 5 classes of bacteria trajectory data. The trajectory data is 60 time steps with 2 features each step, but I know those two features are not closely related (one is change of speed, another is change of angular speed), so I used this TwoStream structure where you basicall process 2 features separately then merge the results togeter by using shared layer. This works well for 5-class classification. 

But, as biology experimentalists, we are more interested in interpretation rather than just predication. In this 5 classes, there is 1 class is wild type which means "natural one", the other 4 are genetically mutant. What researchers are more concerned with is to compare each mutant with the wild type. Therefore, I did binary classification between each mutant and the wild type again with same architecture just different number of classes and therefore the number of nodes of last layer is different. 

To my suprise, the accuracy of binary classification is lower than the 5-class tasks. I searched a bit and found out it does happen that multi-classes classification accuracy is higher than binary classification with same data. This may due to more features from more classes and the model is forced to pay more attention to the features. 

So I am thinking since the trained model for 5-classes already worked well (90-93%, genetically mutant doesn't mean necessarily to behave differently, that is, their trajectory data can be non-differentiable), so I decided to add one more layer on the top of the pretrained 5-classes model so that the model can be turned into a binary task. 

Then the problem happened.....I printed the weights and biases of each epoch, it just never changes...I read all related topic on github issues and stachexchange stc, so far I have tried the following solutions, but none of them works:
1. normalize the input and each layer
2. make sure the data both input and output are shuffled as expected.
3. different optimizer with default parameters' values.
4. sgd with different scales of learning rates
5. different initializer.

Some more notes:
1. it doesn't matter if I have the one more dense layer before the softmax dense layer, the weights won't change.
2. the number of epoch in model.fit was 2000, since the weights never change, I set it to 10 to terminate it faster. 

To reproduce this results, you may need to use or read:
1. data files in ./data/nyp/bin_order/A1_808_bin.npy and ./data/nyp/bin_order/A1_808_bin_y.npy
2. Steps in TwoStreamLSTM I used for 5-classes model or base model
3. the pretrained model(model_5classes_dense64.json), weights(471-0.907-5classes-dense64.hdf5) or model with weights(model_5classes_dense64.h5)

In [None]:
def TwoStreamLSTM(x_train_a, x_train_b, y_train, x_val_a, x_val_b, y_val):
    data_dim = 1
    batch_size = 64
    timesteps = x_train_a.shape[1] #60
    nb_classes = len(np.unique(y_train)) #2

    first_input = Input((timesteps, data_dim))
    encoder_a = LSTM(32, return_sequences=True,
                     batch_input_shape=(batch_size,timesteps, data_dim))(first_input)
    encoder_a = Dropout(0.2)(encoder_a)
    encoder_a = LSTM(32)(encoder_a)
    encoder_a_out = Dropout(0.2)(encoder_a)
    model_a = Model(first_input, encoder_a_out)

    second_input = Input((timesteps, data_dim))
    encoder_b = LSTM(32, return_sequences=True,
                     batch_input_shape=(batch_size, timesteps, data_dim))(second_input)
    encoder_b = Dropout(0.2)(encoder_b)
    encoder_b = LSTM(16)(encoder_b)
    encoder_b_out = Dropout(0.2)(encoder_b)
    model_b = Model(second_input, encoder_b_out)

    concatenated = concatenate([encoder_a_out, encoder_b_out])
    decoder = Dense(8, activation='relu')(concatenated)
    output_layer = Dense(nb_classes, activation='softmax')(decoder)

    model = Model([first_input, second_input], output_layer)
    model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
    ##################################### Helpers in callbacks ##############################################
    tb = TensorBoard(log_dir=os.path.join('tensorboard', 'logs',))
    early_stopper = EarlyStopping(patience=20)
    csv_logger = CSVLogger(os.path.join('logs', str(time.time()) + '.log'))
    checkpointer = ModelCheckpoint(filepath=os.path.join('checkpoints','{epoch:03d}-{val_sparse_categorical_accuracy:.3f}.hdf5'),
                                    verbose=1,save_best_only=True)
    history = History()
    #########################################################################################################
    hist = model.fit([x_train_a, x_train_b], y_train,
                batch_size=batch_size, epochs=2000, 
                validation_data=([x_val_a, x_val_b], y_val),
                callbacks=[tb, early_stopper, csv_logger, checkpointer, history])
    
    #print(model.summary())   
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
    model.save_weights("model.h5")
    print("Saved model to disk")
    return hist