In [1]:
import tensorflow as tf
from keras import backend as K
from keras import regularizers, constraints, initializers, activations
from keras.layers.recurrent import Recurrent, _time_distributed_dense
from keras.engine import InputSpec

tfPrint = lambda d, T: tf.Print(input_=T, data=[T, tf.shape(T)], message=d)

class AttentionDecoder(Recurrent):

    def __init__(self, units, output_dim,
                 activation='tanh',
                 return_probabilities=False,
                 name='AttentionDecoder',
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 **kwargs):
        """
        Implements an AttentionDecoder that takes in a sequence encoded by an
        encoder and outputs the decoded states
        :param units: dimension of the hidden state and the attention matrices
        :param output_dim: the number of labels in the output space

        references:
            Bahdanau, Dzmitry, Kyunghyun Cho, and Yoshua Bengio.
            "Neural machine translation by jointly learning to align and translate."
            arXiv preprint arXiv:1409.0473 (2014).
        """
        self.units = units
        self.output_dim = output_dim
        self.return_probabilities = return_probabilities
        self.activation = activations.get(activation)
        self.kernel_initializer = initializers.get(kernel_initializer)
        self.recurrent_initializer = initializers.get(recurrent_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

        self.kernel_regularizer = regularizers.get(kernel_regularizer)
        self.recurrent_regularizer = regularizers.get(kernel_regularizer)
        self.bias_regularizer = regularizers.get(bias_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.kernel_constraint = constraints.get(kernel_constraint)
        self.recurrent_constraint = constraints.get(kernel_constraint)
        self.bias_constraint = constraints.get(bias_constraint)

        super(AttentionDecoder, self).__init__(**kwargs)
        self.name = name
        self.return_sequences = True  # must return sequences

    def build(self, input_shape):
        """
          See Appendix 2 of Bahdanau 2014, arXiv:1409.0473
          for model details that correspond to the matrices here.
        """

        self.batch_size, self.timesteps, self.input_dim = input_shape

        if self.stateful:
            super(AttentionDecoder, self).reset_states()

        self.states = [None, None]  # y, s

        """
            Matrices for creating the context vector
        """

        self.V_a = self.add_weight(shape=(self.units,),
                                   name='V_a',
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint)
        self.W_a = self.add_weight(shape=(self.units, self.units),
                                   name='W_a',
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint)
        self.U_a = self.add_weight(shape=(self.input_dim, self.units),
                                   name='U_a',
                                   initializer=self.kernel_initializer,
                                   regularizer=self.kernel_regularizer,
                                   constraint=self.kernel_constraint)
        self.b_a = self.add_weight(shape=(self.units,),
                                   name='b_a',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
        """
            Matrices for the r (reset) gate
        """
        self.C_r = self.add_weight(shape=(self.input_dim, self.units),
                                   name='C_r',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_r = self.add_weight(shape=(self.units, self.units),
                                   name='U_r',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_r = self.add_weight(shape=(self.output_dim, self.units),
                                   name='W_r',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_r = self.add_weight(shape=(self.units, ),
                                   name='b_r',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)

        """
            Matrices for the z (update) gate
        """
        self.C_z = self.add_weight(shape=(self.input_dim, self.units),
                                   name='C_z',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_z = self.add_weight(shape=(self.units, self.units),
                                   name='U_z',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_z = self.add_weight(shape=(self.output_dim, self.units),
                                   name='W_z',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_z = self.add_weight(shape=(self.units, ),
                                   name='b_z',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
        """
            Matrices for the proposal
        """
        self.C_p = self.add_weight(shape=(self.input_dim, self.units),
                                   name='C_p',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_p = self.add_weight(shape=(self.units, self.units),
                                   name='U_p',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_p = self.add_weight(shape=(self.output_dim, self.units),
                                   name='W_p',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_p = self.add_weight(shape=(self.units, ),
                                   name='b_p',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)
        """
            Matrices for making the final prediction vector
        """
        self.C_o = self.add_weight(shape=(self.input_dim, self.output_dim),
                                   name='C_o',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.U_o = self.add_weight(shape=(self.units, self.output_dim),
                                   name='U_o',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.W_o = self.add_weight(shape=(self.output_dim, self.output_dim),
                                   name='W_o',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)
        self.b_o = self.add_weight(shape=(self.output_dim, ),
                                   name='b_o',
                                   initializer=self.bias_initializer,
                                   regularizer=self.bias_regularizer,
                                   constraint=self.bias_constraint)

        # For creating the initial state:
        self.W_s = self.add_weight(shape=(self.input_dim, self.units),
                                   name='W_s',
                                   initializer=self.recurrent_initializer,
                                   regularizer=self.recurrent_regularizer,
                                   constraint=self.recurrent_constraint)

        self.input_spec = [
            InputSpec(shape=(self.batch_size, self.timesteps, self.input_dim))]
        self.built = True

    def call(self, x):
        # store the whole sequence so we can "attend" to it at each timestep
        self.x_seq = x

        # apply the a dense layer over the time dimension of the sequence
        # do it here because it doesn't depend on any previous steps
        # thefore we can save computation time:
        self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a,
                                             input_dim=self.input_dim,
                                             timesteps=self.timesteps,
                                             output_dim=self.units)

        return super(AttentionDecoder, self).call(x)

    def get_initial_state(self, inputs):
        # apply the matrix on the first time step to get the initial s0.
        s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s))

        # from keras.layers.recurrent to initialize a vector of (batchsize,
        # output_dim)
        y0 = K.zeros_like(inputs)  # (samples, timesteps, input_dims)
        y0 = K.sum(y0, axis=(1, 2))  # (samples, )
        y0 = K.expand_dims(y0)  # (samples, 1)
        y0 = K.tile(y0, [1, self.output_dim])

        return [y0, s0]

    def step(self, x, states):

        ytm, stm = states

        # repeat the hidden state to the length of the sequence
        _stm = K.repeat(stm, self.timesteps)

        # now multiplty the weight matrix with the repeated hidden state
        _Wxstm = K.dot(_stm, self.W_a)

        # calculate the attention probabilities
        # this relates how much other timesteps contributed to this one.
        et = K.dot(activations.tanh(_Wxstm + self._uxpb),
                   K.expand_dims(self.V_a))
        at = K.exp(et)
        at_sum = K.sum(at, axis=1)
        at_sum_repeated = K.repeat(at_sum, self.timesteps)
        at /= at_sum_repeated  # vector of size (batchsize, timesteps, 1)

        # calculate the context vector
        context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1)
        # ~~~> calculate new hidden state
        # first calculate the "r" gate:

        rt = activations.sigmoid(
            K.dot(ytm, self.W_r)
            + K.dot(stm, self.U_r)
            + K.dot(context, self.C_r)
            + self.b_r)

        # now calculate the "z" gate
        zt = activations.sigmoid(
            K.dot(ytm, self.W_z)
            + K.dot(stm, self.U_z)
            + K.dot(context, self.C_z)
            + self.b_z)

        # calculate the proposal hidden state:
        s_tp = activations.tanh(
            K.dot(ytm, self.W_p)
            + K.dot((rt * stm), self.U_p)
            + K.dot(context, self.C_p)
            + self.b_p)

        # new hidden state:
        st = (1-zt)*stm + zt * s_tp

        yt = activations.softmax(
            K.dot(ytm, self.W_o)
            + K.dot(stm, self.U_o)
            + K.dot(context, self.C_o)
            + self.b_o)

        if self.return_probabilities:
            return at, [yt, st]
        else:
            return yt, [yt, st]

    def compute_output_shape(self, input_shape):
        """
            For Keras internal compatability checking
        """
        if self.return_probabilities:
            return (None, self.timesteps, self.timesteps)
        else:
            return (None, self.timesteps, self.output_dim)

    def get_config(self):
        """
            For rebuilding models on load time.
        """
        config = {
            'output_dim': self.output_dim,
            'units': self.units,
            'return_probabilities': self.return_probabilities
        }
        base_config = super(AttentionDecoder, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
    

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


ImportError: cannot import name '_time_distributed_dense'

In [2]:
import numpy as np
import pandas as pd
from sklearn.cross_validation import train_test_split
import tensorflow as tf
from keras import backend as K
from keras.layers import Input, Dense, Conv2D
from keras.layers.core import Activation, Lambda, Dropout, Flatten 
from keras.models import Model
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils

import numpy as np
import pandas as pd


from sklearn.cross_validation import train_test_split

import matplotlib.pyplot as plt
from keras.callbacks import TensorBoard,ModelCheckpoint
from keras.callbacks import EarlyStopping
from keras.callbacks import ReduceLROnPlateau
import tensorflow as tf
from keras.engine.topology import Layer,InputSpec


from keras.optimizers import Adam
from keras.utils import np_utils

def non_shuffling_train_test_split(X, y, test_size=0.2):
    i = int((1 - test_size) * X.shape[0]) + 1
    X_train, X_test = np.split(X, [i])
    y_train, y_test = np.split(y, [i])
    return X_train, X_test, y_train, y_test

sess = tf.Session()
K.set_session(sess)
K.set_image_dim_ordering("th")



dfin = pd.read_csv('../NNNormalizeDataIn.csv',header=None)  #inputs 128 lines per sample
dfout = pd.read_csv('../NNNormalizeDataOutClassses.csv',header=None)  #output classes 

#dfin=dfin.drop(dfin.columns[[2,3,4]], axis=1)

print(dfin.shape[1])

NB_EPOCH = 200
# network and training
BATCH_SIZE = 64
VERBOSE = 1
OPTIMIZER = Adam()
VALIDATION_SPLIT=0.2

IMG_ROWS, IMG_COLS = 128, dfin.shape[1] # input image dimensions
NB_CLASSES = 5  # number of outputs = number of classes
INPUT_SHAPE = (1, IMG_ROWS, IMG_COLS)

total_inputs , total_output = dfin.as_matrix().astype(np.float32) , dfout.as_matrix().astype(np.int32)

total_inputs = np.reshape(total_inputs, (-1,IMG_ROWS, IMG_COLS))

#train_inputs, test_inputs, train_output, test_output = train_test_split(total_inputs, total_output, test_size=0.2, random_state=42)#shuffle=False # only new version

train_inputs, test_inputs, train_output, test_output = non_shuffling_train_test_split(total_inputs, total_output, test_size=0.2)

X_train = train_inputs#[:, np.newaxis, :, :]
y_train = np_utils.to_categorical(train_output, NB_CLASSES)
X_test = test_inputs#[:, np.newaxis, :, :]
y_test = np_utils.to_categorical(test_output, NB_CLASSES)
from keras.models import Model
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import Bidirectional
from keras.layers import Input

class LSTMNet:
    @staticmethod
    def build(timeSteps,variables,classes):
        #CONV=>POOL
        inputNet = Input(shape=(timeSteps,variables)) #batch_shape=(20, 7, 5) 
       
        lstm=Bidirectional(LSTM(100,recurrent_dropout=0.4,dropout=0.4,return_sequences=True),merge_mode='concat')(inputNet) #worse using stateful=True
        #lstm=SeqSelfAttention(attention_activation='sigmoid')(lstm)
        lstm=Bidirectional(LSTM(50,recurrent_dropout=0.4,dropout=0.4,return_sequences=True),merge_mode='concat')(lstm) #worse using stateful=True 
        lstm=Bidirectional(LSTM(20,recurrent_dropout=0.4,dropout=0.4,return_sequences=False),merge_mode='concat')(lstm) #worse using stateful=True 
        #classificationLayer=AttentionDecoder(150, classes)
        #lstm=LSTM(100,recurrent_dropout=0.4,dropout=0.4,return_sequences=True)(inputNet) #worse using stateful=True
        #lstm=LSTM(50,recurrent_dropout=0.4,dropout=0.4)(lstm) #worse using stateful=True 
        
        
        #denselayers=Dense(400)(lstm)
        #denselayers=Activation("relu")(denselayers)
        #denselayers=Dropout(0.5)(denselayers)
        #denselayers=Dense(150)(denselayers)
        #denselayers=Activation("relu")(denselayers)
        #denselayers=Dropout(0.8)(denselayers)
        # a softmax classifier
        
        classificationLayer=Dense(classes,activation='softmax')(lstm)
        
        #classificationLayer=Activation("softmax")(classificationLayer)
        
        model=Model(inputNet,classificationLayer)
        return model
    
modellstm = LSTMNet.build(timeSteps=128,variables=9,classes=5)
modellstm.summary()
modellstm.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER,
    metrics=["accuracy"])


# Prepare saver.
#builder = tf.saved_model.builder.SavedModelBuilder("./model_keras")

# Initialize all variables
sess.run(tf.global_variables_initializer())





9




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 9)            0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128, 200)          88000     
_________________________________________________________________
bidirectional_2 (Bidirection (None, 128, 100)          100400    
_________________________________________________________________
bidirectional_3 (Bidirection (None, 40)                19360     
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 205       
Total params: 207,965
Trainable params: 207,965
Non-trainable params: 0
_________________________________________________________________


In [3]:
tbCallBack = TensorBoard(log_dir='./Graph', histogram_freq=0, write_graph=True, write_images=True)
esCallBack = EarlyStopping(monitor='val_acc', min_delta=0, patience=50, verbose=0, mode='max')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=5, min_lr=0.001)
best_checkpoint = ModelCheckpoint('lstm_new_data.h5', monitor='val_acc', save_best_only=True, verbose=1)



history = modellstm.fit(X_train, y_train, 
        batch_size=BATCH_SIZE, epochs=NB_EPOCH, 
        verbose=2, # 0 for no logging to stdout, 1 for progress bar logging, 2 for one log line per epoch.
        #validation_split=VALIDATION_SPLIT,
        validation_data=(X_test,y_test),
        callbacks=[tbCallBack,best_checkpoint,esCallBack])#,reduce_lr,esCallBack])

Train on 1661 samples, validate on 414 samples
Epoch 1/200
 - 36s - loss: 1.6138 - acc: 0.1945 - val_loss: 1.6051 - val_acc: 0.2560

Epoch 00001: val_acc improved from -inf to 0.25604, saving model to lstm_new_data.h5
Epoch 2/200
 - 29s - loss: 1.6087 - acc: 0.2191 - val_loss: 1.5959 - val_acc: 0.2681

Epoch 00002: val_acc improved from 0.25604 to 0.26812, saving model to lstm_new_data.h5
Epoch 3/200
 - 30s - loss: 1.6013 - acc: 0.2438 - val_loss: 1.5932 - val_acc: 0.2560

Epoch 00003: val_acc did not improve from 0.26812
Epoch 4/200
 - 30s - loss: 1.5915 - acc: 0.2492 - val_loss: 1.5765 - val_acc: 0.2415

Epoch 00004: val_acc did not improve from 0.26812
Epoch 5/200
 - 29s - loss: 1.5843 - acc: 0.2601 - val_loss: 1.5713 - val_acc: 0.2560

Epoch 00005: val_acc did not improve from 0.26812
Epoch 6/200
 - 29s - loss: 1.5848 - acc: 0.2559 - val_loss: 1.5699 - val_acc: 0.2633

Epoch 00006: val_acc did not improve from 0.26812
Epoch 7/200
 - 30s - loss: 1.5853 - acc: 0.2595 - val_loss: 1.57


Epoch 00058: val_acc did not improve from 0.29710
Epoch 59/200
 - 31s - loss: 1.5200 - acc: 0.3323 - val_loss: 1.5655 - val_acc: 0.2633

Epoch 00059: val_acc did not improve from 0.29710
Epoch 60/200
 - 31s - loss: 1.5221 - acc: 0.3347 - val_loss: 1.5702 - val_acc: 0.2681

Epoch 00060: val_acc did not improve from 0.29710
Epoch 61/200
 - 32s - loss: 1.5276 - acc: 0.3064 - val_loss: 1.5734 - val_acc: 0.2778

Epoch 00061: val_acc did not improve from 0.29710
Epoch 62/200
 - 31s - loss: 1.5263 - acc: 0.3058 - val_loss: 1.5819 - val_acc: 0.2705

Epoch 00062: val_acc did not improve from 0.29710
Epoch 63/200
 - 32s - loss: 1.5199 - acc: 0.3227 - val_loss: 1.5751 - val_acc: 0.2657

Epoch 00063: val_acc did not improve from 0.29710
Epoch 64/200
 - 31s - loss: 1.5176 - acc: 0.3070 - val_loss: 1.5746 - val_acc: 0.2802

Epoch 00064: val_acc did not improve from 0.29710
Epoch 65/200
 - 32s - loss: 1.5279 - acc: 0.3167 - val_loss: 1.5781 - val_acc: 0.2850

Epoch 00065: val_acc did not improve fro

In [4]:
from sklearn.metrics import classification_report, confusion_matrix

Y_pred = modellstm.predict(X_test)
y_pred = np.argmax(Y_pred, axis=1)
print('Confusion Matrix')
print(confusion_matrix(np.argmax(y_test,axis=1), y_pred))

print(Y_pred.shape)

Confusion Matrix
[[29 19  8  9  4]
 [41 12 10  9 14]
 [29 15 16 11 12]
 [29 13 14 18 17]
 [14 11 13 13 34]]
(414, 5)


In [5]:
from keras.models import load_model
from sklearn.metrics import classification_report, confusion_matrix

modelLoaded = load_model('lstm_new_data.h5')

Y_pred_L = modelLoaded.predict(X_test)
y_pred_L = np.argmax(Y_pred_L, axis=1)

print('Confusion Matrix')
print(confusion_matrix(np.argmax(y_test,axis=1), y_pred_L))


print(Y_pred_L.shape)
print(y_test.shape)

Confusion Matrix
[[34 16  3  5 11]
 [39 21  5  9 12]
 [28 17  9 10 19]
 [27 18  6 16 24]
 [14 13  7  8 43]]
(414, 5)
(414, 5)
