In [1]:
import hickle as hkl

In [2]:
import numpy as np
np.random.seed(1234)

In [3]:
PESeq = hkl.load('./Gen_data/11_Gen_Pro_Ehr.hkl')

## pre-process data set

In [4]:
X=[]
Y=[]
for index in range(len(PESeq['label'])):
    Y.append(PESeq['label'][index])
    X.append(np.append(PESeq['Pro-Kmer'][index],PESeq['Ehr-Kmer'][index],axis=1))
X = np.array(X)
Y  = np.array(Y)

In [5]:
VALIDATION_SPLIT =0.1

index = range(len(PESeq['label']))
np.random.shuffle(index)
nb_validation_samples = int(VALIDATION_SPLIT*len(Y))

In [6]:
x_train = X[index[: - nb_validation_samples]]
y_train = Y[index[:-nb_validation_samples]]
x_val = X[index[-nb_validation_samples:]]
y_val =Y[index[-nb_validation_samples:]]

In [7]:
from keras.utils.np_utils import to_categorical

Using Theano backend.
Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5105)


In [8]:
y_train = to_categorical(y_train) #one-hot 编码labels
y_val = to_categorical(np.asarray(y_val))

# Make a Network

In [9]:
from keras.layers import Embedding
from keras.layers import Dense,Input,Activation
from keras.layers import Embedding, LSTM, Bidirectional,GRU,InputLayer
from keras.models import Model,Sequential
from  keras.regularizers import ActivityRegularizer
from keras.layers.core import Dropout
from keras import layers

In [10]:
import os
os.environ['KERAS_BACKEND']='theano'
from keras.engine.topology import Layer
from keras import initializations
from keras import backend as K

class AttLayer(Layer):
    def __init__(self, **kwargs):
        self.init = initializations.get('normal')
        #self.input_spec = [InputSpec(ndim=3)]
        super(AttLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        assert len(input_shape)==3
        #self.W = self.init((input_shape[-1],1))
        self.W = self.init((input_shape[-1],))
        #self.input_spec = [InputSpec(shape=input_shape)]
        self.trainable_weights = [self.W]
        super(AttLayer, self).build(input_shape)  # be sure you call this somewhere!

    def call(self, x, mask=None):


        M = K.tanh(x)
        alpha = K.dot(M,self.W)#.dimshuffle(0,2,1)

        ai = K.exp(alpha)
        weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')
        weighted_input = x*weights.dimshuffle(0,1,'x')
        return K.tanh(weighted_input.sum(axis=1))
        '''
        eij = K.tanh(K.dot(x, self.W))

        ai = K.exp(eij)
        weights = ai/K.sum(ai, axis=1).dimshuffle(0,'x')

        weighted_input = x*weights.dimshuffle(0,1,'x')
        return weighted_input.sum(axis=1)
        '''
    def get_output_shape_for(self, input_shape):
        return (input_shape[0], input_shape[-1])


In [11]:
kmer_input = Input(shape=(4096,2), dtype='float32')
l_lstm =Bidirectional(LSTM(2,return_sequences=True))(kmer_input)
l_lstm_drop = Dropout(0.3)(l_lstm)
l_att = AttLayer()(l_lstm_drop)
l_att_drop = Dropout(0.5)(l_att)
preds = Dense(len( y_train[0]), activation='softmax',activity_regularizer= ActivityRegularizer(l2=0.005))(l_att)
model  = Model (kmer_input,preds)

In [12]:
model.compile(loss='mse',
              optimizer='rmsprop',
              metrics=['acc'])

print("model fitting - attention LSTM network")
model.summary()

model fitting - attention LSTM network
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 4096, 2)       0                                            
____________________________________________________________________________________________________
bidirectional_1 (Bidirectional)  (None, 4096, 4)       80          input_1[0][0]                    
____________________________________________________________________________________________________
dropout_1 (Dropout)              (None, 4096, 4)       0           bidirectional_1[0][0]            
____________________________________________________________________________________________________
attlayer_1 (AttLayer)            (None, 4)             4           dropout_1[0][0]                  
____________________________________________________

In [None]:
model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=20, batch_size=100)

Train on 21377 samples, validate on 2375 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f16bb6ef450>

In [None]:
model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=20, batch_size=100)

Train on 21377 samples, validate on 2375 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20