# Attention Layers

In [2]:
import lasagne
import theano.tensor as T

In [3]:
class AttentionLayer(lasagne.layers.Layer):
    '''
    A Attention Layer
    '''
    
    def __init__(self,incoming,W=lasagne.init.Normal(),
                b=lasagne.init.Normal(0.),
                 nonlinearity = lasagne.nonlinearities.tanh,
                 **kwargs):
        super(AttentionLayer,self).__init__(incoming,**kwargs)
        
        self.nonlinearity = (lasagne.nonlinearities.identity 
                            if nonlinearity is None else nonlinearity)
        self.W = self.add_param(W, (self.input_shape[2],), name="W")
        if b is None:
            self.b = None
        else:
            self.b = self.add_param(b,(),name='b',regularizable = False)
            
    def get_output_shape_for (self,input_shape):
        return (input_shape[0],input_shape[-1])
    
    def get_output_for(self,input,**kwargs):
        activation = T.dot(T.tanh(input),W)
        
        #add bias
        if self.b is not None:
            activation = activation + self.b
            
        #apply nonlinearity
        activation = T.exp(activation)
        activation /= activation.sum(axis=1).dimshuffle(0,'x')
        
        weighted_input = input *activation.dimshuffle(0,1,'x')
        
        return weighted_input.sum(axis=1)

# Data input 

In [5]:
from collections  import defaultdict
import pandas as pd
import numpy as np

In [6]:
datafile ='/home/yinqijin/WorkSpace/2.RNA_Structure_Profile/Orig_data/TRAIN_FILE.TXT'
f = open(datafile,'r')
sentenses = []
labels = []

i =0
for line in f:
    if i%4==0:
        sen = line.split('"')[1]
        #print sen
        sen = sen.replace('<e1>','')
        sen = sen.replace('</e1>','')
        sen = sen.replace('<e2>','')
        sen = sen.replace('</e2>','')
        #print sen
        sentenses.append(sen)
    elif i%4==1:
        labels.append(line.splitlines()[0])
    elif i%4==2:
        #This Commment , it's useless,pass
        pass
    else:
        pass
    
    i += 1
    ''' 
    if i>=12:
        print sentenses
        print labels
        break
   '''
    
f.close()
labels = np.array(labels)
sentenses = np.array(sentenses)

print len(sentenses),'==',len(labels)

8000 == 8000


In [7]:
label_name = np.unique(labels)
print  'labes kind(include \'Other\'): ',len(label_name) 
#print np.array(label_name)

label_dict = defaultdict()
for item in label_name:
    label_dict[item] = len(label_dict)

    
func = lambda item:label_dict[item]
labels_ = pd.Series(labels)
labels_ =labels_.apply(func)
#labes = labels_.tolist()
labels = np.array(labels_)
print labels

labes kind(include 'Other'):  19
[ 3 16 11 ...,  6 16 18]


In [9]:
np.random.seed(1234)
index = range(len(labels))
np.random.shuffle(index)
#print index
#print labels[:5],sentenses[:5]
labels = labels[index]
sentenses = sentenses[index]
#print labels[:5],sentenses[:5]

In [8]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import to_categorical

Using Theano backend.


In [10]:
MAX_SEQUENCE_LENGTH = 500
MAX_NB_WORDS = 20000
EMBEDDING_DIM = 100
VALIDATION_SPLIT = 0.2

In [11]:
GLOVE_DIR = '/home/yinqijin/WorkSpace/2.RNA_Structure_Profile/Orig_data/glove.6B.100d.txt'
embeddings_index = {}
f = open(GLOVE_DIR,'r')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Total %s word vectors.' % len(embeddings_index))

Total 400000 word vectors.


In [12]:
# Tokenizer.fit_on_sequences Tokenizer.fit_on_texts,  any different??
tokenizer = Tokenizer(nb_words= MAX_NB_WORDS)
tokenizer.fit_on_texts(sentenses)
sequences = tokenizer.texts_to_sequences(sentenses)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

print len(word_index.keys())

Found 19391 unique tokens.
19391


In [13]:
data = pad_sequences(sequences,maxlen=MAX_SEQUENCE_LENGTH)
model_labels = to_categorical(np.asarray(labels))   #one-hot 编码labels

In [14]:
print data[1],data.shape[0]
print model_labels[1]

[   0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0  

In [15]:
nb_validation_samples = int(VALIDATION_SPLIT*data.shape[0])

x_train = data[:-nb_validation_samples]
y_train = model_labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = model_labels[-nb_validation_samples:]

print('Traing and validation set number of positive and negative reviews')
print y_train.shape[0]
print y_val.shape[0]

Traing and validation set number of positive and negative reviews
6400
1600


In [16]:
embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

# Make Network

In [17]:
from lasagne import layers

In [20]:
MINIBANTH =10

In [18]:
#Gate Initialize
gate_parameters = lasagne.layers.recurrent.Gate(
    W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
    b=lasagne.init.Constant(0.))

cell_parameters = lasagne.layers.recurrent.Gate(
    W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
    # Setting W_cell to None denotes that no cell connection will be used.
    W_cell=None, b=lasagne.init.Constant(0.),
    # By convention, the cell nonlinearity is tanh in an LSTM.
    nonlinearity=lasagne.nonlinearities.tanh)

In [23]:
l_in = layers.InputLayer(shape =(MINIBANTH,None))
l_embed  = layers.EmbeddingLayer( l_in ,len(word_index) + 1,
                            EMBEDDING_DIM,
                            W=[embedding_matrix],)
l_lstm = layers.LSTMLayer(l_embed ,only_return_final= False,    ingate=gate_parameters, forgetgate=gate_parameters,
    cell=cell_parameters, outgate=gate_parameters,
    learn_init=True, grad_clipping=500.)
l_lstm_back = layers.LSTMLayer(l_embed, backwards=True,only_return_final=False,    ingate=gate_parameters, forgetgate=gate_parameters,
    cell=cell_parameters, outgate=gate_parameters,
    learn_init=True, grad_clipping=500)
l_lstm  = layers.ElemwiseSumLayer([l_lstm,l_lstm_back])
l_att = AttentionLayer(l_lstm)
l_out = layers.DenseLayer(l_att,num_units= 19, nonlinearity=lasagne.nonlinearities.softmax)


RuntimeError: cannot initialize parameters: 'spec' is not a numpy array, a Theano expression, or a callable

# InputLayer

In [3]:
# Data Input


In [4]:
print L_in.output_shape

(10, None, 100)


In [5]:
#Mask Input 
L_mask = layers.InputLayer(shape = (MINIBANTH,TIMESTEP))

# LSTMLayer

In [6]:
#Gate Initialize
gate_parameters = lasagne.layers.recurrent.Gate(
    W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
    b=lasagne.init.Constant(0.))

cell_parameters = lasagne.layers.recurrent.Gate(
    W_in=lasagne.init.Orthogonal(), W_hid=lasagne.init.Orthogonal(),
    # Setting W_cell to None denotes that no cell connection will be used.
    W_cell=None, b=lasagne.init.Constant(0.),
    # By convention, the cell nonlinearity is tanh in an LSTM.
    nonlinearity=lasagne.nonlinearities.tanh)


In [10]:
#LSTMLayer
N_HIDDEN = 10

L_lstm = lasagne.layers.recurrent.LSTMLayer(
    L_in, N_HIDDEN,
    # We need to specify a separate input for masks
    mask_input=L_mask,
    # Here, we supply the gate parameters for each gate
    ingate=gate_parameters, forgetgate=gate_parameters,
    cell=cell_parameters, outgate=gate_parameters,
    # We'll learn the initialization and use gradient clipping
    #only_return_final = True, # This is to flatten t
    learn_init=True, grad_clipping=100.)

L_lstm_back = lasagne.layers.recurrent.LSTMLayer(
    L_in, N_HIDDEN, ingate=gate_parameters,
    mask_input=L_mask, forgetgate=gate_parameters,
    cell=cell_parameters, outgate=gate_parameters,
    #only_return_final = True,
    learn_init=True, grad_clipping=100., backwards=True)
# We'll combine the forward and backward layer output by summing.
# Merge layers take in lists of layers to merge as input.
L_lstm_sum = lasagne.layers.ElemwiseSumLayer([L_lstm, L_lstm_back])
#The output of l_sum will be of shape (n_batch, n_time_steps, N_HIDDEN)

In [11]:
print L_lstm_sum.output_shape

(10, 10)


# Attention Layer