I implement attention model
class AttentionLayer(Layer):
'''
Attention Layer over LSTM
'''
def __init__(self, output_dim=None, init='glorot_uniform', attn_activation='tanh',
W_regularizer=None, b_regularizer=None,
W_constraint=None, b_constraint=None, bias=True, **kwargs):
self.output_dim = output_dim
self.supports_masking = True
self.init = initializers.get(init)
self.W_regularizer = regularizers.get(W_regularizer)
self.b_regularizer = regularizers.get(b_regularizer)
self.W_constraint = constraints.get(W_constraint)
self.b_constraint = constraints.get(b_constraint)
self.bias = bias
self.attn_activation = activations.get(attn_activation)
super(AttentionLayer, self).__init__(**kwargs)
def build(self, input_shape): # (batch, steps, dim)
input_dim = input_shape[2]
if not self.output_dim:
self.output_dim = input_dim
self.steps = input_shape[1]
# shape = (input_dim, output_dim)
self.W_s = self.add_weight(shape=(input_dim, self.output_dim),
initializer=self.init,
name='{}_Ws'.format(self.name),
regularizer=self.W_regularizer,
constraint=self.W_constraint,
trainable=True)
self.B_s = self.add_weight(shape=(self.output_dim,),
initializer='zero',
regularizer=self.b_regularizer,
constraint=self.b_constraint,
name='{}_bs'.format(self.name))
self.Attention_vec = self.add_weight(shape=(self.output_dim,),
initializer='normal',
name='{}_att_vec'.format(self.name))
self.built = True
def compute_mask(self, x, input_mask=None):
return None
def call(self, x, mask=None):
# 1. transform, (None, steps, idim)*(idim, outdim) -> (None, steps, outdim)
uit = K.dot(x, self.W_s)
if self.bias:
uit += self.B_s
u = self.attn_activation(uit)
# 2. * attention sum : {(None, steps, outdim) *(outdim), axis = 2} -> (None, steps)
att = K.sum(u * self.Attention_vec, axis=2)
# 3. softmax, (None, steps)
att = K.exp(att)
# att_sum = att_sum.dimshuffle(0,'x')
att /= K.cast(K.sum(att, axis=1, keepdims=True) + K.epsilon(), K.floatx())
# 4. weighted sum
# att = att.dimshuffle(0, 1, 'x')
att = K.expand_dims(att, 1)
weighted_input = att * x
return K.sum(weighted_input, axis=1)
def get_output_shape_for(self, input_shape):
return (input_shape[0], input_shape[-1])
and got this
Traceback (most recent call last):
File "G:/Kiristingna/nlp_segment_analyse/sentiment/classifier/BiLSTM_ATT.py", line 257, in <module>
bilstm_att_wv(index_dict.tolist(), word_vectors.tolist(), sequences, y)
File "G:/Kiristingna/nlp_segment_analyse/sentiment/classifier/BiLSTM_ATT.py", line 162, in bilstm_att_wv
model.add(AttentionLayer())
File "D:\Program Files (x86)\Python\lib\site-packages\keras\models.py", line 489, in add
output_tensor = layer(self.outputs[0])
File "D:\Program Files (x86)\Python\lib\site-packages\keras\engine\topology.py", line 603, in __call__
output = self.call(inputs, **kwargs)
File "G:\Kiristingna\nlp_segment_analyse\sentiment\classifier\Attention.py", line 87, in call
weighted_input = att * x
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\ops\math_ops.py", line 821, in binary_op_wrapper
return func(x, y, name=name)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1044, in _mul_dispatch
return gen_math_ops._mul(x, y, name=name)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1434, in _mul
result = _op_def_lib.apply_op("Mul", x=x, y=y, name=name)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
op_def=op_def)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\ops.py", line 2338, in create_op
set_shapes_for_outputs(ret)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\ops.py", line 1719, in set_shapes_for_outputs
shapes = shape_func(op)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\ops.py", line 1669, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 610, in call_cpp_shape_fn
debug_python_shape_fn, require_shape_fn)
File "D:\Program Files (x86)\Python\lib\site-packages\tensorflow\python\framework\common_shapes.py", line 676, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Dimensions must be equal, but are 100 and 250 for 'attention_layer_1/mul_1' (op: 'Mul') with input shapes: [?,1,100], [?,?,250].
I implement attention model
and got this