In [532]:
import numpy as np
from keras.layers import Layer, Concatenate, Input, Dense
import keras.backend as K
from keras.models import load_model

In [533]:
model_dir = 'model'
tanh_layer_size = 300

In [534]:
tf.data.experimental.enable_debug_mode()

In [535]:
class QGAttention(Layer):
    def __init__(self, tanh_layer_size):
        super(QGAttention, self).__init__()
        self.tanh_layer_size = tanh_layer_size
 
    def build(self,input_shape):
        self.W1 = self.add_weight(name='attention_weight', shape=(input_shape[2], self.tanh_layer_size), 
                               initializer='random_normal', trainable=True)
        self.W2 = self.add_weight(name='attention_weight2', shape=(self.tanh_layer_size, 1), 
                               initializer='random_normal', trainable=True)
        #self.b=self.add_weight(name='attention_bias', shape=(1, 32), 
        #                       initializer='zeros', trainable=True)        
        super(QGAttention, self).build(input_shape)
 
    # x = [batch_size, vector_length, regions]
    def call(self,x):
        #tanh layer
        e = K.tanh(K.dot(x, self.W1))
        #linear layer
        #print('e: ', e)
        e = K.dot(e, self.W2)
        #print('e: ', e)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)   
        #print('e: ', e)
        # Compute the weights
        alpha = K.softmax(e)
        #print('alpha: ', alpha)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        #print('alpha: ', alpha)
        # Compute the context vector
        context = x * alpha
        #print('context: ', context)
        context = K.sum(context, axis=1)
        #print('context: ', context)
        return context

In [536]:
def get_model(vector_input_size, tanh_layer_size):
    inputs = Input(shape=(None, vector_input_size))
    inputs2 = QGAttention(tanh_layer_size)(inputs)
    outputs = Dense(1)(inputs2)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer="adam", loss="mean_squared_error")
    return model

In [537]:
def train(x, y):
    model = get_model(x.shape[2], tanh_layer_size)
    print(model.summary())
    
    model.fit(x, y, verbose=2)
    model.save(model_dir)

In [538]:
def predict(x):
    model = load_model(model_dir)
    print(model.predict(x))

In [539]:
#jedan element x-a je lista konkateniranih vektora regiona i odgovarajuceg pitanja
x = np.random.random((2, 5, 1280))
#klasa
y = np.random.random((2, 1))

train(x, y)

Model: "model_61"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_200 (InputLayer)      [(None, None, 1280)]      0         
                                                                 
 qg_attention_11 (QGAttentio  (None, 1280)             384300    
 n)                                                              
                                                                 
 dense_102 (Dense)           (None, 1)                 1281      
                                                                 
Total params: 385,581
Trainable params: 385,581
Non-trainable params: 0
_________________________________________________________________
None
1/1 - 0s - loss: 1.0127 - 15ms/epoch - 15ms/step
INFO:tensorflow:Assets written to: model/assets


INFO:tensorflow:Assets written to: model/assets


In [540]:
predict(x)

[[ 0.4374066 ]
 [-0.04947354]]
