<a href="https://colab.research.google.com/github/bsse1009/SPL2-nlp/blob/master/BiDAF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from keras.layers import Layer
import tensorflow as tf
from keras import backend as K
from keras.layers.advanced_activations import Softmax

class SimilarityMatrix(Layer):
  def __init__(self, **kwargs):
    super(SimilarityMatrix, self).__init__(**kwargs)
  
  def build(self, input_shape):
    self.context_shape = input_shape[0]
    self.question_shape = input_shape[1]

    self.kernel = self.add_weight(name="kernel",
                                  shape=(3 * input_shape[0][2], 1),
                                  initializer='uniform',
                                  trainable=True)

    super(SimilarityMatrix, self).build(input_shape)

  def compute_similarity(self, repeated_context_vectors, repeated_query_vectors):

    element_wise_multiply = repeated_context_vectors * repeated_query_vectors
    concatenated_tensor = tf.concat(
    [repeated_context_vectors, repeated_query_vectors, element_wise_multiply], axis=-1)
    dot_product = K.squeeze(K.dot(concatenated_tensor, self.kernel), axis=-1)

    return dot_product

  def build_similarity_matrix(self, context, question):

    num_context_words = K.shape(context)[1]
    num_query_words = K.shape(question)[1]

    context_dim_repeat = K.concatenate([[1, 1], [num_query_words], [1]], 0)
    query_dim_repeat = K.concatenate([[1], [num_context_words], [1, 1]], 0)
    repeated_context_vectors = K.tile(K.expand_dims(context, axis=2), context_dim_repeat)
    repeated_query_vectors = K.tile(K.expand_dims(question, axis=1), query_dim_repeat)
    similarity_matrix = self.compute_similarity(repeated_context_vectors, repeated_query_vectors)
    # similarity_matrix = tf.reshape(similarity_matrix, [self.context_shape[0],self.context_shape[1],self.question_shape[1]])

    return similarity_matrix

  def call(self, x):
    context, question = x
    self.similarity_matrix = self.build_similarity_matrix(context, question)
    return self.similarity_matrix
  
  def compute_output_shape(self, input_shape):
    return (input_shape[0][0],input_shape[0][1],input_shape[1][1])

  

In [2]:
from keras.layers import Layer
import tensorflow as tf
from keras import backend as K
from keras.layers.advanced_activations import Softmax

class C2Q_Layer(Layer):
  def __init__(self, **kwargs):
    super(C2Q_Layer, self).__init__(**kwargs)

  def build(self, input_shape):
    super(C2Q_Layer, self).build(input_shape)

  def call(self,x):
    similarity_matrix, question=x
    attention = tf.nn.softmax(similarity_matrix)

    self.U_A=K.sum(K.dot(attention,question),-2)

    return self.U_A

  def compute_output_shape(self, input_shape):
    return self.U_A.shape;

In [3]:
from keras.layers import Layer
import tensorflow as tf
from keras import backend as K
from keras.layers.advanced_activations import Softmax

class Q2C_Layer(Layer):
  def __init__(self, **kwargs):
    super(Q2C_Layer, self).__init__(**kwargs)

  def build(self, input_shape):
    super(Q2C_Layer, self).build(input_shape)

  def call(self,x):
    similarity_matrix, context=x
    attention = tf.nn.softmax(K.max(similarity_matrix,axis=-1))

    temp=K.expand_dims(K.sum(K.dot(attention,context),-2),1)

    H_A=K.tile(temp,[1,similarity_matrix.shape[1],1])

    return H_A
  
  def compute_output_shape(self, input_shape):
    return self.H_A.shape;

In [4]:
from keras.layers import Layer
import tensorflow as tf
from keras import backend as K
from keras.layers.advanced_activations import Softmax

class MegaMerge(Layer):
  def __init__(self, **kwargs):
    super(MegaMerge, self).__init__(**kwargs)

  def build(self, input_shape):
    super(MegaMerge, self).build(input_shape)

  def call(self,x):
    context,c2q,q2c=x
    self.G=K.concatenate([context,c2q,context*c2q,context*q2c],axis=-1)

    return self.G;

  def compute_output_shape(self, input_shape):
    return self.G.shape;

In [6]:
from keras.layers import Layer,LSTM,Bidirectional
import numpy as np
import tensorflow as tf

class ModellingLayer(Layer):
    def __init__(self,**kwargs):
        super(ModellingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.shape=input_shape
  
        self.lstm1 = Bidirectional(LSTM(int(input_shape[2]//8),
                                   activation='sigmoid',
                                   input_shape=(input_shape[1],input_shape[2]),
                                   return_sequences=True, trainable=True))
        self.lstm2 = Bidirectional(LSTM(int(input_shape[2]//8),
                                   activation='sigmoid',
                                   input_shape=(input_shape[1], int(input_shape[2]//4)),
                                   return_sequences=True, trainable=True))
        super(ModellingLayer, self).build(input_shape)

    def call(self, x):
        
        self.M1=self.lstm1(x)
        
        self.M2=self.lstm2(self.M1)
        
        self.temp1=tf.concat([x, self.M1], -1)
        self.temp2=tf.concat([x, self.M2], -1)
        
        return self.temp1,self.temp2

    def compute_output_shape(self, input_shape):
        return self.temp1.shape,self.temp2.shape

In [7]:
from keras.layers import Layer
import tensorflow as tf
from keras import backend as K

class OutputLayer(Layer):
    def __init__(self,**kwargs):
        super(OutputLayer, self).__init__(**kwargs)

    def build(self,input_shape):
  
        self.w1=self.add_weight(name="w1",
                                shape=(input_shape[0][2],),
                                initializer='uniform',
                                trainable=True)
        self.w2=self.add_weight(name="w2",
                                shape=(input_shape[0][2],),
                                initializer='uniform',
                                trainable=True)

        super(OutputLayer, self).build(input_shape)

    def call(self, x):

        answer_span1=tf.tensordot(x[0],tf.transpose(self.w1),1)
        answer_span2=tf.tensordot(x[1], tf.transpose(self.w2), 1)
        
        self.p1=tf.nn.softmax(answer_span1)
        self.p2=tf.nn.softmax(answer_span2)
        
        
        return self.p1,self.p2

    def compute_output_shape(self, input_shape):
        return self.p1.shape,self.p2.shape


In [9]:
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model, load_model

passage_input = Input(shape=(766, 800), dtype='float32', name="passage_input")
question_input = Input(shape=(60, 800), dtype='float32', name="question_input")
sim=SimilarityMatrix(name="sm")([passage_input, question_input])
c2q=C2Q_Layer(name="c2q")([sim, question_input])
q2c=Q2C_Layer(name="q2c")([sim, passage_input])
megamerge=MegaMerge(name="mega")([passage_input, c2q, q2c])
t1, t2=ModellingLayer(name="modelling")(megamerge)
p1, p2=OutputLayer(name="output")([t1, t2])

model = Model(inputs=[passage_input, question_input], outputs=[p1, p2])
model.summary()
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
passage_input (InputLayer)      [(None, 766, 800)]   0                                            
__________________________________________________________________________________________________
question_input (InputLayer)     [(None, 60, 800)]    0                                            
__________________________________________________________________________________________________
sm (SimilarityMatrix)           (None, 766, 60)      2400        passage_input[0][0]              
                                                                 question_input[0][0]             
__________________________________________________________________________________________________
c2q (C2Q_Layer)                 (None, 766, 800)     0           sm[0][0]                     

In [13]:
context=np.random.rand(10,766,800)
question=np.random.rand(10,60,800)
p1 = np.zeros((10, 766))
p2 = np.ones((10, 766))
model.fit(x={"passage_input": context, "question_input": question}, y={"output": p1, "output_1": p2}, batch_size = 5, steps_per_epoch=1,
                                  epochs=10, verbose = 2)
model.save('bidaf1009.h5')

Epoch 1/10
1/1 - 3s - loss: 5476.0186 - output_loss: 0.0000e+00 - output_1_loss: 5476.0186 - output_accuracy: 0.0000e+00 - output_1_accuracy: 0.0000e+00
Epoch 2/10
1/1 - 2s - loss: 982248.3750 - output_loss: 0.0000e+00 - output_1_loss: 982248.3750 - output_accuracy: 0.0000e+00 - output_1_accuracy: 0.0000e+00
Epoch 3/10
1/1 - 2s - loss: 2251941.5000 - output_loss: 0.0000e+00 - output_1_loss: 2251941.5000 - output_accuracy: 0.0000e+00 - output_1_accuracy: 0.0000e+00
Epoch 4/10
1/1 - 2s - loss: 3456517.2500 - output_loss: 0.0000e+00 - output_1_loss: 3456517.2500 - output_accuracy: 0.0000e+00 - output_1_accuracy: 0.0000e+00
Epoch 5/10
1/1 - 2s - loss: 4551021.5000 - output_loss: 0.0000e+00 - output_1_loss: 4551021.5000 - output_accuracy: 0.0000e+00 - output_1_accuracy: 0.0000e+00
Epoch 6/10
1/1 - 2s - loss: 5395752.0000 - output_loss: 0.0000e+00 - output_1_loss: 5395752.0000 - output_accuracy: 0.0000e+00 - output_1_accuracy: 0.0000e+00
Epoch 7/10
1/1 - 2s - loss: 7550355.0000 - output_loss

In [None]:

context=np.random.rand(10,766,800)
question=np.random.rand(10,60,800)

sim=SimilarityMatrix()
c2q=C2Q_Layer()
q2c=Q2C_Layer()
megamerge=MegaMerge()
modelling=ModellingLayer()
output=OutputLayer()
x=sim([context,question])
y=c2q([x,question])
z=q2c([x,context])
g=megamerge([context,y,z])
temp1,temp2=modelling(g)
p1,p2=output([temp1,temp2])
print(p1,p2)

tf.Tensor(
[[1.04814804e-04 2.34403997e-03 1.44748537e-05 ... 1.56190433e-03
  1.35314977e-03 6.91540889e-04]
 [1.55857124e-05 5.33449638e-05 3.14282114e-03 ... 1.90216408e-03
  3.33979202e-04 3.36068263e-03]
 [1.71619225e-02 8.08488330e-05 2.37535243e-03 ... 5.25931595e-04
  1.75154855e-04 3.91609465e-05]
 ...
 [1.22528465e-04 4.26990911e-03 6.04337765e-06 ... 6.11135969e-04
  1.68583210e-04 4.87228681e-04]
 [6.90609356e-03 8.33049053e-05 1.95606353e-05 ... 1.85843033e-04
  1.43067999e-04 2.20585891e-04]
 [4.04190941e-04 1.92559499e-03 7.71209307e-05 ... 4.30966169e-03
  9.72436450e-04 6.74650346e-06]], shape=(10, 766), dtype=float32) tf.Tensor(
[[2.2800332e-03 1.4471752e-03 5.7612539e-05 ... 2.0602587e-05
  2.2411338e-04 5.3683692e-04]
 [2.3282437e-05 4.6944010e-04 1.9887715e-04 ... 3.0302914e-04
  6.0676412e-05 4.5623918e-04]
 [7.3930188e-03 9.0660404e-05 1.2603401e-04 ... 5.0517321e-03
  3.3784917e-04 2.2127411e-04]
 ...
 [6.2175986e-04 1.0171512e-03 3.5310307e-04 ... 6.0830370e-04