In [1]:
from __future__ import print_function
import json
from pprint import pprint
import numpy as np

import keras
from nltk.tokenize import word_tokenize
from nltk.stem.wordnet import WordNetLemmatizer
from keras import optimizers
from keras.utils.data_utils import get_file
from keras.layers.embeddings import Embedding
from keras import layers
from keras.layers import Lambda, Activation,recurrent, Bidirectional, Dense, Flatten, Conv1D, Dropout, LSTM, GRU, concatenate, multiply, add, Reshape, MaxPooling1D, BatchNormalization
from keras.models import Model, load_model
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K
from keras.engine.topology import Layer

Using TensorFlow backend.


In [2]:
def get_dictionary(vocab):
    d = dict()
    with open(vocab) as f:
        lines = f.readlines()
        for l in lines:
            values = l.strip().split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            d[word] = coefs
    return d

In [3]:
d = get_dictionary("glove/glove.6B.300d.txt")

In [4]:
with open('train.json') as data_file:    
    data = json.load(data_file)

In [5]:
ids = []
titles = dict()
contexts = dict()
questions = dict()
answers_text = dict()
answers_start = dict()
for i in range(len(data)):
    paragraphs = data[i]["paragraphs"]
    title = data[i]["title"]
    for j in range(len(paragraphs)):
        context = paragraphs[j]["context"]
        qas = paragraphs[j]["qas"]
        for k in range(len(qas)):
            id_ = qas[k]["id"]
            answer = qas[k]["answer"]
            question = qas[k]["question"]
            ids.append(id_)
            titles[id_] = title
            contexts[id_] = context
            answers_start[id_] = answer["answer_start"]
            answers_text[id_] = answer["text"]
            questions[id_] = question


In [6]:
max_para = 600
max_q = 50
dimension = 300
# train_len = len(ids)
train_len = 500

In [7]:
paras = np.zeros((train_len, max_para, dimension))
qns = np.zeros((train_len, max_q, dimension))
exact_match = np.zeros((train_len, max_para, 3))
lmtzr = WordNetLemmatizer()

In [8]:
for i in range(train_len):
    if i % 1000 == 0:
        print(i)
    words = word_tokenize(contexts[ids[i]])
    qs = word_tokenize(questions[ids[i]])
    qs_1 = [w.lower() for w in qs]
    qs_2 = [lmtzr.lemmatize(w) for w in qs_1]
    for j in range(min(max_para - 1, len(words))):
        if words[j].lower() in d:
            paras[i][j] = d[words[j].lower()]
        if words[j] in qs:
            exact_match[i][j][0] = 1
        if words[j].lower() in qs_1:
            exact_match[i][j][1] = 1
        if lmtzr.lemmatize(words[j].lower()) in qs_2:
            exact_match[i][j][2] = 1
    for j in range(min(max_q - 1, len(qs))):
        if qs[j].lower() in d:
            qns[i][j] = d[qs[j].lower()]

0


In [9]:
def create_one_hot_answer(para, answer, answer_start, option, max_length):
    if option == "s":
        from_begin = para[0:answer_start]
    else:
        from_begin = para[0:answer_start+len(answer)]
    l = len(word_tokenize(from_begin))
    one_hot = np.zeros(max_length)
    if option == "s":
        one_hot[min(max_para-1,l)] = 1
    else:
        one_hot[min(max_para-1,l-1)] = 1
    return one_hot
    

In [10]:
ans_starts = [create_one_hot_answer(contexts[ids[i]], answers_text[ids[i]], answers_start[ids[i]], "s", max_para) for i in range(train_len)]
ans_ends = [create_one_hot_answer(contexts[ids[i]], answers_text[ids[i]], answers_start[ids[i]], "e", max_para) for i in range(train_len)]

In [11]:
ans_s = np.array(ans_starts)
ans_e = np.array(ans_ends)

In [12]:
class MyLayer(Layer):

    def __init__(self, output_dim, name, **kwargs):
        self.output_dim = output_dim
        self.name = name
        super(MyLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.W = self.add_weight(name=self.name, 
                                      shape=(input_shape[2], self.output_dim),
                                      initializer='uniform',
                                      trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, x):
        return K.dot(x, self.W)

    def compute_output_shape(self, input_shape):
        return (input_shape[0],input_shape[1], self.output_dim)

In [13]:
P = layers.Input(shape=(max_para,dimension), dtype='float32')
Q = layers.Input(shape=(max_q,dimension), dtype='float32')
P_exact_match = layers.Input(shape=(max_para,3), dtype='float32')

In [14]:
alpha_w = MyLayer(128,"alpha_w")
alpha_l = MyLayer(128,"alpha_l")
alpha_h = MyLayer(128,"alpha_h")
alpha_u = MyLayer(128,"alpha_u")
alpha_s = MyLayer(128,"alpha_s")
beta = MyLayer(1,"beta")
gamma_s = MyLayer(250, "gamma_s")
gamma_e = MyLayer(250, "gamma_e")
hidden_unit = 125

In [15]:
# Word level fusion
aligned_P = layers.Activation("relu")(alpha_w(P))
aligned_Q = layers.Activation("relu")(alpha_w(Q))
word_fusion = layers.dot([aligned_Q, aligned_P], axes=2)
word_fusion = Reshape((max_para * max_q,))(word_fusion)
word_fusion = layers.Activation("softmax")(word_fusion)
word_fusion = Reshape((max_q, max_para,))(word_fusion)
word_fusion = layers.dot([word_fusion,Q],axes=1)

In [16]:
# READING
final_P = layers.concatenate([P, P_exact_match, word_fusion], axis=2)
final_P = Dropout(.4)(final_P)
low_P = Bidirectional(LSTM(hidden_unit,return_sequences=True))(final_P)
low_P = Dropout(.4)(low_P)
high_P = Bidirectional(LSTM(hidden_unit,return_sequences=True))(low_P)
high_P = Dropout(.4)(high_P)


In [17]:
final_Q = Dropout(.4)(Q)
low_Q = Bidirectional(LSTM(hidden_unit,return_sequences=True))(final_Q)
low_Q = Dropout(.4)(low_Q)
high_Q = Bidirectional(LSTM(hidden_unit,return_sequences=True))(low_Q)
high_Q = Dropout(.4)(high_Q)

In [18]:
# Question understanding
U_Q = layers.concatenate([low_Q, high_Q], axis=2)
U_Q = Bidirectional(LSTM(hidden_unit,return_sequences=True))(U_Q)
U_Q = Dropout(.4)(U_Q)
w = beta(U_Q)
UQ = layers.dot([w,U_Q],axes=1)

In [19]:
# History of words
HP = layers.concatenate([P, low_P, high_P], axis = 2)
HQ = layers.concatenate([Q, low_Q, high_Q], axis = 2)

In [20]:
# Low level fusion
low_HP = layers.Activation("relu")(alpha_l(HP))
low_HQ = layers.Activation("relu")(alpha_l(HQ))
low_fusion = layers.dot([low_HQ, low_HP], axes=2)
low_fusion = Reshape((max_para * max_q,))(low_fusion)
low_fusion = layers.Activation("softmax")(low_fusion)
low_fusion = Reshape((max_q, max_para,))(low_fusion)
low_fusion = layers.dot([low_fusion,low_Q],axes=1)
low_fusion = Dropout(.4)(low_fusion)

In [21]:
# High level fusion
high_HP = layers.Activation("relu")(alpha_h(HP))
high_HQ = layers.Activation("relu")(alpha_h(HQ))
high_fusion = layers.dot([high_HQ, high_HP], axes=2)
high_fusion = Reshape((max_para * max_q,))(high_fusion)
high_fusion = layers.Activation("softmax")(high_fusion)
high_fusion = Reshape((max_q, max_para,))(high_fusion)
high_fusion = layers.dot([high_fusion,high_Q],axes=1)
high_fusion = Dropout(.4)(high_fusion)

In [22]:
# Understanding level fusion
U_HP = layers.Activation("relu")(alpha_u(HP))
U_HQ = layers.Activation("relu")(alpha_u(HQ))
U_fusion = layers.dot([U_HQ, U_HP], axes=2)
U_fusion = Reshape((max_para * max_q,))(U_fusion)
U_fusion = layers.Activation("softmax")(U_fusion)
U_fusion = Reshape((max_q, max_para,))(U_fusion)
U_fusion = layers.dot([U_fusion,U_Q],axes=1)
U_fusion = Dropout(.4)(U_fusion)

In [23]:
# Fully-aware attention
P_fusion = layers.concatenate([low_P, high_P, low_fusion, high_fusion, U_fusion], axis=2)
V_P = Bidirectional(LSTM(hidden_unit,return_sequences=True))(P_fusion)
V_P = Dropout(.4)(V_P)
H_P = layers.concatenate([P, P_fusion, V_P], axis=2)

In [24]:
# Self-boosted fusion
self_HP = layers.Activation("relu")(alpha_s(H_P))
self_fusion = layers.dot([self_HP, self_HP], axes=2)
self_fusion = Reshape((max_para * max_para,))(self_fusion)
self_fusion = layers.Activation("softmax")(self_fusion)
self_fusion = Reshape((max_para, max_para))(self_fusion)
self_fusion = layers.dot([self_fusion,V_P],axes=1)
self_fusion = Dropout(.4)(self_fusion)

In [25]:
U_P = layers.concatenate([V_P, self_fusion], axis=2)
U_P = Bidirectional(LSTM(hidden_unit,return_sequences=True))(U_P)
U_P = Dropout(.4)(U_P)

In [26]:
start = gamma_s(U_P)
start = layers.dot([start, UQ], axes=2)
start = Reshape((max_para,))(start)
start = layers.Activation("softmax")(start)

In [27]:
end = gamma_e(U_P)
end = layers.dot([end, UQ], axes=2)
end = Reshape((max_para,))(end)
end = layers.Activation("softmax")(end)

In [28]:
model = Model([P, Q, P_exact_match],[start, end])

In [29]:
model.compile(optimizer="adamax",
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [30]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 600, 300)     0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 50, 300)      0                                            
__________________________________________________________________________________________________
my_layer_1 (MyLayer)            multiple             38400       input_1[0][0]                    
                                                                 input_2[0][0]                    
__________________________________________________________________________________________________
activation_2 (Activation)       (None, 50, 128)      0           my_layer_1[1][0]                 
__________

In [None]:
print('Training')
for i in range(10):
    print("*********************--", i, "--*********************")
    model.fit([paras, qns, exact_match], [ans_s, ans_e],
              batch_size=32, epochs=2, validation_split=0.1)
    model.save('fusion.h5')

Training
*********************-- 0 --*********************
Train on 450 samples, validate on 50 samples
Epoch 1/2
