In [None]:
import sys
sys.path.append('..\\')
import nltk
import os
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda, Reshape
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import re
import pickle
import tensorflow as tf
from scripts.attention_utils import softmax, one_hot
from scripts.attention_preprocessing import transform_data
from scripts.data_loader import DataGenerator

Using TensorFlow backend.


In [2]:
with open("..\\data\\dicts", 'rb') as file:
    vocab_to_int, int_to_vocab = pickle.load(file)

## Build a Model

In [3]:
Tx = 200 # input sequence length
Ty = 200 # output sequence length
vocab_size = len(vocab_to_int) # number of unique characters
n_a = 32 # number of neurons in single LSTM in encoder
n_s = 64 # number of neurons in single LSTM in decoder

### Attention mechanism

In [4]:
# Defined shared layers as global variables
repeat_layer = RepeatVector(Tx)
concatenate_layer = Concatenate(axis=-1)
dense_layer_1 = Dense(10, activation = "tanh")
dense_layer_2 = Dense(1, activation = "relu")
activation_layer = Activation(softmax, name='attention_weights')
dot_prod_layer = Dot(axes = 1)

In [5]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Args:
    a (np.array): hidden state output of the Bi-LSTM, numpy-array of shape (?, Tx, 2*n_a)
    s_prev (np.array): previous hidden state of the (post-attention) LSTM, numpy-array of shape (?, n_s)
    
    Returns:
    context (np.array): context vector, input of the next (post-attetion) LSTM cell
    """
    
    ### START CODE HERE ###
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a"
    s_prev = repeat_layer(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis
    concat = concatenate_layer([a, s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e.
    e = dense_layer_1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies.
    energies = dense_layer_2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas"
    alphas = activation_layer(energies)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell
    context = dot_prod_layer([alphas, a])
    ### END CODE HERE ###
    
    return context

### Define model for training phase

In [6]:
reshape_layer = Reshape((1, vocab_size))
concatenate_layer_1 = Concatenate(axis=-1)
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(vocab_size, activation=softmax)

In [7]:
# Create encoder part of the model
X = Input(shape=(Tx, vocab_size), name='X')
a = Bidirectional(LSTM(n_a, return_sequences=True))(X)

In [9]:
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
Y_true = Input(shape=(Ty, vocab_size), name='Y_true')
s = s0
c = c0

# Initialize empty list of outputs
outputs = []

for t in range(Ty):

    # Perform one step of the attention mechanism to get back the context vector at step t
    context = one_step_attention(a, s) # context.shape  = (?, 1, 2*n_a)
    y_true = Lambda(lambda x: x[:, t, :])(Y_true) # y_true.shape = (?, vocab_size)
    y_true = reshape_layer(y_true) # y_true.shape = (?, 1, vocab_size)
    context = concatenate_layer_1([y_true, context])
    # Apply the post-attention LSTM cell to the "context" vector.
    s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])

    # Apply Dense layer to the hidden state output of the post-attention LSTM
    out = output_layer(s)

    outputs.append(out)

training_model = Model(inputs=[X, s0, c0, Y_true], outputs=outputs)

In [22]:
opt = Adam(lr=0.02, beta_1=0.9, beta_2=0.999)
training_model.compile(opt, 'categorical_crossentropy', ['accuracy'])

In [23]:
filenames = [str(x) for x in range(18000)]
training_model.fit_generator(generator=DataGenerator(filenames, filenames), steps_per_epoch=None, epochs=1, workers=4, shuffle=True)

Epoch 1/1
   59/18000 [..............................] - ETA: 64:28:42 - loss: 259.8138 - dense_3_loss: 0.0029 - dense_3_acc: 0.9775 - dense_3_acc_1: 0.5629 - dense_3_acc_2: 0.4502 - dense_3_acc_3: 0.3424 - dense_3_acc_4: 0.2464 - dense_3_acc_5: 0.2944 - dense_3_acc_6: 0.2815 - dense_3_acc_7: 0.2780 - dense_3_acc_8: 0.3256 - dense_3_acc_9: 0.3292 - dense_3_acc_10: 0.3485 - dense_3_acc_11: 0.3531 - dense_3_acc_12: 0.3614 - dense_3_acc_13: 0.3637 - dense_3_acc_14: 0.3500 - dense_3_acc_15: 0.3307 - dense_3_acc_16: 0.3402 - dense_3_acc_17: 0.3366 - dense_3_acc_18: 0.3461 - dense_3_acc_19: 0.3393 - dense_3_acc_20: 0.3398 - dense_3_acc_21: 0.3537 - dense_3_acc_22: 0.3520 - dense_3_acc_23: 0.3544 - dense_3_acc_24: 0.3624 - dense_3_acc_25: 0.3629 - dense_3_acc_26: 0.3625 - dense_3_acc_27: 0.3512 - dense_3_acc_28: 0.3527 - dense_3_acc_29: 0.3447 - dense_3_acc_30: 0.3617 - dense_3_acc_31: 0.3529 - dense_3_acc_32: 0.3607 - dense_3_acc_33: 0.3495 - dense_3_acc_34: 0.3664 - dense_3_acc_35: 0.3625 -

KeyboardInterrupt: 

In [13]:
training_model.save_weights("..\\models\\attention_model\\attention.h5")

### Define model for inference

In [None]:
B = 10

In [None]:
Y_inf = Input(shape=(1, vocab_size), name='Y_inf')
s_inf = [s0 for i in range(B)]
c_inf = [c0 for i in range(B)]
y_inf = [Y_inf for i in range(B)]

outputs_inf = [[] for i in range(B)]
prob = [1 for i in range(B)]
context_inf = [None for i in range(B)]
out_inf = [None for i in range(B)]
s_inf_new = [None for i in range(B)]
c_inf_new = [None for i in range(B)]
out_inf_new = [None for i in range(B)]
outputs_inf_new = [None for i in range(B)]

for t in range(Ty):
        
    for i in range(B):
        context_inf[i] = one_step_attention(a, s_inf[i])
        context_inf[i] = concatenate_layer_1([y_inf[i], context_inf[i]])
        s_inf[i], _, c_inf[i] = post_activation_LSTM_cell(context_inf[i], initial_state=[s_inf[i], c_inf[i]])
        
        out_inf[i] = output_layer(s_inf[i])
        
        outputs_inf[i].append(out_inf[i])
        out_inf[i]*=prob[i]
        
    if not t:
        _, indices = tf.math.top_k(out_inf[0], k=B)
    else:
        concat = np.concatenate(out_inf, axis=-1)
        _, indices = tf.math.top_k(concat, k=B)
    
    for i in range(B):
        index = indices[i]//vocab_size
        s_inf_new[i] = s_inf[index]
        c_inf_new[i] = c_inf[index]
        out_inf_new[i] = out_inf[index]
        outputs_inf_new[i] = outputs_inf[index]
        
    for i in range(B):
        s_inf[i] = s_inf_new[i]
        c_inf[i] = c_inf_new[i]
        out_inf[i]  = out_inf_new[i]
        outputs_inf[i] = outputs_inf_new[i]
        prob[i] = y_inf[i][indeces[i]%vocab_size]
        y_inf[i] = tf.one_hot(indeces[i]%vocab_size, 56) 
        y_inf[i] = RepeatVector(1)(y_inf[i])

concat = np.concatenate(out_inf, axis=0)
index = np.argmax(concat)      
inference_model = Model([X, s0, c0, Y_inf], outputs_inf[index//vocab_size])

In [18]:
encoder = Model(inputs=[X], outputs=[a])

In [20]:
def one_step_inference_model(vocab_size):
    """
    Return decoder that makes one step inference
    
    Args:
    vocab_size (int): number of distinct characters for output
    Returns:
    inference_model (Model): keras model that makes one step inference
    """
    a = Input(shape=(Tx, 2*n_a), name='a')
    Y_prev = Input(shape=(1, vocab_size), name='Y_inf')
    y_prev = Y_prev
    
    context = one_step_attention(a, s0)
    context = concatenate_layer_1([y_prev, context])
    s, _, c = post_activation_LSTM_cell(context, initial_state=[s0, c0])
    
    out = output_layer(s)
    
    return Model(inputs=[a, s0, c0, Y_prev], outputs=[out, s, c])

In [None]:
def beam_search(X, beam_width, vocab_size, Ty, n_s):
    """
    Performs beam search among outputs of inference model
    
    Args:
    X (numpy.ndarray): sentences in a form of numpy arrays
    beam_width (int): number of best candidates to choose
    vocab_size (int): number of distinct characters for output
    Ty (int): number of time steps to perform
    n_s (int): number of neurons in post attention LSTM
    Returns:
    
    
    """
    m = X.shape[0] # batch_size
    # get output of encoder
    a = encoder.predict(x=[X])
    # create beam_width instances of decoder
    models = [one_step_inference_model(vocab_size) for _ in range(beam_width)]
    prob = [1 for _ in range(beam_width)]
    s = [np.zeros((m, n_s)) for _ in range(beam_width)]
    c = [np.zeros((m, n_s)) for _ in range(beam_width)]
    Y_prev = [np.zeros((m, 1, vocab_size)) for _ in range(beam_width)]
    out = [None for _ in range(beam_width)]
    candidates = [[] for _ in range(beam_width)]
    
    for t in range(Ty):
        for i in range(beam_width):
            out[i] = prob[i] * models[i].predict([a, s[i], c[i], Y_prev[i]])
                

### Make an inference

In [None]:
text = """BACKGROUND	The emergence of HIV as a chronic condition means that people living with HIV are required to take more responsibility for the self-management of their condition , including making physical , emotional and social adjustments ."""

In [None]:
def predict(text, inference_model, vocab_to_int, int_to_vocab):
    data = np.array(transform_data(text, vocab_to_int))
    data = to_categorical(data, num_classes=vocab_size)
    prediction = np.array(inference_model.predict([data, np.zeros((data.shape[0], n_s)), np.zeros((data.shape[0], n_s)),
                                                   np.zeros((data.shape[0], 1, vocab_size))], batch_size=100))
    prediction = prediction.swapaxes(0, 1)
    t = prediction[0, 13, :]
    prediction = np.argmax(prediction, axis=-1)
    prediction = ["".join(list(map(lambda x: int_to_vocab[x], i))) for i in prediction]
    return prediction, t


In [None]:
prediction, t = predict(text, inference_model, vocab_to_int, int_to_vocab)

In [None]:
print(prediction)

In [None]:
t1 = [(int_to_vocab[index], el) for index, el in enumerate(t)]
sorted(t1, key = lambda x: x[1])

In [None]:
k = 5
def z():
    return k

In [None]:
z()