In [1]:
import sys
sys.path.append('..\\')
import nltk
import os
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda, Reshape
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import re
import pickle
from scripts.attention_utils import softmax, one_hot
from scripts.data_loader import DataGenerator

Using TensorFlow backend.


In [2]:
with open("..\\data\\dicts", 'rb') as file:
    vocab_to_int, int_to_vocab = pickle.load(file)

## Build a Model

In [3]:
Tx = 200 # input sequence length
Ty = 200 # output sequence length
vocab_size = len(vocab_to_int) # number of unique characters
n_a = 32 # number of neurons in single LSTM in encoder
n_s = 64 # number of neurons in single LSTM in decoder

### Attention mechanism

In [4]:
# Defined shared layers as global variables
repeat_layer = RepeatVector(Tx)
concatenate_layer = Concatenate(axis=-1)
dense_layer_1 = Dense(10, activation = "tanh")
dense_layer_2 = Dense(1, activation = "relu")
activation_layer = Activation(softmax, name='attention_weights')
dot_prod_layer = Dot(axes = 1)

In [5]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Args:
    a (np.array): hidden state output of the Bi-LSTM, numpy-array of shape (?, Tx, 2*n_a)
    s_prev (np.array): previous hidden state of the (post-attention) LSTM, numpy-array of shape (?, n_s)
    
    Returns:
    context (np.array): context vector, input of the next (post-attetion) LSTM cell
    """
    
    ### START CODE HERE ###
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a"
    s_prev = repeat_layer(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis
    concat = concatenate_layer([a, s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e.
    e = dense_layer_1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies.
    energies = dense_layer_2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas"
    alphas = activation_layer(energies)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell
    context = dot_prod_layer([alphas, a])
    ### END CODE HERE ###
    
    return context

### Define model for training phase

In [6]:
reshape_layer = Reshape((1, vocab_size))
concatenate_layer_1 = Concatenate(axis=-1)
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(vocab_size, activation=softmax)

In [7]:
# Create encoder part of the model
X = Input(shape=(Tx, vocab_size), name='X')
a = Bidirectional(LSTM(n_a, return_sequences=True))(X)

In [8]:
s0 = Input(shape=(n_s,), name='s0')
c0 = Input(shape=(n_s,), name='c0')
Y_true = Input(shape=(Ty, vocab_size), name='Y_true')
s = s0
c = c0

# Initialize empty list of outputs
outputs = []

for t in range(Ty):

    # Perform one step of the attention mechanism to get back the context vector at step t
    context = one_step_attention(a, s) # context.shape  = (?, 1, 2*n_a)
    y_true = Lambda(lambda x: x[:, t, :])(Y_true) # y_true.shape = (?, vocab_size)
    y_true = reshape_layer(y_true) # y_true.shape = (?, 1, vocab_size)
    context = concatenate_layer_1([y_true, context])
    # Apply the post-attention LSTM cell to the "context" vector.
    s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])

    # Apply Dense layer to the hidden state output of the post-attention LSTM
    out = output_layer(s)

    outputs.append(out)

training_model = Model(inputs=[X, s0, c0, Y_true], outputs=outputs)

In [9]:
opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999)
training_model.compile(opt, 'categorical_crossentropy', ['accuracy'])

In [10]:
training_model.load_weights("..\\models\\attention_model\\attention.h5")

In [None]:
filenames = [str(x) for x in range(18000)]
training_model.fit_generator(generator=DataGenerator(filenames, filenames), steps_per_epoch=None, epochs=1, workers=4, shuffle=True)

In [29]:
training_model.save_weights("..\\models\\attention_model\\attention.h5")

### Define model for inference

In [12]:
Y_inf = Input(shape=(1, vocab_size), name='Y_inf')
s_inf = s0
c_inf = c0
y_inf = Y_inf

outputs_inf = []

for t in range(Ty):
    context_inf = one_step_attention(a, s_inf)
    context_inf = concatenate_layer_1([y_inf, context_inf])
    s_inf, _, c_inf = post_activation_LSTM_cell(context_inf, initial_state=[s_inf, c_inf])

    out_inf = output_layer(s_inf)
    outputs_inf.append(out_inf) 
    y_inf = Lambda(one_hot)(out_inf)

# Step 3: Create model instance with the correct "inputs" and "outputs" (≈1 line)
inference_model = Model([X, s0, c0, Y_inf], outputs_inf)

Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before concat:  (?, 1, 64)
Context after concat:  (?, 1, 120)
out shape: (?, 56)
y shape: (?, 1, 56)
Context before conc

### Make an inference

In [None]:
def predict(inference_model, )