In [1]:
import sys
sys.path.append('..\\')
import nltk
import os
from keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply
from keras.layers import RepeatVector, Dense, Activation, Lambda, Reshape
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.models import load_model, Model
import keras.backend as K
import numpy as np
import re
from scripts.attention_preprocessing import load_data, create_dicts
from scripts.data_loader import DataGenerator

Using TensorFlow backend.


In [2]:
DATA_FOLDER = "..\\data"
train_path = os.path.join(DATA_FOLDER, "train.txt")
with open(train_path, 'r') as file:
    text = file.read()
text = re.sub(r"\b(?:[a-z.]*[A-Z][a-z.]*){2,}", "", text)
text = re.sub(r"[^a-zA-Z .]+", "", text)
text = re.sub('\'92t', '\'t', text)
text = re.sub('\'92s', '\'s', text)
text = re.sub('\'92m', '\'m', text)
text = re.sub('\'92ll', '\'ll', text)
text = re.sub('\'91', '', text)
text = re.sub('\'92', '', text)
text = re.sub('\'93', '', text)
text = re.sub('\'94', '', text)
text = re.sub('\.', '. ', text)
text = re.sub('\!', '! ', text)
text = re.sub('\?', '? ', text)
text = re.sub(' +', ' ', text)
vocab_to_int, int_to_vocab = create_dicts(text)
# _, _, vocab_to_int, int_to_vocab = load_data(train_path, save=False)

The vocabulary contains 56 characters.
[' ', '.', '<EOS>', '<PAD>', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


## Build a Model

In [7]:
Tx = 200 # input sequence length
Ty = 200 # output sequence length
vocab_size = len(vocab_to_int) # number of unique characters


### Attention layer

In [8]:
def softmax(x, axis=1):
    """Softmax activation function.
    # Arguments
        x : Tensor.
        axis: Integer, axis along which the softmax normalization is applied.
    # Returns
        Tensor, output of softmax transformation.
    # Raises
        ValueError: In case `dim(x) == 1`.
    """
    ndim = K.ndim(x)
    if ndim == 2:
        return K.softmax(x)
    elif ndim > 2:
        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
        s = K.sum(e, axis=axis, keepdims=True)
        return e / s
    else:
        raise ValueError('Cannot apply softmax to a tensor that is 1D')

In [9]:
# Defined shared layers as global variables
repeator = RepeatVector(Tx)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh")
densor2 = Dense(1, activation = "relu")
activator = Activation(softmax, name='attention_weights')
dotor = Dot(axes = 1)

In [10]:
def one_step_attention(a, s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    ### START CODE HERE ###
    # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a"
    s_prev = repeator(s_prev)
    # Use concatenator to concatenate a and s_prev on the last axis
    concat = concatenator([a, s_prev])
    # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e.
    e = densor1(concat)
    # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies.
    energies = densor2(e)
    # Use "activator" on "energies" to compute the attention weights "alphas"
    alphas = activator(energies)
    # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell
    context = dotor([alphas, a])
    ### END CODE HERE ###
    
    return context

### RNN

In [11]:
n_a = 32
n_s = 64
reshapor = Reshape((1, vocab_size))
concatenator1 = Concatenate(axis=-1)
post_activation_LSTM_cell = LSTM(n_s, return_state = True)
output_layer = Dense(vocab_size, activation=softmax)

In [12]:
def model(Tx, Ty, n_a, n_s, vocab_size):
    """
    Arguments:
    Tx -- length of the input sequence
    Ty -- length of the output sequence
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    vocab_size -- number of different characters

    Returns:
    model -- Keras model instance
    """
    
    # Define the inputs of your model with a shape (Tx,)
    # Define s0 and c0, initial hidden state for the decoder LSTM of shape (n_s,)
    X = Input(shape=(Tx, vocab_size), name='X')
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    Y_true = Input(shape=(Ty, vocab_size), name='Y_true')
    s = s0
    c = c0
    
    # Initialize empty list of outputs
    outputs = []
    
    ### START CODE HERE ###
    
    # Step 1: Define your pre-attention Bi-LSTM.
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    # Step 2: Iterate for Ty steps
    for t in range(Ty):
    
        # Step 2.A: Perform one step of the attention mechanism to get back the context vector at step t
        context = one_step_attention(a, s) # context.shape  = (?, 1, 2*n_a)
        y_true = Lambda(lambda x: x[:, t, :])(Y_true) # y_true.shape = (?, vocab_size)
        y_true = reshapor(y_true)
        context = concatenator1([y_true, context])
        # Step 2.B: Apply the post-attention LSTM cell to the "context" vector.
        # Don't forget to pass: initial_state = [hidden state, cell state]
        s, _, c = post_activation_LSTM_cell(context, initial_state=[s, c])
        
        # Step 2.C: Apply Dense layer to the hidden state output of the post-attention LSTM
        out = output_layer(s)
        
        # Step 2.D: Append "out" to the "outputs" list
        outputs.append(out)
    
    # Step 3: Create model instance taking three inputs and returning the list of outputs.
    model = Model(inputs=[X, s0, c0, Y_true], outputs=outputs)
    
    ### END CODE HERE ###
    
    return model

In [13]:
model = model(Tx, Ty, n_a, n_s, vocab_size)

In [14]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
X (InputLayer)                  (None, 200, 56)      0                                            
__________________________________________________________________________________________________
s0 (InputLayer)                 (None, 64)           0                                            
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 200, 64)      22784       X[0][0]                          
__________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)  (None, 200, 64)      0           s0[0][0]                         
                                                                 lstm_1[0][0]                     
          

                                                                 dot_2[46][0]                     
                                                                 reshape_1[47][0]                 
                                                                 dot_2[47][0]                     
                                                                 reshape_1[48][0]                 
                                                                 dot_2[48][0]                     
                                                                 reshape_1[49][0]                 
                                                                 dot_2[49][0]                     
                                                                 reshape_1[50][0]                 
                                                                 dot_2[50][0]                     
                                                                 reshape_1[51][0]                 
          

In [24]:
opt = Adam(lr=0.01, beta_1=0.9, beta_2=0.999)
model.compile(opt, 'categorical_crossentropy', ['accuracy'])

In [16]:
filenames = [str(x) for x in range(18000)]

In [25]:
model.fit_generator(generator=DataGenerator(filenames, filenames), steps_per_epoch=5000, epochs=1, workers=4, shuffle=True)

Epoch 1/1

KeyboardInterrupt: 

In [None]:
# model.fit_generator(generate_data('..//data//', n_classes=56, n_s=n_s),
#                     steps_per_epoch=18000, epochs=5, workers=12, shuffle=True)

In [None]:
# path = "..\\data\\"
# epochs = 5
# sources = os.listdir(os.path.join(path, "sources\\"))
# targets = os.listdir(os.path.join(path, "targets\\"))
# for epoch in range(epochs):
#     print("Epoch: ", epoch)
#     for i in range(len(sources)):
#         X = np.load(os.path.join(path, "sources", sources[i]))
#         Y = np.load(os.path.join(path, "targets", targets[i]))
#         model.fit([to_categorical(X, num_classes=vocab_size), np.zeros((X.shape[0], n_s)), np.zeros((X.shape[0], n_s))],
#                   list(to_categorical(Y, num_classes=vocab_size).swapaxes(0, 1)))

In [28]:
model.save_weights("..\\models\\attention_model\\attention.h5")

In [None]:
def inference_model(LSTM_cell, densor, n_values=56, n_a=64, Ty=200):
    """
    Uses the trained "LSTM_cell" and "densor" from model() to generate a sequence of values.
    
    Arguments:
    LSTM_cell -- the trained "LSTM_cell" from model(), Keras layer object
    densor -- the trained "densor" from model(), Keras layer object
    n_values -- integer, umber of unique values
    n_a -- number of units in the LSTM_cell
    Ty -- integer, number of time steps to generate
    
    Returns:
    inference_model -- Keras model instance
    """ 
     
    # Define the input of your model with a shape 
    X = Input(shape=(Tx, vocab_size), name='X')
    s0 = Input(shape=(n_s,), name='s0')
    c0 = Input(shape=(n_s,), name='c0')
    
    s = s0
    c = c0

    ### START CODE HERE ###
    # Step 1: Create an empty list of "outputs" to later store your predicted values (≈1 line)
    outputs = []
    
    # Step 2: Loop over Ty and generate a value at every time step
    for t in range(Ty):
        
        # Step 2.A: Perform one step of LSTM_cell (≈1 line)
        a, _, c = LSTM_cell(x, initial_state=[a, c])
        
        # Step 2.B: Apply Dense layer to the hidden state output of the LSTM_cell (≈1 line)
        out = densor(a)

        # Step 2.C: Append the prediction "out" to "outputs". out.shape = (None, 78) (≈1 line)
        outputs.append(out)
        
        # Step 2.D: Select the next value according to "out", and set "x" to be the one-hot representation of the
        #           selected value, which will be passed as the input to LSTM_cell on the next step. We have provided 
        #           the line of code you need to do this. 
        x = Lambda(one_hot)(out)
        
    # Step 3: Create model instance with the correct "inputs" and "outputs" (≈1 line)
    inference_model = Model([x0, a0, c0], outputs)
    
    ### END CODE HERE ###
    
    return inference_model