This notebook includes code to build the inference model for model 5 (model with stacked Bi-LSTM encoder and Bi-LSTM decoder with Keras' Luong-style Attention), as well as to generate predicted title from a given article

The inference for this model is not working, despite the high val_acc

I have consulted and adapted code from the following sources:
- A. Pai, “Text Summarization: Text Summarization Using Deep Learning”, 2020 Analytics Vidhya. [Online]. Available: https://www.analyticsvidhya.com/blog/2019/06/comprehensive-guide-text-summarization-using-deep-learning-python/. [Accessed: 21-Apr-2021]. 

In [None]:
import tensorflow as tf
import numpy as np  
import pandas as pd 
import re           
from keras.preprocessing.text import Tokenizer 
from keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords   
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate, TimeDistributed, Bidirectional
from tensorflow.keras.models import Model, load_model


In [None]:
import sys
sys.path.append('../')
from util import preprocess_text
sys.path.append('../util')
from preprocess_text import preprocess_text

# Load Saved Model

In [None]:
model_name = 'bi_lstm_encoder_decoder_glove'

In [None]:
model = load_model("../models/{}".format(model_name))

In [None]:
model.summary()
tf.keras.utils.plot_model(model)

In [None]:
model.layers

In [None]:
latent_dim = 128

In [None]:
max_len_full_article = model.inputs[0].shape[1]
max_len_title = 25

In [None]:
model.layers[6]

In [None]:
encoder_inputs = model.input[0]
encoder_outputs, state_forward_h, state_forward_c, state_backward_h, state_backward_c = model.layers[6].output
# state_h = Concatenate()([state_forward_h, state_backward_h])
# state_c = Concatenate()([state_forward_c, state_backward_c])
# encoder_states = [state_h, state_c]

In [None]:
decoder_inputs = model.input[1]
decoder_embedding_layer = model.layers[5]
decoder_lstm = model.layers[7]
attention_layer = model.layers[8]
decoder_dense = model.layers[10]

# Build Inference Model

In [None]:
# Encode the input sequence to get the feature vector
# encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])
# state_h = Concatenate()([forward_h_3, backward_h_3])
# state_c = Concatenate()([forward_c_3, backward_c_3])
# encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_h, state_c])

encoder_model = Model(inputs=encoder_inputs,outputs=[encoder_outputs, state_forward_h, state_forward_c, state_backward_h, state_backward_c])

# Decoder setup
# Below tensors will hold the states of the previous time step
decoder_state_input_h = Input(shape=(latent_dim ,), name="input_3")
decoder_state_input_c = Input(shape=(latent_dim ,), name="input_4")

decoder_state_backward_input_h = Input(shape=(latent_dim, ), name="input_5")
decoder_state_backward_input_c = Input(shape=(latent_dim,), name="input_6")

# decoder_hidden_state_input = Input(shape=(max_len_full_text, latent_dim))
decoder_hidden_state_input = Input(shape=(max_len_full_article, 256), name="input_7")

# Get the embeddings of the decoder sequence
dec_emb2= decoder_embedding_layer(decoder_inputs) 
print(dec_emb2)
# To predict the next word in the sequence, set the initial states to the states from the previous time step
# decoder_outputs2, state_h2, state_c2 = decoder_lstm(dec_emb2, initial_state=[decoder_state_input_h, decoder_state_input_c])
decoder_state_inputs = [decoder_state_input_h, decoder_state_input_c, decoder_state_backward_input_h, decoder_state_backward_input_c]
decoder_outputs2, state_forward_h_2, state_forward_c_2, state_backward_h_2, state_backward_c_2 = decoder_lstm(dec_emb2, initial_state=decoder_state_inputs)

# attention inference

attn_out_inf = attention_layer([decoder_outputs2, decoder_hidden_state_input])

# attn_out_inf = tf.keras.layers.Attention()([decoder_outputs2, decoder_hidden_state_input])
decoder_inf_concat = Concatenate(axis=-1, name='concat')([decoder_outputs2, attn_out_inf])

# A dense softmax layer to generate prob dist. over the target vocabulary
decoder_outputs2 = decoder_dense(decoder_inf_concat) 

# Final decoder model
# decoder_model = Model(
#     [decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c],
#     [decoder_outputs2] + [state_h2, state_c2])
decoder_model = Model(
    inputs = [decoder_inputs] + [decoder_hidden_state_input,decoder_state_input_h, decoder_state_input_c, decoder_state_backward_input_h, decoder_state_backward_input_c],
    outputs = [decoder_outputs2] + [state_forward_h_2, state_forward_c_2, state_backward_h_2, state_backward_c_2])

### Greedy decoding

In [None]:
'''
Decode sequence by using greedy decoding
'''
def decode_sequence(input_seq): 
    # Encode the input as state vectors
    e_out, e_forward_h, e_forward_c, e_backward_h, e_backward_c = encoder_model.predict(input_seq)
    
    # Generate empty target sequence of length 1.
    target_seq = np.zeros((1,1))
    
    # Initialize the target sequence with the start token
    target_seq[0, 0] = target_word_index['sostoken']

    stop_condition = False
    decoded_sentence = ''

    while not stop_condition:
      
        output_tokens, output_forward_h, output_forward_c, output_backward_h, output_backward_c = decoder_model.predict([target_seq] + [e_out,  e_forward_h, e_forward_c, e_backward_h, e_backward_c])

        # Choose predicted token greedy
        predict_token_index = np.argmax(output_tokens[0, -1, :])
        predict_token = reverse_target_word_index[str(predict_token_index)]

        
        if(predict_token!='eostoken'):
            decoded_sentence += ' '+predict_token

        # Stop condition is either hitting max length for title or found the end token eostoken.
        if (predict_token == 'eostoken' or len(decoded_sentence.split()) >= (max_len_title-1)):
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq = np.zeros((1,1))
        target_seq[0, 0] = predict_token_index

        # Update internal states
        e_forward_h, e_forward_c, e_backward_h, e_backward_c = output_forward_h, output_forward_c, output_backward_h, output_backward_c

    return decoded_sentence

In [None]:
def convert_sequence_to_title(input_sequence):
    ret_title=''
    for word_token in input_sequence:
        if ((word_token!=0 and word_token!=target_word_index['sostoken']) and word_token!=target_word_index['eostoken']):
            ret_title = ret_title + reverse_target_word_index[str(word_token)] + ' '
    return ret_title

def convert_sequence_to_article(input_sequence):
    ret_article=''
    for word_token in input_sequence:
        if(word_token!=0):
            ret_article = ret_article+reverse_source_word_index[str(word_token)]+' '
    return ret_article

In [None]:
path_to_data = "train_val_nd_array"
x_train = np.load("../{}/{}/x_train.npy".format(path_to_data, "bi_lstm_encoder_lstm_decoder"))
y_train = np.load("../{}/{}/y_train.npy".format(path_to_data, "bi_lstm_encoder_lstm_decoder"))
# x_validate = np.load("../{}/{}/x_validate.npy".format(path_to_data, model_name))
# y_validate = np.load("../{}/{}/y_validate.npy".format(path_to_data, model_name))

In [None]:
import json 

with open("../word_idx_dict/{}/reverse_source_word_index.json".format(model_name)) as f:
  reverse_source_word_index = json.load(f)

with open("../word_idx_dict/{}/reverse_target_word_index.json".format(model_name)) as f:
  reverse_target_word_index = json.load(f)

with open("../word_idx_dict/{}/target_word_index.json".format(model_name)) as f:
  target_word_index = json.load(f)

In [None]:
for i in range(0,100):
    print("Article:", convert_sequence_to_article(x_train[i]))
    print("Original Title:", convert_sequence_to_title(y_train[i]))
    print("Predicted Title:", decode_sequence(x_train[i].reshape(1, max_len_full_article)))
    print("\n")

In [None]:
source_word_index = {word: index for index, word in reverse_source_word_index.items()}

In [None]:
def generate_title(article_file_txt): 
    with open(article_file_txt) as f:
        article = f.read()

    article = preprocess_text(article, is_article=True)
    print(article)
    input_sequence = [source_word_index[word] for word in article.split(" ") if word in source_word_index.keys()]

    # text_tokenizer = Tokenizer()
    # text_train = text_tokenizer.texts_to_sequences(article.split(" "))
    # max_len_text = max(len(text) for text in text_train)

    #padding zero upto maximum length
    padded_input_sequence = pad_sequences([input_sequence], maxlen=max_len_full_article, padding='post')
    generated_title = decode_sequence(padded_input_sequence.reshape(1, max_len_full_article))
    return generated_title


In [None]:
generate_title("../sampled_articles/sample_1.txt")
