In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow_addons as tfa
from keras.preprocessing.text import Tokenizer, text_to_word_sequence
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import ELU, Embedding, LSTM, Dense, Dropout, Bidirectional, Input, RepeatVector, TimeDistributed, Lambda, Layer
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from tensorflow.keras import regularizers, losses
from tensorflow.keras import backend as K

%tensorflow_version 2.x
import tensorflow as tf

import os
import datetime
import random

tf.compat.v1.disable_eager_execution()

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [3]:
# import dataset from txt file

X = np.array(())

DataSetPath = '/content/gdrive/My Drive/bsc/Final/DataSet/ferdousi_norm.txt'

dataset = pd.read_csv(DataSetPath, na_values=" ", header=None)
temp = dataset.iloc[:, 0].values

X = np.append(X, temp)

In [4]:
print(X[:3])
print(X[-1])
print(X[-2])

['به نام خداوند جان و خرد' 'کزین برتر اندیشه برنگذرد'
 'خداوند نام و خداوند جای']
هر آنکس که دارد هش و رای و دین
که تخم سخن من پراگنده ام


In [5]:
print("The total count of masraas is " , len(X))

The total count of masraas is  99217


In [6]:
# Train on just a quarter of data
X = X[: int(len(X) / 4) ]

if len(X) % 2 == 1:
    X = X[:-1]

In [7]:
print("The total count of masraas is " , len(X))

The total count of masraas is  24804


In [8]:
verses = []
input_verses = []

for i in range(len(X)):
    if i % 2 == 0:
        input_verses.append(X[i] + ' <middle> ' + X[i + 1] + ' <eos>')
        verses.append(X[i] + ' <middle> ' + X[i + 1] + ' <eos>')

In [9]:
print("The total number of verses is ", len(verses))

The total number of verses is  12402


In [10]:
# tokenize all data

tokenizer = Tokenizer(filters="")
tokenizer.fit_on_texts(verses)

TokenizedVerses = tokenizer.texts_to_sequences(verses)
tokenized_input = tokenizer.texts_to_sequences(input_verses)

In [11]:
word2index = tokenizer.word_index
index2word = tokenizer.index_word

In [12]:
# find max length for padding

max_len = 0

for i in tokenized_input:
    if len(i) > max_len:
        sen = []
        for t in i :
            sen.append(index2word[t])
        print(sen)
    max_len = max(max_len, len(i)) 

print("maximum length of sentences : ", max_len)


['به', 'نام', 'خداوند', 'جان', 'و', 'خرد', '<middle>', 'کزین', 'برتر', 'اندیشه', 'برنگذرد', '<eos>']
['خداوند', 'کیوان', 'و', 'گردان', 'سپهر', '<middle>', 'فروزنده', 'ماه', 'و', 'ناهید', 'و', 'مهر', '<eos>']
['نیابد', 'بدو', 'نیز', 'اندیشه', 'راه', '<middle>', 'که', 'او', 'برتر', 'از', 'نام', 'و', 'از', 'جایگاه', '<eos>']
['خرد', 'را', 'و', 'جان', 'را', 'همی', 'سنجد', 'اوی', '<middle>', 'در', 'اندیشه', 'سخته', 'کی', 'گنجد', 'اوی', '<eos>']
['سه', 'پاس', 'تو', 'چشم', 'است', 'وگوش', 'و', 'زبان', '<middle>', 'کزین', 'سه', 'رسد', 'نیک', 'و', 'بد', 'بی', 'گمان', '<eos>']
['نه', 'کند', 'آوری', 'گیرد', 'از', 'باج', 'و', 'گنج', '<middle>', 'نه', 'دل', 'تیره', 'دارد', 'ز', 'رزم', 'و', 'ز', 'رنج', '<eos>']
['ز', 'گنج', 'و', 'ز', 'تخت', 'و', 'ز', 'در', 'و', 'گهر', '<middle>', 'ز', 'اسپ', 'و', 'سلیح', 'و', 'کلاه', 'و', 'کمر', '<eos>']
['به', 'دیبا', 'و', 'دینار', 'و', 'در', 'و', 'درم', '<middle>', 'به', 'بوی', 'و', 'به', 'رنگ', 'و', 'به', 'هر', 'بیش', 'و', 'کم', '<eos>']
maximum length of sentence

In [13]:
# do padding

input_seq = pad_sequences(tokenized_input, maxlen=max_len, padding='post')

In [14]:
# building model
class VAE(object):
    def build_model(self, vocab_size, timesteps, intermediate_dim, latent_dim, kl_weight_start = 0, epsilon_std=1.):

        """
        Creates an LSTM Variational Autoencoder (VAE). Returns VAE, Encoder, Generator. 
        # Arguments
            timesteps: int, input timestep dimension. e.g sentences max size
            intermediate_dim: int, output shape of LSTM. 
            latent_dim: int, latent z-layer shape. 
            epsilon_std: float, z-layer sigma.
        # References
            - [Building Autoencoders in Keras](https://blog.keras.io/building-autoencoders-in-keras.html)
            - [Generating sentences from a continuous space](https://arxiv.org/abs/1511.06349)
        """
        
        self.vae = None
        self.encoder = None
        self.generator = None
        self.kl_weight_start = kl_weight_start
        self.kl_weight = None

        x = Input(shape=(timesteps,))
        embed = Embedding(vocab_size, 256, input_length=timesteps)(x)

        # LSTM encoding
        h = Bidirectional(LSTM(intermediate_dim, activation="relu", kernel_regularizer=regularizers.l2(0.001), return_sequences=True), merge_mode="concat")(embed)
        h = Bidirectional(LSTM(intermediate_dim, activation="relu", kernel_regularizer=regularizers.l2(0.001)), merge_mode="concat")(h)
        h = Dense(intermediate_dim, activation='linear')(h)
        h = ELU()(h)

        # VAE Z layer
        z_mean = Dense(latent_dim)(h)
        z_log_sigma = Dense(latent_dim)(h)
        
        def sampling(args):
            z_mean_, z_log_var_ = args
            
            epsilon = K.random_normal(shape=tf.shape(z_mean_), mean=0., stddev=epsilon_std)
            return z_mean_ + K.exp(z_log_var_ / 2) * epsilon

        z = Lambda(sampling)([z_mean, z_log_sigma])
        
        # decoded LSTM layer
        repeated_context = RepeatVector(timesteps)
        decoder_h = LSTM(intermediate_dim, return_sequences=True, kernel_regularizer=regularizers.l2(0.001))
        decoder_mean = TimeDistributed(Dense(vocab_size, activation='linear')) 

        h_decoded = decoder_h(repeated_context(z))
        x_decoded_mean = decoder_mean(h_decoded)

        def vae_loss():
            self.kl_weight = K.variable(self.kl_weight_start, name='kl_weight')
            kl_weight = self.kl_weight
            
            def loss(y_true, y_pred):  
                labels = tf.cast(x, tf.int32)

                xent_loss = K.sum(tfa.seq2seq.sequence_loss(x_decoded_mean, labels, 
                                                                weights=tf.ones(tf.shape(x), tf.float32),
                                                                average_across_timesteps=False,
                                                                average_across_batch=False), axis=-1)
                                                                #softmax_loss_function=softmax_loss_f), axis=-1)#, uncomment for sampled doftmax
                kl_loss = - 0.5 * K.sum(1 + z_log_sigma - K.square(z_mean) - K.exp(z_log_sigma), axis=-1)
                kl_loss = kl_loss * kl_weight

                return K.mean(xent_loss + kl_loss)
            return loss
            

        # end-to-end autoencoder
        self.vae = Model(x, x_decoded_mean)

        # encoder, from inputs to latent space
        self.encoder = Model(x, z)

        # generator, from latent space to reconstructed inputs
        decoder_input = Input(shape=(latent_dim))

        _h_decoded = decoder_h(repeated_context(decoder_input))

        _x_decoded_mean = decoder_mean(_h_decoded)
        self.generator = Model(decoder_input, _x_decoded_mean)
        
        opt = Adam(lr=0.01) #SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
        
        self.vae.compile(optimizer='adam', loss=vae_loss())
        # vae.summary()

        # return vae, encoder, generator


In [15]:
base_path = "/content/gdrive/My Drive/bsc/Final/VAE/"
!ls "/content/gdrive/My Drive/bsc/Final/VAE/"

checkpoint		     imputing_batch.txt
cp.ckpt.data-00000-of-00002  reconstruct_sentences_during_training.txt
cp.ckpt.data-00001-of-00002  sample_sentences_during_training.txt
cp.ckpt.index		     verses_batch.txt


In [16]:
# sample from model

def sample_line(model, latent_dim, type = 1):
    middle = word2index['<middle>']
    eos = word2index['<eos>']

    mu, stddev = 0, 1 
    z = np.random.default_rng().normal(mu, stddev, size = (1, latent_dim))
    
    seq = model.predict(z)

    output_sentence = []

    for i in range(max_len):
        probs = np.exp(seq[0][i])
        probs[0] = 0

        idx = 0
        if type == 0:
            probs /= probs.sum()
            idx = np.random.choice(len(probs), p=probs)
        elif type == 1:
            idx = np.argmax(probs)
        
        if idx == eos:
            break
        if idx == 0:
            continue
        if idx == middle:
            output_sentence.append("    -   ")
        else:
            output_sentence.append(index2word[idx])

    decoded_review = ' '.join(output_sentence)

    return decoded_review

In [17]:
# reconstruct

def reconstruct_line(model, latent_dim):
    middle = word2index['<middle>']
    eos = word2index['<eos>']

    num = random.randrange(len(input_seq) - 1)
    input_sentence = input_seq[num:num + 1]
    output_sentence = model.predict(input_sentence)

    output_sentence_text = []
    input_sentence_text = []

    for i in input_sentence[0]:
        if i != 0:
            input_sentence_text.append(index2word[i])

    for i in range(max_len):
        probs = output_sentence[0][i]
        probs[0] = 0

        idx = 0
        idx = np.argmax(probs)

        if idx == eos:
            output_sentence_text.append(index2word[idx])
            break
        if idx == 0:
            continue
        else:
            output_sentence_text.append(index2word[idx])

    decoded_review1 = ' '.join(input_sentence_text)
    decoded_review2 = ' '.join(output_sentence_text)

    return decoded_review1 + " ==> " + decoded_review2

In [18]:
# create custom callback
class MyCustomCallback(tf.keras.callbacks.Callback):

    def __init__(self, generator, latent_dim, initial_epoch = 0):
        self.generator = generator
        self.latent_dim = latent_dim
        self.initial_epoch = initial_epoch

    
    def on_epoch_end(self, epoch, logs=None):
        print('Training: epoch {} ends at {}'.format(epoch + self.initial_epoch, datetime.datetime.now().time()))
        print('sample text via predict : ')
        sentence = sample_line(self.generator, self.latent_dim)
        print(sentence)
        
        _path = base_path + "sample_sentences_during_training.txt"
        file_object = open(_path, 'a+')
        file_object.write('\n Training: epoch {} ends at {} \n'.format(epoch + self.initial_epoch, datetime.datetime.now().time()))
        file_object.write('sample text via predict : \n')
        file_object.write(sentence)
        file_object.close()
  

In [19]:
# create custom callback
kl_annealtime = 30

class KLAnnealing(tf.keras.callbacks.Callback):
    def __init__(self, kl_weight, initial_epoch = 0):
        super(KLAnnealing, self).__init__()
        self.kl_weight = kl_weight
        self.initial_epoch = initial_epoch

    def on_epoch_end(self, epoch, logs=None):
        if epoch + self.initial_epoch > kl_annealtime:
            new_kl_weight = K.get_value(self.kl_weight) + 0.01
            if new_kl_weight < 1:
                K.set_value(self.kl_weight, new_kl_weight)
                print('\nEpoch %05d: KLWeightScheduler setting KL weight '
                  ' to %s.' % (epoch + self.initial_epoch, new_kl_weight))
  

In [20]:
# create custom callback
class Reconstruct(tf.keras.callbacks.Callback):
    
    def __init__(self, latent_dim, initial_epoch = 0):
        self.latent_dim = latent_dim
        self.initial_epoch = initial_epoch

    def on_epoch_end(self, epoch, logs=None):
        print('Training: epoch {} ends at {}'.format(epoch + self.initial_epoch, datetime.datetime.now().time()))
        print('reconstruct text via predict : ')
        sentence = reconstruct_line(self.model, self.latent_dim)
        print(sentence)
        
        _path = base_path + "reconstruct_sentences_during_training.txt"
        file_object = open(_path, 'a+')
        file_object.write('\n Training: epoch {} ends at {} \n'.format(epoch + self.initial_epoch, datetime.datetime.now().time()))
        file_object.write('reconstruct text via predict : \n')
        file_object.write(sentence)
        file_object.close()
  

In [21]:
# create custom callback to save loss

class MyCustomCallbackLoss(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):    
        loss_path = base_path + "loss.txt"
        print("saving loss")
        file_object = open(loss_path, 'a+')
        file_object.write('{}\n'.format(logs["loss"]))
        file_object.close()
  

In [None]:
# train model

LATENT_SIZE = 512
EPOCH_TIME = 100
WORD_NUM = len(index2word) + 1

# Create a callback that saves the model's weights
checkpoint_path = base_path + "cp.ckpt" 
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True)

model = VAE()
model.build_model(vocab_size = WORD_NUM, timesteps = max_len, intermediate_dim = 512, latent_dim = LATENT_SIZE)
model.vae.fit(input_seq[:1000], input_seq[:1000], batch_size=100, epochs = EPOCH_TIME, validation_split= 0.2,
        shuffle=True, verbose=1,
        callbacks=[
                   cp_callback,
                   MyCustomCallback(model.generator, LATENT_SIZE),
                   Reconstruct(LATENT_SIZE),
                   KLAnnealing(model.kl_weight),
                   MyCustomCallbackLoss()
                   ]
        )

In [None]:
# train base on last checkpoint

LATENT_SIZE = 512
EPOCH_TIME = 1
PR_EPOCHS = 485
WORD_NUM = len(index2word) + 1

# Create a callback that saves the model's weights
checkpoint_path = base_path + "cp.ckpt" 
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_weights_only=True)

model = VAE()

model.build_model(vocab_size = WORD_NUM, timesteps = max_len, intermediate_dim = 512, latent_dim = LATENT_SIZE, kl_weight_start = 1)
model.vae.load_weights(checkpoint_path)

model.vae.fit(input_seq, input_seq, batch_size=100, epochs = EPOCH_TIME, validation_split= 0.2,
        shuffle=True, verbose=1,
        callbacks=[
                   cp_callback,
                   MyCustomCallback(model.generator, LATENT_SIZE, initial_epoch=PR_EPOCHS),
                   Reconstruct(LATENT_SIZE, initial_epoch=PR_EPOCHS),
                   KLAnnealing(model.kl_weight, initial_epoch=PR_EPOCHS),
                   MyCustomCallbackLoss()
                ]
)

In [22]:
# load model base on last checkpoint

LATENT_SIZE = 512
WORD_NUM = len(index2word) + 1

# Create a callback that saves the model's weights
checkpoint_path = base_path + "cp.ckpt" 
checkpoint_dir = os.path.dirname(checkpoint_path)

model = VAE()

model.build_model(vocab_size = WORD_NUM, timesteps = max_len, intermediate_dim = 512, latent_dim = LATENT_SIZE, kl_weight_start = 1)
model.vae.load_weights(checkpoint_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fc3be6c0710>

In [None]:
# sample from model

for i in range(30):
    print(sample_line(model.generator, LATENT_SIZE))

In [None]:
# plot loss
loss = pd.read_csv(base_path + "loss.txt", header=None)
loss = loss.iloc[:,0].values

plt.plot(loss)
plt.ylabel("Loss function")
plt.show()

In [None]:
# showing model graph
from tensorflow.keras.utils import plot_model
plot_model(model.vae, to_file= base_path + 'model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# text imputing
index = np.random.randint(0, int(len(X) / 4)) 
if index % 2 == 1:
    index -= 1
verse_text = []
verse_text.append(X[index] + " <middle> " + X[index + 1] + " <eos>")
# print(verse_text)
verse_text = tokenizer.texts_to_sequences(verse_text)
verse_text_seq = pad_sequences(verse_text, maxlen=max_len, padding='post')
# print(verse_text_seq)

def print_sent_from_seq(seq):
    output_sen = []
    for i in seq:
        if i == word2index["<eos>"]:
            break
        if i == 0:
            continue
        if i == word2index["<middle>"]:
            output_sen.append("    -   ")
        elif i == -1:
            output_sen.append("?")
        else:
            output_sen.append(index2word[i])

    decoded_review = ' '.join(output_sen)
    print(decoded_review)

# remove k word at random

k = 2
index_array = []

print("real sentence : ")
print_sent_from_seq(verse_text_seq[0])

for i in range(k):
    index = np.random.randint(0, max_len)
    while verse_text_seq[0][index] == word2index["<middle>"] or verse_text_seq[0][index] == word2index["<eos>"] or verse_text_seq[0][index] == 0:
         index = np.random.randint(0, max_len)
    index_array.append(index)
    verse_text_seq[0][index] = 0

print("After removing some words : ")
verse_text_seq_temp = verse_text_seq[0].copy()
for i in index_array:
    verse_text_seq_temp[i] = -1
print_sent_from_seq(verse_text_seq_temp)

z_hat = model.encoder.predict(verse_text_seq)
y_hat = model.generator.predict(z_hat)

for i in index_array:
    probs = np.exp(y_hat[0][i])
    probs[0] = 0

    idx = 0
    idx = np.argmax(probs)

    verse_text_seq[0][i] = idx

print("After text imputing : ")
print_sent_from_seq(verse_text_seq[0])



In [31]:
# text imputing on test set
index = np.random.randint(int(len(X) / 4), len(X))
if index % 2 == 1:
    index -= 1
verse_text = []
verse_text.append(X[index] + " <middle> " + X[index + 1] + " <eos>")
# print(verse_text)
verse_text = tokenizer.texts_to_sequences(verse_text)
verse_text_seq = pad_sequences(verse_text, maxlen=max_len, padding='post')
# print(verse_text_seq)

def print_sent_from_seq(seq):
    output_sen = []
    for i in seq:
        if i == word2index["<eos>"]:
            break
        if i == 0:
            continue
        if i == word2index["<middle>"]:
            output_sen.append("    -   ")
        elif i == -1:
            output_sen.append("?")
        else:
            output_sen.append(index2word[i])

    decoded_review = ' '.join(output_sen)
    print(decoded_review)

# remove k word at random

k = 2
index_array = []

print("real sentence : ")
print_sent_from_seq(verse_text_seq[0])

for i in range(k):
    index = np.random.randint(0, max_len)
    while verse_text_seq[0][index] == word2index["<middle>"] or verse_text_seq[0][index] == word2index["<eos>"] or verse_text_seq[0][index] == 0:
         index = np.random.randint(0, max_len)
    index_array.append(index)
    verse_text_seq[0][index] = 0

print("After removing some words : ")
verse_text_seq_temp = verse_text_seq[0].copy()
for i in index_array:
    verse_text_seq_temp[i] = -1
print_sent_from_seq(verse_text_seq_temp)

z_hat = model.encoder.predict(verse_text_seq)
y_hat = model.generator.predict(z_hat)

for i in index_array:
    probs = np.exp(y_hat[0][i])
    probs[0] = 0

    idx = 0
    idx = np.argmax(probs)

    verse_text_seq[0][i] = idx

print("After text imputing : ")
print_sent_from_seq(verse_text_seq[0])



real sentence : 
ز بهر بزرگان ایران زمین     -    برآرامش این رنج کردی گزین
After removing some words : 
ز ? ? ایران زمین     -    برآرامش این رنج کردی گزین
After text imputing : 
ز چندان بزرگان ایران زمین     -    برآرامش این رنج کردی گزین


In [36]:
# computing text imputing accuracy
error = 0
acc = 0
for p in range(500):
    index = np.random.randint(0, int(len(X) / 4)) 
    if index % 2 == 1:
        index -= 1
    verse_text = []
    verse_text.append(X[index] + " <middle> " + X[index + 1] + " <eos>")

    verse_text = tokenizer.texts_to_sequences(verse_text)
    verse_text_seq = pad_sequences(verse_text, maxlen=max_len, padding='post')
    
    # remove k word at random
    k = 2
    index_array = []
    value_array = []

    for i in range(k):
        index = np.random.randint(0, max_len)
        while verse_text_seq[0][index] == word2index["<middle>"] or verse_text_seq[0][index] == word2index["<eos>"] or verse_text_seq[0][index] == 0:
            index = np.random.randint(0, max_len)
        index_array.append(index)
        value_array.append(verse_text_seq[0][index])
        verse_text_seq[0][index] = 0

    z_hat = model.encoder.predict(verse_text_seq)
    y_hat = model.generator.predict(z_hat)

    for k in range(len(index_array)):
        i = index_array[k]

        probs = np.exp(y_hat[0][i])
        probs[0] = 0
        probs /= sum(probs)

        idx = 0
        idx = np.argmax(probs)
        error += -np.log(probs[value_array[k]])
        acc += probs[value_array[k]]

        verse_text_seq[0][i] = idx

print(acc / 500)
print(error / 500)

0.538278013484222
8.064722566499796


In [None]:
# create new batch verses

def sample_line_with_token(model, latent_dim):
    middle = word2index['<middle>']
    eos = word2index['<eos>']

    mu, stddev = 0, 1 
    z = np.random.default_rng().normal(mu, stddev, size = (1, latent_dim))
    
    seq = model.predict(z)

    output_sentence = []

    for i in range(max_len):
        probs = np.exp(seq[0][i])
        probs[0] = 0

        idx = 0
        
        idx = np.argmax(probs)
        
        if idx == eos:
            break
        if idx == 0:
            continue
        else:
            output_sentence.append(index2word[idx])

    decoded_review = ' '.join(output_sentence)

    return decoded_review


verses_batch_path = base_path + "verses_batch.txt"
print("saving verses batch")
file_object = open(verses_batch_path, 'a+')

for i in range(4000):
    verse = sample_line_with_token(model.generator, LATENT_SIZE)
    file_object.write('{}\n'.format(verse))

file_object.close()

saving verses batch


In [None]:
# create new batch imputing

imputing_batch_path = base_path + "imputing_batch.txt"
print("saving imputing batch")
file_object = open(imputing_batch_path, 'a+')

for j in range(4000):
    index = np.random.randint(0, int(len(X) / 4)) 
    
    if index % 2 == 1:
        index -= 1
    
    verse_text = []
    verse_text.append(X[index] + " <middle> " + X[index + 1] + " <eos>")

    verse_text = tokenizer.texts_to_sequences(verse_text)
    verse_text_seq = pad_sequences(verse_text, maxlen=max_len, padding='post')

    k = 3
    index_array = []

    for i in range(k):
        index = np.random.randint(0, max_len)
        while verse_text_seq[0][index] == word2index["<middle>"] or verse_text_seq[0][index] == word2index["<eos>"] or verse_text_seq[0][index] == 0:
            index = np.random.randint(0, max_len)
        index_array.append(index)
        verse_text_seq[0][index] = 0

    z_hat = model.encoder.predict(verse_text_seq)
    y_hat = model.generator.predict(z_hat)

    for i in index_array:
        probs = np.exp(y_hat[0][i])
        probs[0] = 0

        idx = 0
        idx = np.argmax(probs)

        verse_text_seq[0][i] = idx

    output_sentence = []
    for i in range(max_len):
        if verse_text_seq[0][i] == word2index["<eos>"]:
            break
        if verse_text_seq[0][i] == 0:
            continue
        else:
            output_sentence.append(index2word[verse_text_seq[0][i]])
        
    decoded_review = ' '.join(output_sentence)

    file_object.write('{}\n'.format(decoded_review))

file_object.close()

saving imputing batch
