In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.preprocessing.text import tokenizer_from_json
import json
import time

In [2]:
with open('./preprocessed_data/questions.json', 'r') as f:
    json_data = json.load(f)
    question_corpus = tokenizer_from_json(json_data)
    f.close()

with open('./preprocessed_data/answers.json', 'r') as f:
    json_data = json.load(f)
    answer_corpus = tokenizer_from_json(json_data)
    f.close()

npzfile = np.load('./preprocessed_data/data.npz') 

In [3]:
class Encoder(tf.keras.Model):

    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units,return_sequences=True,return_state=True,
                                       recurrent_initializer='glorot_uniform')

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state = hidden)
        return output, state

    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))

In [4]:
class BahdanauAttention(tf.keras.layers.Layer):

    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)
        self.R = tf.keras.layers.Dropout(0.2)
    def call(self, query, values):
        hidden_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(values) + self.W2(hidden_with_time_axis)))
        score = self.R(score)
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [5]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.attention = BahdanauAttention(self.dec_units)
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units,return_sequences=True,return_state=True,
                                       recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)

    def call(self, x, hidden, enc_output):
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
        output, state = self.gru(x)
        output = tf.reshape(output, (-1, output.shape[2]))
        x = self.fc(output)
        
        return x, state, attention_weights

In [6]:
VocabSize = 2501
EmbeddingDim = 128
Units = 256
BatchSize = 64

In [7]:
encoder=Encoder(VocabSize,EmbeddingDim,Units,BatchSize)
decoder=Decoder(VocabSize,EmbeddingDim,Units,BatchSize)

In [8]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [9]:
def loss_function(real, pred):
    
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)
    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [10]:
def train_step(inp, targ, enc_hidden):
    loss = 0
    
    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)
        dec_hidden = enc_hidden
        dec_input = tf.expand_dims([answer_corpus.word_index['bos']] * BatchSize, 1)
        
        for t in range(1, targ.shape[1]):
            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
            loss += loss_function(targ[:, t], predictions)
            dec_input = tf.expand_dims(targ[:, t], 1)
    
    batch_loss = (loss / int(targ.shape[1]))
    variables = encoder.trainable_variables + decoder.trainable_variables
    gradients = tape.gradient(loss, variables)
    optimizer.apply_gradients(zip(gradients, variables))
    
    return batch_loss

In [11]:
def validation_loss(inp, targ, enc_hidden):
    loss = 0
    enc_output, enc_hidden = encoder(inp, enc_hidden)
    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([answer_corpus.word_index['bos']] * BatchSize, 1)
    
    for t in range(1, targ.shape[1]):
        predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)
        loss += loss_function(targ[:, t], predictions)
        dec_input = tf.expand_dims(targ[:, t], 1)
            
    batch_loss = (loss / int(targ.shape[1]))
    
    return batch_loss

In [12]:
# define the parameter to split data
train_valid_split = int(len(npzfile['arr_0'])*0.9)

In [13]:
# get the training data
inputq=npzfile['arr_0'][:train_valid_split]
inputa=npzfile['arr_1'][:train_valid_split] 

In [14]:
# get the validation data
validq=npzfile['arr_0'][train_valid_split:]
valida=npzfile['arr_1'][train_valid_split:]

In [15]:
# create tensorflow dataset pipeline for faster processing
# training set
BufferSize = len(inputq)
dataset_train = tf.data.Dataset.from_tensor_slices((inputq,inputa)).shuffle(BufferSize)
dataset_train = dataset_train.batch(BatchSize, drop_remainder=True)
# validation set
BufferSize1 = len(validq)
dataset_valid = tf.data.Dataset.from_tensor_slices((validq,valida)).shuffle(BufferSize1)
dataset_valid = dataset_valid.batch(BatchSize, drop_remainder=True)

In [16]:
# train the model

Epochs = 5
trainstep_epoch = len(inputq)//BatchSize
validstep_epoch = len(validq)//BatchSize
overalltime=0

for epoch in range(Epochs):
    start=time.time()
    total_loss=0
    valid_loss=0
    enc_hidden = encoder.initialize_hidden_state()
    for (batch, (inputq,inputa)) in enumerate(dataset_train.take(trainstep_epoch)):
        batch_loss = train_step(inputq,inputa,enc_hidden)
        total_loss += batch_loss

    for (batch, (validq,valida)) in enumerate(dataset_valid.take(validstep_epoch)):
        valid_batch_loss = validation_loss(validq,valida,enc_hidden)
        valid_loss += valid_batch_loss
    print('Epoch {} Loss {:.3f} Valid_Loss {:.3f}'.format(epoch+1,total_loss/trainstep_epoch,valid_loss/validstep_epoch))
    
    stop=time.time()
    timetaken=stop-start
    print('Time taken for 1 epoch {} sec\n'.format(timetaken))
    
    overalltime+=timetaken
    
print('Overall time taken {} min\n'.format(overalltime/60))

Epoch 1 Loss 1.952 Valid_Loss 1.777
Time taken for 1 epoch 144.3417284488678 sec

Epoch 2 Loss 1.733 Valid_Loss 1.701
Time taken for 1 epoch 143.42776203155518 sec

Epoch 3 Loss 1.667 Valid_Loss 1.665
Time taken for 1 epoch 141.8451211452484 sec

Epoch 4 Loss 1.622 Valid_Loss 1.653
Time taken for 1 epoch 145.02558493614197 sec

Epoch 5 Loss 1.586 Valid_Loss 1.637
Time taken for 1 epoch 143.3309507369995 sec

Overall time taken 11.966185788313547 min



In [17]:
# save parameters after training
encoder.save_weights('./trained_model/attention_enc_test.h5')
decoder.save_weights('./trained_model/attention_dec_test.h5')