In [10]:
import os 
import numpy as np
import pandas as pd
import re             # regular expression
import pickle         # Serialization and de-serialization of object (saving and loading)

import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' #LIMITS the lines of waring upto 3 lines

In [11]:
#encoder and decoder architecture
"""
INPUT -> Encoder -> ENC OUTPUTS, THOUGHT VECTOR -> Attention Network -> Attention Weights (x ENC OUTPUTS) -> ATTENTION OUTPUT

ATTENTION OUTPUT, PREV DECODER STATE -> DECODER ->FINAL OUTPUT
"""
# LSTM take more time and used for large datasets.
# where as GRU is good for small dataset and faster.

# Attention architecture layerclass (Neural network) - a simple dense layer
# Attention Network -> Attention Weights (x ENC OUTPUTS) -> ATTENTION OUTPUT
 


'\nINPUT -> Encoder -> ENC OUTPUTS, THOUGHT VECTOR -> Attention Network -> Attention Weights (x ENC OUTPUTS) -> ATTENTION OUTPUT\n\nATTENTION OUTPUT, PREV DECODER STATE -> DECODER ->FINAL OUTPUT\n'

In [12]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding, encoder_units, batch_size ):
        super(Encoder, self).__init__()
        
        self.batch_size = batch_size
        self.enc_units = encoder_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding)
        self.gru = tf.keras.layers.GRU(self.enc_units, return_sequences = True, return_state = True, kernel_regularizer =tf.keras.regularizers.L2(0.001))
    
    def call(self, inputs, hidden_state):
        embedded_input = self.embedding(inputs)
        enc_outputs, thought_vector = self.gru(embedded_input, initial_state = hidden_state)
        return enc_outputs, thought_vector

In [13]:
#the two inputs are enc_outputs and the thought_vector going into the attention layer
class Attention(tf.keras.layers.Layer ):
    def __init__(self, units):
        self.enc_output_layer = tf.keras.layers.Dense(units, kernel_regularizer =tf.keras.regularizers.L2(0.001))
        self.thought_layer    = tf.keras.layers.Dense(units, kernel_regularizer =tf.keras.regularizers.L2(0.001))
        self.final_layer      = tf.keras.layers.Dense(1    , kernel_regularizer =tf.keras.regularizers.L2(0.001))
        
    def call(self, enc_outputs, thought_vector):
        thought_matrix = tf.expand.dims(thought_vector, 1)
        scores = self.final_layer(tf.keras.activations.tanh(self.enc_output_layer(enc_outputs) + self.thought_layer(thought_matrix)))
        attention_weights = tf.keras.activations.softmax(scores, axis=-1)
        
        attention_output = attention_weights * enc_outputs          # shape(batch_size,num_output,output_size)
        attention_output = tf.reduce_sum(attention_output, axis=1)  # New shape (batch_size, output_size)
        
        return attention_output, attention_weights
        
    

In [20]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding, decoder_units, batch_size):
        super(Decoder, self).__init__()
        
        self.batch_size = batch_size
        self.dec_units = decoder_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding)
        self.gru = tf.keras.layers.GRU(self.dec_units, return_sequences = True, return_state = True, kernel_regularizer =tf.keras.regularizers.L2(0.001))
        
        self.attention = Attention(self.dec_units)
        self.word_output = tf.keras.layers.Dense(vocab_size, kernel_regularizer =tf.keras.regularizers.L2(0.001))
        
    def call(self, inputs, enc_outputs, thought_vector):
        attention_output, attention_weights = self.attention(enc_outputs, thought_vector)
        
        # Shape of attention output (batch_size, size_of_embedding)
        
        embedded_inputs = self.embedding(inputs) # shape (batch_size ,num_words, size_of_embedding) 
        attention_output = tf.expand_dims(attention_outputs, 1) #  shape of attention output (batch_size, size_of_embedding)
        concat_inputs = tf.concat([attention_output, embedded_inputs], axis=-1)
        
        decoder_outputs, hidden_state = self.gru(concat_inputs)
        decoder_outputs = tf.reshape(decoder_outputs, (-1,decoder_outputs.shape[2]))
        
        final_outputs = self.word_output(decoder_outputs)
        
        return final_outputs, hidden_state, attention_weights

In [None]:
class Train:
    def __init__(self):
        self.optimizer = tf.keras.optimizers.Adem()
        self.loss_function = tf.keras