In [1]:
import os
os.chdir('drive/My Drive')

In [2]:
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
from tensorflow.keras.layers import TimeDistributed
tf.keras.backend.clear_session()
from tensorflow.keras.layers import Input, Softmax, RNN, Dense, Embedding, LSTM
from tensorflow.keras.models import Model
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings('ignore')
import pickle

In [3]:
infile = open('tokenizers_subject.pkl','rb')
tokenizer_encoder, tokenizer_decoder = pickle.load(infile)
infile.close()

In [4]:
def custom_lossfunction(targets,logits):

  # Custom loss function that will not consider the loss for padded zeros.
  # Refer https://www.tensorflow.org/tutorials/text/nmt_with_attention#define_the_optimizer_and_the_loss_function
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
  mask = tf.math.logical_not(tf.math.equal(targets, 0))
  loss_ = loss_object(targets, logits)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)

tf.keras.losses.custom_loss = custom_lossfunction

In [5]:
loaded_model_att = tf.keras.models.load_model('final_attention_subject_upd',custom_objects={'custom_lossfunction':custom_lossfunction})

In [6]:
model = loaded_model_att

In [7]:
import copy
def final_fun_1(input_sentence):

  '''
  A. Given input sentence, convert the sentence into integers using tokenizer used earlier
  B. Pass the input_sequence to encoder. we get encoder_outputs, last time step hidden and cell state
  C. Initialize index of <start> as input to decoder. and encoder final states as input_states to decoder
  D. till we reach max_length of decoder or till the model predicted word <end>:
         predicted_out,state_h,state_c=model.layers[1](dec_input,states)
         pass the predicted_out to the dense layer
         update the states=[state_h,state_c]
         And get the index of the word with maximum probability of the dense layer output, using the tokenizer(word index) get the word and then store it in a string.
         Update the input_to_decoder with current predictions
  F. Return the predicted sentence
  '''
  encoder_test_tokens = tokenizer_encoder.texts_to_sequences([input_sentence])
  padded_encoder_input = pad_sequences(encoder_test_tokens, maxlen=16, dtype='float32', padding='post')
  encoder = model.layers[2]
  encoder_op, enc_h, enc_c = encoder(padded_encoder_input)
  decoder = model.layers[4]
  index_of_start = np.array(tokenizer_decoder.word_index['<start>']).reshape(1,1).astype('float32')
  predicted_out,enc_h, enc_c,attention,context_vector = decoder.onestepdecoder(index_of_start,encoder_op, enc_h, enc_c)
  state_h, state_c = enc_h,enc_c
  states = (state_h, state_c)
  toppred = np.argsort(predicted_out[0])[-3:][::-1]
  probs = np.sort(predicted_out[0])[-3:][::-1]
  words = []
  for pred in toppred:
    word = [k for k in tokenizer_decoder.word_index if tokenizer_decoder.word_index[k]==pred][0]
    words.append(word)
  semi_final = [[probs[0],[toppred[0]],[words[0]],states],[probs[1],[toppred[1]],[words[1]],states],[probs[2],[toppred[2]],[words[2]],states]]
  finished_sentences = 0
  final = []
  while (True):
    temp = []
    for i in range(len(semi_final)):
      # dec_emb= decoder.embedding(semi_final[i][1][-1].reshape(1,1))
      predicted_out,state_h, state_c,attention,context_vector = decoder.onestepdecoder(semi_final[i][1][-1].reshape(1,1).astype('float32'),
                                                                                       encoder_op, semi_final[i][-1][0], semi_final[i][-1][1])
      toppred = np.argsort(predicted_out[0])[-len(semi_final):][::-1]
      probs = np.sort(predicted_out[0])[-len(semi_final):][::-1]
      states= (state_h, state_c)
      for j in range(len(toppred)):
        word = [k for k in tokenizer_decoder.word_index if tokenizer_decoder.word_index[k]==toppred[j]][0]
        #temp[str(i)+','+str(j)] = (semi_final[i][0] * probs[j],toppred[j],semi_final[i][2].append(word),states)
        words = copy.deepcopy(semi_final[i][2])
        words.append(word)
        temp.append([semi_final[i][0] * probs[j],[toppred[j]],words,states])
    temp = sorted(temp,key = lambda x:x[0],reverse=True)[:len(semi_final)]
    ids_to_be_removed = []
    for id,k in enumerate(temp):
      if k[2][-1] == '<end>':
        final.append((k[0],' '.join(k[2][:-1])))
        finished_sentences+=1
        ids_to_be_removed.append(id)
    for id in ids_to_be_removed:
      temp[id] = 0
    temp = [i for i in temp if i!=0]
    semi_final=temp
    if finished_sentences == 3:
      break
  predictions_3 = [x[1] for x in final]
  return predictions_3

In [8]:
final_fun_1("i have sent you")

['a copy of', 'a copy in', 'a confirmation of']

In [9]:
def predict(input_sentence):

  '''
  A. Given input sentence, convert the sentence into integers using tokenizer used earlier
  B. Pass the input_sequence to encoder. we get encoder_outputs, last time step hidden and cell state
  C. Initialize index of <start> as input to decoder. and encoder final states as input_states to onestepdecoder.
  D. till we reach max_length of decoder or till the model predicted word <end>:
         predictions, input_states, attention_weights = model.layers[1].onestepdecoder(input_to_decoder, encoder_output, input_states)
         Save the attention weights
         And get the word using the tokenizer(word index) and then store it in a string.
  E. Call plot_attention(#params)
  F. Return the predicted sentence
  ''' 
  encoder_test_tokens = tokenizer_encoder.texts_to_sequences([input_sentence])
  padded_encoder_input = pad_sequences(encoder_test_tokens, maxlen=16, dtype='float32', padding='post')
  encoder = model.layers[2]
  encoder_op, enc_h, enc_c = encoder(padded_encoder_input)
  decoder = model.layers[4]
  index_of_start = np.array(tokenizer_decoder.word_index['<start>']).reshape(1,1).astype('float32')
  pred=0
  sentence = []
  attention_weights=[]
  # att_wgts = tf.TensorArray(dtype=tf.float32, dynamic_size=True,size=0)
  while pred!=tokenizer_decoder.word_index['<end>']:
    predicted_out,enc_h, enc_c,attention,context_vector = decoder.onestepdecoder(index_of_start,encoder_op, enc_h, enc_c)
    # att_wgts = att_wgts.write(att_wgts.size(),tf.reshape(attention,(14,)))
    pred = np.argmax(predicted_out) 
    word = [k for k in tokenizer_decoder.word_index if tokenizer_decoder.word_index[k]==(pred)][0]
    sentence.append(word)
    index_of_start = np.array(pred).reshape(1,1).astype('float32')

  return ' '.join(sentence[:-1])

In [11]:
import pickle
infile = open('final_data_cs2_subject.pkl','rb')
new_data = pickle.load(infile)
infile.close()

new_data['decoder_input'] = '<start> ' + new_data['output'].astype(str)
new_data['decoder_output'] = new_data['output'].astype(str) + ' <end>'
new_data = new_data.drop(['output'], axis=1)

In [12]:
sample = new_data.sample(1000)
import nltk.translate.bleu_score as bleu

In [13]:
def final_fun_2(X,y):
  blue_scores=[]
  for i in X.index:
    predicted = predict(X[i].lower())
    predicted = predicted.split()
    original = [x for x in y[i].split() if x!='<start>']
    blue_scores.append(bleu.sentence_bleu([original],predicted))

  return np.mean(blue_scores)

In [14]:
final_fun_2(sample.input,sample.decoder_input)

0.7040402629024597