In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.environ['KAGGLE_CONFIG_DIR'] = "/content/drive/My Drive/Smart"
%cd /content/drive/My Drive/Smart

Mounted at /content/drive
/content/drive/My Drive/Smart


In [3]:
import tensorflow as tf
from tensorflow import keras

enc_model = keras.models.load_model('encoder-model-final.h5', compile=False)
inf_model = keras.models.load_model('inf-model-final.h5', compile=False)

In [4]:
print(tf.__version__)

2.4.1


In [5]:
import json

vocab_max_size = 10000

with open('word_dict-final.json') as f:
    word_dict = json.load(f)
    tokenizer = keras.preprocessing.text.Tokenizer(filters='', num_words=vocab_max_size)
    tokenizer.word_index = word_dict

In [6]:
import numpy as np

max_length_in = 21
max_length_out = 20

def tokenize_text(text):
  text = '<start> ' + text.lower() + ' <end>'
  text_tensor = tokenizer.texts_to_sequences([text])
  text_tensor = keras.preprocessing.sequence.pad_sequences(text_tensor, maxlen=max_length_in, padding="post")
  return text_tensor

# Reversed map from a tokenizer index to a word
index_to_word = dict(map(reversed, tokenizer.word_index.items()))

# Given an input string, an encoder model (infenc_model) and a decoder model (infmodel),
def decode_sequence(input_sentence):
    sentence_tensor = tokenize_text(input_sentence)
    # Encode the input as state vectors.
    state = enc_model.predict(sentence_tensor)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['<start>']
    curr_word = "<start>"
    decoded_sentence = ''

    i = 0
    while curr_word != "<end>" and i < (max_length_out - 1):
        print(target_seq.shape)
        output_tokens, h = inf_model.predict([target_seq, state])

        curr_token = np.argmax(output_tokens[0, 0])

        if (curr_token == 0):
          break;

        curr_word = index_to_word[curr_token]

        decoded_sentence += ' ' + curr_word
        target_seq[0, 0] = curr_token
        state = h
        i += 1

    return decoded_sentence

In [7]:
print(tokenize_text('have a'))

[[ 1 22 14  2  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]]


In [8]:
enc_model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 21)]         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 21, 10)       100000      input_3[0][0]                    
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) [(None, 21, 384), (N 235008      embedding_2[0][0]                
__________________________________________________________________________________________________
concatenate_1 (Concatenate)     (None, 384)          0           bidirectional_1[0][1]            
                                                                 bidirectional_1[0][2]      

In [9]:
inf_model.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, None)]       0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, None, 10)     100000      input_4[0][0]                    
__________________________________________________________________________________________________
input_5 (InputLayer)            [(None, 384)]        0                                            
__________________________________________________________________________________________________
gru_3 (GRU)                     [(None, None, 384),  456192      embedding_3[0][0]                
                                                                 input_5[0][0]              

In [18]:
import pandas as pd

texts = [
    'Good  ']

output = list(map(lambda text: (text, decode_sequence(text)), texts))
output_df = pd.DataFrame(output, columns=["input", "output"])
output_df.head(len(output))

(1, 1)
(1, 1)
(1, 1)
(1, 1)
(1, 1)


Unnamed: 0,input,output
0,Good,is the bingham deal <end>


In [None]:
enc_model.save('./encoder-model-2.h5')
inf_model.save('./inf-model-2.h5')