In [None]:
import numpy as np
import json
import re
import tensorflow as tf
import warnings

import spacy
nlp = spacy.load('en_core_web_sm')

warnings.filterwarnings('ignore')

: 

In [None]:
''' reading data '''
with open('Intent.json', 'rb') as file:
    data = json.load(file)

In [None]:
''' preprocessing '''
def pre_processing(line):
    line = re.sub(r'[^a-zA-z.?!\']', ' ', line)
    line = re.sub(r'[ ]+', ' ', line)
    return line

In [None]:
'''get text and intent title from json data'''
inputs, targets = [], []
cls = []
intent_doc = {}

for i in data['intents']:
    if i['intent'] not in cls:
        cls.append(i['intent'])
        
    if i['intent'] not in intent_doc:
        intent_doc[i['intent']] = []
        
    for text in i['text']:
        inputs.append(pre_processing(text))
        targets.append(i['intent'])
        
    for response in i['responses']:
        intent_doc[i['intent']].append(response)

In [None]:
''' tokenize data '''
def token_data(inp_list):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')
    
    tokenizer.fit_on_texts(inp_list)
    
    inp_seq = tokenizer.texts_to_sequences(inp_list)

    ''' adding padding '''
    inp_seq = tf.keras.preprocessing.sequence.pad_sequences(inp_seq, padding='pre')
    
    return tokenizer, inp_seq

'''preprocess input data'''
tokenizer, inp_tensor = token_data(inputs)

In [None]:
def cr_cat_target(targets):
    word = {}
    cat_t = []
    counter=0
    
    for trg in targets:
        if trg not in word:
            word[trg]=counter
            counter+=1
        cat_t.append(word[trg])
    
    cat_tensor = tf.keras.utils.to_categorical(cat_t, num_classes=len(word), dtype='int32')
    return cat_tensor, dict((v,k) for k, v in word.items())

'''preprocess output data'''
target_tensor, target_idx_word = cr_cat_target(targets)

In [None]:
print('input shape: {} and output shape: {}'.format(inp_tensor.shape, target_tensor.shape))

In [None]:
''' Build Model '''
''' hyperparameters'''
epochs=50
vocab_size = len(tokenizer.word_index) + 1
embed_dim = 512
units=128
target_len = target_tensor.shape[1]

''' Model '''
model = tf.keras.models.Sequential([
    ## Embedding Layer 
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, dropout=0.2)),
    ## Hidden Layer 
    tf.keras.layers.Dense(units, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    ## Classification Layer 
    tf.keras.layers.Dense(target_len, activation='softmax')])

In [None]:
''' Compile the model '''
model.compile(optimizer=tf.keras.optimizers.Adam(lr=1e-2), loss='categorical_crossentropy', metrics=['accuracy'])

''' lets see how model looks like '''
model.summary()

In [None]:
''' EarlyStopping'''
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

''' training '''
model.fit(inp_tensor, target_tensor, epochs=epochs, callbacks=[early_stop])

In [None]:
def response(sentence):
    sent_seq = []
    doc = nlp(repr(sentence))
    
    # split the input sentences into words
    for token in doc:
        if token.text in tokenizer.word_index:
            sent_seq.append(tokenizer.word_index[token.text])

        # handle the unknown words error
        else:
            sent_seq.append(tokenizer.word_index['<unk>'])

    sent_seq = tf.expand_dims(sent_seq, 0)
    # predict the category of input sentences
    pred = model(sent_seq)

    pred_class = np.argmax(pred.numpy(), axis=1)
    
    # choice a random response for predicted sentence
    return random.choice(intent_doc[trg_index_word[pred_class[0]]]), trg_index_word[pred_class[0]]

# chat with bot
print("Note: Enter 'quit' to break the loop.")
while True:
    input_ = input('You: ')
    if input_.lower() == 'quit':
        break
    res, typ = response(input_)
    print('Bot: {} -- TYPE: {}'.format(res, typ))
    print()