In [1]:
# Importing the libraries

import numpy as np
import json
import re
import tensorflow as tf
import random
import spacy

2022-04-20 11:35:24.912678: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-04-20 11:35:24.912709: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
# Loading the spaCy module 

nlp = spacy.load('en_core_web_sm')

In [3]:
# Opening the intents.json file 

with open('intents.json') as f:
    intents = json.load(f)

In [4]:
# Preprocessing the intents to remove unwanted characters

def preprocessing(line):
    line = re.sub(r'[^a-zA-z.?!\']', ' ', line)
    line = re.sub(r'[ ]+', ' ', line)
    return line

In [5]:
# Running the above method and creating inputs, targets from the intents.json file

inputs, targets = [], []
classes = []
intent_doc = {}

for intent in intents['intents']:
    if intent['intent'] not in classes:
        classes.append(intent['intent'])
    if intent['intent'] not in intent_doc:
        intent_doc[intent['intent']] = []
        
    for text in intent['text']:
        inputs.append(preprocessing(text))
        targets.append(intent['intent'])
        
    for response in intent['responses']:
        intent_doc[intent['intent']].append(response)

In [6]:
# Tokenizing the inputs using keras and padding them to generate equal length sequences

def tokenize_data(input_list):
    tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='', oov_token='<unk>')
    
    tokenizer.fit_on_texts(input_list)
    
    input_seq = tokenizer.texts_to_sequences(input_list)

    input_seq = tf.keras.preprocessing.sequence.pad_sequences(input_seq, padding='pre')
    
    return tokenizer, input_seq

In [7]:
# Calling the above function with the inputs

tokenizer, input_tensor = tokenize_data(inputs)

In [8]:
# Creating the categorical tensor for the target variables

def create_categorical_target(targets):
    word={}
    categorical_target=[]
    counter=0
    for trg in targets:
        if trg not in word:
            word[trg]=counter
            counter+=1
        categorical_target.append(word[trg])
    
    categorical_tensor = tf.keras.utils.to_categorical(categorical_target, num_classes=len(word), dtype='int32')
    return categorical_tensor, dict((v,k) for k, v in word.items())

In [9]:
# Calling the above function with the targets

target_tensor, trg_index_word = create_categorical_target(targets)

In [10]:
# Setting up the variables for the LSTM network

epochs=50
vocab_size=len(tokenizer.word_index) + 1
embed_dim=512
units=128
target_length=target_tensor.shape[1]

In [11]:
# LSTM Sequential model for prediction

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size, embed_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units, dropout=0.2)),
    tf.keras.layers.Dense(units, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(target_length, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam(lr=1e-2)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

2022-04-20 11:35:27.825678: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2022-04-20 11:35:27.825727: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: pop-os
2022-04-20 11:35:27.825740: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: pop-os
2022-04-20 11:35:27.825924: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 470.86.0
2022-04-20 11:35:27.825964: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 470.86.0
2022-04-20 11:35:27.825977: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 470.86.0
2022-04-20 11:35:27.826462: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operat

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 512)         75264     
                                                                 
 bidirectional (Bidirectiona  (None, 256)              656384    
 l)                                                              
                                                                 
 dense (Dense)               (None, 128)               32896     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 5)                 645       
                                                                 
Total params: 765,189
Trainable params: 765,189
Non-trainable params: 0
__________________________________________________

  super(Adam, self).__init__(name, **kwargs)


In [12]:
# Using the EarlyStop callback to finish training once accuracy doesn't improve further
early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

# Fitting the model
model.fit(input_tensor, target_tensor, epochs=epochs, callbacks=[early_stop])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50


<keras.callbacks.History at 0x7fb2903acdf0>

In [13]:
# Getting the response from the model for an input sentence

def response(sentence):
    sent_seq = []
    doc = nlp(repr(sentence))
    
    # split the input sentences into words
    for token in doc:
        if token.text in tokenizer.word_index:
            sent_seq.append(tokenizer.word_index[token.text])

        # handle the unknown words error
        else:
            sent_seq.append(tokenizer.word_index['<unk>'])

    sent_seq = tf.expand_dims(sent_seq, 0)
    pred = model(sent_seq)

    pred_class = np.argmax(pred.numpy(), axis=1)
    
    return random.choice(intent_doc[trg_index_word[pred_class[0]]]), trg_index_word[pred_class[0]]

In [14]:
# Interacting with the bot
print("Welcome. I am your COVID-19 bot. How can I help ?")

while True:
    input_ = input('You: ')
    if input_.lower() == 'q':
        print("Thanks for using COVID-19 bot. Goodbye :)")
        break
    
    result, type = response(input_)
        
    print('INTENT TYPE -> {}'.format(type))
    print('RESPONSE -> {}'.format(result))
    
    print()

Welcome. I am your COVID-19 bot. How can I help ?
You: Hello. Is anyone here ?
INTENT TYPE -> Greeting
RESPONSE -> Hello thanks for visiting. How can I help you?

You: What is coronavirus ?
INTENT TYPE -> Information
RESPONSE -> Coronavirus disease 2019 (COVID-19) is a contagious disease caused by a virus, the severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2). The first known case was identified in Wuhan, China, in December 2019. The disease has since spread worldwide, leading to the ongoing COVID-19 pandemic

You: What precautions I have to take ?
INTENT TYPE -> Precautions
RESPONSE -> To prevent the spread of COVID-19: Maintain a safe distance from others (at least 1 metre), even if they don’t appear to be sick. Wear a mask in public, especially indoors or when physical distancing is not possible. Choose open, well-ventilated spaces over closed ones. Open a window if indoors. Clean your hands often. Use soap and water, or an alcohol-based hand rub. Get vaccinated when it’s