In [5]:
import h5py
import numpy
import tensorflow as tf
import random
import json
import spacy
import nltk
from nltk.stem.porter import *
from nltk.stem.lancaster import LancasterStemmer
import logging
logging.getLogger('tensorflow').disabled = True

class Chatbot:

    showWorking = False
    train_again=False
    vocabulary=[]
    labels=[]
    responseDictionary = {}
    stemmer = PorterStemmer()
    labelFileName= ''
    vocabularyFileName = ''
    responseFileName = ''
    dataSetFileName = ''
    feedbackFileName = ''
    modelFileName = ''
    
    def __init__(self,train_again=False):
        Chatbot.train_again = train_again

    # this method gives the model depending on passed parameter like
    # train and then gives a model or by retriving already computed model from saved file.
    # also gives different model itself if instructed.
    def get_model(self,isCommonTalk=0):
        
        if not isCommonTalk:
            Chatbot.labelFileName= 'LabelList.json'
            Chatbot.vocabularyFileName = 'VocabularyList.json'
            Chatbot.responseFileName = 'responseDictionary.json'
            Chatbot.dataSetFileName = 'CommonIntentions.json'
            Chatbot.feedbackFileName = 'feedback.json'
            Chatbot.modelFileName = 'chatBotContext.h5'
        else:
            Chatbot.labelFileName= 'LabelList_smallTalk.json'
            Chatbot.vocabularyFileName = 'VocabularyList_smallTalk.json'
            Chatbot.responseFileName = 'responseDictionary_smallTalk.json'
            Chatbot.dataSetFileName = 'CommonIntentions_smallTalk.json'
            Chatbot.feedbackFileName = 'feedback.json'
            Chatbot.modelFileName = 'chatBot_smallTalk.h5'
            
        # This section retrives saved model    
        if not Chatbot.train_again:
            
            
            with open (Chatbot.labelFileName) as fl:
                Chatbot.labels = json.load(fl)
            
            with open (Chatbot.vocabularyFileName) as fv:
                Chatbot.vocabulary = json.load(fv)
                
            with open (Chatbot.responseFileName) as fr:
                Chatbot.responseDictionary = json.load(fr)
                
            model= tf.keras.models.load_model(Chatbot.modelFileName)
            
        # In this section we create new model from the input intent json file and saves it for future reference.
        else:
            with open(Chatbot.dataSetFileName) as file:
                data = json.load(file)

            docs_x = []
            docs_y = []

            for intent in data['intents']:
                
                Chatbot.responseDictionary[intent['tag']] = []
                responseList=[]
                for response in intent['responses']:
                    responseList.append(response)
                
                # saving response to response dictinary so as to get O(1) response time
                Chatbot.responseDictionary[intent['tag']] = responseList
                
                for pattern in intent['patterns']:
                    wrds = tokenize_and_get_stem(pattern)
                    if len(wrds)>0:
                        Chatbot.vocabulary.extend(wrds)
                        docs_x.append(wrds)
                        docs_y.append(intent['tag'])               

                if intent['tag'] not in Chatbot.labels:
                    # saving all possible labels(intents/tag) to label list  
                    Chatbot.labels.append(intent['tag'])
                    
            with open(Chatbot.responseFileName, 'w') as fp:
                json.dump(Chatbot.responseDictionary, fp)
                
            # sorting vocabulary list so that all vectors of sentence have same meaning.
            Chatbot.vocabulary = sorted(list(set(Chatbot.vocabulary)))
            Chatbot.labels = sorted(Chatbot.labels)
            training = []
            output = []
            out_empty = [0 for _ in range(len(Chatbot.labels))]
            
            
            # creating input vector for traning data
            for x, doc in enumerate(docs_x):
                bag = []

                for w in Chatbot.vocabulary:
                    if w in doc:
                        bag.append(1)
                    else:
                        bag.append(0)

                output_row = out_empty[:]
                output_row[Chatbot.labels.index(docs_y[x])] = 1

                training.append(bag)
                output.append(output_row)

            training = numpy.array(training)
            output = numpy.array(output)


            model = tf.keras.Sequential()
            for layer in range(3):
            # Adds a densely-connected layer with specified units to the model:
                model.add(tf.keras.layers.Dense(10, activation='relu'))
                tf.logging.set_verbosity(tf.logging.ERROR)

            # Add a output layer with softmax as activation function:
            model.add(tf.keras.layers.Dense(len(output[0]), activation='softmax'))
            model.compile(optimizer='adam',loss='categorical_crossentropy',metric=['accuracy'])
            model.fit(training, output, epochs=300)
            
            #saving model for future use
            model.save(Chatbot.modelFileName)
            
            # saving label List and vocabulary List for future reference
            with open(Chatbot.labelFileName, 'w') as fl:
                json.dump(Chatbot.labels, fl)
            with open(Chatbot.vocabularyFileName, 'w') as fv:
                json.dump(Chatbot.vocabulary, fv)
        return model

    # this gives list of all possible ways of sayings yes.
    def get_yes_list(self):
        return ['yes','sure','ok','okay','for sure','sure','totally','yep','ya','yeah','yup','certainly',
                         'definitely','of course','gladly','absolutely','indeed','yah','okk','yea']
    
    #this method gets called when probability of predicted tag is less than .7
    # it takes stores input sentence and its predicted tag into file for future training
    @staticmethod
    def feedback_save_to_file(sentence,PredictedTag):
        feedbackDict = {}
        feedbackDict['input'] = sentence
        feedbackDict['PredictedTag']=PredictedTag
        
        #saving the sentence in feedback file
        with open(Chatbot.feedbackFileName, 'a+') as fv:
                json.dump(feedbackDict, fv)
        
    
    # It takes continous input from user untill he/she wants to quit.
    def chat(self):
        
        print("Do you want to see internal processing steps:")
        choice = input("Enter you choice : ")
        if choice.lower() in self.get_yes_list():
            Chatbot.showWorking=True
        print("Start talking with the bot (type quit to stop)!")
        while True:
            model = self.get_model()
            inp = input("You: ")
            
            #Converts the input into vector form
            bag_of_word_rep=bag_of_words(inp)
            
            # calls the model to predict intent of sentence
            results = model.predict(bag_of_word_rep)
            results_index = numpy.argmax(results)
            tag = Chatbot.labels[results_index]
            
            # getting the probability of the tag predicted.
            Probability = results[0][results_index]
            if Chatbot.showWorking:
                print("Highest intent proability:",Probability)
                print("Intent of sentence : ",tag)
            responses = Chatbot.responseDictionary[tag]
            
            #fallback when the probability is less the .7 to common talk chatbot model
            if Probability < .7:
                Chatbot.feedback_save_to_file(inp,tag)
                print('***** This seems to be kind of out of context. General response can be *****')
                model = self.get_model(1)
                bag_of_word_rep=bag_of_words(inp)
                results = model.predict(bag_of_word_rep)
                results_index = numpy.argmax(results)
                tag = Chatbot.labels[results_index]
                Probability = results[0][results_index]
                if Chatbot.showWorking:
                    print("Highest intent proability:",Probability)
                    print("Intent of sentence : ",tag)
                responses = Chatbot.responseDictionary[tag]
                
                
            print("chatbot:",random.choice(responses))
            if tag == "quit":
                break
            
# this method gives tokens after stemming them            
def tokenize_and_get_stem(sentence):
        s_words = nltk.word_tokenize(sentence)
        s_words = [Chatbot.stemmer.stem(word.lower()) for word in s_words]
        if Chatbot.showWorking:
            print('Stems present in sentence are:',s_words)
        return s_words

# this method takes the sentence and convert it into vector form
def bag_of_words(sentence):
    bags = []
    
    #creating hot vector
    bag = [0 for _ in range(len(Chatbot.vocabulary))] 
    s_words = tokenize_and_get_stem(sentence)

    for se in s_words:
        for i, w in enumerate(Chatbot.vocabulary):
            if w == se:
                bag[i] = 1
                
    if Chatbot.showWorking:
        print('Vector Representation of input sentence :',bag)
            
    bags.append(bag)
    bags = numpy.array(bags)
    return bags
        
def main():
    chatbot = Chatbot(False)
    chatbot.chat()

if __name__ == '__main__':
    main()
    


Do you want to see internal processing steps:
Enter you choice : no
Start talking with the bot (type quit to stop)!
You: how are you
***** This seems to be kind of out of context. General response can be *****
chatbot: Okay.
You: how to drive car
***** This seems to be kind of out of context. General response can be *****
chatbot: I see.
You: how to drive a car
chatbot: you can toggle the key position for starting and stopping
You: how to apply brake
chatbot: Please read the manual
You: will talk later
chatbot: Good bye!
