This notebook creates, trains, and initializes a chatbot using the bag-of-words model. 


https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077

In [18]:

from os import path, name, system
from nltk.stem.lancaster import LancasterStemmer
from random import choice, randint
import numpy as np 
import tensorflow as tf
import nltk, pickle, json, re, string, tflearn, spacy, warnings
warnings.filterwarnings("ignore")


In [19]:
with open('cs_prompts.json') as file:
    data = json.load(file)

In [4]:
# Preprocess and format the training data
def preprocess_train_data(data):
    stemmer = LancasterStemmer()

    words = []
    labels = list(data.keys())
    docs_x = []
    docs_y = []

    for label in labels:
        for pattern in data[label]['patterns']:
            tokens = nltk.word_tokenize(pattern)
            words.extend(tokens)
            docs_x.append(tokens)
            docs_y.append(label)

    # Pass over punctuation tokens
    ignored_tokens = [',', '.', '?', '!']
    words = [stemmer.stem(w.lower()) for w in words if w not in ignored_tokens]

    words = sorted(set(words))
    labels = sorted(labels)

    training = []
    output = []

    # Template for the BOW
    out_empty = list(np.zeros(len(labels)))

    for x, doc in enumerate(docs_x):
        bag = []
        stemmed = [stemmer.stem(w) for w in doc]

        for w in words:
            if w in stemmed:
                bag.append(1)
            else:
                bag.append(0)

        output_row = out_empty[:]
        output_row[labels.index(docs_y[x])] = 1

        training.append(bag)
        output.append(output_row)  

    training = np.array(training)
    output = np.array(output)    
    
    return words, labels, training, output

In [5]:
# If a pickle file of the processed training data exists, then it will be loaded
# Otherwise, the training data will be processed and saved in a pickle file

if path.exists('./data.pickle'):
    with open('data.pickle', 'rb') as file:
        words, labels, training, output = pickle.load(file)
else:
    words, labels, training, output = preprocess_train_data(data)
    with open('data.pickle', 'wb') as file:
        pickle.dump((words, labels, training, output), file)

In [29]:
# Run this cell to create and train a new model
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tf.compat.v1.reset_default_graph()
net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,len(output[0]), activation='softmax')
net = tflearn.regression(net)

model = tflearn.DNN(net)
model.fit(training, output, n_epoch=250, batch_size=8, show_metric=True)
model.save('model.tflearn')


Training Step: 2399  | total loss: [1m[32m0.00418[0m[0m | time: 0.059s
| Adam | epoch: 300 | loss: 0.00418 - acc: 1.0000 -- iter: 56/60
Training Step: 2400  | total loss: [1m[32m0.00427[0m[0m | time: 0.068s
| Adam | epoch: 300 | loss: 0.00427 - acc: 1.0000 -- iter: 60/60
--
INFO:tensorflow:c:\Users\owner\Documents\GitHub\CustomerServiceBot-RW\cs-bagofwords\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [6]:
# Run this cell to load a previously trained model

physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

tf.compat.v1.reset_default_graph()
net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,8)
net = tflearn.fully_connected(net,len(output[0]), activation='softmax')
net = tflearn.regression(net)

model = tflearn.DNN(net)
model.load('model.tflearn')


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Restoring parameters from c:\Users\owner\Documents\GitHub\CustomerServiceBot-RW\cs-bagofwords\model.tflearn


In [30]:
def clear(): 
    # Uses os.system and os.name
    # for windows 
    if name == 'nt': 
        _ = system('cls') 
    # for mac/linux 
    else: 
        _ = system('clear') 

def bag_of_words(s, words, stemmer):
    # Creates a bag of words from a given sequence of tokens
    bag = list(np.zeros(len(words)))
    s_words = nltk.word_tokenize(s)
    s_words = [stemmer.stem(word.lower()) for word in s_words]

    for se in s_words:
        for i, w in enumerate(words):
            if w == se:
                bag[i]=1
    return np.array(bag)


In [31]:
def context_chat():
    history = None
    with open("history.json") as h_file:
        history = json.load(h_file)
    clear()
    print(choice(data['greeting']['responses']))

    stemmer = LancasterStemmer()
    ner = spacy.load('en_core_web_sm')

    def filter_punctuation(s):
        # Uses regular expressions to filter non-alphabetical characters from strings
        regex = re.compile('[%s]' % re.escape(string.punctuation))
        return regex.sub('', s)

    def classify(user_input):
        results = model.predict([bag_of_words(user_input, words, stemmer)])
        result_index=np.argmax(results)
        result_max = np.max(results)
        prediction = labels[result_index]
        responses = data[prediction]['responses']
        response = choice(responses)
        return prediction, response

    CHAT_ENDED = False

    def end_chat(inp):
        # print(inp)
        if inp.lower() in ['end', 'quit', 'stop']:
            CHAT_ENDED = True
            return True
        return False

    while not CHAT_ENDED:
        prediction = None
        inp = filter_punctuation(input("You: ").lower())

        if CHAT_ENDED:
            break

        prediction, response = classify(inp)

        print(f"Bot: {response}")
        
        if prediction in ['open_account', 'close_account']:
            user_name = []
            user_account_num = None
            while len(user_name) < 1:
                print("Bot: Please give me your first and last name")
                inp = input("You: ")

                if end_chat(inp):
                    break

                parts = ner(inp)
                for part in parts:
                    if part.pos_ == 'PROPN':
                        user_name.append(part.text)
                if len(user_name) > 0:
                    print(f"Bot: Your name is {' '.join(user_name)}. Is that correct?")
                    inp = input("You: ")

                    if end_chat(inp):
                        break

                    name_prediction, response = classify(filter_punctuation(inp.lower()))
                    
                    if name_prediction == 'deny':
                        print("Bot: Sorry about that. Let me try again.")
                        user_name = []
                        user_account_num = None
                        continue
                    
                    elif name_prediction == 'confirm':
                        
                        if prediction == 'open_account':
                            user_account_num = ''.join([str(randint(1,9)) for i in range(12)])
                            print(f"Your new account number is {user_account_num}")
                            history[' '.join(user_name)] = user_account_num
                        
                        elif prediction == 'close_account':
                            print("Bot: Can you give me your account number?")
                            inp = input("You: ")

                            if end_chat(inp):
                                break

                            parts = ner(inp)
                            for part in parts:
                                if part.pos_ == 'NUM':
                                    user_account_num = part.text
                            print(f"Bot: Your account number is {user_account_num}. Is that correct?")
                            inp = input("You: ")

                            if end_chat(inp):
                                break
                            
                            confirm_prediction, confirm_response = classify(filter_punctuation(inp.lower()))
                            
                            if prediction == 'confirm':
                                print(f"Bot: {response}")
                                history[" ".join(user_name)] = None
                            elif prediction == 'deny':
                                print("Bot: Sorry about that. Let me try again.")

        if prediction == 'goodbye':
            break

        elif prediction == None:
        #    print(f"Bot: I'm sorry, I didn't quite get that. Can you rephrase?")
            print(f"Bot: {choice(data['deny']['responses'])}")
        
        else:
            print("Bot: Is there anything else I can help with?")

    with open("history.json", "w") as h_file:
        json.dump(history, h_file)

In [32]:
context_chat()

Hi there, how can I help?
Bot: To open a new account, you'll need proof of identification and an initial deposit of at least $100.00. 
Please call the customer service line to speak with a representative. 
You can also use our website to create one using the Account Creation tool or to find a location near you.
Bot: Please give me your first and last name
Bot: Your name is Rob Weddell. Is that correct?
Your new account number is 332483333371
Bot: Is there anything else I can help with?
Bot: Great. Thanks for confirming.
Bot: Is there anything else I can help with?
Bot: Account management can be done through BotBank's website, 
or if you need assistance, 
you can call the customer support line to speak with a representative.
Bot: Is there anything else I can help with?
Bot: Glad I could help. Thanks for choosing BotBank Have a nice day.
