In [177]:
# Import requirements

##SPACY IMPORTS
import spacy

nlp = spacy.load('en_core_web_sm')
nlp.pipe_names
ner = nlp.get_pipe('ner')

import random
from spacy.util import minibatch, compounding
from pathlib import Path

##KERAS IMPORTS
import re
import torch
import torch.nn as nn

import numpy as np
import pandas as pd
import random
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.lancaster import LancasterStemmer
import nltk
import re
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
nltk.download("stopwords")
nltk.download("punkt")


[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/maxlengdell/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/maxlengdell/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [193]:
#Load intent model
model = load_model("model.h5")

In [194]:
def cleaning(sentences):
    words = []
    for s in sentences:
        clean = re.sub(r'[^ a-z A-Z 0-9]', " ", s)
        w = word_tokenize(clean)
        #stemming
        words.append([i.lower() for i in w])
    
    return words  

def create_tokenizer(words, filters = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~'):
    token = Tokenizer(filters = filters)
    token.fit_on_texts(words)
    #print(token)
    return token
def encoding_doc(token, words):
    return(token.texts_to_sequences(words))

def padding_doc(encoded_doc, max_length):
    return(pad_sequences(encoded_doc, maxlen = max_length, padding = "post"))

def get_max_length(words):
    return(len(max(words, key = len)))

def load_dataset(filename):
    df = pd.read_csv(filename, encoding = "latin1", names = ["Sentence", "Intent"])
    print(df.head())
    intent = df["Intent"]
    unique_intent = list(set(intent))
    sentences = list(df["Sentence"])
  
    return (intent, unique_intent, sentences)
  

intent, unique_intent, sentences = load_dataset("data.csv")

#define stemmer
stemmer = LancasterStemmer()


                      Sentence Intent
0          What does that cost  price
1  What's the price of <Item>?  price
2          How much is <item>?  price
3              Cost of <Item>?  price
4       What does <Item> cost?  price


In [195]:
cleaned_words = cleaning(sentences)
print(len(cleaned_words))
print(cleaned_words[:2])  

92
[['what', 'does', 'that', 'cost'], ['what', 's', 'the', 'price', 'of', 'item']]


In [196]:
word_tokenizer = create_tokenizer(cleaned_words)
vocab_size = len(word_tokenizer.word_index) + 1
max_length = get_max_length(cleaned_words)

print("Vocab Size = %d and Maximum length = %d" % (vocab_size, max_length))

Vocab Size = 74 and Maximum length = 9


In [197]:
encoded_doc = encoding_doc(word_tokenizer, cleaned_words)
padded_doc = padding_doc(encoded_doc, max_length)

output_tokenizer = create_tokenizer(unique_intent, filters = '!"#$%&()*+,-/:;<=>?@[\]^`{|}~')


In [198]:
encoded_output = encoding_doc(output_tokenizer, intent)

In [199]:
encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1)

In [200]:
encoded_output.shape

(92, 1)

In [201]:
def predictions(text):
    
    clean = re.sub(r'[^ a-z A-Z 0-9]', " ", text)
    test_word = word_tokenize(clean)
    test_word = [w.lower() for w in test_word]
    test_ls = word_tokenizer.texts_to_sequences(test_word)

    #Check for unknown words
    if [] in test_ls:
        test_ls = list(filter(None, test_ls))
    
    test_ls = np.array(test_ls).reshape(1, len(test_ls))
 
    x = padding_doc(test_ls, max_length)
  
    pred = model.predict_proba(x)
  
  
    return pred
    

In [202]:
def one_hot(encode):
    o = OneHotEncoder(sparse = False)
    return(o.fit_transform(encode))

In [203]:
#output_one_hot = one_hot(encoded_output)
#output_one_hot.shape


In [204]:
#from sklearn.model_selection import train_test_split

In [205]:
def get_final_output(pred, classes):
    predictions = pred[0]
 
    classes = np.array(classes)
    ids = np.argsort(-predictions)
    classes = classes[ids]
    predictions = -np.sort(-predictions)
 
    for i in range(pred.shape[1]):
        print("%s has confidence = %s" % (classes[i], (predictions[i])))




In [207]:
text = "How much does that cost?"
pred = predictions(text)
get_final_output(pred, unique_intent)

price has confidence = 0.995908
balance has confidence = 0.0027589584
order has confidence = 0.00082231336
common has confidence = 0.00051071536


In [208]:
#Load entity model

output_dir = Path('./Models/')

print("loading model form", output_dir)
nlp_updated = spacy.load(output_dir)
doc = nlp_updated("Those pants are really good")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

loading model form Models




Entities [('Those', 'ORG'), ('pants', 'PRODUCT')]


In [209]:
opening_lines = ['Hello, what can I help you with?',
                'How can I help you?',
                'Whats on your mind today?']
stock_info = [(['glove', '299']),
             (['shoes', '899']),
             (['pants', '450'])]

def randomOpening():
    index = random.randint(0,len(opening_lines)-1)
    print(opening_lines[index])
def getProductPrice(product):
    for prod in stock_info:
        if(prod[0]==product):
            return prod[1]


In [210]:
def getEntity(sentence):
    print("getting entities")
    
    entities = nlp_updated(sentence)
    #print("Entities", [(ent.text, ent.label_) for ent in doc.ents])  
    text = []
    label = []
    for ent in doc.ents:
        text.append(ent.text)
        label.append(ent.label_)
    ret_arr = [text,label]
    return doc.ents

In [211]:
def getIntent(sentence):
    pred = predictions(sentence)
    guess = pred[0]
    
    classes = np.array(unique_intent)
    ids = np.argsort(-pred)
    classes = classes[ids]
    predict = -np.sort(-pred)
    
    print("Network guessed:", classes[0][0])
    return classes[0][0]


In [149]:
def priceFunc(entities):
    print("Oh, so you want to know the price of your product?")
    for ent in entities:
        if(ent.label_=="PRODUCT"):
            print("The price of " + ent.text + " is " + getProductPrice(ent.text))
            #print(ent.text, ent.label_)
            

def balanceFunc(entities):

    print("Do you want to know the storage count of your product?")

In [None]:
#Ask user from the opening lines-list
#identify intent
#identify entities
#send to corresponding function and return answer

def cli():
    while True:
        randomOpening()
        user_input = input("Ask the chatbot something: ")
        if(user_input=="quit"):
            break
        else:
            intent = getIntent(user_input)
            entity = getEntity(user_input)
            print(entity)
            if(intent=="price"):
                priceFunc(entity)
            elif(intent=="balance"):
                balanceFunc(entity)
                

        
cli()

Whats on your mind today?
Ask the chatbot something: 
Network guessed: price
getting entities
(Those, pants)
Oh, so you want to know the price of your product?
The price of pants is 
pants PRODUCT
['glove', '299']
['shoes', '899']
['pants', '450']
450
Hello, what can I help you with?
