In [1]:
# Import requirements

##SPACY IMPORTS
import spacy

nlp = spacy.load('en_core_web_sm')
nlp.pipe_names
ner = nlp.get_pipe('ner')

import random
from spacy.util import minibatch, compounding
from pathlib import Path

##KERAS IMPORTS
import re
import torch
import torch.nn as nn

import numpy as np
import pandas as pd
import random
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.lancaster import LancasterStemmer
import nltk
import re
from sklearn.preprocessing import OneHotEncoder
import matplotlib.pyplot as plt
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.models import Sequential, load_model
from keras.layers import Dense, LSTM, Bidirectional, Embedding, Dropout
from keras.callbacks import ModelCheckpoint
nltk.download("stopwords")
nltk.download("punkt")


Using TensorFlow backend.
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/maxlengdell/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/maxlengdell/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
#Load intent model
model = load_model("model.h5")

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [3]:
def cleaning(sentences):
    words = []
    for s in sentences:
        clean = re.sub(r'[^ a-z A-Z 0-9]', " ", s)
        w = word_tokenize(clean)
        #stemming
        words.append([i.lower() for i in w])
    
    return words  

def create_tokenizer(words, filters = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~'):
    token = Tokenizer(filters = filters)
    token.fit_on_texts(words)
    #print(token)
    return token
def encoding_doc(token, words):
    return(token.texts_to_sequences(words))

def padding_doc(encoded_doc, max_length):
    return(pad_sequences(encoded_doc, maxlen = max_length, padding = "post"))

def get_max_length(words):
    return(len(max(words, key = len)))

def load_dataset(filename):
    df = pd.read_csv(filename, encoding = "latin1", names = ["Sentence", "Intent"])
    print(df.head())
    intent = df["Intent"]
    unique_intent = list(set(intent))
    sentences = list(df["Sentence"])
  
    return (intent, unique_intent, sentences)
  

intent, unique_intent, sentences = load_dataset("data.csv")

#define stemmer
stemmer = LancasterStemmer()


                   Sentence  Intent
0              Order please   order
1      Ship this item to me   order
2  Can I order that please?   order
3        Where is the help?  common
4           Purchase <Item>   order


In [4]:
cleaned_words = cleaning(sentences)
print(len(cleaned_words))
print(cleaned_words[:2])  

237
[['order', 'please'], ['ship', 'this', 'item', 'to', 'me']]


In [5]:
word_tokenizer = create_tokenizer(cleaned_words)
vocab_size = len(word_tokenizer.word_index) + 1
max_length = get_max_length(cleaned_words)

print("Vocab Size = %d and Maximum length = %d" % (vocab_size, max_length))

Vocab Size = 144 and Maximum length = 9


In [6]:
encoded_doc = encoding_doc(word_tokenizer, cleaned_words)
padded_doc = padding_doc(encoded_doc, max_length)

output_tokenizer = create_tokenizer(unique_intent, filters = '!"#$%&()*+,-/:;<=>?@[\]^`{|}~')


In [7]:
encoded_output = encoding_doc(output_tokenizer, intent)

In [8]:
encoded_output = np.array(encoded_output).reshape(len(encoded_output), 1)

In [9]:
encoded_output.shape

(237, 1)

In [10]:
def predictions(text):
    
    clean = re.sub(r'[^ a-z A-Z 0-9]', " ", text)
    test_word = word_tokenize(clean)
    test_word = [w.lower() for w in test_word]
    test_ls = word_tokenizer.texts_to_sequences(test_word)

    #Check for unknown words
    if [] in test_ls:
        test_ls = list(filter(None, test_ls))
    
    test_ls = np.array(test_ls).reshape(1, len(test_ls))
 
    x = padding_doc(test_ls, max_length)
  
    pred = model.predict_proba(x)
  
  
    return pred
    

In [11]:
def one_hot(encode):
    o = OneHotEncoder(sparse = False)
    return(o.fit_transform(encode))

In [12]:
#output_one_hot = one_hot(encoded_output)
#output_one_hot.shape


In [13]:
#from sklearn.model_selection import train_test_split

In [14]:
def get_final_output(pred, classes):
    predictions = pred[0]
 
    classes = np.array(classes)
    ids = np.argsort(-predictions)
    classes = classes[ids]
    predictions = -np.sort(-predictions)
 
    for i in range(pred.shape[1]):
        print("%s has confidence = %s" % (classes[i], (predictions[i])))




In [15]:
text = "How much does it cost?"
pred = predictions(text)
get_final_output(pred, unique_intent)

price has confidence = 0.99968886
order has confidence = 0.0003107582
balance has confidence = 2.9935424e-07
common has confidence = 2.0552145e-08


In [16]:
text = "Can I order 3 pants?"
pred = predictions(text)
get_final_output(pred, unique_intent)

balance has confidence = 0.9723652
common has confidence = 0.02595956
price has confidence = 0.0010816839
order has confidence = 0.0005934916


In [17]:
text = "How many pants are left?"
pred = predictions(text)
get_final_output(pred, unique_intent)

order has confidence = 0.999992
price has confidence = 6.0079956e-06
balance has confidence = 1.3878124e-06
common has confidence = 6.8480136e-07


In [None]:
#Load entity model

output_dir = Path('./Models/')

print("loading model form", output_dir)
nlp_updated = spacy.load(output_dir)
doc = nlp_updated("Those pants are really good")
print("Entities", [(ent.text, ent.label_) for ent in doc.ents])

In [None]:
opening_lines = ['Hello, what can I help you with?',
                'How can I help you?',
                'Whats on your mind today?']
stock_info = [(['gloves', '299', '3']), #product name, price, balance
             (['shoes', '899', '5']),
             (['pants', '450', '10'])]

def randomOpening():
    index = random.randint(0,len(opening_lines)-1)
    print(opening_lines[index])
def getProductPrice(product):
    for prod in stock_info:
        if(prod[0]==product):
            return prod[1]
def getProductBalance(product):
    for prod in stock_info:
        if(prod[0]==product):
            return prod[2]
def printProducts():
    for prod in stock_info:
        print(prod[0])

In [None]:
def getEntity(sentence):
    #print(sentence)
    doc = nlp_updated(sentence)
    #print("Entities", [(ent.text, ent.label_) for ent in doc.ents])  
    text = []
    label = []
    for ent in doc.ents:
        text.append(ent.text)
        label.append(ent.label_)
    ret_arr = [text,label]
    return doc.ents

In [None]:
def getIntent(sentence):
    pred = predictions(sentence)
    guess = pred[0]
    
    classes = np.array(unique_intent)
    ids = np.argsort(-pred)
    classes = classes[ids]
    predict = -np.sort(-pred)
    
    #print("Network guessed:", classes[0][0])
    return classes[0][0]


In [None]:
def priceFunc(entities):
    print("Oh, so you want to know the price of your product?")
    for ent in entities:
        print(ent.text, ent.label_)

        if(ent.label_=="PRODUCT"):
            print("The price of " + ent.text + " is " + getProductPrice(ent.text))
        else:
            print("I'm sorry, i did not find that product. Did you mean any of these?")
            printProducts()
            
            

def balanceFunc(entities):
    print("Do you want to know the storage count of your product?")
    for ent in entities:
        if(ent.label_=="PRODUCT"):
            print("The balance of " + ent.text + " is " + getProductBalance(ent.text))
            
        else:
            print("I'm sorry, i did not find that product. Did you mean any of these?")
            printProducts()


def orderFunc(entities):
    print("Print order info")
    for ent in entities:

        if(ent.label_=="PRODUCT"):
            print("You have placed an order on " + ent.text)
        else:
            print("I'm sorry, i did not find that product. Did you mean any of these?")
            printProducts()
            
def commonFunf(entities):
    print("I'm sorry, i did not understand. Try rephrasing the sentence.")

In [None]:
#Ask user from the opening lines-list
#identify intent
#identify entities
#send to corresponding function and return answer
introduction = "Welcome to our e-com chatbot for a fashion store. \n
Place a order, check the availability of a product or its price. \n
What does pants cost? How many gloves are in stock? "
def cli():
    print(introduction)
    while True:
        randomOpening()
        user_input = input("Ask the chatbot something: ")
        if(user_input=="quit"):
            break
        else:
            intent = getIntent(user_input)
            entity = getEntity(user_input)
            if(intent=="price"):
                priceFunc(entity)
            elif(intent=="balance"):
                balanceFunc(entity)
            elif(intent=="order"):
                orderFunc(entity)
            else:
                commonFunc(entities)
            print("***********************************")


                

        
cli()