Other chatbots in the travel industry: Expedia, Booking.com

In [85]:
import re, json
import random
import spacy 
import numpy as np
import en_core_web_sm
from nltk.stem import WordNetLemmatizer
import nltk

In [86]:
# Load the spacy model: nlp
nlp = en_core_web_sm.load()
lemmatizer = WordNetLemmatizer()
# !python -m spacy download en

In [87]:
# import our chat-bot intents file
with open('data/intents.json') as json_data:
    intents = json.load(json_data)

In [88]:
patterns = {}
for intent in intents['intents']:
    patterns[intent['tag']] = re.compile('|'.join(intent['patterns']))
    
patterns

{'goodbye': re.compile(r'bye|see you later|goodbye', re.UNICODE),
 'greeting': re.compile(r'hi|how are you|is anyone there|hello|good day|hey',
 re.UNICODE),
 'thanks': re.compile(r"thanks|thank you|that's helpful", re.UNICODE)}

In [89]:
# Define a function to find the intent of a message
def match_intent(message):
    matched_intent = None
    for intent, pattern in patterns.items():
        # Check if the pattern occurs in the message 
        if pattern.search(message) :
            matched_intent = intent
    return matched_intent

## Entity Recognition

### One option

In [90]:
# Define included entities
include_entities = ['DATE', 'ORG', 'PERSON', 'TIME', 'MONEY', 'QUANTITY', 'FAC']

# Define extract_entities()
def extract_entities(message):
    # Create a dict to hold the entities
    ents = dict.fromkeys(include_entities)
    # Create a spacy document
    doc = nlp(message)
    print(doc)
    for ent in doc.ents:
        if ent.label_ in include_entities:
            # Save interesting entities
            ents[ent.label_] = ent.text
    return ents

In [91]:
print(extract_entities('friends called Mary who have worked at Google since 2010'))
# print(extract_entities('people who graduated from MIT in 1999'))
# print(extract_entities('What are the top hotels below $50'))
# print(extract_entities('What is the rating for hotel ABC?'))
# print(extract_entities('What are the 5 ranking hotels near area ABC in 2019 ?'))
# print(extract_entities('Can I buy 200 apples'))
# print(extract_entities('What is the rating for hotel ABC?'))
# print(extract_entities('people who graduated from MIT in 1999'))

friends called Mary who have worked at Google since 2010
{'DATE': '2010', 'ORG': 'Google', 'PERSON': 'Mary', 'TIME': None, 'MONEY': None, 'QUANTITY': None, 'FAC': None}


## Alternative

In [92]:
#Implement noun phrase chunking. For now is one rule but can increase
def extract_entities(text):
    pattern = 'NP: {<DT>?<JJ>*<NN>}' #Optional determinant followed by any number of adjectives and then a noun
    cp = nltk.RegexpParser(pattern)
    cs = cp.parse(text)
    print(cs)
    represent_chunks(cs)
    
from nltk.chunk import conlltags2tree, tree2conlltags
from pprint import pprint

def represent_chunks(cs):
    iob_tagged = tree2conlltags(cs)
    pprint(iob_tagged)

In [95]:
from collections import Counter
doc = nlp(text)

# doc = nlp('European authorities fined Google a record $5.1 billion on Wednesday for abusing its power in the mobile phone market and ordered the company to alter its practices')
# pprint([(X.text, X.label_) for X in doc.ents])
pprint([(X, X.ent_iob_, X.ent_type_) for X in doc])

[(Is, 'O', ''),
 (the, 'O', ''),
 (rating, 'O', ''),
 (for, 'O', ''),
 (hotel, 'O', ''),
 (Abra, 'B', 'PERSON'),
 (Chu, 'I', 'PERSON'),
 (better, 'O', ''),
 (for, 'O', ''),
 (hotel, 'O', ''),
 (Google, 'B', 'ORG'),
 (?, 'O', '')]


## Create a dictionary of rules and get responses

In [9]:
rules = {}
for intent in intents['rules']:
    print(intent)
    rules[intent['tag']] = intent['patterns']
    
rules

{'tag': 'topN', 'patterns': ['What are the top N hotels '], 'responses': ['We operate only in LA, for now.']}
{'tag': 'review', 'patterns': ['What are the reviews for '], 'responses': ['The review: ']}
{'tag': 'amenities', 'patterns': ['Does hotel X have good Y '], 'responses': ['The review: ']}


{'amenities': ['Does hotel X have good Y '],
 'review': ['What are the reviews for '],
 'topN': ['What are the top N hotels ']}

## Get user INPUT

In [78]:
text = input()

Is the rating for hotel Abra Chu better for hotel Google ?


### Clean text

In [68]:
# docs_lower = [[w.lower() for w in doc] for doc in docs]
# docs_regex= [[w for w in doc if re.search('^[a-z]+$',w)] for doc in docs_lower]
# docs_stop = [[w for w in doc if w not in stop_words] for doc in docs_regex]
# docs_stem = [[stemmer.stem(w) for w in doc] for doc in docs_stop]

In [69]:
text = text.lower()
text_list = [word for word in text.split()]
#If last word has a question mark attached, that gets removed also.
text_regex = [word for word in text_list if re.search('^[a-z0-9]+$',word)]
text_lemma = [lemmatizer.lemmatize(word) for word in text_regex]
text_pos = nltk.pos_tag(text_lemma)
print(text_pos)

[('shubham', 'NN'), ('wa', 'NN'), ('asking', 'VBG'), ('whether', 'IN'), ('hotel', 'NN'), ('a', 'DT'), ('is', 'VBZ'), ('better', 'JJR'), ('than', 'IN'), ('hotel', 'NN')]


In [70]:
extract_entities(text_pos)

(S
  (NP shubham/NN)
  (NP wa/NN)
  asking/VBG
  whether/IN
  (NP hotel/NN)
  a/DT
  is/VBZ
  better/JJR
  than/IN
  (NP hotel/NN))
[('shubham', 'NN', 'B-NP'),
 ('wa', 'NN', 'B-NP'),
 ('asking', 'VBG', 'O'),
 ('whether', 'IN', 'O'),
 ('hotel', 'NN', 'B-NP'),
 ('a', 'DT', 'O'),
 ('is', 'VBZ', 'O'),
 ('better', 'JJR', 'O'),
 ('than', 'IN', 'O'),
 ('hotel', 'NN', 'B-NP')]


In [71]:
ne_tree = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(text)))
print(ne_tree)

(S
  shubham/NN
  was/VBD
  asking/VBG
  whether/IN
  hotel/NN
  a/DT
  is/VBZ
  better/JJR
  than/IN
  hotel/NN
  b/NN
  ?/.)
