In [23]:
# The NLTK library is required:
#!conda install nltk
# We will also use TextBlob:
#!pip install textblob
# Furthermore, in order to use TextBlob(bunchOfText).sentences, I
#   needed to issue the following command:
#!python -m textblob.download_corpora  
#   -- if you need this, you will get an error message telling you so
import random
import logging
import os

os.environ['NLTK_DATA'] = os.getcwd() + '/nltk_data'

from textblob import TextBlob
from config import FILTER_WORDS

logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

# Preprocessing
The user is going to input a sentence. It might be messy, like "it mIGht     be MESSY."
We want to first preprocess the text so that BillyBot can understand it best!

Fortunately, NLTK and TextBlob can do the heavy-lifting for us!

In [24]:
def process_input_text(input_text):
    """
    Removes extraneous white space and returns TextBlob object
    """
    return TextBlob(' '.join(input_text.split()))

In [25]:
# Example processing step
processed_text = process_input_text('This is   a  sentence. I made a great         example!')
print(processed_text.sentences)

[Sentence("This is a sentence."), Sentence("I made a great example!")]


# Grammatical Components of the Input Text
After the input text has been processed, we can begin to understand its content.  
One way to do this is to parse the text into some of its atomic elements: 
* is there a pronoun?
* is there a verb?
* what nouns are present?
* etc


In [26]:
def find_candidate_parts_of_speech(parsed):
    """
    Given a parsed input (TextBlob object), find the best pronoun, direct noun, adjective, 
    and verb to match their input.
    Returns a tuple of pronoun, noun, adjective, verb any of which may be None if there was 
    no good match
    """
    pronoun = None
    noun = None
    adjective = None
    verb = None
    for sent in parsed.sentences:
        pronoun = find_pronoun(sent)
        noun = find_noun(sent)
        adjective = find_adjective(sent)
        verb = find_verb(sent)
    logger.info("Pronoun=%s, noun=%s, adjective=%s, verb=%s", pronoun, noun, adjective, verb)
    return pronoun, noun, adjective, verb

def find_pronoun(sent):
    """
    Given a sentence (TextBlob Sentence object), find a preferred pronoun to respond with. 
    Returns None if no candidate pronoun is found in the input
    """
    pronoun = None

    for word, part_of_speech in sent.pos_tags:
        # Disambiguate pronouns
        if part_of_speech == 'PRP' and word.lower() == 'you':
            pronoun = 'I'
        elif part_of_speech == 'PRP' and word.lower() == 'i':
            # If the user mentioned themselves, then they will definitely be the pronoun
            pronoun = 'You'
    return pronoun

def find_noun(sent):
    """
    Given a sentence (TextBlob Sentence object), find the best candidate noun.
    """
    noun = None

    if not noun:
        for w, p in sent.pos_tags:
            if p == 'NN':  # This is a noun
                noun = w
                break
    if noun:
        logger.info("Found noun: %s", noun)

    return noun

def find_adjective(sent):
    """
    Given a sentence (TextBlob Sentence object), find the best candidate adjective.
    """
    adj = None
    for w, p in sent.pos_tags:
        if p == 'JJ':  # This is an adjective
            adj = w
            break
    return adj

def find_verb(sent):
    """
    Pick a candidate verb for the sentence.
    """
    verb = None
    pos = None
    for word, part_of_speech in sent.pos_tags:
        if part_of_speech.startswith('VB'):  # This is a verb
            verb = word
            pos = part_of_speech
            break
    return verb, pos



In [27]:
# Examples
sentences = processed_text.sentences
print('sentences[0]:',sentences[0], '\nsentences[1]:', sentences[1])

print("\nResponse Pronouns")
print(find_pronoun(sentences[0]), find_pronoun(sentences[1]))

print("\nNouns")
print(find_noun(sentences[0]), find_noun(sentences[1]))

print("\nAdjectives")
print(find_adjective(sentences[0]), find_adjective(sentences[1]))

print("\nVerbs")
print(find_verb(sentences[0]), find_verb(sentences[1]))

print("\nAltogether Now!")
pronoun, noun, adjective, verb = find_candidate_parts_of_speech(processed_text)
print("Pronoun:", pronoun, "\nNoun:", noun, "\nAdjective:", adjective, "\nVerb", verb)

INFO:root:Found noun: sentence
INFO:root:Found noun: example
INFO:root:Found noun: sentence
INFO:root:Found noun: example
INFO:root:Pronoun=You, noun=example, adjective=great, verb=('made', 'VBD')


sentences[0]: This is a sentence. 
sentences[1]: I made a great example!

Response Pronouns
None You

Nouns
sentence example

Adjectives
None great

Verbs
('is', 'VBZ') ('made', 'VBD')

Altogether Now!
Pronoun: You 
Noun: example 
Adjective: great 
Verb ('made', 'VBD')


# Differential Response
The chatbot does not have to be able to respond to every type of input possible.
However, it's desirable to differentiate a greeting from a question, and so on.
In this regard, we can design several functions to determine some additional context
of the input text and decide what type of response is likely to make the most sense.

### Respond to a Greeting
The simplest way to approach a greeting is rules-based: make a list of likely greetings and a list of potential responses. Since this is a stateless chatbot, it won't get annoyed if you say "hi" 10,000 times! It picks its response at random. In practice, the tutorial advises to cycle through the responses, or at the least hedge against immediate repeats.

In [28]:
# Sentences we'll respond with if the user greeted us
GREETING_KEYWORDS = ("hello", "hi", "greetings", "sup", "what's up", "hola")

GREETING_RESPONSES = ["What do ya want?", "Oh, hey.", "Let's make this quick. I only got about two humps left in me.",
                     "You must want something.", "Hey, I was just about to go get some coffee."]

def check_for_greeting(sentence):
    """If any of the words in the user's input was a greeting, return a greeting response"""
    for word in sentence.words:
        if word.lower() in GREETING_KEYWORDS:
            return random.choice(GREETING_RESPONSES)


In [32]:
print(check_for_greeting(sentences[0]))
print(check_for_greeting(sentences[1]))

None
None


### Repond to Comments about BillyBot

In [43]:
def check_for_comment_about_bot(pronoun, noun, adjective):
    """Check if the user's input was about the bot itself, in which case try to fashion a response
    that feels right based on their input. Returns the new best sentence, or None."""
    resp = None
    if pronoun is not None:
        if pronoun.lower() == 'i' and (noun or adjective):
            if noun:
                if random.choice((True, False)):
                    resp = random.choice(SELF_VERBS_WITH_NOUN_CAPS_PLURAL).format(**{'noun': noun.pluralize().capitalize()})
                else:
                    resp = random.choice(SELF_VERBS_WITH_NOUN_LOWER).format(**{'noun': noun})
            else:
                resp = random.choice(SELF_VERBS_WITH_ADJECTIVE).format(**{'adjective': adjective})
    return resp

# Template for responses that include a direct noun which is indefinite/uncountable
SELF_VERBS_WITH_NOUN_CAPS_PLURAL = [
    "My last startup totally crushed the {noun} vertical",
    "Were you aware I was a serial entrepreneur in the {noun} sector?",
    "My startup is Uber for {noun}",
    "I really consider myself an expert on {noun}",
]

SELF_VERBS_WITH_NOUN_LOWER = [
    "Yeah but I know a lot about {noun}",
    "My bros always ask me about {noun}",
]

SELF_VERBS_WITH_ADJECTIVE = [
    "Not as {adjective} as the last bass I caught.",
    "I like my coffee like I like my fish: {adjective}.",
]

In [44]:
example_text = process_input_text('You are so cool.')
pronoun, noun, adjective, verb = find_candidate_parts_of_speech(example_text)
print(check_for_comment_about_bot(pronoun, noun, adjective))

INFO:root:Pronoun=I, noun=None, adjective=cool, verb=('are', 'VBP')


I like my coffee like I like my fish: cool.


### Respond to Any Other Type of Input Text

In [None]:
def construct_response(pronoun, noun, verb):
    """
    No special cases matched, so we're going to try to construct a full sentence that uses as much
    of the user's input as possible
    """
    resp = []

    if pronoun:
        resp.append(pronoun)

    # We always respond in the present tense, and the pronoun will always either be a passthrough
    # from the user, or 'you' or 'I', in which case we might need to change the tense for some
    # irregular verbs.
    if verb:
        verb_word = verb[0]
        if verb_word in ('be', 'am', 'is', "'m"):  # This would be an excellent place to use lemmas!
            if pronoun.lower() == 'you':
                # The bot will always tell the person they aren't whatever they said they were
                resp.append("aren't really")
            else:
                resp.append(verb_word)
    if noun:
        pronoun = "an" if starts_with_vowel(noun) else "a"
        resp.append(pronoun + " " + noun)

    resp.append(random.choice(("tho", "bro", "lol", "bruh", "smh", "")))

    return " ".join(resp)


# Sentences we'll respond with if we have no idea what the user just said
NONE_RESPONSES = [
    "uh whatever",
    "meet me at the foosball table, bro?",
    "code hard bro",
    "want to bro down and crush code?",
    "I'd like to add you to my professional network on LinkedIn",
    "Have you closed your seed round, dog?",
]

### Hedge Against Hate Speech

In [None]:
def filter_response(resp):
    """Don't allow any words to match our filter list"""
    tokenized = resp.split(' ')
    for word in tokenized:
        if '@' in word or '#' in word or '!' in word:
            raise UnacceptableUtteranceException()
        for s in FILTER_WORDS:
            if word.lower().startswith(s):
                raise UnacceptableUtteranceException()

### Don't Sound Stupid

In [8]:
def starts_with_vowel(word):
    """Check for pronoun compability -- 'a' vs. 'an'"""
    return True if word[0] in 'aeiou' else False

## Response Function

In [None]:
def respond(input_text):
    """
    Parse the user's inbound sentence and find candidate terms that make up a best-fit response
    """
    cleaned = preprocess_text(input_text)
    parsed = TextBlob(cleaned)

    # Loop through all the sentences, if more than one. This will help extract the most relevant
    # response text even across multiple sentences (for example if there was no obvious direct noun
    # in one sentence
    pronoun, noun, adjective, verb = find_candidate_parts_of_speech(parsed)

    # If we said something about the bot and used some kind of direct noun, construct the
    # sentence around that, discarding the other candidates
    resp = check_for_comment_about_bot(pronoun, noun, adjective)

    # If we just greeted the bot, we'll use a return greeting
    if not resp:
        resp = check_for_greeting(parsed)

    if not resp:
        # If we didn't override the final sentence, try to construct a new one:
        if not pronoun:
            resp = random.choice(NONE_RESPONSES)
        elif pronoun == 'I' and not verb:
            resp = random.choice(COMMENTS_ABOUT_SELF)
        else:
            resp = construct_response(pronoun, noun, verb)

    # If we got through all that with nothing, use a random response
    if not resp:
        resp = random.choice(NONE_RESPONSES)

    logger.info("Returning phrase '%s'", resp)
    # Check that we're not going to say anything obviously offensive
    filter_response(resp)

    return resp
