# Introduction to NLP -- Rule-Based AI
In this example, we look at basic natural language preprocessing with hand-written rules

## Basic preprocessing

In [63]:
import string

def preprocess_sentence(raw):
    """ Basic string preprocessing """
    # Convert all to lower case
    processed = raw.lower()

    # Remove punctuation
    processed = processed.translate(
        str.maketrans("", "", string.punctuation))
    
    # Split the string by space
    processed = processed.split(" ")

    return processed


input_sentence = "Go to the kitchen, and then get me an apple."
processed_sentence = preprocess_sentence(input_sentence)
print(processed_sentence)

['go', 'to', 'the', 'kitchen', 'and', 'then', 'get', 'me', 'an', 'apple']


## Keyword extraction

In [64]:
def extract_keywords(words, objects, locations):
    """ 
    Extracts target objects and locations from a list of words 
    given lists of possible objects and locations
    """

    target_object = None
    target_location = None

    # Extract object
    for obj in objects:
        if obj in words:
            target_object = obj
            break
    
    # Extract location
    for loc in locations:
        if loc in words:
            target_location = loc
            break

    return target_object, target_location


input_sentence = "Go to the kitchen, and then get me an apple."
objects = ["apple", "water", "snacks"]
locations = ["kitchen", "bedroom", "garage"]

words = preprocess_sentence(input_sentence)
(tgt_obj, tgt_loc) = extract_keywords(words, objects, locations)
print("Input sentence:  {}".format(input_sentence))
print("Target object:   {}".format(tgt_obj))
print("Target location: {}".format(tgt_loc))

Input sentence:  Go to the kitchen, and then get me an apple.
Target object:   apple
Target location: kitchen


## Test on Multiple Sentences

In [65]:
objects = ["apple", "water", "snacks"]
locations = ["kitchen", "bedroom", "garage"]
input_sentences = ["Go to the kitchen, and then get me an apple",
                   "Bring me a bottle of water", 
                   "Drive over to the garage",
                   "Find a snack in my bedroom"]

for sentence in input_sentences:
    words = preprocess_sentence(sentence)
    (tgt_obj, tgt_loc) = extract_keywords(words, objects, locations)
    print("Input sentence:  {}".format(sentence))
    print("Target object:   {}".format(tgt_obj))
    print("Target location: {}".format(tgt_loc))
    print("")

Input sentence:  Go to the kitchen, and then get me an apple
Target object:   apple
Target location: kitchen

Input sentence:  Bring me a bottle of water
Target object:   water
Target location: None

Input sentence:  Drive over to the garage
Target object:   None
Target location: garage

Input sentence:  Find a snack in my bedroom
Target object:   None
Target location: bedroom



## String Processing with Regular Expressions

In [67]:
import re

def extract_keywords_regexp(sentence, objects, locations):
    """ 
    Extracts target objects and locations from a list of words 
    given lists of possible objects and locations.
    This implementation uses regular expressions from the `re` module.
    """

    target_object = None
    target_location = None

    # Extract object
    for obj in objects:
        pattern = "(" + obj + ")s*"  # Includes plurals
        result = re.search(pattern, sentence, re.IGNORECASE)
        if result is not None:
            target_object = obj
    
    # Extract location
    for loc in locations:
        result = re.search(loc, sentence, re.IGNORECASE)
        if result is not None:
            target_location = loc

    return target_object, target_location


objects = ["apple", "water", "snack"]
locations = ["kitchen", "bedroom", "garage", "living room"]
input_sentences = ["Go to the kitchen, and then get me an apple",
                   "Bring me a BOTTLE OFWATER!!!", 
                   "Drive over to the garage",
                   "Find a snack in my bedroom",
                   "Look for some apples in the Kitchenette!", 
                   "Can you search for snacks in the living room?"]

for sentence in input_sentences:
    (tgt_obj, tgt_loc) = extract_keywords_regexp(sentence, objects, locations)
    print("Input sentence:  {}".format(sentence))
    print("Target object:   {}".format(tgt_obj))
    print("Target location: {}".format(tgt_loc))
    print("")

Input sentence:  Go to the kitchen, and then get me an apple
Target object:   apple
Target location: kitchen

Input sentence:  Bring me a BOTTLE OFWATER!!!
Target object:   water
Target location: None

Input sentence:  Drive over to the garage
Target object:   None
Target location: garage

Input sentence:  Find a snack in my bedroom
Target object:   snack
Target location: bedroom

Input sentence:  Look for some apples in the Kitchenette!
Target object:   apple
Target location: kitchen

Input sentence:  Can you search for snacks in the living room?
Target object:   snack
Target location: living room

