In [317]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag 
import dateparser
import dateutil

In [318]:
stop_words = set(stopwords.words('english')) 
# we need following words to process, so remove them from stop_words
stop_words.remove('at')
stop_words.remove('to')
stop_words.remove('from')
stop_words.remove('on')

In [319]:
def preprocess(sent):
    tk_sent = nltk.word_tokenize(sent)
    filtered_sentence = [] 
    for word in tk_sent: 
        if word not in stop_words: 
            filtered_sentence.append(word) 
    prep = nltk.pos_tag(filtered_sentence)
    return prep

In [320]:
example = 'Driving to Hamilton from Niagara Falls at 8pm on December 12th, 2 spots needed'
example1 = 'Hi I need 3 seats from BK Plaza to Communitech Data Hub at 5pm today'

In [321]:
preprocess(example)


[('Driving', 'VBG'),
 ('to', 'TO'),
 ('Hamilton', 'NNP'),
 ('from', 'IN'),
 ('Niagara', 'NNP'),
 ('Falls', 'NNP'),
 ('at', 'IN'),
 ('8pm', 'CD'),
 ('on', 'IN'),
 ('December', 'NNP'),
 ('12th', 'CD'),
 (',', ','),
 ('2', 'CD'),
 ('spots', 'NNS'),
 ('needed', 'VBD')]

In [322]:
preprocess(example1)

[('Hi', 'NNP'),
 ('I', 'PRP'),
 ('need', 'VBP'),
 ('3', 'CD'),
 ('seats', 'NNS'),
 ('from', 'IN'),
 ('BK', 'NNP'),
 ('Plaza', 'NNP'),
 ('to', 'TO'),
 ('Communitech', 'NNP'),
 ('Data', 'NNP'),
 ('Hub', 'NNP'),
 ('at', 'IN'),
 ('5pm', 'CD'),
 ('today', 'NN')]

In [323]:
def get_info(sent):
    loc_from_entered = False
    loc_to_entered = False
    loc_from = ""
    loc_to = ""
    time = "" # the time requested for the ride
    time_entered = False
    spot = 1 # the number of spots needed/available, default value is 1
    
    Seats = set(["seats", "Seats", "seat", "Seat", "spot", "spots", "Spot", "Spots"])
    Months = set(["January", "january", "February", "february", "March", "march", "April", "april", "May", "may", "June", "june", "July", "july", "August", "august", "September", "september", "October", "october", "November", "november", "December", "december"])    
    sent = preprocess(sent)
    for i in range(len(sent)):
        if (sent[i][0] == "from") or (sent[i][0] == "From"):
            j = i + 1
            while j < len(sent):
                if sent[j][1] == "NNP":
                    loc_from = loc_from + (sent[j][0]) + " "
                else:
                    break
                j += 1
            loc_from_entered = True
            i = j
                
        elif sent[i][1] == "TO":
            j = i + 1
            while j < len(sent):
                if sent[j][1] == "NNP":
                    loc_to = loc_to + (sent[j][0]) + " "
                else:
                    break
                j += 1
            loc_to_entered = True
            i = j

        elif sent[i][1] == 'CD':
            if (i + 1 < len(sent)) and sent[i + 1][0] in Seats:
                spot = sent[i][0]
            
        elif (sent[i][0] == "at") or (sent[i][0] == "on"): # or pair[0] == 'around' will be considered in the future
            time_entered = True 
            time = time + (sent[i][0]) + " "
            j = i + 1
            while j < len(sent):
                if sent[j][1] == "CD":
                    time = time + (sent[j][0]) + " "
                elif (j + 1 < len(sent)) and sent[j][0] in Months: # Month entered
                    time = time + (sent[j][0]) + " "
                    time = time + (sent[j + 1][0]) + " " # date of the month
                elif sent[j][1] == "NN":
                    time = time + (sent[j][0]) + " "
                else:
                    break
                j += 1
            i = j
    this_time = dateparser.parse(time)
    timestampStr = this_time.strftime("%m-%d,%H:%M")
    return loc_to, loc_from, timestampStr, spot
 #   if time_entered is False:
 #       return "Sorry, the time input was not evaluated. Please re-enter your request"
 #   if loc_from_entered is False:
 #       return "Sorry, starting location was not evaluated. Please re-enter your request"
 #   if loc_to_entered is False:
 #       return "Sorry, destination was not evaluated. Please re-enter your request"
   

In [324]:
    get_info(example1)

('Communitech Data Hub ', 'BK Plaza ', '12-08,17:00', '3')

In [325]:
location_to, location_from, timestampStr, spot = get_info(example)
print(location_to)
print(location_from)
print(timestampStr)
print(spot)

Hamilton 
Niagara Falls 
12-12,20:00
2


In [326]:
example2 = "from Union Station to BK Plaza at 3pm on December 12th, 3 seats available"
location_to, location_from, timestampStr, spot = get_info(example2)
print(location_to)
print(location_from)
print(timestampStr)
print(spot)

BK Plaza 
Union Station 
12-12,15:00
3


In [327]:
example3 = "Hi I need 3 seats from BK Plaza to Communitech Data Hub at 5pm today"
location_to, location_from, timestampStr, spot = get_info(example3)
print(location_to)
print(location_from)
print(timestampStr)
print(spot)

Communitech Data Hub 
BK Plaza 
12-08,17:00
3


In [329]:
example4 = "Hi at 6pm tomorrow, i need a ride from Union Station to Markvill High School"
location_to, location_from, timestampStr, spot = get_info(example4)
print(location_to)
print(location_from)
print(timestampStr)
print(spot)

Markvill High School 
Union Station 
12-09,18:00
1


In [332]:
example5 = "Looking for a ride from Toronto to New York at 8pm tomorrow"
location_to, location_from, timestampStr, spot = get_info(example5)
print(location_to)
print(location_from)
print(timestampStr)
print(spot)


New York 
Toronto 
12-09,20:00
1
