In [1]:
#Numeric, Symbolic, and Punctuation Tags
import spacy
nlp = spacy.load('en')
doc = nlp(u"The firm earned $1.5 million in 2017.")
for token in doc:
    print(token.text, token.pos_, spacy.explain(token.pos_))

The DET determiner
firm NOUN noun
earned VERB verb
$ SYM symbol
1.5 NUM numeral
million NUM numeral
in ADP adposition
2017 NUM numeral
. PUNCT punctuation


In [2]:
for token in doc:
    print(token.text, token.pos_, token.tag_, spacy.explain(token.tag_))

The DET DT determiner
firm NOUN NN noun, singular or mass
earned VERB VBD verb, past tense
$ SYM $ symbol, currency
1.5 NUM CD cardinal number
million NUM CD cardinal number
in ADP IN conjunction, subordinating or preposition
2017 NUM CD cardinal number
. PUNCT . punctuation mark, sentence closer


In [1]:
import spacy
nlp = spacy.load('en')
doc = nlp(u"The firm earned $1.5 million in 2017.")
phrase = ''
for token in doc:
    if token.tag_ == '$':
        phrase = token.text
        i = token.i+1
        while doc[i].tag_ == 'CD':
            phrase += doc[i].text + ' '
            i += 1
        break
phrase = phrase[:-1]
print(phrase)

$1.5 million


In [2]:
import spacy
nlp = spacy.load('en')
doc = nlp(u"The firm earned $1.5 million in 2017, in comparison with $1.2 million in 2016.")
phrase = ''
for token in doc:
    if token.tag_ == '$':
        phrase = token.text
        i = token.i+1
        while doc[i].tag_ == 'CD':
            phrase += doc[i].text + ' '
            i += 1
        phrase = phrase[:-1]
        print(phrase)

$1.5 million
$1.2 million


In [3]:
#Turning Statements into Questions
doc = nlp(u"I can promise it is worth your time.")
for token in doc:
    print(token.text, token.pos_, token.tag_)

I PRON PRP
can VERB MD
promise VERB VB
it PRON PRP
is AUX VBZ
worth ADJ JJ
your DET PRP$
time NOUN NN
. PUNCT .


In [8]:
import spacy
nlp = spacy.load('en')
doc = nlp(u"I can promise it is worth your time.")
sent = ''
for i,token in enumerate(doc):
    if token.tag_ == 'PRP' and doc[i+1].tag_ == 'MD' and doc[i+2].tag_ == 'VB':
        sent = doc[i+1].text.capitalize() + ' ' + doc[i].text
        sent = sent + ' ' + doc[i+2:].text
        break
 #By now, you should have: 'Can I promise it is worth your time.'
   #Retokenization
doc=nlp(sent)
for i,token in enumerate(doc):
    if token.tag_ == 'PRP' and token.text == 'I':
        sent = doc[:i].text + ' you ' + doc[i+1:].text
        break
 #By now, you should have: 'Can you promise it is worth your time.'
doc=nlp(sent)
for i,token in enumerate(doc):
    if token.tag_ == 'PRP$' and token.text == 'your':
        sent = doc[:i].text + ' my ' + doc[i+1:].text
        break
 #By now, you should have: 'Can you promise it is worth my time.' 
doc=nlp(sent)
for i,token in enumerate(doc):
    if token.tag_ == 'VB':
        sent = doc[:i].text + ' really ' + doc[i:].text
        break
 #By now, you should have: 'Can you really promise it is worth my time.'
doc=nlp(sent)
sent = doc[:len(doc)-1].text + '?'
 #Finally, you should have: 'Can you really promise it is worth my time?'
print(sent)

Can you really promise it is worth my time?


In [10]:
#Distinguishing Subjects from Objects
doc = nlp(u"I can promise it is worth your time.")
for token in doc:
    print(token.text, token.pos_, token.tag_, token.dep_, spacy.explain(token.dep_))



I PRON PRP nsubj nominal subject
can VERB MD aux auxiliary
promise VERB VB ROOT None
it PRON PRP nsubj nominal subject
is AUX VBZ ccomp clausal complement
worth ADJ JJ acomp adjectival complement
your DET PRP$ poss possession modifier
time NOUN NN npadvmod noun phrase as adverbial modifier
. PUNCT . punct punctuation


In [38]:
#Deciding What Question a Chatbot Should Ask
import spacy
import sys

def find_chunk(doc):
    chunk = ''
    for i,token in enumerate(doc):
        if token.dep_ == 'dobj':
            shift = len([w for w in token.children])
            #print([w for w in token.children])
            chunk = doc[i-shift:i+1]
            break
    return chunk

def determine_question_type(chunk):
    question_type = 'yesno'
    for token in chunk:
        if token.dep_ == 'amod':
            question_type = 'info'
    return question_type

def generate_question(doc, question_type):
    sent = ''
    for i,token in enumerate(doc):
        if token.tag_ == 'PRP' and doc[i+1].tag_ == 'VBP':
            sent = 'do ' + doc[i].text
            sent = sent + ' ' + doc[i+1:].text
            break
    doc=nlp(sent)
    for i,token in enumerate(doc):
        if token.tag_ == 'PRP' and token.text == 'I':
            sent = doc[:i].text + ' you ' + doc[i+1:].text
            break
        doc=nlp(sent)
        if question_type == 'info':
            for i,token in enumerate(doc):
                if token.dep_ == 'dobj':
                    sent = 'why ' + doc[:i].text + ' one ' + doc[i+1:].text
                    break
        if question_type == 'yesno':
            for i,token in enumerate(doc):
                if token.dep_ == 'dobj':
                    sent = doc[:i-1].text + ' a red ' + doc[i:].text
                    break
        doc=nlp(sent)
        sent = doc[0].text.capitalize() +' ' + doc[1:len(doc)-1].text + '?'
        return sent

if len(sys.argv) > 1:
    nlp = spacy.load('en')
    doc = nlp('I want a blue apple.')
    chunk = find_chunk(doc)
    if str(chunk) == '':
        print('The sentence does not contain a direct object.')
        sys.exit()
    question_type = determine_question_type(chunk)
    question = generate_question(doc, question_type)
    print(question)
else:
    print('You did not submit a sentence!')

Why do I want a blue one?
