In [1]:
#load libraries and environments
import spacy
from nltk.corpus import gutenberg
from spacy import displacy
nlp = spacy.load("en_core_web_lg")

In [2]:
#get books names from the Gutenberg corpus
files = gutenberg.fileids()

In [3]:
#print the booknames
print("Names of the sample books from Gutenberg are:\n")
for i in range(len(files)):
    print(str(i+1)+". "+files[i])

Names of the sample books from Gutenberg are:

1. austen-emma.txt
2. austen-persuasion.txt
3. austen-sense.txt
4. bible-kjv.txt
5. blake-poems.txt
6. bryant-stories.txt
7. burgess-busterbrown.txt
8. carroll-alice.txt
9. chesterton-ball.txt
10. chesterton-brown.txt
11. chesterton-thursday.txt
12. edgeworth-parents.txt
13. melville-moby_dick.txt
14. milton-paradise.txt
15. shakespeare-caesar.txt
16. shakespeare-hamlet.txt
17. shakespeare-macbeth.txt
18. whitman-leaves.txt


In [4]:
#rule 1: Has a verb in it's lemma (base) form
def verb_in_lemma(sent):
    doc = nlp(sent)
    for token in doc:
        if token.pos_=='VERB' and token.text.lower() == token.lemma_:
            return True

In [5]:
#rule 2: Has a verb in its lemma (base) form and is the root
def root_verb_in_lemma(sent):
    doc = nlp(sent)
    for token in doc:
        if token.dep_=='ROOT' and token.pos_=='VERB' and token.text.lower() == token.lemma_:
            print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)
            return True

In [6]:
#rule 3: Has a verb in its lemma (base) form and is the root and does not have any subject child in it's dependency structure
def root_verb_in_lemma_nosubj(sent):
    doc = nlp(sent)
    cond1 = False
    cond2 = True
    for token in doc:
        if token.dep_=='ROOT' and token.pos_=='VERB' and token.text.lower() == token.lemma_:
            cond1 = True
            #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)
        if token.dep_ == 'nsubj':
            cond2 = False   
    return (cond1 and cond2)

In [7]:
#filter sentences from gutenberg ebooks
for f in files:
    sentences = gutenberg.sents(f)
    i = 0
    for s in sentences:
        sent = " ".join(s)
        if root_verb_in_lemma_nosubj(sent):
            print("Sent "+str(i+1)+": "+ sent)
            i+=1
            print("\n")
            #displacy.serve(nlp(sent), style="dep")
        if i ==10:
            break
    break

Sent 1: Look at my shoes .


Sent 2: Pray do not make any more matches ."


Sent 3: Invite him to dinner , Emma , and help him to the best of the fish and the chicken , but leave him to chuse his own wife .


Sent 4: " Only think of our happening to meet him !-- How very odd !


Sent 5: Compare Mr . Martin with either of _them_ .


Sent 6: Compare their manner of carrying themselves ; of walking ; of speaking ; of being silent .


Sent 7: " Thank you .


Sent 8: No , Mr . Knightley , do not foretell vexation from that quarter ."


Sent 9: say beautiful rather .


Sent 10: Keep your raptures for Harriet ' s face .




In [18]:
#sample sentences to test rules
sent1 = "Book me an appointment with my doctor!"
print("Sent 1 is imperative? "+str(root_verb_in_lemma_nosubj(sent1)))
displacy.render(nlp(sent1), style="dep")

Sent 1 is imperative? True


In [9]:
sent2 = "Ana, book me an appointment with my doctor!"
print("Sent 2 is imperative? "+str(root_verb_in_lemma_nosubj(sent2)))
displacy.render(nlp(sent2), style="dep")

Sent 2 is imperative? True


In [10]:
sent3 = "Did you book an appointment with my doctor?"
print("Sent 3 is imperative? "+str(root_verb_in_lemma_nosubj(sent3)))
displacy.render(nlp(sent3), style="dep")

Sent 3 is imperative? False


In [11]:
sent4 = "Doctor cancelled my appointment"
print("Sent 4 is imperative? "+str(root_verb_in_lemma_nosubj(sent4)))
displacy.render(nlp(sent4), style="dep")

Sent 4 is imperative? False


In [16]:
sent5 = 'Thank you!'
print("Sent 5 is imperative? "+str(root_verb_in_lemma_nosubj(sent5)))
displacy.render(nlp(sent5), style="dep")

Sent 5 is imperative? True
