In [1]:
import nltk
from nltk.util import ngrams
from nltk import pos_tag, word_tokenize, RegexpParser, Tree
from nltk.tokenize import PunktSentenceTokenizer

View all the pos tags in nltk.

In [2]:
# nltk.download('tagsets')
nltk.help.upenn_tagset()

$: dollar
    $ -$ --$ A$ C$ HK$ M$ NZ$ S$ U.S.$ US$
'': closing quotation mark
    ' ''
(: opening parenthesis
    ( [ {
): closing parenthesis
    ) ] }
,: comma
    ,
--: dash
    --
.: sentence terminator
    . ! ?
:: colon or ellipsis
    : ; ...
CC: conjunction, coordinating
    & 'n and both but either et for less minus neither nor or plus so
    therefore times v. versus vs. whether yet
CD: numeral, cardinal
    mid-1890 nine-thirty forty-two one-tenth ten million 0.5 one forty-
    seven 1987 twenty '79 zero two 78-degrees eighty-four IX '60s .025
    fifteen 271,124 dozen quintillion DM2,000 ...
DT: determiner
    all an another any both del each either every half la many much nary
    neither no some such that the them these this those
EX: existential there
    there
FW: foreign word
    gemeinschaft hund ich jeux habeas Haementeria Herr K'ang-si vous
    lutihaw alai je jour objets salutaris fille quibusdam pas trop Monte
    terram fiche oui corporis ...
IN: preposition or

Define the grammar for chunking.

In [3]:
grammar = r"""
  AdjP: {<RB>?<JJ|JJR|JJS|RBR|RBS>}    # Adjectives may have comparative and superlative, and come after adverbs like very
  AdjP: {<AdjP><,>*<AdjP>*<,>*<AdjP>*<CC>?<AdjP>}    # Multiple adjectives can come in comma and 'and'
  NP: {<DT|WDT|WP$>?<CD>?<AdjP>*<NN|NNS|NNP|NNPS><POS>*<NN|NNS|NNP|NNPS|PP|CD>*<VBG>?}    # Determiner, number and adjectives come before nouns and nouns may have possessive -s and followed by another noun
  NP: {<PRP|EX|CD|WP|WRB|PRP$|WP$>}    # Pronouns and numbers can also replace nouns and function as one
  PP: {<IN>?<IN>?<IN|TO><NP>}    # Prepositions come before nouns and sometimes two prepositions come together
  NP: {<NP><PP>}    # Noun can come before prepositions, usually describing the location of the noun
  NP: {<NP><,>*<NP>*<,>*<NP>*<CC>?<NP>}    # Multiple nouns can come in comma and 'and'
  ToP: {<TO><VB><NP>?}    # To is both a noun phrase and an infinitive that comes before a base verb
  VP: {<MD>?<RB>*<VB|VBD|VBP|VBZ><VB>?<VBN>?<VBN>?<VBG>?<VBN>?<RB>*<IN>?<ToP>?}    # Modal verbs come before verbs. Various tenses are formed through specific sequences of verb forms. Phrasal verbs are verbs that come with a preposition.
  """

chunker = RegexpParser(grammar)

Create a function to return chunked text.

In [4]:
def chunk(corpus):
    tagged = pos_tag(word_tokenize(corpus))
    output = chunker.parse(tagged)
    return str(output)

Create a function to label the words with pos.

In [5]:
tokenizer = PunktSentenceTokenizer()

def tag_pos(corpus):

    tokenized = tokenizer.tokenize(corpus)

    try:
        for sent in tokenized:
            words = nltk.word_tokenize(sent)
            tagged = nltk.pos_tag(words)
            return tagged
    except Exception as e:
        return str(e)

Combine both functions and allow sentences to be generated from list and then parsed and chunked.

In [6]:
def tag_and_chunk(corpus):
    print(chunk(corpus))
    print(tag_pos(corpus))
    
def t_c_list(ls, prepend="", append=""):
    for p in ls:
        sent = prepend + p + append
        tag_and_chunk(sent)
        print("\n")

This function can modify the pos in tagged result.

In [7]:
def modify_pos(sent, word, pos, no_cap=True, prev_pos=[], next_pos=[], prev_word=[], next_word=[]):
    # Tag the sentence with pos
    if no_cap: sent = sent.lower()
    tagTuple = tag_pos(sent)
    tupleIndices = [i for i, v in enumerate(tagTuple) if v[0] == word]
    for i, t in enumerate(tupleIndices):
        is_match = 1
        
        for j, pp in enumerate(prev_pos):
            # Disqualify if any of the previous pos is different from expectation
            curr_i = t - len(prev_pos) - j
            if tagTuple[curr_i][1] != pp: is_match -= 1
            
        for k, pw in enumerate(prev_word):
            # Disqualify if any of the previous word is different from expectation
            curr_i = t - len(prev_word) - k
            if tagTuple[curr_i][0] != pw: is_match -= 1
            
        for m, np in enumerate(next_pos):
            # Disqualify if any of the next pos is different from expectation
            curr_i = t + len(next_pos) - m
            if tagTuple[curr_i][1] != np: is_match -= 1
            
        for n, nw in enumerate(next_word):
            # Disqualify if any of the next word is different from expectation
            curr_i = t + len(next_word) - n
            if tagTuple[curr_i][0] != nw: is_match -= 1
      
        if is_match == 1:
            tagTuple[tupleIndices[i]] = (word, pos)

    return tagTuple

# Adjective Phrase

In [8]:
adjs = ["long", "longer", "longest", "long and sharp", "long, sharp and bright", "very long"]

t_c_list(adjs, prepend="My cat has a ", append=" tail.")

(S
  My/PRP$
  (NP cat/NN)
  (VP has/VBZ)
  (NP a/DT (AdjP long/JJ) tail/NN)
  ./.)
[('My', 'PRP$'), ('cat', 'NN'), ('has', 'VBZ'), ('a', 'DT'), ('long', 'JJ'), ('tail', 'NN'), ('.', '.')]


(S
  My/PRP$
  (NP cat/NN)
  (VP has/VBZ)
  (NP a/DT (AdjP longer/JJR) tail/NN)
  ./.)
[('My', 'PRP$'), ('cat', 'NN'), ('has', 'VBZ'), ('a', 'DT'), ('longer', 'JJR'), ('tail', 'NN'), ('.', '.')]


(S
  My/PRP$
  (NP cat/NN)
  (VP has/VBZ)
  (NP a/DT (AdjP longest/JJ) tail/NN)
  ./.)
[('My', 'PRP$'), ('cat', 'NN'), ('has', 'VBZ'), ('a', 'DT'), ('longest', 'JJ'), ('tail', 'NN'), ('.', '.')]


(S
  My/PRP$
  (NP cat/NN)
  (VP has/VBZ)
  (NP a/DT (AdjP (AdjP long/JJ) and/CC (AdjP sharp/JJ)) tail/NN)
  ./.)
[('My', 'PRP$'), ('cat', 'NN'), ('has', 'VBZ'), ('a', 'DT'), ('long', 'JJ'), ('and', 'CC'), ('sharp', 'JJ'), ('tail', 'NN'), ('.', '.')]


(S
  My/PRP$
  (NP cat/NN)
  (VP has/VBZ)
  (NP
    a/DT
    (AdjP (AdjP long/JJ) ,/, (AdjP sharp/JJ) and/CC (AdjP bright/JJ))
    tail/NN)
  ./.)
[('My', 'PRP$')

# Noun Phrase

Samples taken from "Linguistics for English Language Teaching: Sounds, Words, and Sentences"

In [9]:
noun_phrases = [ "John",
 "mailmen",
 "most students", 
 "many Americans",
 "a huge, loveable bear",
 "a student from brazil",
 "the table in the corner",
 "the people we interviewed",
"John and his friends"]

t_c_list(noun_phrases, append=" laughed at the cat.")

(S (NP John/NNP) (VP laughed/VBD) (PP at/IN (NP the/DT cat/NN)) ./.)
[('John', 'NNP'), ('laughed', 'VBD'), ('at', 'IN'), ('the', 'DT'), ('cat', 'NN'), ('.', '.')]


(S
  (NP mailmen/NNS)
  (VP laughed/VBD)
  (PP at/IN (NP the/DT cat/NN))
  ./.)
[('mailmen', 'NNS'), ('laughed', 'VBD'), ('at', 'IN'), ('the', 'DT'), ('cat', 'NN'), ('.', '.')]


(S
  (NP (AdjP most/JJS) students/NNS)
  laughed/VBN
  (PP at/IN (NP the/DT cat/NN))
  ./.)
[('most', 'JJS'), ('students', 'NNS'), ('laughed', 'VBN'), ('at', 'IN'), ('the', 'DT'), ('cat', 'NN'), ('.', '.')]


(S
  (NP (AdjP many/JJ) Americans/NNPS)
  laughed/VBN
  (PP at/IN (NP the/DT cat/NN))
  ./.)
[('many', 'JJ'), ('Americans', 'NNPS'), ('laughed', 'VBN'), ('at', 'IN'), ('the', 'DT'), ('cat', 'NN'), ('.', '.')]


(S
  (NP a/DT (AdjP (AdjP huge/JJ) ,/, (AdjP loveable/JJ)) bear/NN)
  laughed/VBN
  (PP at/IN (NP the/DT cat/NN))
  ./.)
[('a', 'DT'), ('huge', 'JJ'), (',', ','), ('loveable', 'JJ'), ('bear', 'NN'), ('laughed', 'VBN'), ('at', 'IN'), ('t

Oddly, pronouns (PRP$) just doesn't work in the chunking grammar, despite being specified in the noun phrase rule.

In [10]:
print(chunker.parse(modify_pos("John and his friends laughed at the cat.", "his", "DT", next_pos=["NNS"])))

(S
  (NP (NP john/NN) and/CC (NP his/DT friends/NNS))
  laughed/VBN
  (PP at/IN (NP the/DT cat/NN))
  ./.)


But this can be mitigated easily by making 'his' a determiner if it is followed by a noun.

Samples taken from 'English Syntax: An Introduction'

In [11]:
sentences = [ "His friend learned dancing.",
 "My bother’s friend learned dancing.",
 "The president’s bodyguard learned surveillance.", 
 "The King of Rock and Roll’s records led to dancing."]

for s in sentences:
    print(tag_and_chunk(s))
    print("\n")

(S His/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('His', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]
None


(S
  My/PRP$
  (NP bother/NN ’/NNP s/NN friend/NN)
  (VP learned/VBD)
  (NP dancing/NN)
  ./.)
[('My', 'PRP$'), ('bother', 'NN'), ('’', 'NNP'), ('s', 'NN'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]
None


(S
  (NP The/DT president/NN ’/NNP)
  (VP s/VBZ bodyguard/RB)
  learned/VBN
  (NP surveillance/NN)
  ./.)
[('The', 'DT'), ('president', 'NN'), ('’', 'NNP'), ('s', 'VBZ'), ('bodyguard', 'RB'), ('learned', 'VBN'), ('surveillance', 'NN'), ('.', '.')]
None


(S
  (NP
    (NP (NP The/DT King/NNP) (PP of/IN (NP Rock/NNP)))
    and/CC
    (NP Roll/NNP ’/NNP s/NN records/NNS))
  (VP led/VBD)
  to/TO
  dancing/VBG
  ./.)
[('The', 'DT'), ('King', 'NNP'), ('of', 'IN'), ('Rock', 'NNP'), ('and', 'CC'), ('Roll', 'NNP'), ('’', 'NNP'), ('s', 'NN'), ('records', 'NNS'), ('led', 'VBD'), ('to', 'TO'), ('dancing', 'VBG'),

Possessive -s sometimes get parsed wrongly as verb, perhaps due to how 's is also the contracted form of 'is'.

In [77]:
print(chunker.parse(modify_pos("His friend learned dancing.", "his", "DT", next_pos=["NN"])))
print(chunker.parse(modify_pos("The president’s bodyguard learned surveillance.", "s", "NN", prev_word=["’"],
                               prev_pos=["NN", "NN"])))

(S (NP his/DT friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
(S
  (NP the/DT president/NN ’/NNP)
  (VP s/VBZ bodyguard/RB)
  learned/VBN
  (NP surveillance/NN)
  ./.)


But grammatically speaking, -s for 'is' is used with pronouns, like he's, she's. It's possible to add extra codes to differentiate -s for is or possessive.

## Possesive Determiner

Possesive determiner should be seperated from possesive pronoun. His, hers, mine is not followed by another noun, but his, her and my are.

In [13]:
pos_dets = ["my", "our", "your", "his", "her", "their", "its"]
    
t_c_list(pos_dets, append=" friend learned dancing.")

(S my/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('my', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]


(S our/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('our', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]


(S your/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('your', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]


(S his/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('his', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]


(S her/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('her', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]


(S their/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('their', 'PRP$'), ('friend', 'NN'), ('learned', 'VBD'), ('dancing', 'NN'), ('.', '.')]


(S its/PRP$ (NP friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
[('its'

In every instance, considering possesive determiners as pronouns causes chunking issues in noun phrase.

In [78]:
print(chunker.parse(modify_pos("His friend learned dancing.", "his", "DT", next_pos=["NN"])))
print(chunker.parse(modify_pos("My friend learned dancing.", "my", "DT", next_pos=["NN"])))

(S (NP his/DT friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)
(S (NP my/DT friend/NN) (VP learned/VBD) (NP dancing/NN) ./.)


Despite the grammar set, there is still a need to change possessive pronouns to determiners for better noun phrase parsing.

## Compound noun and verbs as adjectives
Refers to an object that uses two nouns and when verbs are used as adjectives.

In [15]:
tag_and_chunk("I am at the bus stop.")
tag_and_chunk("I live in Street 13.")

(S (NP I/PRP) (VP am/VBP) (PP at/IN (NP the/DT bus/NN stop/NN)) ./.)
[('I', 'PRP'), ('am', 'VBP'), ('at', 'IN'), ('the', 'DT'), ('bus', 'NN'), ('stop', 'NN'), ('.', '.')]
(S (NP I/PRP) (VP live/VBP) (PP in/IN (NP Street/NNP 13/CD)) ./.)
[('I', 'PRP'), ('live', 'VBP'), ('in', 'IN'), ('Street', 'NNP'), ('13', 'CD'), ('.', '.')]


In [16]:
tag_and_chunk("He finished the remaining food.")
tag_and_chunk("He finished the food remaining.")
tag_and_chunk("He finished the food eaten.")

(S
  (NP He/PRP)
  (VP finished/VBD)
  the/DT
  remaining/VBG
  (NP food/NN)
  ./.)
[('He', 'PRP'), ('finished', 'VBD'), ('the', 'DT'), ('remaining', 'VBG'), ('food', 'NN'), ('.', '.')]
(S
  (NP He/PRP)
  (VP finished/VBD)
  (NP the/DT food/NN remaining/VBG)
  ./.)
[('He', 'PRP'), ('finished', 'VBD'), ('the', 'DT'), ('food', 'NN'), ('remaining', 'VBG'), ('.', '.')]
(S (NP He/PRP) (VP finished/VBD) (NP the/DT food/NN eaten/NN) ./.)
[('He', 'PRP'), ('finished', 'VBD'), ('the', 'DT'), ('food', 'NN'), ('eaten', 'NN'), ('.', '.')]


When 'remaining' is used to describe food, the noun phrase parsing only worked when 'remaining' comes after 'food', so as to not affect the parsing of verb phrase.

## Demonstrative determiners

In [17]:
s_dem_dets = ["this", "that"]
    
t_c_list(s_dem_dets, append=" cat can swim.")

(S (NP this/DT cat/NN) (VP can/MD swim/VB) ./.)
[('this', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('swim', 'VB'), ('.', '.')]


(S (NP that/DT cat/NN) (VP can/MD swim/VB) ./.)
[('that', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('swim', 'VB'), ('.', '.')]




In [18]:
p_dem_dets = ["these", "those"]

t_c_list(p_dem_dets, append=" cats can swim.")

(S (NP these/DT cats/NNS) (VP can/MD swim/VB) ./.)
[('these', 'DT'), ('cats', 'NNS'), ('can', 'MD'), ('swim', 'VB'), ('.', '.')]


(S (NP those/DT cats/NNS) (VP can/MD swim/VB) ./.)
[('those', 'DT'), ('cats', 'NNS'), ('can', 'MD'), ('swim', 'VB'), ('.', '.')]




No issue for demonstrative determiners.

## Articles (Can be considered as determiners)

In [19]:
art_nouns = ["the cat", "a cat", "an orange"]

t_c_list(art_nouns, append=" can fly.")

(S (NP the/DT cat/NN) (VP can/MD fly/VB) ./.)
[('the', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP a/DT cat/NN) (VP can/MD fly/VB) ./.)
[('a', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP an/DT orange/NN) (VP can/MD fly/VB) ./.)
[('an', 'DT'), ('orange', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]




No issue for articles.

## Quantifiers (as determiners and pronouns)

In [20]:
p_quantifiers = ["several", "few", "fewer", "many", "all", "some", "no", "neither", "either", "enough", "other", "both"]

t_c_list(p_quantifiers, append=" cats can fly.")

(S (NP (AdjP several/JJ) cats/NNS) (VP can/MD fly/VB) ./.)
[('several', 'JJ'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP (AdjP few/JJ) cats/NNS) (VP can/MD fly/VB) ./.)
[('few', 'JJ'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP (AdjP fewer/JJR) cats/NNS) (VP can/MD fly/VB) ./.)
[('fewer', 'JJR'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP (AdjP many/JJ) cats/NNS) (VP can/MD fly/VB) ./.)
[('many', 'JJ'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP all/DT cats/NNS) (VP can/MD fly/VB) ./.)
[('all', 'DT'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP some/DT cats/NNS) (VP can/MD fly/VB) ./.)
[('some', 'DT'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP no/DT cats/NNS) (VP can/MD fly/VB) ./.)
[('no', 'DT'), ('cats', 'NNS'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP neither/DT cats/NNS) (VP can/MD fly/VB) ./.)
[('neither', 'DT'), ('cats', 

In [21]:
t_c_list(p_quantifiers, append=" can fly.")

(S (AdjP several/JJ) (VP can/MD fly/VB) ./.)
[('several', 'JJ'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (AdjP few/JJ) (VP can/MD fly/VB) ./.)
[('few', 'JJ'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (AdjP fewer/JJR) (VP can/MD fly/VB) ./.)
[('fewer', 'JJR'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (AdjP many/JJ) (VP can/MD fly/VB) ./.)
[('many', 'JJ'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S all/DT (VP can/MD fly/VB) ./.)
[('all', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S some/DT (VP can/MD fly/VB) ./.)
[('some', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S no/DT (VP can/MD fly/VB) ./.)
[('no', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S neither/DT (VP can/MD fly/VB) ./.)
[('neither', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S either/CC (VP can/MD fly/VB) ./.)
[('either', 'CC'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S enough/RB (VP can/MD fly/VB) ./.)
[('enough', 'RB'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')

In [22]:
s_quantifiers = ["every", "each", "any", "another"]

t_c_list(s_quantifiers, append=" cat can fly.")

(S (NP every/DT cat/NN) (VP can/MD fly/VB) ./.)
[('every', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP each/DT cat/NN) (VP can/MD fly/VB) ./.)
[('each', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP any/DT cat/NN) (VP can/MD fly/VB) ./.)
[('any', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP another/DT cat/NN) (VP can/MD fly/VB) ./.)
[('another', 'DT'), ('cat', 'NN'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]




In [23]:
t_c_list(s_quantifiers, append=" can fly.")

(S every/DT (VP can/MD fly/VB) ./.)
[('every', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S each/DT (VP can/MD fly/VB) ./.)
[('each', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S any/DT (VP can/MD fly/VB) ./.)
[('any', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S another/DT (VP can/MD fly/VB) ./.)
[('another', 'DT'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]




In [24]:
u_quantifiers = ["little", "much"]

t_c_list(u_quantifiers, append=" water is needed.")

(S (NP (AdjP little/JJ) water/NN) (VP is/VBZ needed/VBN) ./.)
[('little', 'JJ'), ('water', 'NN'), ('is', 'VBZ'), ('needed', 'VBN'), ('.', '.')]


(S (NP (AdjP much/JJ) water/NN) (VP is/VBZ needed/VBN) ./.)
[('much', 'JJ'), ('water', 'NN'), ('is', 'VBZ'), ('needed', 'VBN'), ('.', '.')]




In [25]:
t_c_list(u_quantifiers, append=" is needed.")

(S (AdjP little/JJ) (VP is/VBZ needed/VBN) ./.)
[('little', 'JJ'), ('is', 'VBZ'), ('needed', 'VBN'), ('.', '.')]


(S (AdjP much/JJ) (VP is/VBZ needed/VBN) ./.)
[('much', 'JJ'), ('is', 'VBZ'), ('needed', 'VBN'), ('.', '.')]




Most of the quantifiers are treated as adjectives or determiners, which led to accurate noun phrase parsing. Only 'enough' is considered as adverb.

In [26]:
c_quan_with_of = ["a lot", "a number", "many", "some", "none", "any", "several", "most", "few", "each", "both"]

t_c_list(c_quan_with_of, append=" of the cats ate.")

(S
  (NP (NP a/DT lot/NN) (PP of/IN (NP the/DT cats/NNS)))
  (VP ate/VBP)
  ./.)
[('a', 'DT'), ('lot', 'NN'), ('of', 'IN'), ('the', 'DT'), ('cats', 'NNS'), ('ate', 'VBP'), ('.', '.')]


(S
  (NP (NP a/DT number/NN) (PP of/IN (NP the/DT cats/NNS)))
  (VP ate/VBP)
  ./.)
[('a', 'DT'), ('number', 'NN'), ('of', 'IN'), ('the', 'DT'), ('cats', 'NNS'), ('ate', 'VBP'), ('.', '.')]


(S (AdjP many/JJ) (PP of/IN (NP the/DT cats/NNS)) (VP ate/VBP) ./.)
[('many', 'JJ'), ('of', 'IN'), ('the', 'DT'), ('cats', 'NNS'), ('ate', 'VBP'), ('.', '.')]


(S some/DT (PP of/IN (NP the/DT cats/NNS)) (VP ate/VBP) ./.)
[('some', 'DT'), ('of', 'IN'), ('the', 'DT'), ('cats', 'NNS'), ('ate', 'VBP'), ('.', '.')]


(S
  (NP (NP none/NN) (PP of/IN (NP the/DT cats/NNS)))
  (VP ate/VBP)
  ./.)
[('none', 'NN'), ('of', 'IN'), ('the', 'DT'), ('cats', 'NNS'), ('ate', 'VBP'), ('.', '.')]


(S any/DT (PP of/IN (NP the/DT cats/NNS)) (VP ate/VBP) ./.)
[('any', 'DT'), ('of', 'IN'), ('the', 'DT'), ('cats', 'NNS'), ('ate', 'VBP'),

In [27]:
u_quan_with_of = ["much", "a little"]

t_c_list(u_quan_with_of, append=" of the water is left.")

(S much/RB (PP of/IN (NP the/DT water/NN)) (VP is/VBZ left/VBN) ./.)
[('much', 'RB'), ('of', 'IN'), ('the', 'DT'), ('water', 'NN'), ('is', 'VBZ'), ('left', 'VBN'), ('.', '.')]


(S
  a/DT
  (AdjP little/JJ)
  (PP of/IN (NP the/DT water/NN))
  (VP is/VBZ left/VBN)
  ./.)
[('a', 'DT'), ('little', 'JJ'), ('of', 'IN'), ('the', 'DT'), ('water', 'NN'), ('is', 'VBZ'), ('left', 'VBN'), ('.', '.')]




A {noun1} of {noun2} can be treated as (NP a/DT {noun1}/NN (PP of/IN (NP the/DT {noun2}/NN))). Even though in the grammar variable at the beginning, NP's definition included PP, PP is only defined after NP. Thus, it is unable to include the 'of the {noun2}' into the first noun phrase.

## Personal Pronouns (Subject)

In general, pronouns should be able to be replaced by nouns easily, because they refer to actual nouns.

In [28]:
person_obj_pn = ["I", "you", "he", "she", "it", "we", "they", "one"]

t_c_list(person_obj_pn, append=" will eat the cake.")

(S (NP I/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('I', 'PRP'), ('will', 'MD'), ('eat', 'VB'), ('the', 'DT'), ('cake', 'NN'), ('.', '.')]


(S (NP you/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('you', 'PRP'), ('will', 'MD'), ('eat', 'VB'), ('the', 'DT'), ('cake', 'NN'), ('.', '.')]


(S (NP he/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('he', 'PRP'), ('will', 'MD'), ('eat', 'VB'), ('the', 'DT'), ('cake', 'NN'), ('.', '.')]


(S (NP she/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('she', 'PRP'), ('will', 'MD'), ('eat', 'VB'), ('the', 'DT'), ('cake', 'NN'), ('.', '.')]


(S (NP it/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('it', 'PRP'), ('will', 'MD'), ('eat', 'VB'), ('the', 'DT'), ('cake', 'NN'), ('.', '.')]


(S (NP we/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('we', 'PRP'), ('will', 'MD'), ('eat', 'VB'), ('the', 'DT'), ('cake', 'NN'), ('.', '.')]


(S (NP they/PRP) (VP will/MD eat/VB) (NP the/DT cake/NN) ./.)
[('they', 'PRP'), ('will

Technically, pronouns can be considered a noun phrase. Also, 'one' can also be a pronoun, in addition to being a cardinal digit.

## Personal Pronoun (Object)

In [29]:
person_obj_pn = ["me", "you", "him", "her", "it", "us", "them", "one"]

t_c_list(person_obj_pn, prepend="Ahmad befriends ")

(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP me/PRP))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('me', 'PRP')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP you/PRP))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('you', 'PRP')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP him/PRP))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('him', 'PRP')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) her/PRP$)
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('her', 'PRP$')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP it/PRP))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('it', 'PRP')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP us/PRP))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('us', 'PRP')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP them/PRP))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('them', 'PRP')]


(S (NP Ahmad/NNP) (VP befriends/VBZ) (NP one/CD))
[('Ahmad', 'NNP'), ('befriends', 'VBZ'), ('one', 'CD')]




No issues found.

## Reflexive Pronoun

In [30]:
person_obj_pn = ["myself", "youself", "yourselves", "himself", "herself", "itself", "ourselves", "themselves", "oneself"]

t_c_list(person_obj_pn, prepend="Helping ")

(S Helping/VBG (NP myself/PRP))
[('Helping', 'VBG'), ('myself', 'PRP')]


(S Helping/VBG (NP youself/PRP))
[('Helping', 'VBG'), ('youself', 'PRP')]


(S Helping/VBG (NP yourselves/NNS))
[('Helping', 'VBG'), ('yourselves', 'NNS')]


(S Helping/VBG (NP himself/PRP))
[('Helping', 'VBG'), ('himself', 'PRP')]


(S Helping/VBG (NP herself/PRP))
[('Helping', 'VBG'), ('herself', 'PRP')]


(S Helping/VBG (NP itself/PRP))
[('Helping', 'VBG'), ('itself', 'PRP')]


(S Helping/VBG (NP ourselves/PRP))
[('Helping', 'VBG'), ('ourselves', 'PRP')]


(S Helping/VBG (NP themselves/PRP))
[('Helping', 'VBG'), ('themselves', 'PRP')]


(S Helping/VBG (NP oneself/PRP))
[('Helping', 'VBG'), ('oneself', 'PRP')]




No issues found.

## Possessive Pronouns

In [31]:
poss_pn = ["mine", "yours", "his", "hers", "its", "ours", "theirs"]

t_c_list(poss_pn, prepend="This cat is ")

(S (NP This/DT cat/NN) (VP is/VBZ) (AdjP mine/JJ))
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('mine', 'JJ')]


(S (NP This/DT cat/NN) (VP is/VBZ) (NP yours/NNS))
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('yours', 'NNS')]


(S (NP This/DT cat/NN) (VP is/VBZ) his/PRP$)
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('his', 'PRP$')]


(S (NP This/DT cat/NN) (VP is/VBZ) (NP hers/NNS))
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('hers', 'NNS')]


(S (NP This/DT cat/NN) (VP is/VBZ) its/PRP$)
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('its', 'PRP$')]


(S (NP This/DT cat/NN) (VP is/VBZ) (AdjP ours/JJ))
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('ours', 'JJ')]


(S (NP This/DT cat/NN) (VP is/VBZ) (AdjP theirs/JJ))
[('This', 'DT'), ('cat', 'NN'), ('is', 'VBZ'), ('theirs', 'JJ')]




'His' can be a determiner or a pronoun, causing parsing error.

## Indefinite Pronouns

In [32]:
indef_pn = [ "none", "nobody", "everyone", "everybody", "someone", "somebody", "anyone", "anybody", "nothing",
"everything", "something", "anything", "each", "whatever", "whichever", "whoever", "whomever",
"whomsoever", "whosoever", "others", "neither", "both", "either", "any"]

t_c_list(indef_pn, prepend="I will help ")

(S (NP I/PRP) (VP will/MD help/VB) (NP none/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('none', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP nobody/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('nobody', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP everyone/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('everyone', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP everybody/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('everybody', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP someone/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('someone', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP somebody/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('somebody', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP anyone/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('anyone', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP anybody/NN))
[('I', 'PRP'), ('will', 'MD'), ('help', 'VB'), ('anybody', 'NN')]


(S (NP I/PRP) (VP will/MD help/VB) (NP not

About half of the sentences have their noun phrases parsed correctly. In the other half, some are determiners, which can often also function as pronouns. This is similar to the coordinating conjunction, either and neither. Most of the pronouns that end with -ever are wrongly labelled as verbs.

## "Existential there"

In [33]:
ex_there = ["there", "here"]

t_c_list(ex_there, append=" are the cats.")

(S (NP there/EX) (VP are/VBP) (NP the/DT cats/NNS) ./.)
[('there', 'EX'), ('are', 'VBP'), ('the', 'DT'), ('cats', 'NNS'), ('.', '.')]


(S (VP here/RB are/VBP) (NP the/DT cats/NNS) ./.)
[('here', 'RB'), ('are', 'VBP'), ('the', 'DT'), ('cats', 'NNS'), ('.', '.')]




In [34]:
tag_and_chunk("Here are the cats.")
tag_and_chunk("Dogs are the cats.")

(S (VP Here/RB are/VBP) (NP the/DT cats/NNS) ./.)
[('Here', 'RB'), ('are', 'VBP'), ('the', 'DT'), ('cats', 'NNS'), ('.', '.')]
(S (NP Dogs/NNS) (VP are/VBP) (NP the/DT cats/NNS) ./.)
[('Dogs', 'NNS'), ('are', 'VBP'), ('the', 'DT'), ('cats', 'NNS'), ('.', '.')]


'Here' and 'there' can also function like pronouns.

## Multiple nouns

In [35]:
tag_and_chunk("Edwin and Sally are best friends.")
tag_and_chunk("Edwin, John, Sally and I are best friends.")
tag_and_chunk("Edwin and the three cats are best friends.")

(S
  (NP (NP Edwin/NNP) and/CC (NP Sally/NNP))
  (VP are/VBP)
  (NP (AdjP best/JJS) friends/NNS)
  ./.)
[('Edwin', 'NNP'), ('and', 'CC'), ('Sally', 'NNP'), ('are', 'VBP'), ('best', 'JJS'), ('friends', 'NNS'), ('.', '.')]
(S
  (NP
    (NP Edwin/NNP)
    ,/,
    (NP John/NNP)
    ,/,
    (NP Sally/NNP)
    and/CC
    (NP I/PRP))
  (VP are/VBP)
  (NP (AdjP best/JJ) friends/NNS)
  ./.)
[('Edwin', 'NNP'), (',', ','), ('John', 'NNP'), (',', ','), ('Sally', 'NNP'), ('and', 'CC'), ('I', 'PRP'), ('are', 'VBP'), ('best', 'JJ'), ('friends', 'NNS'), ('.', '.')]
(S
  (NP (NP Edwin/NNP) and/CC (NP the/DT three/CD cats/NNS))
  (VP are/VBP)
  (NP (AdjP best/JJS) friends/NNS)
  ./.)
[('Edwin', 'NNP'), ('and', 'CC'), ('the', 'DT'), ('three', 'CD'), ('cats', 'NNS'), ('are', 'VBP'), ('best', 'JJS'), ('friends', 'NNS'), ('.', '.')]


## Prepositional phrase

In [36]:
single_word_prep = ["about", "above", "across", "along", "alongside", "among", "around", "at", "before", "behind", "below",
"beneath", "beside", "between", "beyond", "by", "down", "from", "in", "inside", "into", "near", "off", "on", "opposite",
"outside", "over", "past", "round", "through", "throughout", "toward", "towards", "under", "underneath", "up", "within", "to",
"upon"]

t_c_list(single_word_prep, prepend="Jibby is ", append=" the law.")

(S (NP Jibby/NNP) (VP is/VBZ) (PP about/IN (NP the/DT law/NN)) ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('about', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S (NP Jibby/NNP) (VP is/VBZ) (PP above/IN (NP the/DT law/NN)) ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('above', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S (NP Jibby/NNP) (VP is/VBZ) (PP across/IN (NP the/DT law/NN)) ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('across', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S (NP Jibby/NNP) (VP is/VBZ) (PP along/IN (NP the/DT law/NN)) ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('along', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S (NP Jibby/NNP) (VP is/VBZ alongside/RB) (NP the/DT law/NN) ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('alongside', 'RB'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S (NP Jibby/NNP) (VP is/VBZ) (PP among/IN (NP the/DT law/NN)) ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('among', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S (NP Jibby/NNP)

Most of the prepositional phrases were parsed correctly, except for those that can also serve as adverbs, such as 'up' and 'down'.

In [37]:
print(chunker.parse(modify_pos("Jibby is opposite the law.", "opposite", "IN")))
print(chunker.parse(modify_pos("Jibby is up the law.", "up", "IN")))
print(chunker.parse(modify_pos("Jibby is down the law.", "down", "IN")))

(S (NP jibby/NN) (VP is/VBZ) (PP opposite/IN (NP the/DT law/NN)) ./.)
(S (NP jibby/NN) (VP is/VBZ) (PP up/IN (NP the/DT law/NN)) ./.)
(S (NP jibby/NN) (VP is/VBZ) (PP down/IN (NP the/DT law/NN)) ./.)


With the function defined earlier, the tagged part of speech can be changed and lead to a different chunking result.

In [38]:
multi_word_prep = ["across from", "ahead of", "away from", "close by", "close to", "in between", "in front of",
                   "near to", "next to", "on top of", "out of"]

t_c_list(multi_word_prep, prepend="Jibby is ", append=" the law.")

(S
  (NP Jibby/NNP)
  (VP is/VBZ)
  (PP across/IN from/IN (NP the/DT law/NN))
  ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('across', 'IN'), ('from', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S
  (NP Jibby/NNP)
  (VP is/VBZ ahead/RB)
  (PP of/IN (NP the/DT law/NN))
  ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('ahead', 'RB'), ('of', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S
  (NP Jibby/NNP)
  (VP is/VBZ away/RB)
  (PP from/IN (NP the/DT law/NN))
  ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('away', 'RB'), ('from', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S
  (NP Jibby/NNP)
  (VP is/VBZ close/RB)
  (PP by/IN (NP the/DT law/NN))
  ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('close', 'RB'), ('by', 'IN'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S
  (NP Jibby/NNP)
  (VP is/VBZ close/RB)
  (PP to/TO (NP the/DT law/NN))
  ./.)
[('Jibby', 'NNP'), ('is', 'VBZ'), ('close', 'RB'), ('to', 'TO'), ('the', 'DT'), ('law', 'NN'), ('.', '.')]


(S
  (NP Jibby/NNP)
  (VP is/VBZ)
  (

Sometimes prepositions can be chained together. The parsing can also be done, except when the second preposition is also considered as an adverb.

In [39]:
print(chunker.parse(modify_pos("Jibby is ahead of the law.", "ahead", "IN")))
print(chunker.parse(modify_pos("Jibby is away from the law.", "away", "IN")))
print(chunker.parse(modify_pos("Jibby is in front of the law.", "front", "IN", prev_word=["in"], next_word=["of"])))
print(chunker.parse(modify_pos("Jibby is next to the law.", "next", "IN", next_word=["to"])))
print(chunker.parse(modify_pos("Jibby is on top of the law.", "top", "IN", prev_word=["on"], next_word=["of"])))

(S
  (NP jibby/NN)
  (VP is/VBZ)
  (PP ahead/IN of/IN (NP the/DT law/NN))
  ./.)
(S
  (NP jibby/NN)
  (VP is/VBZ)
  (PP away/IN from/IN (NP the/DT law/NN))
  ./.)
(S
  (NP jibby/NN)
  (VP is/VBZ)
  (PP in/IN front/IN of/IN (NP the/DT law/NN))
  ./.)
(S
  (NP jibby/NN)
  (VP is/VBZ)
  (PP next/IN to/TO (NP the/DT law/NN))
  ./.)
(S
  (NP jibby/NN)
  (VP is/VBZ)
  (PP on/IN top/IN of/IN (NP the/DT law/NN))
  ./.)


The function also works for very spcecific cases like prepositional phrases with multiple prepositions.

# Verb Phrases

## Modal Auxilary

In [40]:
mod_aux = ["can", "cannot", "could", "couldn't", "can't", "may", "might", "must", "ought", "shall", "should",
           "will", "would", "wouldn't"]

t_c_list(mod_aux, prepend="Cat ", append=" fly.")

(S (NP Cat/NNP) (VP can/MD fly/VB) ./.)
[('Cat', 'NNP'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP can/MD not/RB fly/VB) ./.)
[('Cat', 'NNP'), ('can', 'MD'), ('not', 'RB'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP could/MD fly/VB) ./.)
[('Cat', 'NNP'), ('could', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP could/MD n't/RB fly/VB) ./.)
[('Cat', 'NNP'), ('could', 'MD'), ("n't", 'RB'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP ca/MD n't/RB fly/VB) ./.)
[('Cat', 'NNP'), ('ca', 'MD'), ("n't", 'RB'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP may/MD fly/VB) ./.)
[('Cat', 'NNP'), ('may', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NN) (VP might/MD fly/VB) ./.)
[('Cat', 'NN'), ('might', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP must/MD fly/VB) ./.)
[('Cat', 'NNP'), ('must', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/NNP) (VP ought/MD fly/VB) ./.)
[('Cat', 'NNP'), ('ought', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP Cat/

The modal verbs are parsed into their main verbs as verb phrases, except when there is a 'not' in between.

In [41]:
tag_and_chunk("Ali's favourite drink is tea.")
tag_and_chunk("Ali's drink is tea.")
tag_and_chunk("He's my brother.")

(S
  (NP (NP Ali/NNP 's/POS) (NP (AdjP favourite/JJ) drink/NN))
  (VP is/VBZ)
  (AdjP tea/JJ)
  ./.)
[('Ali', 'NNP'), ("'s", 'POS'), ('favourite', 'JJ'), ('drink', 'NN'), ('is', 'VBZ'), ('tea', 'JJ'), ('.', '.')]
(S (NP Ali/NNP 's/POS drink/NN) (VP is/VBZ) (AdjP tea/JJ) ./.)
[('Ali', 'NNP'), ("'s", 'POS'), ('drink', 'NN'), ('is', 'VBZ'), ('tea', 'JJ'), ('.', '.')]
(S (NP He/PRP) (VP 's/VBZ) my/PRP$ (NP brother/NN) ./.)
[('He', 'PRP'), ("'s", 'VBZ'), ('my', 'PRP$'), ('brother', 'NN'), ('.', '.')]


Possessive 's can also be parsed in noun phrase, except it there is an adjective in between. However, the library is able to distinguish between possessive 's and contraction 's.

## Be, do, have and tenses
These can be both main and auxilary verbs

## Simple present, simple past, present continuous, past continuous, simple present (passive), simple past (passive), present continuous (passive), past continous (passive)

In [42]:
am_was = ["am", "was", "was'nt"]

t_c_list(am_was, prepend="I ", append=" cat.")
t_c_list(am_was, prepend="I ", append=" cleaned.")
t_c_list(am_was, prepend="I ", append=" cleaning.")
t_c_list(am_was, prepend="I ", append=" being cleaned.")

(S (NP I/PRP) (VP am/VBP cat/RB) ./.)
[('I', 'PRP'), ('am', 'VBP'), ('cat', 'RB'), ('.', '.')]


(S (NP I/PRP) (VP was/VBD) (NP cat/NN) ./.)
[('I', 'PRP'), ('was', 'VBD'), ('cat', 'NN'), ('.', '.')]


(S (NP I/PRP) (VP was'nt/VBP) (NP cat/NN) ./.)
[('I', 'PRP'), ("was'nt", 'VBP'), ('cat', 'NN'), ('.', '.')]


(S (NP I/PRP) (VP am/VBP cleaned/VBN) ./.)
[('I', 'PRP'), ('am', 'VBP'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) (VP was/VBD cleaned/VBN) ./.)
[('I', 'PRP'), ('was', 'VBD'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) (VP was'nt/VBP cleaned/VBN) ./.)
[('I', 'PRP'), ("was'nt", 'VBP'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) (VP am/VBP cleaning/VBG) ./.)
[('I', 'PRP'), ('am', 'VBP'), ('cleaning', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP was/VBD cleaning/VBG) ./.)
[('I', 'PRP'), ('was', 'VBD'), ('cleaning', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP was'nt/VBP cleaning/VBG) ./.)
[('I', 'PRP'), ("was'nt", 'VBP'), ('cleaning', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP am/

In [43]:
is_was = ["is", "isn't", "was", "was'nt"]

t_c_list(is_was, prepend="He ", append=" cat.")
t_c_list(is_was, prepend="He ", append=" cleaned.")
t_c_list(is_was, prepend="He ", append=" cleaning.")
t_c_list(is_was, prepend="He ", append=" being cleaned.")

(S (NP He/PRP) (VP is/VBZ) (NP cat/NN) ./.)
[('He', 'PRP'), ('is', 'VBZ'), ('cat', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP is/VBZ n't/RB) (NP cat/NN) ./.)
[('He', 'PRP'), ('is', 'VBZ'), ("n't", 'RB'), ('cat', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP was/VBD) (NP cat/NN) ./.)
[('He', 'PRP'), ('was', 'VBD'), ('cat', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP was'nt/VBZ) (NP cat/NN) ./.)
[('He', 'PRP'), ("was'nt", 'VBZ'), ('cat', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP is/VBZ cleaned/VBN) ./.)
[('He', 'PRP'), ('is', 'VBZ'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP is/VBZ n't/RB) cleaned/VBN ./.)
[('He', 'PRP'), ('is', 'VBZ'), ("n't", 'RB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP was/VBD cleaned/VBN) ./.)
[('He', 'PRP'), ('was', 'VBD'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP was'nt/VBD cleaned/VBN) ./.)
[('He', 'PRP'), ("was'nt", 'VBD'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP is/VBZ cleaning/VBG) ./.)
[('He', 'PRP'), ('is', 'VBZ'), ('cleani

In [44]:
are_were = ["are", "aren't", "were", "weren't"]

t_c_list(are_were, prepend="We ", append=" cat.")
t_c_list(are_were, prepend="We ", append=" cleaned.")
t_c_list(are_were, prepend="We ", append=" cleaning.")
t_c_list(are_were, prepend="We ", append=" being cleaned.")

(S (NP We/PRP) (VP are/VBP) (AdjP cat/JJ) ./.)
[('We', 'PRP'), ('are', 'VBP'), ('cat', 'JJ'), ('.', '.')]


(S (NP We/PRP) (VP are/VBP n't/RB) (NP cat/NNS) ./.)
[('We', 'PRP'), ('are', 'VBP'), ("n't", 'RB'), ('cat', 'NNS'), ('.', '.')]


(S (NP We/PRP) (VP were/VBD) (NP cat/NNS) ./.)
[('We', 'PRP'), ('were', 'VBD'), ('cat', 'NNS'), ('.', '.')]


(S (NP We/PRP) (VP were/VBD n't/RB) cat/VBN ./.)
[('We', 'PRP'), ('were', 'VBD'), ("n't", 'RB'), ('cat', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP are/VBP cleaned/VBN) ./.)
[('We', 'PRP'), ('are', 'VBP'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP are/VBP n't/RB) cleaned/VBN ./.)
[('We', 'PRP'), ('are', 'VBP'), ("n't", 'RB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP were/VBD cleaned/VBN) ./.)
[('We', 'PRP'), ('were', 'VBD'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP were/VBD n't/RB) cleaned/VBN ./.)
[('We', 'PRP'), ('were', 'VBD'), ("n't", 'RB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP are/VBP cleani

## Present perfect, past perfect, present perfect continuous, past perfect continuous, present perfect (passive), past perfect (passive)

In [45]:
has_had = ["has", "had", "hasn't"]

t_c_list(has_had, prepend="He ", append=" water.")
t_c_list(has_had, prepend="He ", append=" cleaned.")
t_c_list(has_had, prepend="He ", append=" been cleaned.")
t_c_list(has_had, prepend="He ", append=" been cleaning.")

(S (NP He/PRP) (VP has/VBZ) (NP water/NN) ./.)
[('He', 'PRP'), ('has', 'VBZ'), ('water', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP had/VBD) (NP water/NN) ./.)
[('He', 'PRP'), ('had', 'VBD'), ('water', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP has/VBZ n't/RB) (NP water/NN) ./.)
[('He', 'PRP'), ('has', 'VBZ'), ("n't", 'RB'), ('water', 'NN'), ('.', '.')]


(S (NP He/PRP) (VP has/VBZ cleaned/VBN) ./.)
[('He', 'PRP'), ('has', 'VBZ'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP had/VBD cleaned/VBN) ./.)
[('He', 'PRP'), ('had', 'VBD'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP has/VBZ n't/RB) cleaned/VBN ./.)
[('He', 'PRP'), ('has', 'VBZ'), ("n't", 'RB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP has/VBZ been/VBN cleaned/VBN) ./.)
[('He', 'PRP'), ('has', 'VBZ'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP had/VBD been/VBN cleaned/VBN) ./.)
[('He', 'PRP'), ('had', 'VBD'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP 

In [46]:
have_had = ["have", "had", "haven't"]

t_c_list(have_had, prepend="We ", append=" water.")
t_c_list(have_had, prepend="We ", append=" cleaned.")
t_c_list(have_had, prepend="We ", append=" been cleaned.")
t_c_list(have_had, prepend="We ", append=" been cleaning.")

(S (NP We/PRP) (VP have/VBP) (NP water/NN) ./.)
[('We', 'PRP'), ('have', 'VBP'), ('water', 'NN'), ('.', '.')]


(S (NP We/PRP) (VP had/VBD) (NP water/NN) ./.)
[('We', 'PRP'), ('had', 'VBD'), ('water', 'NN'), ('.', '.')]


(S (NP We/PRP) (VP have/VBP n't/RB) (NP water/NN) ./.)
[('We', 'PRP'), ('have', 'VBP'), ("n't", 'RB'), ('water', 'NN'), ('.', '.')]


(S (NP We/PRP) (VP have/VBP cleaned/VBN) ./.)
[('We', 'PRP'), ('have', 'VBP'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP had/VBD cleaned/VBN) ./.)
[('We', 'PRP'), ('had', 'VBD'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP have/VBP n't/RB) cleaned/VBN ./.)
[('We', 'PRP'), ('have', 'VBP'), ("n't", 'RB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP have/VBP been/VBN cleaned/VBN) ./.)
[('We', 'PRP'), ('have', 'VBP'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP had/VBD been/VBN cleaned/VBN) ./.)
[('We', 'PRP'), ('had', 'VBD'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We

## Present perfect continuous (passive), past perfect continuous (passive)

In [47]:
t_c_list(has_had, prepend="He ", append=" been getting cleaned.")

(S (NP He/PRP) (VP has/VBZ been/VBN getting/VBG cleaned/VBN) ./.)
[('He', 'PRP'), ('has', 'VBZ'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP He/PRP) (VP had/VBD been/VBN getting/VBG cleaned/VBN) ./.)
[('He', 'PRP'), ('had', 'VBD'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP He/PRP)
  (VP has/VBZ n't/RB)
  been/VBN
  getting/VBG
  cleaned/VBN
  ./.)
[('He', 'PRP'), ('has', 'VBZ'), ("n't", 'RB'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]




In [48]:
t_c_list(have_had, prepend="We ", append=" been getting cleaned.")

(S (NP We/PRP) (VP have/VBP been/VBN getting/VBG cleaned/VBN) ./.)
[('We', 'PRP'), ('have', 'VBP'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP We/PRP) (VP had/VBD been/VBN getting/VBG cleaned/VBN) ./.)
[('We', 'PRP'), ('had', 'VBD'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP We/PRP)
  (VP have/VBP n't/RB)
  been/VBN
  getting/VBG
  cleaned/VBN
  ./.)
[('We', 'PRP'), ('have', 'VBP'), ("n't", 'RB'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]




## Future tense and probabilities

In [49]:
t_c_list(mod_aux, prepend="I ", append=" fly.")

(S (NP I/PRP) (VP can/MD fly/VB) ./.)
[('I', 'PRP'), ('can', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP can/MD not/RB fly/VB) ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP could/MD fly/VB) ./.)
[('I', 'PRP'), ('could', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP could/MD n't/RB fly/VB) ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP ca/MD n't/RB fly/VB) ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP may/MD fly/VB) ./.)
[('I', 'PRP'), ('may', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP might/MD fly/VB) ./.)
[('I', 'PRP'), ('might', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP must/MD fly/VB) ./.)
[('I', 'PRP'), ('must', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP ought/MD fly/VB) ./.)
[('I', 'PRP'), ('ought', 'MD'), ('fly', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP shall/MD fly/VB) ./.)
[('I'

In [50]:
t_c_list(mod_aux, prepend="I ", append=" be flying.")

(S (NP I/PRP) (VP can/MD be/VB flying/VBG) ./.)
[('I', 'PRP'), ('can', 'MD'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP can/MD not/RB be/VB flying/VBG) ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP could/MD be/VB flying/VBG) ./.)
[('I', 'PRP'), ('could', 'MD'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP could/MD n't/RB be/VB flying/VBG) ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP ca/MD n't/RB be/VB flying/VBG) ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP may/MD be/VB flying/VBG) ./.)
[('I', 'PRP'), ('may', 'MD'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP might/MD be/VB flying/VBG) ./.)
[('I', 'PRP'), ('might', 'MD'), ('be', 'VB'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP must/MD be/VB flying/VBG) ./.)


In [51]:
t_c_list(mod_aux, prepend="I ", append=" will have flown.")

(S (NP I/PRP) can/MD (VP will/MD have/VB flown/VBN) ./.)
[('I', 'PRP'), ('can', 'MD'), ('will', 'MD'), ('have', 'VB'), ('flown', 'VBN'), ('.', '.')]


(S (NP I/PRP) can/MD not/RB (VP will/MD have/VB flown/VBN) ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('will', 'MD'), ('have', 'VB'), ('flown', 'VBN'), ('.', '.')]


(S (NP I/PRP) could/MD (VP will/MD have/VB flown/VBN) ./.)
[('I', 'PRP'), ('could', 'MD'), ('will', 'MD'), ('have', 'VB'), ('flown', 'VBN'), ('.', '.')]


(S (NP I/PRP) could/MD n't/RB (VP will/MD have/VB flown/VBN) ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('have', 'VB'), ('flown', 'VBN'), ('.', '.')]


(S (NP I/PRP) ca/MD n't/RB (VP will/MD have/VB flown/VBN) ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('have', 'VB'), ('flown', 'VBN'), ('.', '.')]


(S (NP I/PRP) may/MD (VP will/MD have/VB flown/VBN) ./.)
[('I', 'PRP'), ('may', 'MD'), ('will', 'MD'), ('have', 'VB'), ('flown', 'VBN'), ('.', '.')]


(S (NP I/PRP) might/MD

In [52]:
t_c_list(mod_aux, prepend="I ", append=" have been flying.")

(S (NP I/PRP) (VP can/MD have/VB been/VBN flying/VBG) ./.)
[('I', 'PRP'), ('can', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP can/MD not/RB have/VB been/VBN flying/VBG) ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('have', 'VB'), ('been', 'VBN'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP could/MD have/VB been/VBN flying/VBG) ./.)
[('I', 'PRP'), ('could', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP could/MD n't/RB have/VB been/VBN flying/VBG) ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('have', 'VB'), ('been', 'VBN'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP ca/MD n't/RB have/VB been/VBN flying/VBG) ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('have', 'VB'), ('been', 'VBN'), ('flying', 'VBG'), ('.', '.')]


(S (NP I/PRP) (VP may/MD have/VB been/VBN flying/VBG) ./.)
[('I', 'PRP'), ('may', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('flying', 'VBG'), ('.', '.')]


In [53]:
t_c_list(mod_aux, prepend="I ", append=" will be cleaned.")

(S (NP I/PRP) can/MD (VP will/MD be/VB cleaned/VBN) ./.)
[('I', 'PRP'), ('can', 'MD'), ('will', 'MD'), ('be', 'VB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) can/MD not/RB (VP will/MD be/VB cleaned/VBN) ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('will', 'MD'), ('be', 'VB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) could/MD (VP will/MD be/VB cleaned/VBN) ./.)
[('I', 'PRP'), ('could', 'MD'), ('will', 'MD'), ('be', 'VB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) could/MD n't/RB (VP will/MD be/VB cleaned/VBN) ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('be', 'VB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) ca/MD n't/RB (VP will/MD be/VB cleaned/VBN) ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('be', 'VB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) may/MD (VP will/MD be/VB cleaned/VBN) ./.)
[('I', 'PRP'), ('may', 'MD'), ('will', 'MD'), ('be', 'VB'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) might/MD

In [54]:
t_c_list(mod_aux, prepend="I ", append=" will have been cleaned.")

(S (NP I/PRP) can/MD (VP will/MD have/VB been/VBN cleaned/VBN) ./.)
[('I', 'PRP'), ('can', 'MD'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  can/MD
  not/RB
  (VP will/MD have/VB been/VBN cleaned/VBN)
  ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) could/MD (VP will/MD have/VB been/VBN cleaned/VBN) ./.)
[('I', 'PRP'), ('could', 'MD'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  could/MD
  n't/RB
  (VP will/MD have/VB been/VBN cleaned/VBN)
  ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  ca/MD
  n't/RB
  (VP will/MD have/VB been/VBN cleaned/VBN)
  ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('cleaned', 'VBN'), ('.', '

In [55]:
t_c_list(mod_aux, prepend="I ", append=" will be being cleaned.")

(S (NP I/PRP) can/MD (VP will/MD be/VB being/VBG cleaned/VBN) ./.)
[('I', 'PRP'), ('can', 'MD'), ('will', 'MD'), ('be', 'VB'), ('being', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  can/MD
  not/RB
  (VP will/MD be/VB being/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('will', 'MD'), ('be', 'VB'), ('being', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S (NP I/PRP) could/MD (VP will/MD be/VB being/VBG cleaned/VBN) ./.)
[('I', 'PRP'), ('could', 'MD'), ('will', 'MD'), ('be', 'VB'), ('being', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  could/MD
  n't/RB
  (VP will/MD be/VB being/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('be', 'VB'), ('being', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  ca/MD
  n't/RB
  (VP will/MD be/VB being/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('ca', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('be', 'VB'), ('being', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S 

In [56]:
t_c_list(mod_aux, prepend="I ", append=" will have been getting cleaned.")

(S
  (NP I/PRP)
  can/MD
  (VP will/MD have/VB been/VBN getting/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('can', 'MD'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  can/MD
  not/RB
  (VP will/MD have/VB been/VBN getting/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('can', 'MD'), ('not', 'RB'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  could/MD
  (VP will/MD have/VB been/VBN getting/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('could', 'MD'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  could/MD
  n't/RB
  (VP will/MD have/VB been/VBN getting/VBG cleaned/VBN)
  ./.)
[('I', 'PRP'), ('could', 'MD'), ("n't", 'RB'), ('will', 'MD'), ('have', 'VB'), ('been', 'VBN'), ('getting', 'VBG'), ('cleaned', 'VBN'), ('.', '.')]


(S
  (NP I/PRP)
  ca/MD
  n't/RB
  (VP will/MD have/VB been/

# Do

In [57]:
do_did = ["do", "did", "don't do", "didn't do"]

t_c_list(do_did, prepend="I ", append=" homework.")
t_c_list(do_did, prepend="I ", append=" achieve.")

(S (NP I/PRP) (VP do/VBP homework/RB) ./.)
[('I', 'PRP'), ('do', 'VBP'), ('homework', 'RB'), ('.', '.')]


(S (NP I/PRP) (VP did/VBD) (NP homework/NN) ./.)
[('I', 'PRP'), ('did', 'VBD'), ('homework', 'NN'), ('.', '.')]


(S (NP I/PRP) (VP do/VBP n't/RB) (VP do/VB homework/VB) ./.)
[('I', 'PRP'), ('do', 'VBP'), ("n't", 'RB'), ('do', 'VB'), ('homework', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP did/VBD n't/RB) (VP do/VB homework/VB) ./.)
[('I', 'PRP'), ('did', 'VBD'), ("n't", 'RB'), ('do', 'VB'), ('homework', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP do/VBP achieve/RB) ./.)
[('I', 'PRP'), ('do', 'VBP'), ('achieve', 'RB'), ('.', '.')]


(S (NP I/PRP) (VP did/VBD achieve/VB) ./.)
[('I', 'PRP'), ('did', 'VBD'), ('achieve', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP do/VBP n't/RB) (VP do/VB achieve/VB) ./.)
[('I', 'PRP'), ('do', 'VBP'), ("n't", 'RB'), ('do', 'VB'), ('achieve', 'VB'), ('.', '.')]


(S (NP I/PRP) (VP did/VBD n't/RB) (VP do/VB achieve/VB) ./.)
[('I', 'PRP'), ('did', 'VBD'), ("n't", 'RB')

In [58]:
does_did = ["does", "did", "doesn't do", "didn't do"]

t_c_list(does_did, prepend="he ", append=" homework.")
t_c_list(does_did, prepend="he ", append=" achieve.")

(S (NP he/PRP) (VP does/VBZ) (NP homework/NN) ./.)
[('he', 'PRP'), ('does', 'VBZ'), ('homework', 'NN'), ('.', '.')]


(S (NP he/PRP) (VP did/VBD) (NP homework/NN) ./.)
[('he', 'PRP'), ('did', 'VBD'), ('homework', 'NN'), ('.', '.')]


(S (NP he/PRP) (VP does/VBZ n't/RB) (VP do/VB homework/VB) ./.)
[('he', 'PRP'), ('does', 'VBZ'), ("n't", 'RB'), ('do', 'VB'), ('homework', 'VB'), ('.', '.')]


(S (NP he/PRP) (VP did/VBD n't/RB) (VP do/VB homework/VB) ./.)
[('he', 'PRP'), ('did', 'VBD'), ("n't", 'RB'), ('do', 'VB'), ('homework', 'VB'), ('.', '.')]


(S (NP he/PRP) (VP does/VBZ achieve/VB) ./.)
[('he', 'PRP'), ('does', 'VBZ'), ('achieve', 'VB'), ('.', '.')]


(S (NP he/PRP) (VP did/VBD achieve/VB) ./.)
[('he', 'PRP'), ('did', 'VBD'), ('achieve', 'VB'), ('.', '.')]


(S (NP he/PRP) (VP does/VBZ n't/RB) (VP do/VB achieve/VB) ./.)
[('he', 'PRP'), ('does', 'VBZ'), ("n't", 'RB'), ('do', 'VB'), ('achieve', 'VB'), ('.', '.')]


(S (NP he/PRP) (VP did/VBD n't/RB) (VP do/VB achieve/VB) ./.)
[('he', 

Whatever comes behind "do" will be treated as adverb, causing problematic verb phrase parsing. Otherwise, 'does' and 'did' with a verb after get parsed.

# To
'To' can be infinitive or preposition

In [59]:
tag_and_chunk("I am going to work.")
tag_and_chunk("I am going to eat the bread.")

(S (NP I/PRP) (VP am/VBP going/VBG (ToP to/TO work/VB)) ./.)
[('I', 'PRP'), ('am', 'VBP'), ('going', 'VBG'), ('to', 'TO'), ('work', 'VB'), ('.', '.')]
(S
  (NP I/PRP)
  (VP am/VBP going/VBG (ToP to/TO eat/VB (NP the/DT bread/NN)))
  ./.)
[('I', 'PRP'), ('am', 'VBP'), ('going', 'VBG'), ('to', 'TO'), ('eat', 'VB'), ('the', 'DT'), ('bread', 'NN'), ('.', '.')]


## Phrasal verb
Verbs that come with a preposition, like clean off.

In [60]:
tag_and_chunk("He turned in.")
tag_and_chunk("He walked across the street.")

(S (NP He/PRP) (VP turned/VBD in/IN) ./.)
[('He', 'PRP'), ('turned', 'VBD'), ('in', 'IN'), ('.', '.')]
(S
  (NP He/PRP)
  (VP walked/VBD)
  (PP across/IN (NP the/DT street/NN))
  ./.)
[('He', 'PRP'), ('walked', 'VBD'), ('across', 'IN'), ('the', 'DT'), ('street', 'NN'), ('.', '.')]


Phrasal verb parsing is limited, because it shares similar structure with prepositional phrases. It works if there is no noun after the verb and preposition.

## Homophones: auxilary and noun

In [61]:
tag_and_chunk("I can be late.")
tag_and_chunk("There are cans of tomatoes.")
tag_and_chunk("There is a can of tomatoes.")
tag_and_chunk("Test your might.")
tag_and_chunk("Test your mights.")
tag_and_chunk("I might be late.")

(S (NP I/PRP) (VP can/MD be/VB late/RB) ./.)
[('I', 'PRP'), ('can', 'MD'), ('be', 'VB'), ('late', 'RB'), ('.', '.')]
(S
  (NP There/EX)
  (VP are/VBP)
  (NP (NP cans/NNS) (PP of/IN (NP tomatoes/NNS)))
  ./.)
[('There', 'EX'), ('are', 'VBP'), ('cans', 'NNS'), ('of', 'IN'), ('tomatoes', 'NNS'), ('.', '.')]
(S
  (NP There/EX)
  (VP is/VBZ)
  a/DT
  can/MD
  (PP of/IN (NP tomatoes/NNS))
  ./.)
[('There', 'EX'), ('is', 'VBZ'), ('a', 'DT'), ('can', 'MD'), ('of', 'IN'), ('tomatoes', 'NNS'), ('.', '.')]
(S (NP Test/NNP) your/PRP$ might/MD ./.)
[('Test', 'NNP'), ('your', 'PRP$'), ('might', 'MD'), ('.', '.')]
(S (VP Test/VB) your/PRP$ (NP mights/NNS) ./.)
[('Test', 'VB'), ('your', 'PRP$'), ('mights', 'NNS'), ('.', '.')]
(S (NP I/PRP) (VP might/MD be/VB late/RB) ./.)
[('I', 'PRP'), ('might', 'MD'), ('be', 'VB'), ('late', 'RB'), ('.', '.')]


Some auxilaries and nouns share the same spelling, but for the most part, the distinguishing is accurate.

## WH- words and yes/no question

In [62]:
wh_pronouns = ["what", "where", "who", "when", "why"]

t_c_list(wh_pronouns, append=" is it?")

(S (NP what/WP) (VP is/VBZ) (NP it/PRP) ?/.)
[('what', 'WP'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]


(S (NP where/WRB) (VP is/VBZ) (NP it/PRP) ?/.)
[('where', 'WRB'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]


(S (NP who/WP) (VP is/VBZ) (NP it/PRP) ?/.)
[('who', 'WP'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]


(S (NP when/WRB) (VP is/VBZ) (NP it/PRP) ?/.)
[('when', 'WRB'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]


(S (NP why/WRB) (VP is/VBZ) (NP it/PRP) ?/.)
[('why', 'WRB'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]




In [63]:
wh_dets = ["which", "whose"]

t_c_list(wh_dets, append=" idea is it?")

(S (NP which/WDT idea/NN) (VP is/VBZ) (NP it/PRP) ?/.)
[('which', 'WDT'), ('idea', 'NN'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]


(S whose/WP$ (NP idea/NN) (VP is/VBZ) (NP it/PRP) ?/.)
[('whose', 'WP$'), ('idea', 'NN'), ('is', 'VBZ'), ('it', 'PRP'), ('?', '.')]




In [64]:
t_c_list(am_was, append=" I healthy?")
t_c_list(is_was, append=" he healthy?")
t_c_list(are_were, append=" we healthy?")

(S (VP am/VBP) (NP I/PRP) (AdjP healthy/JJ) ?/.)
[('am', 'VBP'), ('I', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (VP was/VBD) (NP I/PRP) (AdjP healthy/JJ) ?/.)
[('was', 'VBD'), ('I', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (NP (NP was'nt/NN) (NP I/PRP)) (AdjP healthy/JJ) ?/.)
[("was'nt", 'NN'), ('I', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (VP is/VBZ) (NP he/PRP) (AdjP healthy/JJ) ?/.)
[('is', 'VBZ'), ('he', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (VP is/VBZ n't/RB) (NP he/PRP) (AdjP healthy/JJ) ?/.)
[('is', 'VBZ'), ("n't", 'RB'), ('he', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (VP was/VBD) (NP he/PRP) (AdjP healthy/JJ) ?/.)
[('was', 'VBD'), ('he', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (NP (NP was'nt/NN) (NP he/PRP)) (AdjP healthy/JJ) ?/.)
[("was'nt", 'NN'), ('he', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (VP are/VBP) (NP we/PRP) (AdjP healthy/JJ) ?/.)
[('are', 'VBP'), ('we', 'PRP'), ('healthy', 'JJ'), ('?', '.')]


(S (VP are/VBP n't/RB) (NP we/PRP) (AdjP hea

In [65]:
t_c_list(has_had, append=" he eaten?")
t_c_list(have_had, append=" you eaten?")

(S (VP has/VBZ) (NP he/PRP) (VP eaten/VB) ?/.)
[('has', 'VBZ'), ('he', 'PRP'), ('eaten', 'VB'), ('?', '.')]


(S (VP had/VBD) (NP he/PRP) (VP eaten/VB) ?/.)
[('had', 'VBD'), ('he', 'PRP'), ('eaten', 'VB'), ('?', '.')]


(S (VP has/VBZ n't/RB) (NP he/PRP) (VP eaten/VB) ?/.)
[('has', 'VBZ'), ("n't", 'RB'), ('he', 'PRP'), ('eaten', 'VB'), ('?', '.')]


(S (VP have/VB) (NP you/PRP) eaten/VBN ?/.)
[('have', 'VB'), ('you', 'PRP'), ('eaten', 'VBN'), ('?', '.')]


(S (VP had/VBD) (NP you/PRP) (VP eaten/VB) ?/.)
[('had', 'VBD'), ('you', 'PRP'), ('eaten', 'VB'), ('?', '.')]


(S (VP have/VBP n't/RB) (NP you/PRP) (VP eaten/VB) ?/.)
[('have', 'VBP'), ("n't", 'RB'), ('you', 'PRP'), ('eaten', 'VB'), ('?', '.')]




# Coordinating Conjunction

https://grammar.yourdictionary.com/parts-of-speech/conjunctions/coordinating-conjunctions.html

In [66]:
tag_and_chunk("I go to the park every Sunday.")
tag_and_chunk("I long to see his face.")
for_conj = "I go to the park every Sunday, for I long to see his face."
tag_and_chunk(for_conj)

(S
  (NP I/PRP)
  (VP go/VBP)
  (PP to/TO (NP the/DT park/NN))
  (NP every/DT Sunday/NNP)
  ./.)
[('I', 'PRP'), ('go', 'VBP'), ('to', 'TO'), ('the', 'DT'), ('park', 'NN'), ('every', 'DT'), ('Sunday', 'NNP'), ('.', '.')]
(S (NP I/PRP) long/RB (ToP to/TO see/VB) his/PRP$ (NP face/NN) ./.)
[('I', 'PRP'), ('long', 'RB'), ('to', 'TO'), ('see', 'VB'), ('his', 'PRP$'), ('face', 'NN'), ('.', '.')]
(S
  (NP I/PRP)
  (VP go/VBP)
  (PP to/TO (NP the/DT park/NN))
  (NP every/DT Sunday/NNP)
  ,/,
  (PP for/IN (NP I/PRP))
  (AdjP long/JJ)
  (ToP to/TO see/VB)
  his/PRP$
  (NP face/NN)
  ./.)
[('I', 'PRP'), ('go', 'VBP'), ('to', 'TO'), ('the', 'DT'), ('park', 'NN'), ('every', 'DT'), ('Sunday', 'NNP'), (',', ','), ('for', 'IN'), ('I', 'PRP'), ('long', 'JJ'), ('to', 'TO'), ('see', 'VB'), ('his', 'PRP$'), ('face', 'NN'), ('.', '.')]


NLTK treats for as an adverb, but it can also be coordinating conjuction.

In [67]:
and_conj = "I like to read, and I write in my journal every night."
tag_and_chunk(and_conj)

(S
  (NP I/PRP)
  (VP like/VBP (ToP to/TO read/VB))
  ,/,
  and/CC
  (NP I/PRP)
  (VP write/VBP in/IN)
  my/PRP$
  (NP (NP journal/NN) (NP every/DT night/NN))
  ./.)
[('I', 'PRP'), ('like', 'VBP'), ('to', 'TO'), ('read', 'VB'), (',', ','), ('and', 'CC'), ('I', 'PRP'), ('write', 'VBP'), ('in', 'IN'), ('my', 'PRP$'), ('journal', 'NN'), ('every', 'DT'), ('night', 'NN'), ('.', '.')]


In [68]:
and_conj = "You should invite Mario and Estefan to the party."
tag_and_chunk(and_conj)

(S
  (NP You/PRP)
  (VP should/MD invite/VB)
  (NP
    (NP Mario/NNP)
    and/CC
    (NP (NP Estefan/NNP) (PP to/TO (NP the/DT party/NN))))
  ./.)
[('You', 'PRP'), ('should', 'MD'), ('invite', 'VB'), ('Mario', 'NNP'), ('and', 'CC'), ('Estefan', 'NNP'), ('to', 'TO'), ('the', 'DT'), ('party', 'NN'), ('.', '.')]


And is accurate.

In [69]:
nor_conj = "My sister doesn’t like to study, nor does she take notes in class."
tag_and_chunk(nor_conj)

(S
  My/PRP$
  (NP sister/NN doesn/NN ’/NNP t/NN)
  like/IN
  (ToP to/TO study/VB)
  ,/,
  nor/CC
  (VP does/VBZ)
  (NP she/PRP)
  (VP take/VB)
  (NP (NP notes/NNS) (PP in/IN (NP class/NN)))
  ./.)
[('My', 'PRP$'), ('sister', 'NN'), ('doesn', 'NN'), ('’', 'NNP'), ('t', 'NN'), ('like', 'IN'), ('to', 'TO'), ('study', 'VB'), (',', ','), ('nor', 'CC'), ('does', 'VBZ'), ('she', 'PRP'), ('take', 'VB'), ('notes', 'NNS'), ('in', 'IN'), ('class', 'NN'), ('.', '.')]


Nor is accurate

In [70]:
but_conj = "Television is a wonderful escape, but it interferes with my writing."
tag_and_chunk(but_conj)

(S
  (NP Television/NN)
  (VP is/VBZ)
  (NP (NP a/DT (AdjP wonderful/JJ) escape/NN) ,/, but/CC (NP it/PRP))
  (VP interferes/VBZ with/IN)
  my/PRP$
  (NP writing/NN)
  ./.)
[('Television', 'NN'), ('is', 'VBZ'), ('a', 'DT'), ('wonderful', 'JJ'), ('escape', 'NN'), (',', ','), ('but', 'CC'), ('it', 'PRP'), ('interferes', 'VBZ'), ('with', 'IN'), ('my', 'PRP$'), ('writing', 'NN'), ('.', '.')]


In [71]:
or_conj = "We could have dinner before the movie, or we could grab a bite afterward."
tag_and_chunk(or_conj)

(S
  (NP We/PRP)
  (VP could/MD have/VB dinner/VBN)
  (PP before/IN (NP the/DT movie/NN))
  ,/,
  or/CC
  (NP we/PRP)
  (VP could/MD grab/VB)
  (NP a/DT (AdjP bite/JJ) afterward/NN)
  ./.)
[('We', 'PRP'), ('could', 'MD'), ('have', 'VB'), ('dinner', 'VBN'), ('before', 'IN'), ('the', 'DT'), ('movie', 'NN'), (',', ','), ('or', 'CC'), ('we', 'PRP'), ('could', 'MD'), ('grab', 'VB'), ('a', 'DT'), ('bite', 'JJ'), ('afterward', 'NN'), ('.', '.')]


In [72]:
or_conj = "I can’t decide if I should study economics or political science."
tag_and_chunk(or_conj)

(S
  (NP I/PRP)
  (VP can/MD ’/VB)
  (AdjP t/JJ)
  (PP decide/IN if/IN (NP I/PRP))
  (VP should/MD study/VB)
  (NP (NP economics/NNS) or/CC (NP (AdjP political/JJ) science/NN))
  ./.)
[('I', 'PRP'), ('can', 'MD'), ('’', 'VB'), ('t', 'JJ'), ('decide', 'IN'), ('if', 'IN'), ('I', 'PRP'), ('should', 'MD'), ('study', 'VB'), ('economics', 'NNS'), ('or', 'CC'), ('political', 'JJ'), ('science', 'NN'), ('.', '.')]


In [73]:
yet_conj = "I always take a book to the beach, yet I never seem to turn a single page."
tag_and_chunk(yet_conj)

(S
  (NP I/PRP)
  (VP always/RB take/VBP)
  (NP (NP a/DT book/NN) (PP to/TO (NP the/DT beach/NN)))
  ,/,
  yet/RB
  (NP I/PRP)
  (VP
    never/RB
    seem/VBP
    (ToP to/TO turn/VB (NP a/DT (AdjP single/JJ) page/NN)))
  ./.)
[('I', 'PRP'), ('always', 'RB'), ('take', 'VBP'), ('a', 'DT'), ('book', 'NN'), ('to', 'TO'), ('the', 'DT'), ('beach', 'NN'), (',', ','), ('yet', 'RB'), ('I', 'PRP'), ('never', 'RB'), ('seem', 'VBP'), ('to', 'TO'), ('turn', 'VB'), ('a', 'DT'), ('single', 'JJ'), ('page', 'NN'), ('.', '.')]
