In [1]:
import nltk
import sys

TERMINALS = """
Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red"
Adv -> "down" | "here" | "never"
Conj -> "and" | "until"
Det -> "a" | "an" | "his" | "my" | "the"
N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself"
N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she"
N -> "smile" | "thursday" | "walk" | "we" | "word"
P -> "at" | "before" | "in" | "of" | "on" | "to"
V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat"
V -> "smiled" | "tell" | "were"
"""

NONTERMINALS = """
S -> NP VP | S Conj S | S Conj VP
PP -> P NP
NP -> N | Adj NP | Det NP | NP PP
VP -> V | VP NP | VP Adv | Adv VP | VP PP

"""

grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS)
parser = nltk.ChartParser(grammar)


def main():

    # If filename specified, read sentence from file
    if len(sys.argv) == 2:
        with open(sys.argv[1]) as f:
            s = f.read()

    # Otherwise, get sentence as input
    else:
        s = input("Sentence: ")

    # Convert input into list of words
    s = preprocess(s)

    # Attempt to parse sentence
    try:
        trees = list(parser.parse(s))
    except ValueError as e:
        print(e)
        return
    if not trees:
        print("Could not parse sentence.")
        return

    # Print each tree with noun phrase chunks
    for tree in trees:
        tree.pretty_print()

        print("Noun Phrase Chunks")
        for np in np_chunk(tree):
            print(" ".join(np.flatten()))


def preprocess(sentence):
    """
    Convert `sentence` to a list of its words.
    Pre-process sentence by converting all characters to lowercase
    and removing any word that does not contain at least one alphabetic
    character.
    """
    words = nltk.word_tokenize(sentence)
    processed_words = []
    for word in words:
        alphanumeric_count = 0
        clean_word = []
        for char in word:
            if char.isalnum():
                alphanumeric_count = alphanumeric_count + 1
                clean_word.append(char)
        clean_word = ''.join(clean_word)
        if alphanumeric_count > 0:
            processed_words.append(clean_word.lower())
    return processed_words


def np_chunk(tree):
    """
    Return a list of all noun phrase chunks in the sentence tree.
    A noun phrase chunk is defined as any subtree of the sentence
    whose label is "NP" that does not itself contain any other
    noun phrases as subtrees.
    """
    return []




In [53]:
s = 'I had a country walk on Thursday and came home in a dreadful mess.'
s = preprocess(s)

# Attempt to parse sentence
try:
    trees = list(parser.parse(s))
except ValueError as e:
    print(e)

if not trees:
    print("Could not parse sentence.")


In [54]:
trees

[Tree('S', [Tree('S', [Tree('NP', [Tree('N', ['i'])]), Tree('VP', [Tree('VP', [Tree('VP', [Tree('V', ['had'])]), Tree('NP', [Tree('Det', ['a']), Tree('NP', [Tree('Adj', ['country']), Tree('NP', [Tree('N', ['walk'])])])])]), Tree('PP', [Tree('P', ['on']), Tree('NP', [Tree('N', ['thursday'])])])])]), Tree('Conj', ['and']), Tree('VP', [Tree('VP', [Tree('VP', [Tree('V', ['came'])]), Tree('NP', [Tree('N', ['home'])])]), Tree('PP', [Tree('P', ['in']), Tree('NP', [Tree('Det', ['a']), Tree('NP', [Tree('Adj', ['dreadful']), Tree('NP', [Tree('N', ['mess'])])])])])])]),
 Tree('S', [Tree('S', [Tree('NP', [Tree('N', ['i'])]), Tree('VP', [Tree('VP', [Tree('V', ['had'])]), Tree('NP', [Tree('NP', [Tree('Det', ['a']), Tree('NP', [Tree('Adj', ['country']), Tree('NP', [Tree('N', ['walk'])])])]), Tree('PP', [Tree('P', ['on']), Tree('NP', [Tree('N', ['thursday'])])])])])]), Tree('Conj', ['and']), Tree('VP', [Tree('VP', [Tree('VP', [Tree('V', ['came'])]), Tree('NP', [Tree('N', ['home'])])]), Tree('PP', [Tre

In [55]:
trees[1].label()

'S'

In [61]:
subs = trees[0].subtrees()
for s in subs:
    if s.label() == 'NP':
        contain_subtrees = False
        for s2 in s.subtrees():
            if s2.label() == 'NP':
                contain_subtrees = True
        #if contain_subtrees == False:
        print(s)

(NP (N i))
(NP (Det a) (NP (Adj country) (NP (N walk))))
(NP (Adj country) (NP (N walk)))
(NP (N walk))
(NP (N thursday))
(NP (N home))
(NP (Det a) (NP (Adj dreadful) (NP (N mess))))
(NP (Adj dreadful) (NP (N mess)))
(NP (N mess))


In [63]:

def subtree_np_check(subtree):
    if subtree.label() != 'NP':
        return False
    for s in subtree.subtrees():
        if s.label() == 'NP':
            if s.label() == 'N':
                print(s)
                return False
            else:
                return True
        else:
            subtree_np_check(s)
    return False

In [58]:
subs = trees[0].subtrees()


In [59]:
subtree_np_check(trees[0])

False

In [60]:
subs = trees[0].subtrees()


['i']
['walk']
['thursday']
['home']
['mess']


In [34]:
subs

<generator object Tree.subtrees at 0x7fee4ebd3c50>

In [35]:
for s in subs:
    print(s)

In [64]:
subs = trees[0].subtrees()
for s in subs:
    if s.label() == 'NP':
        print(subtree_np_check(s))
        
        #if contain_subtrees == False:
        print(s)

True
(NP (N i))
True
(NP (Det a) (NP (Adj country) (NP (N walk))))
True
(NP (Adj country) (NP (N walk)))
True
(NP (N walk))
True
(NP (N thursday))
True
(NP (N home))
True
(NP (Det a) (NP (Adj dreadful) (NP (N mess))))
True
(NP (Adj dreadful) (NP (N mess)))
True
(NP (N mess))
