# Write an AI to parse sentences and extract noun phrases.

In [1]:
import nltk
import sys
import numpy as np

TERMINALS = """
Adj -> "country" | "dreadful" | "enigmatical" | "little" | "moist" | "red"
Adv -> "down" | "here" | "never"
Conj -> "and"
Det -> "a" | "an" | "his" | "my" | "the"
N -> "armchair" | "companion" | "day" | "door" | "hand" | "he" | "himself"
N -> "holmes" | "home" | "i" | "mess" | "paint" | "palm" | "pipe" | "she"
N -> "smile" | "thursday" | "walk" | "we" | "word"
P -> "at" | "before" | "in" | "of" | "on" | "to" | "until"
V -> "arrived" | "came" | "chuckled" | "had" | "lit" | "said" | "sat"
V -> "smiled" | "tell" | "were"
"""

NONTERMINALS = """
S -> N V
S -> N V Det N
S -> N V Det N P N
S -> N V P Det Adj N Conj N V
S -> Det N V Det Adj N
"""

grammar = nltk.CFG.fromstring(NONTERMINALS + TERMINALS)
parser = nltk.ChartParser(grammar)

In [2]:
#sent ="she smiled".split()
#parser = nltk.ChartParser(grammar)
#for tree in parser.parse(sent):
    #print (tree) (S (N she) (V smiled))

In [3]:
import re
def preprocess(sentence):
    """
    Convert `sentence` to a list of its words.
    Pre-process sentence by converting all characters to lowercase
    and removing any word that does not contain at least one alphabetic
    character.
    """
    sentence= re.sub(r'[^\w\s]','',sentence)
    sentence=sentence.split(" ")
    
    final=[]
    for i in sentence:
        if i.isalpha():
            final.append(i.lower())
    return final
def np_chunk(tree):
    """
    Return a list of all noun phrase chunks in the sentence tree.
    A noun phrase chunk is defined as any subtree of the sentence
    whose label is "NP" that does not itself contain any other
    noun phrases as subtrees.
    """
    ans=[]
    for i in range(len(tree)):
        
        if tree[i].label()=="N":
            
            ans.append(tree[i][0])
    return np.array(ans)

In [4]:
#sentence="holmes sat."
#sentence=preprocess(sentence)
#s=list(parser.parse(sentence)) [Tree('S', [Tree('N', ['holmes']), Tree('V', ['sat'])])]

In [5]:
#print(s[0][0].label())    N
#print(s[0][1].label())    V
#print(s[0])  (S (N holmes) (V sat))
#print(s[0][0].label()=="N") True
#print(s[0][0],s[0][1]) (N holmes) (V sat)
#print(len(s[0])) 2

In [6]:
def main():

    # If filename specified, read sentence from file
    if len(sys.argv) == 2:
        with open(sys.argv[1]) as f:
            s = f.read()

    # Otherwise, get sentence as input
    else:
        s = input("Sentence: ")

    # Convert input into list of words
    s = preprocess(s)

    # Attempt to parse sentence
    try:
        trees = list(parser.parse(s))
    except ValueError as e:
        print(e)
        return
    if not trees:
        print("Could not parse sentence.")
        return

    # Print each tree with noun phrase chunks
    for tree in trees:
        tree.pretty_print()

        print("Noun Phrase Chunks")
        for np in np_chunk(tree):
            print(" ".join(np.flatten()))

In [7]:
#Exp 1
main()

Sentence: Holmes sat.
        S     
   _____|___   
  N         V 
  |         |  
holmes     sat

Noun Phrase Chunks
holmes


In [8]:
#Exp 2
main()

Sentence: Holmes lit a pipe.
        S          
   _____|_______    
  N     V  Det  N  
  |     |   |   |   
holmes lit  a  pipe

Noun Phrase Chunks
holmes
pipe


In [9]:
#Exp 3
main()

Sentence: We arrived the day before Thursday.
             S                     
  ___________|________________      
 N     V    Det  N    P       N    
 |     |     |   |    |       |     
 we arrived the day before thursday

Noun Phrase Chunks
we
day
thursday


In [10]:
#Exp 4
main()

Sentence: Holmes sat in the red armchair and he chuckled.
                    S                            
   _________________|_______________________      
  N     V   P  Det Adj    N     Conj  N     V    
  |     |   |   |   |     |      |    |     |     
holmes sat  in the red armchair and   he chuckled

Noun Phrase Chunks
holmes
armchair
he


In [11]:
#Exp 5
main()

Sentence: My companion smiled an enigmatical smile. 
                S                         
  ______________|______________________    
Det     N       V    Det     Adj       N  
 |      |       |     |       |        |   
 my companion smiled  an enigmatical smile

Noun Phrase Chunks
companion
smile
