In [1]:
import nltk

In [2]:
input_sentence = "Dogs chase Cats"

In [3]:
text = nltk.word_tokenize(input_sentence)

In [4]:
text

['Dogs', 'chase', 'Cats']

In [5]:
list_of_tokens = nltk.pos_tag(text)

In [6]:
print(list_of_tokens)

[('Dogs', 'NNS'), ('chase', 'NN'), ('Cats', 'NNP')]


# NNS	noun plural (Dogs)
# NN	noun, singular (Chase)
# NNP	proper noun, singular (Cats)

NLTK POS tagger is used to assign grammatical information of each word of the sentence

# Grammatical Ambiguities

# Ubiquitous Ambiguity

# Let's take a closer look at the ambiguity in the phrase: I shot an elephant in my pajamas. First we need to define a simple grammar:

In [7]:
groucho_grammar = nltk.CFG.fromstring("""
... S -> NP VP
... PP -> P NP
... NP -> Det N | Det N PP | 'I'
... VP -> V NP | VP PP
... Det -> 'an' | 'my'
... N -> 'elephant' | 'pajamas'
... V -> 'shot'
... P -> 'in'
... """)

In [8]:
groucho_grammar.productions()

[S -> NP VP,
 PP -> P NP,
 NP -> Det N,
 NP -> Det N PP,
 NP -> 'I',
 VP -> V NP,
 VP -> VP PP,
 Det -> 'an',
 Det -> 'my',
 N -> 'elephant',
 N -> 'pajamas',
 V -> 'shot',
 P -> 'in']

In [9]:
sent = ['I', 'shot', 'an', 'elephant', 'in', 'my', 'pajamas']

In [10]:
parser = nltk.ChartParser(groucho_grammar)

In [11]:
for tree in parser.parse(sent):
    print(tree)

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


In [None]:
rd_parser = nltk.RecursiveDescentParser(groucho_grammar)
for tree in rd_parser.parse(sent):
    print(tree)

(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))
(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))


# 1. Recursive Descent Parsing

In [None]:
grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

In [None]:
sent = "Mary saw Bob".split()
sent = "John walked dog by the cat".split()
rd_parser = nltk.RecursiveDescentParser(grammar1,trace=3)
for tree in rd_parser.parse(sent):
    print(tree)