# Top Down parsing

In [31]:
import nltk
from nltk.parse import RecursiveDescentParser

In [32]:
#Specification of CFG
grammer = nltk.CFG.fromstring("""
S -> NP VP
NP -> Det N | Det N PP
VP -> V | V NP | V NP PP
PP -> P NP

Det -> 'a' | 'an' | 'the'
N -> 'man' | 'park' | 'dog' | 'telescope'
V -> 'saw' | 'walked'
P -> 'in' | 'with'
""")

In [33]:
str = "the man saw a dog in the park with a telescope"
rdstr = RecursiveDescentParser(grammer)

In [21]:
#print each tree
for tree in rdstr.parse(str.split()):
    print(tree)

(S
  (NP (Det the) (N man))
  (VP
    (V saw)
    (NP
      (Det a)
      (N dog)
      (PP
        (P in)
        (NP
          (Det the)
          (N park)
          (PP (P with) (NP (Det a) (N telescope))))))))
(S
  (NP (Det the) (N man))
  (VP
    (V saw)
    (NP (Det a) (N dog))
    (PP
      (P in)
      (NP
        (Det the)
        (N park)
        (PP (P with) (NP (Det a) (N telescope)))))))
(S
  (NP (Det the) (N man))
  (VP
    (V saw)
    (NP (Det a) (N dog) (PP (P in) (NP (Det the) (N park))))
    (PP (P with) (NP (Det a) (N telescope)))))


In [23]:
tree

The Ghostscript executable isn't found.
See http://web.mit.edu/ghostscript/www/Install.htm
If you're using a Mac, you can try installing
https://docs.brew.sh/Installation then `brew install ghostscript`


LookupError: 

Tree('S', [Tree('NP', [Tree('Det', ['the']), Tree('N', ['man'])]), Tree('VP', [Tree('V', ['saw']), Tree('NP', [Tree('Det', ['a']), Tree('N', ['dog']), Tree('PP', [Tree('P', ['in']), Tree('NP', [Tree('Det', ['the']), Tree('N', ['park'])])])]), Tree('PP', [Tree('P', ['with']), Tree('NP', [Tree('Det', ['a']), Tree('N', ['telescope'])])])])])

In [25]:
nltk.app.rdparser()
# once the recursive descent parser application opens, 
# you can edit the 'Text' and 'Grammar' according to your requirements


# Bottom Up Parsing

In [26]:
# ShiftReduce Parser is a bottom up parser
from nltk.parse import ShiftReduceParser

In [28]:
srp = ShiftReduceParser(grammer)

In [34]:
#print each tree
for tree in srp.parse(str.split()):
    print(tree)
    
# Shift Reduce parser tries to aggregate the string to the start symbol, since it's a bottom-up parser
# It is not able to find the parse tree even if it exists 

In [30]:
nltk.app.srparser()



# PCFG parsing

In [36]:
#define the grammar for pcfg
pcfg_grammar = nltk.PCFG.fromstring("""
    S -> NP VP [1.0] 
    PP -> P NP [1.0]
    VP -> V NP [0.7] | VP PP [0.3] 
    NP -> NP PP [0.4] 
    P -> 'with' [1.0]
    V -> 'saw' [1.0]
    NP -> 'astronomers' [0.1] | 'ears' [0.18] | 'saw' [0.04] | 'stars' [0.18] | 'telescopes' [0.1]
    """)

In [37]:
str = "astronomers saw stars with ears"

In [35]:
from nltk.parse import pchart

In [41]:
pcfg = pchart.InsideChartParser(pcfg_grammar)

#print all possible trees, showing probability of each parse
for t in pcfg.parse(str.split()):
    print(t)

(S
  (NP astronomers)
  (VP (V saw) (NP (NP stars) (PP (P with) (NP ears))))) (p=0.0009072)
(S
  (NP astronomers)
  (VP (VP (V saw) (NP stars)) (PP (P with) (NP ears)))) (p=0.0006804)
