In [1]:
import nltk
from nltk import tree

def loadData(path):
	with open(path,'r') as f:
		data = f.read().split('\n')
	return data

def getTreeData(data):
	return map(lambda s: tree.Tree.fromstring(s), data)


In [2]:
from nltk import Nonterminal, induce_pcfg
from nltk.draw.tree import TreeView

In [3]:
# Main script

print ("loading data..")
data = loadData('parseTrees.txt')
print ("generating trees..")
treeData = getTreeData(data)
print ("done")
rules = list()
print ("compiling the rules")
for t in treeData:
    rules.extend(t.productions())
print("Number of rules: " + str(len(rules)))
print ("computing PCFG")
S = Nonterminal('S')
grammar = induce_pcfg(S, rules)
print ("PCFG:")
print(grammar)

loading data..
generating trees..
done
compiling the rules
Number of rules: 214289
computing PCFG
PCFG:
Grammar with 566 productions (start state = S)
    S -> DECL_MD [0.428924]
    DECL_MD -> NP_PPSS VERB_MD NP_NN AVPNP_NP pt_char_per [0.0705036]
    NP_PPSS -> PRON_PPSS [1.0]
    PRON_PPSS -> i [0.998601]
    i -> 'i' [1.0]
    VERB_MD -> pt_verb_md [0.998134]
    pt_verb_md -> 'need' [0.974299]
    NP_NN -> ADJ_AT NOUN_NN PREP_IN [0.0736596]
    ADJ_AT -> a [0.394803]
    a -> 'a' [1.0]
    NOUN_NN -> flight [0.40036]
    flight -> 'flight' [1.0]
    PREP_IN -> pt_prep_in [0.307885]
    pt_prep_in -> 'from' [0.997651]
    AVPNP_NP -> NOUN_NP PP_NN [0.0218978]
    NOUN_NP -> charlotte [0.113329]
    charlotte -> 'charlotte' [1.0]
    PP_NN -> PREP_IN NP_NP NOUN_NN PP_NP [0.0644788]
    PREP_IN -> to [0.302821]
    to -> 'to' [1.0]
    NP_NP -> NOUN_NP RELCL_VBZ [0.106612]
    NOUN_NP -> las vegas [0.112429]
    las -> 'las' [1.0]
    vegas -> 'vegas' [1.0]
    RELCL_VBZ -> NP_WPS VE

In [4]:
type(grammar)

nltk.grammar.PCFG

In [4]:
sentence = "show me the meals on the flight from Phoenix".split()
print ("parsing with InsideChart parser...")
inside_parser = nltk.InsideChartParser(grammar)
inside_parser.trace(3)
for tree in inside_parser.parse(sentence):
    print(tree)
    tree.draw()
print ("done!")

parsing with InsideChart parser...
  |[-] . . . . . . . .| [0:1] 'show'                 [1.0]
  |. [-] . . . . . . .| [1:2] 'me'                   [1.0]
  |. . [-] . . . . . .| [2:3] 'the'                  [1.0]
  |. . . [-] . . . . .| [3:4] 'meals'                [1.0]
  |. . . . [-] . . . .| [4:5] 'on'                   [1.0]
  |. . . . . [-] . . .| [5:6] 'the'                  [1.0]
  |. . . . . . [-] . .| [6:7] 'flight'               [1.0]
  |. . . . . . . [-] .| [7:8] 'from'                 [1.0]
  |. . . . . . . . [-]| [8:9] 'Phoenix'              [1.0]
  |. . . . . . . . [-]| [8:9] 'Phoenix'              [1.0]
  |. . . . . . . [-] .| [7:8] 'from'                 [1.0]
  |. . . . . . [-] . .| [6:7] 'flight'               [1.0]
  |. . . . . . [-] . .| [6:7] flight -> 'flight' *   [1.0]
  |. . . . . . > . . .| [6:6] flight -> * 'flight'   [1.0]
  |. . . . . [-] . . .| [5:6] 'the'                  [1.0]
  |. . . . . [-] . . .| [5:6] the -> 'the' *         [1.0]
  |. . . . . [-] . . 

  |. . . . > . . . . .| [4:4] PP_NP -> * PREP_IN NP_NPS NOUN_NP [0.005330053300533005]
  |. . . . . . . > . .| [7:7] PP_NP -> * PREP_IN NP_NPS NOUN_NP [0.005330053300533005]
  |[-> . . . . . . . .| [0:1] IMPR_VB -> VERB_VB * NP_NNS pt_char_per [0.005291062605691864]
  |. . . . . . > . . .| [6:6] NP_NN -> * NOUN_NN PP_CC [0.005010856856522465]
  |. . . . . > . . . .| [5:5] NP_NN -> * ADJ_AT NOUN_NN PP_NP PP_NN PP_NP [0.005010856856522465]
  |. . > . . . . . . .| [2:2] NP_NN -> * ADJ_AT NOUN_NN PP_NP PP_NN PP_NP [0.005010856856522465]
  |. . . . . . . [-> .| [7:8] PP_NP -> PREP_IN * NOUN_NP PP_NP PP_NN [0.00491155631619445]
  |. . . . [-> . . . .| [4:5] PP_NN -> PREP_IN * NP_NP NOUN_NN [0.004760254434746445]
  |. . . . . . . [-> .| [7:8] PP_NP -> PREP_IN * NOUN_NP PRPRTCL_VBG [0.004722650304033125]
  |. . . . > . . . . .| [4:4] PP_NN -> * PREP_IN ADJ_DT NP_NN NOUN_NN [0.004633204633204633]
  |. . . . . . . > . .| [7:7] PP_NN -> * PREP_IN ADJ_DT NP_NN NOUN_NN [0.004633204633204633]
  |[->

  |. . [-------------]| [2:9] NP -> Det Nominal *    [6.003272981677121e-06]
  |. . . . . [---] . .| [5:7] S  -> NP_NN *          [5.694668703306371e-06]
  |. . . . . [-----> .| [5:8] NP_NN -> NP_NN * NP_NN NOUN_NN [5.493952316747573e-06]
  |. . . . [---> . . .| [4:6] PP_NP -> PREP_IN ADJ_AT * NOUN_NP [5.31119349740237e-06]
  |. . . . [-----> . .| [4:7] NP_NN -> AJP_JJ NOUN_NN * PP_NP PP_NP [5.155003330637759e-06]
  |. . . . [-----> . .| [4:7] NP_NN -> AJP_JJ NOUN_NN * PP_NP pt_char_per [5.155003330637759e-06]
  |. . . . . [-----> .| [5:8] NP_NNS -> NP_NN * NOUN_NNS [4.713713459496664e-06]
  |. . . . . [-----> .| [5:8] NP_NNS -> NP_NN * NOUN_NNS PP_NP [4.713713459496664e-06]
  |. . . . [-----> . .| [4:7] PP_NN -> PREP_IN NP_NN * NOUN_NN NAPPOS_NP [4.575428393946993e-06]
  |[-> . . . . . . . .| [0:1] NP_NN -> NOUN_NN * PREP_IN [4.285193215885083e-06]
  |. . . . . [---> . .| [5:7] NP_NP -> NP_NN * NOUN_NP PP_NPS [4.076245702661209e-06]
  |. . . . . [---> . .| [5:7] RELCL_VBZ -> NP_NN * V

(S
  (IVP
    (IVerb show)
    (NP (Pronoun me))
    (NP
      (Det the)
      (Nominal
        (Nominal (Noun meals))
        (PP
          (Preposition on)
          (NP (Det the) (Nominal (Noun flight))))))
    (PP (Preposition from) (NP (Proper_Noun Phoenix))))) (p=2.19769e-08)
(S
  (IVP
    (IVerb show)
    (NP (Pronoun me))
    (NP
      (NP (Det the) (Nominal (Noun meals)))
      (PP
        (Preposition on)
        (NP (Det the) (Nominal (Noun flight)))))
    (PP (Preposition from) (NP (Proper_Noun Phoenix))))) (p=1.35593e-08)
(S
  (IVP
    (IVerb show)
    (NP (Pronoun me))
    (NP (Det the) (Nominal (Noun meals)))
    (PP
      (Preposition on)
      (NP
        (NP (Det the) (Nominal (Noun flight)))
        (PP (Preposition from) (NP (Proper_Noun Phoenix))))))) (p=1.35593e-08)
(S
  (IVP
    (IVerb show)
    (NP (Pronoun me))
    (NP
      (Det the)
      (Nominal
        (Nominal
          (Nominal (Noun meals))
          (PP
            (Preposition on)
            (NP (Det