In [None]:
import pickle
import subprocess
import sys
from nltk import Nonterminal, nonterminals, Production, CFG

In [None]:
nt1 = Nonterminal('NP')
nt2 = Nonterminal('VP')

In [None]:
nt1.symbol()

'NP'

In [None]:
nt1 == Nonterminal('NP')

True

In [None]:
nt1 == nt2

False

In [None]:
S, NP, VP, PP = nonterminals('S, NP, VP, PP')
N, V, P, DT = nonterminals('N, V, P, DT')

In [None]:
prod1 = Production(S, [NP, VP])
prod2 = Production(NP, [DT, NP])

In [None]:
prod1.lhs()

S

In [None]:
prod1.rhs()

(NP, VP)

In [None]:
prod1 == Production(S, [NP, VP])

True

In [None]:
prod1 == prod2

False

In [None]:
grammar = CFG.fromstring("""
... S -> NP VP
... PP -> P NP
... NP ->  N | N PP
... VP -> V NP | V PP | V NP PP
... N -> 'кот'
... N -> 'собаку'
... N -> 'ковру'
... V -> 'гонялся'
... P -> 'по'
... P -> 'за'
... """)

In [None]:
cmd = """import pickle
... from nltk import Production
... p = Production('S', ['NP', 'VP'])
... print(pickle.dumps(p))
... """

In [None]:
# Start a subprocess to simulate pickling in another process
proc = subprocess.run([sys.executable, '-c', cmd], stdout=subprocess.PIPE)
p1 = pickle.loads(eval(proc.stdout))
p2 = Production('S', ['NP', 'VP'])
print(hash(p1) == hash(p2))

True


In [None]:
from nltk.parse import RecursiveDescentParser
rd = RecursiveDescentParser(grammar)

In [None]:
sent1 = 'кот гонялся за собакой'.split()
sent2 = 'кот гонялся за собакой по ковру'.split()

In [None]:
for t in rd.parse(sent1):
     print(t)

(S (NP (N кот)) (VP (V гонялся) (PP (P за) (NP (N собакой)))))


In [None]:
for t in rd.parse(sent2):
     print(t)

(S
  (NP (N кот))
  (VP
    (V гонялся)
    (PP (P за) (NP (N собакой) (PP (P по) (NP (N ковру)))))))


In [None]:
from nltk.parse import ShiftReduceParser
sr = ShiftReduceParser(grammar)

In [None]:
sentence1 = 'кот гонялся за собакой'.split()
sentence2 = 'кот гонялся за собакой по ковру'.split()

In [None]:
for t in sr.parse(sentence1):
     print(t)

(S (NP (N кот)) (VP (V гонялся) (PP (P за) (NP (N собакой)))))


In [None]:
for t in sr.parse(sentence2):
     print(t)

In [None]:
import nltk

In [None]:
nltk.parse.chart.demo(2, print_times=False, trace=1,
                       sent='Я увидел собаку', numparses=1)

In [None]:
nltk.parse.chart.demo(1, print_times=False, trace=0,
                       sent='Я увидел собаку', numparses=2)

In [None]:
nltk.parse.chart.demo(2, print_times=False, trace=0,
                       sent='Я увидел собаку', numparses=2)

In [None]:
nltk.parse.chart.demo(3, print_times=False, trace=0,
                      sent='Я увидел собаку', numparses=2)

In [None]:
nltk.parse.chart.demo(4, print_times=False, trace=0,
                       sent='Я увидел собаку', numparses=2)

In [None]:
nltk.parse.chart.demo(5, print_times=False, trace=1,
                       sent='Я увидел собаку', numparses=2)

In [None]:
nltk.parse.earleychart.demo(print_times=False, trace=1,
                             sent='Я увидел собаку', numparses=2)

In [None]:
from nltk.corpus import treebank
nltk.download('treebank')
from itertools import islice
from nltk.grammar import PCFG, induce_pcfg
toy_pcfg1 = PCFG.fromstring("""
     S -> NP VP [1.0]
     NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
     Det -> 'the' [0.8] | 'my' [0.2]
     N -> 'man' [0.5] | 'telescope' [0.5]
     VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
     V -> 'ate' [0.35] | 'saw' [0.65]
     PP -> P NP [1.0]
     P -> 'with' [0.61] | 'under' [0.39]
    """)

In [None]:
toy_pcfg2 = PCFG.fromstring("""
     S    -> NP VP         [1.0]
     VP   -> V NP          [.59]
     VP   -> V             [.40]
     VP   -> VP PP         [.01]
     NP   -> Det N         [.41]
     NP   -> Name          [.28]
     NP   -> NP PP         [.31]
     PP   -> P NP          [1.0]
     V    -> 'saw'         [.21]
     V    -> 'ate'         [.51]
     V    -> 'ran'         [.28]
     N    -> 'boy'         [.11]
     N    -> 'cookie'      [.12]
     N    -> 'table'       [.13]
     N    -> 'telescope'   [.14]
     N    -> 'hill'        [.5]
     Name -> 'Jack'        [.52]
     Name -> 'Bob'         [.48]
     P    -> 'with'        [.61]
     P    -> 'under'       [.39]
     Det  -> 'the'         [.41]
     Det  -> 'a'           [.31]
     Det  -> 'my'          [.28]
     """)

In [None]:
grammar = PCFG.fromstring("""
... A -> B B [.3] | C B C [.7]
... B -> B D [.5] | C [.5]
... C -> 'a' [.1] | 'b' [0.9]
... D -> 'b' [1.0]
... """)
prod = grammar.productions()[0]
prod

A -> B B [0.3]

In [None]:
prod.lhs()

A

In [None]:
prod.rhs()

(B, B)

In [None]:
print((prod.prob()))

0.3


In [None]:
grammar.start()

A

In [None]:
grammar.productions()

[A -> B B [0.3],
 A -> C B C [0.7],
 B -> B D [0.5],
 B -> C [0.5],
 C -> 'a' [0.1],
 C -> 'b' [0.9],
 D -> 'b' [1.0]]

In [None]:
productions = []
for fileid in treebank.fileids()[:2]:
     for t in treebank.parsed_sents(fileid):
         productions += t.productions()

In [None]:
grammar = induce_pcfg(S, productions)
grammar

<Grammar with 71 productions>

In [None]:
sorted(grammar.productions(lhs=Nonterminal('PP')))[:2]

[PP -> IN NP [1.0]]

In [None]:
sorted(grammar.productions(lhs=Nonterminal('NNP')))[:2]

[NNP -> 'Agnew' [0.0714286], NNP -> 'Consolidated' [0.0714286]]

In [None]:
sorted(grammar.productions(lhs=Nonterminal('JJ')))[:2]

[JJ -> 'British' [0.142857], JJ -> 'former' [0.142857]]

In [None]:
sorted(grammar.productions(lhs=Nonterminal('NP')))[:2]

[NP -> CD NNS [0.133333], NP -> DT JJ JJ NN [0.0666667]]

In [None]:
tokens = "Jack saw Bob with my cookie".split()
grammar = toy_pcfg2
print(grammar)

Grammar with 23 productions (start state = S)
    S -> NP VP [1.0]
    VP -> V NP [0.59]
    VP -> V [0.4]
    VP -> VP PP [0.01]
    NP -> Det N [0.41]
    NP -> Name [0.28]
    NP -> NP PP [0.31]
    PP -> P NP [1.0]
    V -> 'saw' [0.21]
    V -> 'ate' [0.51]
    V -> 'ran' [0.28]
    N -> 'boy' [0.11]
    N -> 'cookie' [0.12]
    N -> 'table' [0.13]
    N -> 'telescope' [0.14]
    N -> 'hill' [0.5]
    Name -> 'Jack' [0.52]
    Name -> 'Bob' [0.48]
    P -> 'with' [0.61]
    P -> 'under' [0.39]
    Det -> 'the' [0.41]
    Det -> 'a' [0.31]
    Det -> 'my' [0.28]


In [None]:
from nltk.parse import pchart

In [None]:
parser = pchart.InsideChartParser(grammar)
for t in parser.parse(tokens):
    print(t)

(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)


In [None]:
parser = pchart.RandomChartParser(grammar)
for t in parser.parse(tokens):
     print(t)

(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)


In [None]:
parser = pchart.UnsortedChartParser(grammar)
for t in parser.parse(tokens):
     print(t)

(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)


In [None]:
parser = pchart.LongestChartParser(grammar)
for t in parser.parse(tokens):
     print(t)

(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)
(S
  (NP (Name Jack))
  (VP
    (VP (V saw) (NP (Name Bob)))
    (PP (P with) (NP (Det my) (N cookie))))) (p=2.03744e-07)


In [None]:
parser = pchart.InsideChartParser(grammar, beam_size = len(tokens)+1)
for t in parser.parse(tokens):
     print(t)

In [None]:
from nltk.parse import ViterbiParser
tokens = "Jack saw Bob with my cookie".split()
grammar = toy_pcfg2

In [None]:
parser = ViterbiParser(grammar)
for t in parser.parse(tokens):
     print(t)

(S
  (NP (Name Jack))
  (VP
    (V saw)
    (NP
      (NP (Name Bob))
      (PP (P with) (NP (Det my) (N cookie)))))) (p=6.31607e-06)


In [None]:
from nltk.grammar import FeatStructNonterminal
FeatStructNonterminal(
    pos='n', agr=FeatStructNonterminal(number='pl', gender='f'))

[agr=[gender='f', number='pl'], pos='n']

In [None]:
FeatStructNonterminal('VP[+fin]/NP[+pl]')

VP[+fin]/NP[+pl]

In [None]:
nltk.parse.featurechart.demo(print_times=False,
                              print_grammar=True,
                              parser=nltk.parse.featurechart.FeatureChartParser,
                              sent='I saw John with a dog')


Grammar with 18 productions (start state = S[])
    S[] -> NP[] VP[]
    PP[] -> Prep[] NP[]
    NP[] -> NP[] PP[]
    VP[] -> VP[] PP[]
    VP[] -> Verb[] NP[]
    VP[] -> Verb[]
    NP[] -> Det[pl=?x] Noun[pl=?x]
    NP[] -> 'John'
    NP[] -> 'I'
    Det[] -> 'the'
    Det[] -> 'my'
    Det[-pl] -> 'a'
    Noun[-pl] -> 'dog'
    Noun[-pl] -> 'cookie'
    Verb[] -> 'ate'
    Verb[] -> 'saw'
    Prep[] -> 'with'
    Prep[] -> 'under'

* FeatureChartParser
Sentence: I saw John with a dog
|.I.s.J.w.a.d.|
|[-] . . . . .| [0:1] 'I'
|. [-] . . . .| [1:2] 'saw'
|. . [-] . . .| [2:3] 'John'
|. . . [-] . .| [3:4] 'with'
|. . . . [-] .| [4:5] 'a'
|. . . . . [-]| [5:6] 'dog'
|[-] . . . . .| [0:1] NP[] -> 'I' *
|[-> . . . . .| [0:1] S[] -> NP[] * VP[] {}
|[-> . . . . .| [0:1] NP[] -> NP[] * PP[] {}
|. [-] . . . .| [1:2] Verb[] -> 'saw' *
|. [-> . . . .| [1:2] VP[] -> Verb[] * NP[] {}
|. [-] . . . .| [1:2] VP[] -> Verb[] *
|. [-> . . . .| [1:2] VP[] -> VP[] * PP[] {}
|[---] . . . .| [0:2] S[] ->