In [12]:
import nltk
import numpy as np
from nltk import PCFG

In [13]:
pcfg=PCFG.fromstring("""
S -> NP VP [0.9]
S -> VP [0.1]
VP -> V NP [0.5]
VP -> V [0.5]
NP -> Det N [0.3]
NP -> N [0.7]
N -> 'cat' [0.2]
N -> 'book' [0.2]
N -> 'bird' [0.2]
N -> 'dog' [0.4]
V -> 'read' [0.1]
V -> 'chased' [0.6]
V -> 'ate' [0.3]
Det -> 'the' [0.5]
Det -> 'a' [0.5]
""")

In [14]:
sentences=[
"the cat chased the dog",
"the dog chased the cat",
"the dog chased the bird",
"Book read"
]

In [22]:
def cyk_parse_with_probability(pcfg,sentence):
    n=len(sentence)
    table=[[[] for _ in range(n)] for _ in range(n)]
    for i in range(n):
        for prod in pcfg.productions(rhs=sentence[i]):
            table[i][i]=[(prod.lhs(),prod.prob())]
    for length in range(2,n+1):
        for i in range(n-length+1):
            j=i+length-1
            for k in range(i,j):
                for prod in pcfg.productions():
                    for left,left_prob in table[i][k]:
                        for right,right_prob in table[k+1][j]:
                            if prod.rhs()==(left,right):
                                prob=left_prob*right_prob*prod.prob()
                                if table[i][j] is None:
                                    table[i][j]=[(prod.lhs(),prob)]
                                else:
                                    table[i][j].append((prod.lhs(),prob))
    if pcfg.start() in [lhs for lhs,prob in table[0][n-1]]:
        for lhs,prob in table[0][n-1]:
            if lhs==pcfg.start():
                return prob
    return 0.0

In [25]:
for sent in sentences:
    probability = cyk_parse_with_probability(pcfg,sent.lower().split())
    if probability > 0:
        print(f'The sentence "{sent}" is grammatically correct with a probability {probability:.4f}')
    else:
        print(f"The sentence \"{sent}\" is not grammatically correct.")

The sentence "the cat chased the dog" is grammatically correct with a probability 0.0005
The sentence "the dog chased the cat" is grammatically correct with a probability 0.0005
The sentence "the dog chased the bird" is grammatically correct with a probability 0.0005
The sentence "Book read" is not grammatically correct.


In [27]:
grammar = PCFG.fromstring("""
S -> NP VP [1.0]
PP -> P NP [1.0]
VP -> V NP [0.7] | VP PP [0.3]
P -> 'with' [1.0]
V -> 'saw' [1.0]
NP -> NP PP [0.4] |'astronomers' [0.1] | 'ears' [0.18] | 'saw' [0.04] | 'stars' [0.18] | 'telescopes' [0.1]
""")

In [28]:
from nltk.parse.viterbi import ViterbiParser
parser=ViterbiParser(grammar)
sentence="astronomers saw stars with ears"
tokens=sentence.split()
try:
    for tree in parser.parse(tokens):
        tree.pretty_print()
        tree.draw()
except ValueError:
    print("No parse tree found in this sentence")

                  S                    
      ____________|____                 
     |                 VP              
     |        _________|___             
     |       |             NP          
     |       |     ________|____        
     |       |    |             PP     
     |       |    |         ____|___    
     NP      V    NP       P        NP 
     |       |    |        |        |   
astronomers saw stars     with     ears

