In [1]:
import math

In [2]:
"""
Constant Names for CFG
"""
S = "S" #sentence
NP = "NP" #noun phrase
VP = "VP" #verb phrase
D = "D" #determiner
N = "N" #noun
V = "V" #verb
P = "P" #pronoun

In [29]:
"""
example CFG for initial testing
i.e. S : ([(NP,VP)],0.5) => S -> NP VP with a probability of 0.5
"""
grammar = {
    S : [((NP,VP),.8),((VP, NP),.2)],
    NP: [((D,N), .8), ((NP,NP), .2) ],
    VP: [((V,P), 1)],
    D : [(("the"), 1)],
    N : [(("woman"), .6),(("man"), .4)],
    V : [(("saw"), 1)],
    P : [(("him"), 1)]
}


In [30]:
"""
example sentences to be parsed by the CKY algorithm
"""

sentence = "the woman saw him"
sentence01 = "the woman saw"
sentence02 = "the woman"
sentence03 = "woman"
sentence04 = "the woman saw him yesterday"

In [23]:
def cky(sentence, grammar):
    """
    Perform the CKY algorithm to parse a sentence given a CNF grammar.

    :param sentence: The sentence to parse
    :param grammar: A dictionary representing the CNF grammar, with the key being the LHS and the value being a list of possible RHS.
    :return: A nxn CKY Table, with each cell containing the max possible log probability
    """
    #split sentence into words
    words = sentence.split()
    n = len(words)
    
    # Initialize the table to be nxn
    cky_table = [[{} for j in range(n)] for i in range(n)]
    
    # Fill in the diagonals of the table
    for i, word in enumerate(words):
        for lhs, rules in grammar.items():
            for rhs, prob in rules:
                if rhs == (word):
                    cky_table[i][i][lhs] = math.log(prob)

    for j in range(2, n + 1):
        for i in range(n - j + 1):
            for k in range(1, j):
                for lhs, rules in grammar.items():
                    for rhs, prob in rules:
                        if len(rhs) == 2:
                            if rhs[0] in cky_table[i][i + k - 1] and rhs[1] in cky_table[i + k][i + j - 1]:

                                # Calculate the probability of the current parse
                                current_prob = cky_table[i][i + k - 1][rhs[0]] + cky_table[i + k][i + j - 1][rhs[1]] + math.log(prob)

                                # If this rule gives a higher probability, or if the rule is not yet in the table
                                if lhs not in cky_table[i][i + j - 1] or current_prob > cky_table[i][i + j - 1][lhs]:
                                    cky_table[i][i + j - 1][lhs] = current_prob
    return cky_table
    

In [31]:
table = cky(sentence, grammar)

for row in table:
    print(row)

[{'D': 0.0}, {'NP': -0.7339691750802004}, {}, {'S': -0.9571127263944101}]
[{}, {'N': -0.5108256237659907}, {}, {}]
[{}, {}, {'V': 0.0}, {'VP': 0.0}]
[{}, {}, {}, {'P': 0.0}]
