In [12]:
"""
Constant Names for CFG
"""
S = "S" #sentence
NP = "NP" #noun phrase
VP = "VP" #verb phrase
D = "D" #determiner
N = "N" #noun
V = "V" #verb
P = "P" #pronoun

In [23]:
"""
example CFG for initial testing
i.e. {[S],[NP,VP]} => S -> NP VP
"""
grammar = {
    S : [(NP,VP)],
    NP : [(D,N)],
    VP : [(V,P)],
    D : [("the",)],
    N : [("woman",)],
    V : [("saw",)],
    P : [("him",)]
}

In [14]:
"""
example sentence to be parsed by the CKY algorithm
"""

sentence = "the woman saw him"

In [51]:
def cyk(sentence, grammar):
    """
    Perform the CYK algorithm to parse a sentence given a CNF grammar.

    :param sentence: The sentence to parse
    :param grammar: A dictionary representing the CNF grammar, with the key being the LHS and the value being a list of possible RHS.
    :return: A 3D table that represents the parsing table.
    """
    #split sentence into words
    words = sentence.split()
    n = len(words)
    
    # Initialize the table to be nxn
    cky_table = [[[] for j in range(n)] for i in range(n)]

    # Fill in the diagonals of the table
    for i, word in enumerate(words):
        for lhs, rules in grammar.items():
            for rhs in rules:
                if rhs == (word,):
                    cky_table[i][i].append(lhs)

    # Fill in the rest of the table
    for length in range(1, n):
        for start in range(n - length):
            for partition in range(1, length + 1):
                for lhs, rules in grammar.items():
                    for rhs in rules:
                        if len(rhs) == 2:  # Only considering rules with two non-terminals in RHS
                            B, C = rhs
                            if B in cky_table[start][start + partition - 1] and C in cky_table[start + partition][start + length]:
                                cky_table[start][start + length].append(lhs)

    return cky_table

In [52]:
table = cyk("the woman saw him", grammar)

for row in table:
    print(row)

[['D'], ['NP'], [], ['S']]
[[], ['N'], [], []]
[[], [], ['V'], ['VP']]
[[], [], [], ['P']]
