In [1]:
import nltk
import numpy as np
import numpy.matlib
from nltk.grammar import *
from pprint import pprint

# Loading the Corpus and Grammars

In [2]:
# Loads the ATIS CNF grammar and the sentences. If the sentences file is not available,
# automatically downloads it and loads it

try:
    grammar = nltk.data.load('atis/atis-grammar-cnf.cfg')
    s = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
    t = nltk.parse.util.extract_test_sentences(s)
    
except LookupError:
    nltk.download('large_grammars')
    
    grammar = nltk.data.load('atis/atis-grammar-cnf.cfg')
    s = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
    t = nltk.parse.util.extract_test_sentences(s)

In [None]:
with open('atis/atis-grammar-cnf.cfg','r') as string_cfg:
    grammar1 = nltk.grammar.CFG.fromstring(string_cfg.read())
grammar1

## Test Grammar

In [73]:
test_grammar_string = '''
S -> NP VP
NP -> Det N
NP -> NP PP
PP -> P NP
VP -> V NP
VP -> VP PP

NP -> 'I'
N -> 'elephant'
N -> 'pajamas'
V -> 'shot'
P -> 'in'
Det -> 'an'
Det -> 'my'
'''

test_grammar = nltk.grammar.CFG.fromstring(test_grammar_string)
test_grammar.productions()

[S -> NP VP,
 NP -> Det N,
 NP -> NP PP,
 PP -> P NP,
 VP -> V NP,
 VP -> VP PP,
 NP -> 'I',
 N -> 'elephant',
 N -> 'pajamas',
 V -> 'shot',
 P -> 'in',
 Det -> 'an',
 Det -> 'my']

In [None]:
# Return the grammar productions, filtered by the left-hand side
# or the first item in the right-hand side.
for prod in grammar1.productions(lhs=Nonterminal('order')):
    print(prod)
rule = grammar1.productions(lhs=Nonterminal('NP'),rhs=Nonterminal('VP'))



In [6]:
#print(test_grammar.productions(rhs=Nonterminal('elephant')))

print(test_grammar.productions(rhs=Nonterminal('NP')))

[S -> NP VP, NP -> NP PP]


In [182]:
class CKY:
    def __init__(self,sentence, grammar):
        self.grammar = grammar
        self.sent_list = sentence.split()
        n = len(self.sent_list)
        
        self.chart = [[None for i in range(n)] for j in range(n)]
        
        
        for i in range(1, n+1):
            # Adds all possible terminal nodes according to the grammar
            self.chart[n-i][i-1] = [rule.lhs()
                            for rule in self.grammar.productions(rhs=self.sent_list[i-1])]
            
        for b in range(2, n+1):
            for i in range(0, n-b+1):
                list_of_As = []
                for k in range(1, b):
                    B = self.chart[n-i-k][i]
                    C = self.chart[n-b-i][i + k]
                    if B and C:
                        # If B and C exist then compare them. Saves checking
                        # every possible k point
                        list_of_As.append(CKY.check_grammar(
                            self.grammar,B,C))
                        #self.chart[n-b-i][i] = CKY.check_grammar(
                        #    self.grammar,B,C)
                    else:
                        continue                
                self.chart[n-b-i][i] = list(filter(None,list_of_As))

                
    
    def check_grammar(gram, B,C):
        # param B: list of all possible elements for rhs right corner
        # param C: list of all possible elements for rhs left corner
        # return A: list of all possible elements for lhs
        
        # B = B.rhs()[0]
        for element_B in B:
            # for every production with element_B on the left side of the
            # righ hand side, check if there is a production with element_C
            # on the right side of right hand side
            
            for prod in gram.productions(rhs=element_B):
                for element_C in C:
                    if prod.rhs() == (element_B, element_C):
                        return(prod.lhs())

    
    def print_chart(self):
        pprint(chart)
        


In [181]:
racog = CKY('I shot an elephant in my pajamas', test_grammar)
#pprint(chart)

NP
NP
VP
PP
S
NP
VP
VP
S


In [177]:
print(type(racog.chart[0][0][0]))


<class 'nltk.grammar.Nonterminal'>


In [143]:
print(type(test_grammar.productions()[0].rhs()[1]))
print(test_grammar.productions()[0].rhs()[1])
if test_grammar.productions()[0].rhs() == (Nonterminal('NP'),Nonterminal('VP')):
    print('piru')
print(type(test_grammar.productions()[0]))
prod = test_grammar.productions()[0]

?prod

<class 'nltk.grammar.Nonterminal'>
VP
piru
<class 'nltk.grammar.Production'>


In [94]:
# def CKY_recognizer(sentence, grammar):
#     sent_list = sentence.split()
#     n = len(sent_list)
#     # Initializes the matrix with 'None' types
#     matrix = [[None for i in range(n)] for j in range(n)]

#     for i in range(1, n+1):
#         # Adds all possible terminal nodes according to the grammar
        
#         matrix[n-i][i-1] = [rule.lhs()
#                             for rule in grammar.productions(rhs=sent_list[i-1])]

#     for b in range(2, n+1):
#         for i in range(0, n-b+1):
#             # matrix [n-b-i][i]
#             # row = n-b-i
#             # column = i
            
#             # A = matrix[row][column]
            
#             for k in range(1, b):
#                 B = matrix[n-i-k][i]
#                 C = matrix[n-b-i][i + k]
#                 A = B + C
                
#                 if B and C:
#                     matrix[row][column] = check_grammar(B,C)
#                     # If B and C exist then compare them. Saves checking
#                     # every possible k point
#                     matrix[n-b-i][i] = B + C
                    
#                     # B: list of all possible rhs (right leaf) 
#                     # C: list of all possible rhs (left leaf)
#                     # is there a rule which has B and C as leave
#                 else:
#                     continue
                
#                 print('B =', B)
#                 print('C =', C)
#                 if input(''):
#                     pass
#             pprint(matrix)
#             print('\n')
                
#     return matrix

[[[NP, V, TESTE, Det, N, P, Det, N],
  [V, TESTE, Det, N, P, Det, N],
  [Det, N, P, Det, N],
  [N, P, Det, N],
  [P, Det, N],
  [Det, N],
  [N]],
 [[NP, V, TESTE, Det, N, P, Det],
  [V, TESTE, Det, N, P, Det],
  [Det, N, P, Det],
  [N, P, Det],
  [P, Det],
  [Det],
  0],
 [[NP, V, TESTE, Det, N, P],
  [V, TESTE, Det, N, P],
  [Det, N, P],
  [N, P],
  [P],
  0,
  0],
 [[NP, V, TESTE, Det, N], [V, TESTE, Det, N], [Det, N], [N], 0, 0, 0],
 [[NP, V, TESTE, Det], [V, TESTE, Det], [Det], 0, 0, 0, 0],
 [[NP, V, TESTE], [V, TESTE], 0, 0, 0, 0, 0],
 [[NP], 0, 0, 0, 0, 0, 0]]


In [20]:
len(chart)

7

In [89]:
for i in range(1,2):
    print(i)

1
