# CKY parser with constituent copying

This parser was written by Floris. It implements copying by marking every rule as a copy rule or a non-copy rule. Rules are in Chomsky Normal Form. Copy rules apply only if both daughters generate the same string.

In [None]:
import cky_constituent_copy as ckypy
from IPython.display import Image
from IPython.display import display

We make a toy grammar. None of these rules are copy rules.

In [None]:
grammar_bare = [("S",  [(["NP","VP"])]),
               ("VP", [(["VP","PP"]),
                       (["V","NP"]),
                       (["eats"])]),
               ("PP", [(["P","NP"])]),
               ("NP", [(["NP","PP"]),
                       (["Det","N"]),
                       (["she"])]),
               ("V" , [(["eats"])]),
               ("P" , [(["with"])]),
               ("N" , [(["fish"]),
                       (["fork"])]),
               ("Det",[(["a"])])]

Add copy markers

In [None]:
grammar_copy = ckypy.mark_no_copy_rules(grammar_bare)

In [None]:
print(ckypy.grammar2string_copy(grammar_copy))

A second grammar that generates full binary trees

In [None]:
grammar_ambig = [("S",[(["S","S"],False),(["a"],False)])]
grammar_ambig_probs = [("S",[(["S","S"],False,0.5),(["a"],False,0.5)])]

In [None]:
print(ckypy.grammar2string_copy(grammar_ambig))

A third grammar with copying

In [None]:
grammar_ambig_copy = [("S",[(["S","S"],False),(["S","S"],True),(["a"],False)])]
grammar_ambig_copy_probs = [("S",[(["S","S"],False,0.3),(["S","S"],True,0.3),(["a"],False,0.4)])]

In [None]:
print(ckypy.grammar2string_copy(grammar_ambig_copy))

## Let's run some examples

In [None]:
s = "she eats a fish with a fork".split(" ")

In [None]:
g = grammar_copy

Parse s with g

In [None]:
chart,backpointers = ckypy.parse(s,g)

In [None]:
ckypy.print_chart(chart)

In [None]:
ckypy.print_backpointers(backpointers)

Collect the trees

In [None]:
parses = ckypy.collect_trees(0,len(s),"S",chart,backpointers,g,s)

Print the trees to files

In [None]:
for i,parse in enumerate(parses):    
    ckypy.tree_to_png(parse,"parse_%i.png"%i)

In [None]:
x=Image(filename='parse_0.png')
y=Image(filename='parse_1.png')
display(x,y)

Count the trees

In [None]:
ckypy.n_parses("S",chart,backpointers,g,s)

Calculate probabilities

In [None]:
grammar_copy_probs = [("S",  [(["NP","VP"],False,1.)]),
           ("VP", [(["VP","PP"],False,0.25),
                   (["V","NP"],False,0.5),
                   (["eats"],False,0.25)]),
           ("PP", [(["P","NP"],False,1.)]),
           ("NP", [(["NP","PP"],False,0.3),
                   (["Det","N"],False,0.4),
                   (["she"],False,0.3)]),
           ("V" , [(["eats"],False,1.)]),
           ("P" , [(["with"],False,1.)]),
           ("N" , [(["fish"],False,0.4),
                   (["fork"],False,0.6)]),
           ("Det",[(["a"],False,1.)])]

In [None]:
g_probs = ckypy.make_rule_probs(grammar_copy_probs)

In [None]:
print(ckypy.grammar2string_copy_probs(grammar_copy_probs))


In [None]:
(probs,s_prob)=ckypy.probability("S",chart,backpointers,g,s,g_probs)

The log probability of the sentence is the second element

In [None]:
s_prob

Let's un-log it just to get a look

In [None]:
ckypy.np.exp(s_prob)

## Full binary tree grammars

### Using the binary tree grammar without copying

In [None]:
g = grammar_ambig

In [None]:
s = ["a"]*3

In [None]:
chart,backpointers = ckypy.parse(s,g)

In [None]:
ckypy.print_chart(chart)

In [None]:
ckypy.print_backpointers(backpointers)

In [None]:
ckypy.n_parses("S",chart,backpointers,g,s)

In [None]:
parses = ckypy.collect_trees(0,len(s),"S",chart,backpointers,g,s)

In [None]:
for i,parse in enumerate(parses):    
    ckypy.tree_to_png(parse,"parse_%i.png"%i)

In [None]:
x=Image(filename='parse_0.png')
y=Image(filename='parse_1.png')
display(x,y)

Probability of sentence

In [None]:
g_probs = ckypy.make_rule_probs(grammar_ambig_probs)

In [None]:
(probs,s_prob)=ckypy.probability("S",chart,backpointers,g,s,g_probs)

The log probability of the sentence is the second element

In [None]:
s_prob

Let's un-log it just to get a look

In [None]:
ckypy.np.exp(s_prob)

### Using the binary tree grammar with copying

In [None]:
g = grammar_ambig_copy

In [None]:
s = ["a"]*3

In [None]:
chart,backpointers = ckypy.parse(s,g)

In [None]:
ckypy.print_chart(chart)

In [None]:
ckypy.print_backpointers(backpointers)

In [None]:
ckypy.n_parses("S",chart,backpointers,g,s)

In [None]:
parses = ckypy.collect_trees(0,len(s),"S",chart,backpointers,g,s)

In [None]:
for i,parse in enumerate(parses):    
    ckypy.tree_to_png(parse,"parse_%i.png"%i)

In [None]:
x=Image(filename='parse_0.png')
y=Image(filename='parse_1.png')
z=Image(filename='parse_2.png')
w=Image(filename='parse_3.png')


display(x,y,z,w)

Probability of sentence

In [None]:
g_probs = ckypy.make_rule_probs(grammar_ambig_copy_probs)

In [None]:
(probs,s_prob)=ckypy.probability("S",chart,backpointers,g,s,g_probs)

The log probability of the sentence is the second element

In [None]:
s_prob

Let's un-log it just to get a look

In [None]:
ckypy.np.exp(s_prob)