Skip to content

Commit

Permalink
Refactored a bit, added comments, added support for N number of look-…
Browse files Browse the repository at this point in the history
…backs

Signed-off-by: Rafael Lopez <rafael@case.edu>
  • Loading branch information
rxl211 committed Apr 12, 2012
1 parent a88b04f commit f2987e3
Showing 1 changed file with 40 additions and 34 deletions.
74 changes: 40 additions & 34 deletions artifacts/tables/counts.py
Expand Up @@ -118,56 +118,62 @@ def production_probability(path, oldtable, tables, conf):
@registration.register('table', depends=['production_count', 'infer_grammar'])
def conditional_probabilities(path, oldtable, tables, conf):
grammar = dict((row[0], tuple(row[1:])) for row in tables['infer_grammar'])
stats = dict()
prodnum = dict()
#print grammar
#print "\n"
for nonterm, P in grammar.iteritems():
for i, p in enumerate(P):
prodnum[(nonterm, p)] = i+1
stats[(nonterm, i+1)] = 0
if oldtable is not None:
for nonterm, p, count in oldtable:
stats[(nonterm, prodnum[(nonterm, p)])] = int(count)
#print prodnum


counts = dict()
stack = list()
def callback(grammar, stats, node, depth):
lookBack = 2
def callback(grammar, node, depth):
#if this is a new ast then we want to reset our information
if node.label == "Start":
while stack:
stack.pop()
counts.clear()
prev2 = (None, None, False)
stack.append(prev2)
initStack = (tuple(None for x in range(lookBack)), False)
stack.append(initStack)

prev = stack[len(stack)-1][0]
requirePop = stack[len(stack)-1][1]

prevAsTuple = tuple(prev[x] for x in range(lookBack))

if not node.children:
if stack[len(stack)-1][2]:
if requirePop:
stack.pop()
return
prev2 = stack[len(stack)-1]
if prev2[2]:

if requirePop:
stack.pop()

productions = grammar[node.label]
p = productions.index(':'.join(kid.label for kid in node.children)) + 1


#build up our dictionary of production counts
if not counts.has_key(node.label):
counts[node.label] = {(prev2[0], prev2[1]) : 1}
counts[node.label] = {prevAsTuple : 1}
else:
if not counts.get(node.label).has_key((prev2[0], prev2[1])):
counts[node.label][(prev2[0], prev2[1])] = 1
if not counts.get(node.label).has_key(prevAsTuple):
counts[node.label][prevAsTuple] = 1
else:
counts[node.label][(prev2[0], prev2[1])] += 1

counts[node.label][prevAsTuple] += 1

#append this new rule to the stack as our new "most previous"
if grammar[node.label][p-1].count(":") > 1:
stack.append((prev2[1], grammar[node.label][p-1], False))
else:
stack.append((prev2[1], grammar[node.label][p-1], True))
stats[(node.label, p)] += 1
walktrees(conf['trees'], functools.partial(callback, grammar, stats))
stack.append(
(
tuple(prev[x+1] for x in range(lookBack-1)) + (grammar[node.label][p-1],),
False
)
)
else: #if there is only one nonterminal in this rule then we want to log it as a previous but then pop it from the stack
#this way, rules that have >1 nonterminals will keep their "prev" relative to what it was originally.
#e.g. with NT:NT2:NT3, when we get to NT2, we dont want previous to include the previous from when we went down NT's productions
stack.append(
(
tuple(prev[x+1] for x in range(lookBack-1)) + (grammar[node.label][p-1],),
True
)
)
walktrees(conf['trees'], functools.partial(callback, grammar))


#now we normalize
Expand All @@ -176,19 +182,19 @@ def callback(grammar, stats, node, depth):
nonterm,
dict(
(
prev2,
prev,
float(num)/float(sum(num for num in myCounts.itervalues()))
)
for prev2, num in myCounts.iteritems()
for prev, num in myCounts.iteritems()
)
)
for nonterm, myCounts in counts.iteritems()
)

table = tuple(
(nonterm, prev2, probability)
(nonterm, prev, probability)
for nonterm, myCounts in probabilities.iteritems()
for prev2, probability in myCounts.iteritems()
for prev, probability in myCounts.iteritems()
)

save(path, table)
Expand Down

0 comments on commit f2987e3

Please sign in to comment.