Skip to content

Commit

Permalink
Correct conditional probability computations, changed output format
Browse files Browse the repository at this point in the history
Signed-off-by: Rafael Lopez <rafael@case.edu>
  • Loading branch information
rxl211 committed Apr 13, 2012
1 parent f2987e3 commit b3b8ffc
Showing 1 changed file with 28 additions and 19 deletions.
47 changes: 28 additions & 19 deletions artifacts/tables/counts.py
Expand Up @@ -119,15 +119,15 @@ def production_probability(path, oldtable, tables, conf):
def conditional_probabilities(path, oldtable, tables, conf):
grammar = dict((row[0], tuple(row[1:])) for row in tables['infer_grammar'])

counts = dict()
counts = dict() #counts how many times a RULE is reached by a specific prevTuple (e.g. counts[NT => A:B:C][(NT1,NT2)] == 5)
terminalCounts = dict() #counts how many times a prevTuple reaches a specific NONTERMINAL (e.g. terminalCounts[(NT1,NT2)][NT] == 78)
stack = list()
lookBack = 2
lookBack = 2 #how many items in prevTuple?
def callback(grammar, node, depth):
#if this is a new ast then we want to reset our information
#if this is a new ast then we want to clear our stack
if node.label == "Start":
while stack:
stack.pop()
counts.clear()
initStack = (tuple(None for x in range(lookBack)), False)
stack.append(initStack)

Expand All @@ -147,20 +147,30 @@ def callback(grammar, node, depth):
productions = grammar[node.label]
p = productions.index(':'.join(kid.label for kid in node.children)) + 1

#build up our dictionary of production counts
if not counts.has_key(node.label):
counts[node.label] = {prevAsTuple : 1}
chosenRule = node.label + " => " + grammar[node.label][p-1]


if not counts.has_key(chosenRule):
counts[chosenRule] = {prevAsTuple : 1}
else:
if not counts.get(node.label).has_key(prevAsTuple):
counts[node.label][prevAsTuple] = 1
if not counts.get(chosenRule).has_key(prevAsTuple):
counts[chosenRule][prevAsTuple] = 1
else:
counts[node.label][prevAsTuple] += 1
counts[chosenRule][prevAsTuple] += 1

if not terminalCounts.has_key(prevAsTuple):
terminalCounts[prevAsTuple] = {node.label : 1}
else:
if not terminalCounts.get(prevAsTuple).has_key(node.label):
terminalCounts[prevAsTuple][node.label] = 1
else:
terminalCounts[prevAsTuple][node.label] += 1

#append this new rule to the stack as our new "most previous"
if grammar[node.label][p-1].count(":") > 1:
stack.append(
(
tuple(prev[x+1] for x in range(lookBack-1)) + (grammar[node.label][p-1],),
tuple(prev[x+1] for x in range(lookBack-1)) + (node.label,),
False
)
)
Expand All @@ -169,33 +179,32 @@ def callback(grammar, node, depth):
#e.g. with NT:NT2:NT3, when we get to NT2, we dont want previous to include the previous from when we went down NT's productions
stack.append(
(
tuple(prev[x+1] for x in range(lookBack-1)) + (grammar[node.label][p-1],),
tuple(prev[x+1] for x in range(lookBack-1)) + (node.label,),
True
)
)
walktrees(conf['trees'], functools.partial(callback, grammar))


#now we normalize
probabilities = dict(
(
nonterm,
rule,
dict(
(
prev,
float(num)/float(sum(num for num in myCounts.itervalues()))
float(num)/float(terminalCounts[prev][rule.split("=>")[0].strip()]) #P[rule | prev]
)
for prev, num in myCounts.iteritems()
)
)
for nonterm, myCounts in counts.iteritems()
for rule, myCounts in counts.iteritems()
)

table = tuple(
(nonterm, prev, probability)
for nonterm, myCounts in probabilities.iteritems()
(lookBack, prod) + tuple(nt for nt in prev) + (probability,)
for prod, myCounts in probabilities.iteritems()
for prev, probability in myCounts.iteritems()
)

save(path, table)
return table
return table

0 comments on commit b3b8ffc

Please sign in to comment.