Skip to content

Commit

Permalink
Break ties using proper nouns, if those are part of a sentence (eg, p…
Browse files Browse the repository at this point in the history
…os was added) addresses #1326
  • Loading branch information
AngledLuffa committed Jan 5, 2024
1 parent b33f912 commit c4c3de5
Showing 1 changed file with 16 additions and 1 deletion.
17 changes: 16 additions & 1 deletion stanza/pipeline/coref_processor.py
Expand Up @@ -114,12 +114,27 @@ def process(self, document):

# treat the longest span as the representative
# break ties using the first one
# IF there is the POS processor, and it adds upos tags
# to the sentence, ties are broken first by maximum
# number of UPOS and then earliest in the document
max_len = 0
best_span = None
max_propn = 0
for span_idx, span in enumerate(span_cluster):
if span[1] - span[0] > max_len:
sent_id = sent_ids[span[0]]
sentence = sentences[sent_id]
start_word = word_pos[span[0]]
end_word = word_pos[span[1]]
# very UD specific test for most number of proper nouns in a mention
# will do nothing if POS is not active (they will all be None)
num_propn = sum(word.pos == 'PROPN' for word in sentence.words[start_word:end_word])
print(span, num_propn)

if ((span[1] - span[0] > max_len) or
span[1] - span[0] == max_len and num_propn > max_propn):
max_len = span[1] - span[0]
best_span = span_idx
max_propn = num_propn

mentions = []
for span in span_cluster:
Expand Down

0 comments on commit c4c3de5

Please sign in to comment.