From 054820734e700eaa3f2f90303e5a5bd183f89567 Mon Sep 17 00:00:00 2001 From: Gabor Angeli Date: Mon, 27 Apr 2015 00:03:18 -0700 Subject: [PATCH] Fix some of the more glaring UD bugs in OpenIE --- .../stanford/nlp/ie/util/RelationTriple.java | 29 +- .../nlp/naturalli/NaturalLogicRelation.java | 395 ++++++------------ src/edu/stanford/nlp/naturalli/OpenIE.java | 71 +++- .../naturalli/RelationTripleSegmenter.java | 5 + src/edu/stanford/nlp/naturalli/Util.java | 32 ++ 5 files changed, 251 insertions(+), 281 deletions(-) diff --git a/src/edu/stanford/nlp/ie/util/RelationTriple.java b/src/edu/stanford/nlp/ie/util/RelationTriple.java index cd4e776e1f..211cd69228 100644 --- a/src/edu/stanford/nlp/ie/util/RelationTriple.java +++ b/src/edu/stanford/nlp/ie/util/RelationTriple.java @@ -1,8 +1,10 @@ package edu.stanford.nlp.ie.util; import edu.stanford.nlp.ie.machinereading.structure.Span; +import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.naturalli.Util; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.util.*; @@ -76,7 +78,7 @@ public CoreLabel subjectHead() { * This method will additionally strip out punctuation as well. */ public String subjectLemmaGloss() { - return StringUtils.join(subject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(CoreLabel::lemma), " "); + return StringUtils.join(subject.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " "); } /** The object of this relation triple, as a String */ @@ -94,7 +96,7 @@ public CoreLabel objectHead() { * This method will additionally strip out punctuation as well. */ public String objectLemmaGloss() { - return StringUtils.join(object.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(CoreLabel::lemma), " "); + return StringUtils.join(object.stream().filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]")).map(x -> x.lemma() == null ? x.word() : x.lemma()), " "); } /** @@ -110,7 +112,7 @@ public String relationGloss() { */ public String relationLemmaGloss() { return StringUtils.join(relation.stream() - .filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]") && !x.lemma().matches("[\\.,;'\"\\?!]")).map(CoreLabel::lemma), " ").toLowerCase(); + .filter(x -> !x.tag().matches("[\\.\\?,:;'\"!]") && (x.lemma() == null || !x.lemma().matches("[\\.,;'\"\\?!]"))).map(x -> x.lemma() == null ? x.word() : x.lemma()), " ").toLowerCase(); } /** A textual representation of the confidence. */ @@ -185,6 +187,27 @@ public String toString() { return "" + this.confidence + "\t" + subjectGloss() + "\t" + relationGloss() + "\t" + objectGloss(); } + /** Print a description of this triple, formatted like the ReVerb outputs. */ + public String toReverbString(String docid, CoreMap sentence) { + return docid + "\t" + + relation.get(0).sentIndex() + "\t" + + subjectGloss().replace('\t', ' ') + "\t" + + relationGloss().replace('\t', ' ') + "\t" + + objectGloss().replace('\t', ' ') + "\t" + + (subject.get(0).index() - 1) + "\t" + + subject.get(subject.size() - 1).index() + "\t" + + (relation.get(0).index() - 1) + "\t" + + relation.get(relation.size() - 1).index() + "\t" + + (object.get(0).index() - 1) + "\t" + + object.get(object.size() - 1).index() + "\t" + + confidenceGloss() + "\t" + + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(x -> x.word().replace('\t', ' ').replace(" ", "")), " ") + "\t" + + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class).stream().map(CoreLabel::tag), " ") + "\t" + + subjectLemmaGloss().replace('\t', ' ') + "\t" + + relationLemmaGloss().replace('\t', ' ') + "\t" + + objectLemmaGloss().replace('\t', ' '); + } + @Override public int compareTo(RelationTriple o) { if (this.confidence < o.confidence) { diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java b/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java index 1b474777a5..09969f51ed 100644 --- a/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java +++ b/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java @@ -189,271 +189,136 @@ public NaturalLogicRelation join(NaturalLogicRelation other) { put("poss", NaturalLogicRelation.REVERSE_ENTAILMENT); // put("preconj", NaturalLogicRelation.INDEPENDENCE); // forbidden to see this put("predet", NaturalLogicRelation.INDEPENDENCE); // forbidden to see this - put("prep_aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_about", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_above", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_across", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_after", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_against", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_along", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_alongside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_amid", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_among", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_anti", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_around", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_at", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_before", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_behind", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_below", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_beside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_besides", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_between", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_but", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_about", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_above", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_across", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_after", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_against", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_along", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_amid", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_among", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_anti", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_around", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_at", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_before", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_behind", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_below", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_beside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_besides", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_between", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_but", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_close_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_close_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_compared_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_compared_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_concerning", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_considering", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_despite", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_down", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_during", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_except", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_far_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_following", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_inside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_into", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_like", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_close_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_close_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_minus", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_near", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_off", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_compared_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_compared_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_concerning", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_considering", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_onto", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_outside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_over", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_past", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_plus", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_round", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_save", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_since", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_than", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_through", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_toward", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_towards", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_under", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_until", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_upon", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_up", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_versus", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_via", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_within", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_without", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prepc_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_dep", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_despite", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_down", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_during", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_en", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_except", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_following", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_if", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_including", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_inside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_into", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_like", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_minus", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_near", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_off", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_onto", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_out", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_outside", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_over", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_past", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_plus", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:about", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:above", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:across", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:after", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:against", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:along", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:alongside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:amid", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:among", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:anti", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:around", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:as", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:at", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:before", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:behind", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:below", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:beside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:besides", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:between", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:but", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:dep", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:despite", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:down", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:during", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:en", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:except", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:following", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:if", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:including", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:inside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:into", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:like", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:minus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:near", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:off", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:onto", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:out", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:outside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:over", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:past", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:per", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:plus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // put("prep", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_round", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_save", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_since", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_than", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_throughout", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_through", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_toward", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_towards", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_under", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_until", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_upon", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_up", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_versus", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_via", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_vs.", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_whether", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_within", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_without", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // - put("prep_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:round", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:save", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:since", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:than", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:throughout", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:through", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:toward", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:towards", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:under", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:until", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:upon", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:up", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:versus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:via", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:vs.", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:whether", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:within", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:without", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nmod:with", NaturalLogicRelation.REVERSE_ENTAILMENT); // put("prt", NaturalLogicRelation.INDEPENDENCE); // put("punct", NaturalLogicRelation.EQUIVALENT); // put("purpcl", NaturalLogicRelation.REVERSE_ENTAILMENT); // deprecated into advmod @@ -498,7 +363,7 @@ public static NaturalLogicRelation forDependencyInsertion(String dependencyLabel return rel; } else { // System.err.println("Unknown dependency arc for NaturalLogicRelation: " + dependencyLabel); - if (dependencyLabel.startsWith("prep_")) { + if (dependencyLabel.startsWith("nmod:")) { return NaturalLogicRelation.REVERSE_ENTAILMENT; } else if (dependencyLabel.startsWith("conj_")) { return NaturalLogicRelation.REVERSE_ENTAILMENT; diff --git a/src/edu/stanford/nlp/naturalli/OpenIE.java b/src/edu/stanford/nlp/naturalli/OpenIE.java index e4e27c9bda..fedbc77421 100644 --- a/src/edu/stanford/nlp/naturalli/OpenIE.java +++ b/src/edu/stanford/nlp/naturalli/OpenIE.java @@ -27,6 +27,10 @@ import java.io.File; import java.io.IOException; import java.util.*; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; /** @@ -37,13 +41,23 @@ @SuppressWarnings({"FieldCanBeLocal", "UnusedDeclaration"}) public class OpenIE implements Annotator { - private static enum Optimization { GENERAL, KB } + private static enum OutputFormat { REVERB, OLLIE, DEFAULT } /** * A pattern for rewriting "NN_1 is a JJ NN_2" --> NN_1 is JJ" */ private static SemgrexPattern adjectivePattern = SemgrexPattern.compile("{}=obj >nsubj {}=subj >cop {}=be >det {word:/an?/} >amod {}=adj ?>/prep_.*/=prep {}=pobj"); + // + // Static Options (for running standalone) + // + + @Execution.Option(name="format", gloss="The format to output the triples in.") + private static OutputFormat FORMAT = OutputFormat.DEFAULT; + + // + // Annotator Options (for running in the pipeline) + // @Execution.Option(name="splitter.model", gloss="The location of the clause splitting model.") private String splitterModel = DefaultPaths.DEFAULT_OPENIE_CLAUSE_SEARCHER; @@ -236,7 +250,7 @@ public void annotateSentence(CoreMap sentence, Map> c } else { List clauses = clausesInSentence(sentence); List fragments = entailmentsFromClauses(clauses); - fragments.add(new SentenceFragment(sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), false)); +// fragments.add(new SentenceFragment(sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class), false)); extractions.addAll(relationsInFragments(fragments, sentence, canonicalMentionMap)); sentence.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, fragments); sentence.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, extractions); @@ -341,26 +355,42 @@ private static void processDocument(AnnotationPipeline pipeline, String docid, S pipeline.annotate(ann); // Get the extractions - Collection extractions = new ArrayList<>(); - for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { - extractions.addAll(sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class)); + boolean empty = true; + synchronized (System.out) { + for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { + for (RelationTriple extraction : sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class)) { + // Print the extractions + switch (FORMAT) { + case REVERB: + System.out.println(extraction.toString()); + break; + case OLLIE: + System.out.println(extraction.confidenceGloss() + ": (" + extraction.subjectGloss() + "; " + extraction.relationGloss() + "; " + extraction.objectGloss() + ")"); + break; + case DEFAULT: + System.out.println(extraction.toString()); + break; + default: + throw new IllegalStateException("Format is not implemented: " + FORMAT); + } + empty = false; + } + } } - if (extractions.isEmpty()) { + if (empty) { System.err.println("No extractions in: " + ("stdin".equals(docid) ? document : docid)); } - - // Print the extractions - synchronized (System.out) { - extractions.forEach(System.out::println); - } } /** * An entry method for annotating standard in with OpenIE extractions. */ - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException, InterruptedException { // Parse the arguments Properties props = StringUtils.argsToProperties(args); + Execution.fillOptions(new Class[]{ OpenIE.class, Execution.class}, props); + AtomicInteger exceptionCount = new AtomicInteger(0); + ExecutorService exec = Executors.newFixedThreadPool(Execution.threads); // Parse the files to process String[] filesToProcess = props.getProperty("", "").split("\\s+"); @@ -399,8 +429,23 @@ public static void main(String[] args) throws IOException { } for (String file : filesToProcess) { System.err.println("Processing file: " + file); - processDocument(pipeline, file, IOUtils.slurpFile(new File(file))); + if (Execution.threads > 1) { + final String fileToSubmit = file; + exec.submit(() -> { + try { + processDocument(pipeline, file, IOUtils.slurpFile(new File(fileToSubmit))); + } catch (Throwable t) { + t.printStackTrace(); + exceptionCount.incrementAndGet(); + } + }); + } } } + + // Exit + exec.shutdown(); + exec.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); + System.exit(exceptionCount.get()); } } diff --git a/src/edu/stanford/nlp/naturalli/RelationTripleSegmenter.java b/src/edu/stanford/nlp/naturalli/RelationTripleSegmenter.java index cdfe4947e3..8536ba5173 100644 --- a/src/edu/stanford/nlp/naturalli/RelationTripleSegmenter.java +++ b/src/edu/stanford/nlp/naturalli/RelationTripleSegmenter.java @@ -436,6 +436,11 @@ private Optional> getValidAdverbChunk(SemanticGraph parse, Index * @return A relation triple, if this sentence matches one of the patterns of a valid relation triple. */ public Optional segment(SemanticGraph parse, Optional confidence, boolean consumeAll) { + // Copy and clean the tree + parse = new SemanticGraph(parse); + Util.stripPrepCases(parse); + + // Run pattern loop PATTERN_LOOP: for (SemgrexPattern pattern : VERB_PATTERNS) { // For every candidate pattern... SemgrexMatcher m = pattern.matcher(parse); if (m.matches()) { // ... see if it matches the sentence diff --git a/src/edu/stanford/nlp/naturalli/Util.java b/src/edu/stanford/nlp/naturalli/Util.java index e585363f68..bb9a98b47e 100644 --- a/src/edu/stanford/nlp/naturalli/Util.java +++ b/src/edu/stanford/nlp/naturalli/Util.java @@ -261,6 +261,38 @@ public static List cleanTree(SemanticGraph tree) { return extraEdges; } + + /** + * Strip away case edges, if the incoming edge is a preposition. + * This replicates the behavior of the old Stanford dependencies on universal dependencies. + * @param tree The tree to modify in place. + */ + public static void stripPrepCases(SemanticGraph tree) { + // Find incoming case edges that have an 'nmod' incoming edge + List toClean = new ArrayList<>(); + for (SemanticGraphEdge edge : tree.edgeIterable()) { + if ("case".equals(edge.getRelation().toString())) { + boolean isPrepTarget = false; + for (SemanticGraphEdge incoming : tree.incomingEdgeIterable(edge.getGovernor())) { + if ("nmod".equals(incoming.getRelation().getShortName())) { + isPrepTarget = true; + break; + } + } + if (isPrepTarget && !tree.outgoingEdgeIterator(edge.getDependent()).hasNext()) { + toClean.add(edge); + } + } + } + + // Delete these edges + for (SemanticGraphEdge edge : toClean) { + tree.removeEdge(edge); + tree.removeVertex(edge.getDependent()); + assert isTree(tree); + } + } + /** * A little utility function to make sure a SemanticGraph is a tree. * @param tree The tree to check.