From 4246a16eaccf28f79923acd4bdb3f0c04dd1750d Mon Sep 17 00:00:00 2001
From: Gabor Angeli
Date: Thu, 12 Mar 2015 16:55:30 -0700
Subject: [PATCH] Tests still failing, but OpenIE is starting to come together
---
README.md | 8 +-
build.xml | 2 +
.../spanish/SpanishTokenizerITest.java | 2 +-
.../TokenSequenceMatcherITest.java | 4 +-
.../naturalli/NaturalLogicAnnotatorITest.java | 44 +
.../stanford/nlp/naturalli/OpenIEITest.java | 156 +
.../nlp/naturalli/OperatorScopeITest.java | 749 +++++
.../stanford/nlp/naturalli/PolarityITest.java | 66 +
.../parser/nndep/DependencyParserITest.java | 6 +-
...ProtobufAnnotationSerializerSlowITest.java | 5 +
.../nlp/pipeline/StanfordCoreNLPITest.java | 15 +-
.../semgraph/semgrex/SemgrexPatternITest.java | 36 -
.../nlp/sentiment/SentimentTrainingITest.java | 18 +
lib/README | 2 +-
.../nlp/classify/ColumnDataClassifier.java | 70 +-
src/edu/stanford/nlp/dcoref/CorefChain.java | 14 +-
.../nlp/graph/DirectedMultiGraph.java | 29 +-
.../nlp/ie/AbstractSequenceClassifier.java | 62 +-
.../nlp/ie/NERClassifierCombiner.java | 3 +-
.../stanford/nlp/ie/NERFeatureFactory.java | 15 +-
.../stanford/nlp/ie/crf/CRFClassifier.java | 37 +-
.../stanford/nlp/ie/crf/CRFCliqueTree.java | 12 +-
src/edu/stanford/nlp/ie/crf/NERGUI.java | 2 +-
src/edu/stanford/nlp/ie/demo/NERDemo.java | 80 +-
.../stanford/nlp/ie/util/RelationTriple.java | 338 +++
.../french/process/FrenchTokenizer.java | 16 +-
.../spanish/SpanishVerbStripper.java | 4 +-
.../spanish/process/SpanishTokenizer.java | 100 +-
.../stanford/nlp/ling/AbstractCoreLabel.java | 26 +
.../stanford/nlp/ling/AnnotationLookup.java | 6 +-
.../stanford/nlp/ling/CoreAnnotations.java | 14 -
src/edu/stanford/nlp/ling/CoreLabel.java | 81 +-
src/edu/stanford/nlp/ling/IndexedWord.java | 52 +-
.../nlp/ling/tokensregex/NodePattern.java | 3 +-
.../nlp/ling/tokensregex/SequenceMatcher.java | 3 +-
.../nlp/ling/tokensregex/SequencePattern.java | 42 +-
.../tokensregex/TokenSequencePattern.java | 4 -
.../parser/TokenSequenceParser.java | 1242 ++++----
.../tokensregex/parser/TokenSequenceParser.jj | 29 +-
src/edu/stanford/nlp/math/ArrayMath.java | 74 +-
src/edu/stanford/nlp/math/SloppyMath.java | 73 +-
.../stanford/nlp/naturalli/Monotonicity.java | 13 +
.../nlp/naturalli/MonotonicityType.java | 13 +
.../naturalli/NaturalLogicAnnotations.java | 58 +
.../nlp/naturalli/NaturalLogicAnnotator.java | 435 +++
.../nlp/naturalli/NaturalLogicRelation.java | 481 +++
.../nlp/naturalli/NaturalLogicWeights.java | 85 +
src/edu/stanford/nlp/naturalli/OpenIE.java | 597 ++++
src/edu/stanford/nlp/naturalli/Operator.java | 160 +
.../stanford/nlp/naturalli/OperatorSpec.java | 97 +
src/edu/stanford/nlp/naturalli/Polarity.java | 242 ++
.../nlp/naturalli/SentenceFragment.java | 51 +
src/edu/stanford/nlp/neural/NeuralUtils.java | 12 +
src/edu/stanford/nlp/neural/SimpleTensor.java | 13 +
.../nlp/optimization/QNMinimizer.java | 8 +-
.../parser/lexparser/LexicalizedParser.java | 2 +-
.../stanford/nlp/parser/nndep/Classifier.java | 11 +-
src/edu/stanford/nlp/parser/nndep/Config.java | 1 -
.../nlp/parser/nndep/DependencyParser.java | 14 +-
src/edu/stanford/nlp/parser/nndep/Util.java | 33 +-
.../shiftreduce/BasicFeatureFactory.java | 4 +-
.../parser/shiftreduce/BinaryTransition.java | 4 +-
.../shiftreduce/CreateTransitionSequence.java | 6 +-
.../parser/shiftreduce/FeatureFactory.java | 10 +-
.../parser/shiftreduce/ShiftReduceParser.java | 16 +-
.../parser/shiftreduce/ShiftReduceUtils.java | 5 +-
.../parser/shiftreduce/UnaryTransition.java | 4 +-
src/edu/stanford/nlp/pipeline/Annotation.java | 1 +
.../nlp/pipeline/AnnotationPipeline.java | 2 +-
src/edu/stanford/nlp/pipeline/Annotator.java | 4 +
.../nlp/pipeline/AnnotatorFactories.java | 57 +-
.../pipeline/AnnotatorImplementations.java | 22 +-
src/edu/stanford/nlp/pipeline/CoreNLP.proto | 101 +-
.../stanford/nlp/pipeline/CoreNLPProtos.java | 2598 +++++++++++++++--
.../nlp/pipeline/ParserAnnotator.java | 12 -
.../ProtobufAnnotationSerializer.java | 92 +-
.../nlp/pipeline/StanfordCoreNLP.java | 8 +-
.../nlp/pipeline/TokenizerAnnotator.java | 89 +-
.../pipeline/WordsToSentencesAnnotator.java | 8 +-
.../pipeline/demo/StanfordCoreNlpDemo.java | 64 +-
.../nlp/process/DocumentPreprocessor.java | 186 +-
src/edu/stanford/nlp/process/Morpha.flex | 27 +-
src/edu/stanford/nlp/process/Morpha.java | 33 +-
.../nlp/process/WordToSentenceProcessor.java | 7 +-
.../stanford/nlp/semgraph/SemanticGraph.java | 3 +-
.../nlp/semgraph/SemanticGraphFactory.java | 31 -
.../nlp/semgraph/SemanticGraphUtils.java | 83 +-
.../semgraph/semgrex/SemgrexBatchParser.java | 13 +-
.../nlp/sentiment/RNNTrainOptions.java | 24 +
.../sentiment/SentimentCostAndGradient.java | 58 +-
.../nlp/sentiment/SentimentModel.java | 103 +-
.../nlp/sentiment/SentimentTraining.java | 12 +-
.../ColumnTabDocumentReaderWriter.java | 7 +-
.../PlainTextDocumentReaderAndWriter.java | 1 +
src/edu/stanford/nlp/time/TimeFormatter.java | 2 +
.../nlp/trees/ENUniversalPOS.tsurgeon | 297 ++
.../trees/EnglishGrammaticalStructure.java | 2 +-
.../nlp/trees/GrammaticalStructure.java | 58 +-
.../stanford/nlp/trees/PennTreeReader.java | 5 +
src/edu/stanford/nlp/trees/Tree.java | 54 +-
.../nlp/trees/TreeCoreAnnotations.java | 14 +-
src/edu/stanford/nlp/trees/TreeGraphNode.java | 160 +-
src/edu/stanford/nlp/trees/Trees.java | 41 +
.../nlp/trees/UniversalPOSMapper.java | 66 +
.../nlp/trees/tregex/TregexPattern.java | 13 +-
.../nlp/trees/tregex/tsurgeon/AdjoinNode.java | 2 +-
.../tregex/tsurgeon/AdjoinToFootNode.java | 2 +-
.../tregex/tsurgeon/AdjoinToHeadNode.java | 2 +-
.../trees/tregex/tsurgeon/AuxiliaryTree.java | 26 +-
.../tregex/tsurgeon/CreateSubtreeNode.java | 2 +-
.../trees/tregex/tsurgeon/HoldTreeNode.java | 2 +-
.../nlp/trees/tregex/tsurgeon/Tsurgeon.java | 6 +-
.../trees/tregex/tsurgeon/TsurgeonParser.java | 1 -
.../trees/tregex/tsurgeon/TsurgeonParser.jj | 2 +-
.../trees/tregex/tsurgeon/TsurgeonParser.jjt | 2 +-
.../tsurgeon/TsurgeonParserTokenManager.java | 109 +-
src/edu/stanford/nlp/util/ArrayCoreMap.java | 39 +-
.../stanford/nlp/util/FileBackedCache.java | 2 +-
.../stanford/nlp/util/IterableIterator.java | 46 +-
src/edu/stanford/nlp/util/MutableLong.java | 28 +-
src/edu/stanford/nlp/util/StringUtils.java | 24 +-
.../nlp/ie/util/RelationTripleTest.java | 250 ++
.../naturalli/NaturalLogicRelationTest.java | 65 +
.../stanford/nlp/naturalli/PolarityTest.java | 133 +
.../shiftreduce/BinaryTransitionTest.java | 4 +-
.../nlp/semgraph/SemanticGraphUtilsTest.java | 41 -
.../trees/tregex/tsurgeon/TsurgeonTest.java | 6 +
.../edu/stanford/nlp/util/CoreMapTest.java | 3 +-
.../nlp/util/IterableIteratorTest.java | 71 -
129 files changed, 9063 insertions(+), 2082 deletions(-)
create mode 100644 itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java
create mode 100644 itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java
create mode 100644 itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java
create mode 100644 itest/src/edu/stanford/nlp/naturalli/PolarityITest.java
delete mode 100644 itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java
create mode 100644 src/edu/stanford/nlp/ie/util/RelationTriple.java
create mode 100644 src/edu/stanford/nlp/naturalli/Monotonicity.java
create mode 100644 src/edu/stanford/nlp/naturalli/MonotonicityType.java
create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java
create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java
create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java
create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java
create mode 100644 src/edu/stanford/nlp/naturalli/OpenIE.java
create mode 100644 src/edu/stanford/nlp/naturalli/Operator.java
create mode 100644 src/edu/stanford/nlp/naturalli/OperatorSpec.java
create mode 100644 src/edu/stanford/nlp/naturalli/Polarity.java
create mode 100644 src/edu/stanford/nlp/naturalli/SentenceFragment.java
create mode 100644 src/edu/stanford/nlp/trees/ENUniversalPOS.tsurgeon
create mode 100644 src/edu/stanford/nlp/trees/UniversalPOSMapper.java
create mode 100644 test/src/edu/stanford/nlp/ie/util/RelationTripleTest.java
create mode 100644 test/src/edu/stanford/nlp/naturalli/NaturalLogicRelationTest.java
create mode 100644 test/src/edu/stanford/nlp/naturalli/PolarityTest.java
delete mode 100644 test/src/edu/stanford/nlp/semgraph/SemanticGraphUtilsTest.java
delete mode 100644 test/src/edu/stanford/nlp/util/IterableIteratorTest.java
diff --git a/README.md b/README.md
index a1c4a5598b..a2078cb1ce 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,14 @@
Stanford CoreNLP
================
-Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, and German. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry.
+Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry.
-The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in distributed proprietary software.
+The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute.
You can find releases of Stanford CoreNLP on [Maven Central](http://search.maven.org/#browse%7C11864822).
-You can find more explanation and documentation of Stanford CoreNLP on [the Stanford CoreNLP homepage](http://nlp.stanford.edu/software/corenlp.shtml#Demo).
+You can find more explanation and documentation on [the Stanford CoreNLP homepage](http://nlp.stanford.edu/software/corenlp.shtml#Demo).
-The most recent models associated the code in this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar).
+The most recent models associated with the code in the HEAD of this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar).
For information about making contributions to Stanford CoreNLP, see the file `CONTRIBUTING.md`.
diff --git a/build.xml b/build.xml
index 891a4faf89..91cce31d7e 100644
--- a/build.xml
+++ b/build.xml
@@ -107,6 +107,8 @@
-->
+
tf = SpanishTokenizer.coreLabelFactory();
+ final TokenizerFactory tf = SpanishTokenizer.ancoraFactory();
tf.setOptions("");
tf.setOptions("tokenizeNLs");
diff --git a/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java b/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java
index ef81bedc47..b449f97a14 100644
--- a/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java
+++ b/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java
@@ -1,6 +1,5 @@
package edu.stanford.nlp.ling.tokensregex;
-import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.pipeline.*;
import edu.stanford.nlp.util.CoreMap;
@@ -9,10 +8,8 @@
import edu.stanford.nlp.util.Timing;
import junit.framework.TestCase;
-import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -1526,4 +1523,5 @@ public void testCaseInsensitive2(){
boolean match = m.find();
assertTrue(match);
}
+
}
diff --git a/itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java b/itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java
new file mode 100644
index 0000000000..4ccfd58ac8
--- /dev/null
+++ b/itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java
@@ -0,0 +1,44 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Properties;
+
+import static org.junit.Assert.*;
+
+/**
+ * A lightweight test to makes sure the annotator runs in the pipeline.
+ * For more in-depth tests, see {@link edu.stanford.nlp.naturalli.OperatorScopeITest} and
+ * {@link edu.stanford.nlp.naturalli.PolarityITest}.
+ *
+ * @author Gabor Angeli
+ */
+public class NaturalLogicAnnotatorITest {
+
+ @Test
+ public void testAnnotatorRuns() {
+ // Run pipeline
+ StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{
+ setProperty("annotators", "tokenize,ssplit,pos,lemma,parse,natlog");
+ setProperty("ssplit.isOneSentence", "true");
+ setProperty("tokenize.class", "PTBTokenizer");
+ setProperty("tokenize.language", "en");
+ setProperty("enforceRequirements", "true");
+ }});
+ Annotation ann = new Annotation("All cats have tails");
+ pipeline.annotate(ann);
+
+ // Check output
+ List tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class);
+ assertTrue(tokens.get(0).containsKey(NaturalLogicAnnotations.OperatorAnnotation.class));
+ assertTrue(tokens.get(0).get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards());
+ assertTrue(tokens.get(1).get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards());
+ assertTrue(tokens.get(2).get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards());
+ assertTrue(tokens.get(3).get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards());
+ }
+}
diff --git a/itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java b/itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java
new file mode 100644
index 0000000000..697466301e
--- /dev/null
+++ b/itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java
@@ -0,0 +1,156 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ie.util.RelationTriple;
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.StringUtils;
+import org.junit.Test;
+
+import java.util.*;
+import java.util.stream.Collectors;
+
+import static org.junit.Assert.*;
+
+/**
+ * Test the natural logic OpenIE extractor at {@link edu.stanford.nlp.naturalli.OpenIE}.
+ *
+ * @author Gabor Angeli
+ */
+public class OpenIEITest {
+ protected static StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{
+ setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie");
+ setProperty("ssplit.isOneSentence", "true");
+ setProperty("tokenize.class", "PTBTokenizer");
+ setProperty("tokenize.language", "en");
+ setProperty("enforceRequirements", "true");
+ }});
+
+ public CoreMap annotate(String text) {
+ Annotation ann = new Annotation(text);
+ pipeline.annotate(ann);
+ return ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
+ }
+
+ public void assertExtracted(String expected, String text) {
+ boolean found = false;
+ Collection extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
+ for (RelationTriple extraction : extractions) {
+ if (extraction.toString().equals("1.0\t" + expected)) {
+ found = true;
+ }
+ }
+ assertTrue("The extraction '" + expected + "' was not found in '" + text + "'", found);
+ }
+
+ public void assertExtracted(Set expected, String text) {
+ Collection extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
+ Set guess = extractions.stream().filter(x -> x.confidence > 0.1).map(RelationTriple::toString).collect(Collectors.toSet());
+ assertEquals(StringUtils.join(expected.stream().sorted(), "\n").toLowerCase(), StringUtils.join(guess.stream().map( x -> x.substring(x.indexOf("\t") + 1) ).sorted(), "\n").toLowerCase());
+ }
+
+ public void assertEntailed(String expected, String text) {
+ boolean found = false;
+ Collection extractions = annotate(text).get(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
+ for (SentenceFragment extraction : extractions) {
+ if (extraction.toString().equals(expected)) {
+ found = true;
+ }
+ }
+ assertTrue("The sentence '" + expected + "' was not entailed from '" + text + "'", found);
+ }
+
+
+ @Test
+ public void testAnnotatorRuns() {
+ annotate("all cats have tails");
+ }
+
+ @Test
+ public void testBasicEntailments() {
+ assertEntailed("some cats have tails", "some blue cats have tails");
+ assertEntailed("blue cats have tails", "some blue cats have tails");
+ assertEntailed("cats have tails", "some blue cats have tails");
+ }
+
+ @Test
+ public void testBasicExtractions() {
+ assertExtracted("cats\thave\ttails", "some cats have tails");
+ }
+
+ @Test
+ public void testExtractionsObamaWikiOne() {
+ assertExtracted(new HashSet() {{
+ add("Barack Hussein Obama II\tis 44th and current President of\tUnited States");
+ add("Barack Hussein Obama II\tis 44th President of\tUnited States");
+ add("Barack Hussein Obama II\tis current President of\tUnited States");
+ add("Barack Hussein Obama II\tis President of\tUnited States");
+ add("Barack Hussein Obama II\tis\tPresident");
+ add("Barack Hussein Obama II\tis\tcurrent President");
+ add("Barack Hussein Obama II\tis\t44th President");
+ }}, "Barack Hussein Obama II is the 44th and current President of the United States, and the first African American to hold the office.");
+ }
+
+ @Test
+ public void testExtractionsObamaWikiTwo() {
+ assertExtracted(new HashSet() {{
+ add("Obama\tis graduate of\tColumbia University");
+ add("Obama\tis graduate of\tHarvard Law School");
+ add("Obama\tborn in\tHonolulu Hawaii");
+ add("he\tserved as\tpresident of Harvard Law Review");
+ add("he\tserved as\tpresident");
+ add("Obama\tis\tgraduate");
+ }}, "Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he served as president of the Harvard Law Review.");
+ }
+
+ @Test
+ public void testExtractionsObamaWikiThree() {
+ assertExtracted(new HashSet() {{
+ add("He\twas\tcommunity organizer in Chicago");
+ add("He\twas\tcommunity organizer");
+ add("He\tearning\tlaw degree");
+ }}, "He was a community organizer in Chicago before earning his law degree.");
+ }
+
+ @Test
+ public void testExtractionsObamaWikiFour() {
+ assertExtracted(new HashSet() {{
+ add("He\tworked as\tcivil rights attorney");
+ add("He\tworked as\trights attorney");
+ add("He\ttaught\tconstitutional law");
+ add("He\ttaught\tlaw");
+ add("He\ttaught at\tUniversity of Chicago Law School");
+ add("He\ttaught at\tUniversity of Chicago Law School from 1992");
+ add("He\ttaught at\tUniversity");
+ add("He\ttaught to\t2004"); // shouldn't be here, but sometimes appears?
+ }}, "He worked as a civil rights attorney and taught constitutional law at the University of Chicago Law School from 1992 to 2004.");
+ }
+
+ @Test
+ public void testExtractionsObamaWikiFive() {
+ assertExtracted(new HashSet() {{
+ add("He\tserved\tthree terms");
+ add("He\trepresenting\t13th District in Illinois Senate");
+ add("He\trepresenting\t13th District");
+ add("He\trepresenting\tDistrict in Illinois Senate");
+ add("He\trepresenting\tDistrict");
+ add("He\trunning unsuccessfully for\tUnited States House of Representatives in 2000");
+ add("He\trunning unsuccessfully for\tUnited States House of Representatives");
+ add("He\trunning unsuccessfully for\tUnited States House");
+ add("He\trunning for\tUnited States House of Representatives in 2000");
+ add("He\trunning for\tUnited States House of Representatives");
+ add("He\trunning for\tUnited States House");
+ }}, "He served three terms representing the 13th District in the Illinois Senate from 1997 to 2004, running unsuccessfully for the United States House of Representatives in 2000.");
+ }
+
+ @Test
+ public void testExtractionsObamaWikiSix() {
+ assertExtracted(new HashSet() {{
+ add("He\tdefeated\tRepublican nominee John McCain");
+ add("He\tdefeated\tnominee John McCain");
+ add("He\twas inaugurated as\tpresident on January 20 2009");
+ add("He\twas inaugurated as\tpresident");
+ }}, "He then defeated Republican nominee John McCain in the general election, and was inaugurated as president on January 20, 2009.");
+ }
+}
diff --git a/itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java b/itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java
new file mode 100644
index 0000000000..1202e9d79d
--- /dev/null
+++ b/itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java
@@ -0,0 +1,749 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.util.StringUtils;
+import org.junit.*;
+
+import java.util.*;
+
+import static org.junit.Assert.*;
+
+/**
+ * A test for the {@link NaturalLogicAnnotator} setting the right
+ * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.OperatorAnnotation}s.
+ *
+ * TODO(gabor) add parses to the parser using "like" as a verb (among other things)
+ *
+ * @author Gabor Angeli
+ */
+public class OperatorScopeITest {
+
+ private static final StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{
+ setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
+ setProperty("ssplit.isOneSentence", "true");
+ setProperty("tokenize.class", "PTBTokenizer");
+ setProperty("tokenize.language", "en");
+ }});
+
+ static {
+ pipeline.addAnnotator(new NaturalLogicAnnotator());
+ }
+
+ @SuppressWarnings("unchecked")
+ private Optional[] annotate(String text) {
+ Annotation ann = new Annotation(text);
+ pipeline.annotate(ann);
+ List tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class);
+ Optional[] scopes = new Optional[tokens.size()];
+ Arrays.fill(scopes, Optional.empty());
+ for (int i = 0; i < tokens.size(); ++i) {
+ if (tokens.get(i).containsKey(NaturalLogicAnnotations.OperatorAnnotation.class)) {
+ scopes[i] = Optional.of(tokens.get(i).get(NaturalLogicAnnotations.OperatorAnnotation.class));
+ }
+ }
+ return scopes;
+ }
+
+ private void checkScope(int subjBegin, int subjEnd, int objBegin, int objEnd, Optional guess) {
+ assertTrue("No quantifier found", guess.isPresent());
+ assertEquals("Bad subject begin " + guess.get(), subjBegin, guess.get().subjectBegin);
+ assertEquals("Bad subject end " + guess.get(), subjEnd, guess.get().subjectEnd);
+ assertEquals("Bad object begin " + guess.get(), objBegin, guess.get().objectBegin);
+ assertEquals("Bad object end " + guess.get(), objEnd, guess.get().objectEnd);
+ }
+
+ private void checkScope(int subjBegin, int subjEnd, Optional guess) {
+ assertTrue("No quantifier found", guess.isPresent());
+ assertEquals("Bad subject begin " + guess.get(), subjBegin, guess.get().subjectBegin);
+ assertEquals("Bad subject end " + guess.get(), subjEnd, guess.get().subjectEnd);
+ assertEquals("Two-place quantifier matched", subjEnd, guess.get().objectBegin);
+ assertEquals("Two place quantifier matched", subjEnd, guess.get().objectEnd);
+ }
+
+ private void checkScope(String spec) {
+ String[] terms = spec.split("\\s+");
+// int quantStart = -1;
+ int quantEnd = -1;
+ int subjBegin = -1;
+ int subjEnd = -1;
+ int objBegin = -1;
+ int objEnd = -1;
+ boolean seenSubj = false;
+ int tokenIndex = 0;
+ List cleanSentence = new ArrayList<>();
+ for (String term : terms) {
+ switch (term) {
+ case "{":
+// quantStart = tokenIndex;
+ break;
+ case "}":
+ quantEnd = tokenIndex;
+ break;
+ case "[":
+ if (!seenSubj) {
+ subjBegin = tokenIndex;
+ } else {
+ objBegin = tokenIndex;
+ }
+ break;
+ case "]":
+ if (!seenSubj) {
+ subjEnd = tokenIndex;
+ seenSubj = true;
+ } else {
+ objEnd = tokenIndex;
+ }
+ break;
+ default:
+ cleanSentence.add(term);
+ tokenIndex += 1;
+ break;
+ }
+ }
+ Optional[] scopes = annotate(StringUtils.join(cleanSentence, " "));
+ System.err.println("Checking [@ " + (quantEnd - 1) + "]: " + spec);
+ if (objBegin >= 0 && objEnd >= 0) {
+ checkScope(subjBegin, subjEnd, objBegin, objEnd, scopes[quantEnd - 1]);
+ } else {
+ checkScope(subjBegin, subjEnd, scopes[quantEnd - 1]);
+ }
+ }
+
+ @Test
+ public void annotatorRuns() {
+ annotate("All green cats have tails.");
+ }
+
+ @Test
+ public void all_X_verb_Y() {
+ checkScope(1, 2, 2, 4, annotate("All cats eat mice.")[0]);
+ checkScope(1, 2, 2, 4, annotate("All cats have tails.")[0]);
+ }
+
+ @Test
+ public void all_X_want_Y() {
+ checkScope(1, 2, 2, 4, annotate("All cats want milk.")[0]);
+ }
+
+ @Test
+ public void all_X_verb_prep_Y() {
+ checkScope(1, 2, 2, 5, annotate("All cats are in boxes.")[0]);
+ checkScope(1, 2, 2, 5, annotate("All cats voted for Roosevelt.")[0]);
+ checkScope(1, 5, 5, 8, annotate("All cats who like dogs voted for Teddy.")[0]);
+ checkScope(1, 2, 2, 6, annotate("All cats have spoken to Fido.")[0]);
+ }
+
+ @Test
+ public void all_X_be_Y() {
+ checkScope(1, 2, 2, 4, annotate("All cats are cute")[0]);
+ }
+
+ @Test
+ public void all_X_can_Y() {
+ checkScope(1, 2, 2, 4, annotate("All cats can purr")[0]);
+ }
+
+ @Test
+ public void all_X_relclause_verb_Y() {
+ checkScope(1, 5, 5, 7, annotate("All cats who like dogs eat fish.")[0]);
+ }
+
+ @Test
+ public void all_of_X_verb_Y() {
+ checkScope(1, 4, 4, 6, annotate("All of the cats hate dogs.")[0]);
+ checkScope(1, 6, 6, 9, annotate("Each of the other 99 companies owns one computer.")[0]);
+ }
+
+ @Test
+ public void PER_predicate() {
+ checkScope(0, 1, 1, 4, annotate("Felix likes cat food.")[0]);
+ }
+
+ @Test
+ public void PER_has_predicate() {
+ checkScope(0, 1, 1, 5, annotate("Felix has liked cat food.")[0]);
+ }
+
+ @Test
+ public void PER_predicate_prep() {
+ checkScope(0, 1, 1, 7, annotate("Jack paid the bank for 10 years")[0]);
+ }
+
+ @Test
+ public void PER_has_predicate_prep() {
+ checkScope(0, 1, 1, 5, annotate("Felix has spoken to Fido.")[0]);
+ }
+
+ @Test
+ public void PER_is_nn() {
+ checkScope(0, 1, 1, 4, annotate("Felix is a cat.")[0]);
+ }
+
+ @Test
+ public void PER_is_jj() {
+ checkScope(0, 1, 1, 3, annotate("Felix is cute.")[0]);
+ }
+
+ @Test
+ public void few_x_verb_y() {
+ checkScope(1, 2, 2, 4, annotate("all cats chase dogs")[0]);
+ }
+
+ @Test
+ public void a_few_x_verb_y() {
+ checkScope(2, 3, 3, 5, annotate("a few cats chase dogs")[1]);
+ assertFalse(annotate("a few cats chase dogs")[0].isPresent());
+ }
+
+ @Test
+ public void binary_no() {
+ checkScope(1, 2, 2, 4, annotate("no cats chase dogs")[0]);
+ }
+
+ @Test
+ public void unary_not() {
+ Optional[] quantifiers = annotate("some cats don't like dogs");
+ checkScope(1, 2, 2, 6, quantifiers[0]); // some
+ checkScope(4, 6, quantifiers[3]); // no
+ assertFalse(quantifiers[3].get().isBinary()); // is unary no
+ }
+
+ @Test
+ public void num_X_verb_Y() {
+ checkScope(1, 2, 2, 4, annotate("Three cats eat mice.")[0]);
+ checkScope(1, 2, 2, 4, annotate("3 cats have tails.")[0]);
+ }
+
+ @Test
+ public void at_least_num_X_verb_Y() {
+ checkScope(3, 4, 4, 6, annotate("at least Three cats eat mice.")[2]);
+ checkScope(3, 4, 4, 6, annotate("at least 3 cats have tails.")[2]);
+ }
+
+ @Test
+ public void everyone_pp_verb_Y() {
+ checkScope(1, 3, 3, 5, annotate("everyone at Stanford likes cats.")[0]);
+ checkScope(1, 5, 5, 7, annotate("everyone who is at Stanford likes cats.")[0]);
+ }
+
+ @Test
+ public void there_are_np() {
+ checkScope(2, 3, annotate("there are cats")[1]);
+ }
+
+ @Test
+ public void there_are_np_pp() {
+ checkScope(2, 6, annotate("there are cats who like dogs")[1]);
+ }
+
+ @Test
+ public void regressionStrangeComma() {
+ Optional[] operators = annotate("all cats, have tails.");
+ checkScope(1, 2, 3, 5, operators[0]); // though, unclear if this should even be true?
+ }
+
+ @Test
+ public void fracasSentencesWithAll() {
+ checkScope("{ All } [ APCOM managers ] [ have company cars ]");
+ checkScope("{ All } [ Canadian residents ] [ can travel freely within Europe ]");
+ checkScope("{ All } [ Europeans ] [ are people ]");
+ checkScope("{ All } [ Europeans ] [ can travel freely within Europe ]");
+ checkScope("{ All } [ Europeans ] [ have the right to live in Europe ]");
+ checkScope("{ All } [ Italian men ] [ want to be a great tenor ]");
+ checkScope("{ All } [ committee members ] [ are people ]");
+ checkScope("{ All } [ competent legal authorities ] [ are competent law lecturers ]");
+ checkScope("{ All } [ elephants ] [ are large animals ]");
+ checkScope("{ All } [ fat legal authorities ] [ are fat law lecturers ]");
+ checkScope("{ All } [ law lecturers ] [ are legal authorities ]");
+ checkScope("{ All } [ legal authorities ] [ are law lecturers ]");
+ checkScope("{ All } [ mice ] [ are small animals ]");
+ checkScope("{ All } [ people who are from Portugal ] [ are from southern Europe ]");
+ checkScope("{ All } [ people who are from Sweden ] [ are from Scandinavia ]");
+ checkScope("{ All } [ people who are resident in Europe ] [ can travel freely within Europe ]");
+ checkScope("{ All } [ residents of major western countries ] [ are residents of western countries ]");
+ checkScope("{ All } [ residents of member states ] [ are individuals ]");
+ checkScope("{ All } [ residents of the North American continent ] [ can travel freely within Europe ]");
+ checkScope("{ All } [ the people who were at the meeting ] [ voted for a new chairman ]");
+ }
+
+ @Test
+ public void fracasSentencesWithEach() {
+ checkScope("{ Each } [ Canadian resident ] [ can travel freely within Europe ]");
+ checkScope("{ Each } [ European ] [ can travel freely within Europe ]");
+ checkScope("{ Each } [ European ] [ has the right to live in Europe ]");
+ checkScope("{ Each } [ Italian tenor ] [ wants to be great ]");
+ checkScope("{ Each } [ department ] [ has a dedicated line ]");
+ checkScope("{ Each } [ of the other 99 companies ] [ owns one computer ]");
+ checkScope("{ Each } [ resident of the North American continent ] [ can travel freely within Europe ]");
+ }
+
+ @Test
+ public void fracasSentencesWithEvery() {
+ checkScope("{ Every } [ Ancient Greek ] [ was a noted philosopher ]");
+ checkScope("{ Every } [ Canadian resident ] [ can travel freely within Europe ]");
+ checkScope("{ Every } [ Canadian resident ] [ is a resident of the North American continent ]");
+ checkScope("{ Every } [ European ] [ can travel freely within Europe ]");
+ checkScope("{ Every } [ European ] [ has the right to live in Europe ]");
+ checkScope("{ Every } [ European ] [ is a person ]");
+ checkScope("{ Every } [ Italian man ] [ wants to be a great tenor ]");
+ checkScope("{ Every } [ Swede ] [ is a Scandinavian ]");
+ checkScope("{ Every } [ committee ] [ has a chairman ]");
+ checkScope("{ Every } [ committee ] [ has a chairman appointed by members of the committee ]");
+ checkScope("{ Every } [ customer who owns a computer ] [ has a service contract for it ]");
+ checkScope("{ Every } [ department ] [ rents a line from BT ]");
+ checkScope("{ Every } [ executive who had a laptop computer ] [ brought it to take notes at the meeting ]");
+ checkScope("{ Every } [ four-legged mammal ] [ is a four-legged animal ]");
+ checkScope("{ Every } [ individual who has the right to live anywhere in Europe ] [ can travel freely within Europe ]");
+ checkScope("{ Every } [ individual who has the right to live in Europe ] [ can travel freely within Europe ]");
+ checkScope("{ Every } [ inhabitant of Cambridge ] [ voted for a Labour MP ]");
+ checkScope("{ Every } [ mammal ] [ is an animal ]");
+ checkScope("{ Every } [ person who has the right to live in Europe ] [ can travel freely within Europe ]");
+ checkScope("{ Every } [ report ] [ has a cover page ]");
+ checkScope("{ Every } [ representative and client ] [ was at the meeting ]");
+ checkScope("{ Every } [ representative and every client ] [ was at the meeting ]");
+ checkScope("{ Every } [ representative ] [ has read this report ]");
+ checkScope("{ Every } [ representative or client ] [ was at the meeting ]");
+ checkScope("{ Every } [ representative ] [ was at the meeting ]");
+ checkScope("{ Every } [ resident of the North American continent ] [ can travel freely within Europe ]");
+ checkScope("{ Every } [ student ] [ used her workstation ]");
+ }
+
+ @Test
+ public void fracasSentencesWithEveryone() {
+ checkScope("{ Everyone } [ at the meeting ] [ voted for a new chairman ]");
+ checkScope("{ Everyone } [ who starts gambling seriously ] [ continues until he is broke ]");
+ checkScope("{ Everyone } [ who starts gambling seriously ] [ stops the moment he is broke ]");
+ }
+
+ @Test
+ public void fracasSentencesWithFew() {
+ checkScope("{ Few } [ committee members ] [ are from Portugal ]");
+ checkScope("{ Few } [ committee members ] [ are from southern Europe ]");
+ checkScope("{ Few } [ female committee members ] [ are from southern Europe ]");
+ }
+
+ @Test
+ public void fracasSentencesWithA() {
+ checkScope("{ A } [ Scandinavian ] [ won a Nobel prize ]");
+ checkScope("{ A } [ Swede ] [ won a Nobel prize ]");
+ checkScope("{ A } [ company director ] [ awarded himself a large payrise ]");
+ checkScope("{ A } [ company director ] [ has awarded and been awarded a payrise ]");
+ checkScope("{ A } [ lawyer ] [ signed every report ]");
+
+ checkScope("{ An } [ Irishman ] [ won a Nobel prize ]");
+ checkScope("{ An } [ Irishman ] [ won the Nobel prize for literature ]");
+ checkScope("{ An } [ Italian ] [ became the world 's greatest tenor ]");
+ }
+
+ @Test
+ public void fracasSentencesWithAFew() {
+ checkScope("{ A few } [ committee members ] [ are from Scandinavia ]");
+ checkScope("{ A few } [ committee members ] [ are from Sweden ]");
+ checkScope("{ A few } [ female committee members ] [ are from Scandinavia ]");
+ checkScope("{ A few } [ great tenors ] [ sing popular music ]");
+ }
+
+ @Test
+ public void fracasSentencesWithAtLeastAFew() {
+ checkScope("{ At least a few } [ committee members ] [ are from Scandinavia ]");
+ checkScope("{ At least a few } [ committee members ] [ are from Sweden ]");
+ checkScope("{ At least a few } [ female committee members ] [ are from Scandinavia ]");
+ }
+
+ @Test
+ public void fracasSentencesWithEither() {
+ checkScope("{ Either } [ Smith Jones or Anderson ] [ signed the contract ]");
+ }
+
+ @Test
+ public void fracasSentencesWithOneOfThe() {
+ checkScope("{ One of the } [ commissioners ] [ spends a lot of time at home ]");
+ checkScope("{ One of the } [ leading tenors ] [ is Pavarotti ]");
+ }
+
+ @Test
+ public void fracasSentencesWithSeveral() {
+ checkScope("{ Several } [ Portuguese delegates ] [ got the results published in major national newspapers ]");
+ checkScope("{ Several } [ delegates ] [ got the results published ]");
+ checkScope("{ Several } [ delegates ] [ got the results published in major national newspapers ]");
+ checkScope("{ Several } [ great tenors ] [ are British ]");
+ }
+
+ @Test
+ public void fracasSentencesWithSome() {
+ checkScope("{ Some } [ Irish delegates ] [ finished the survey on time ]");
+ checkScope("{ Some } [ Italian men ] [ are great tenors ]");
+ checkScope("{ Some } [ Italian tenors ] [ are great ]");
+ checkScope("{ Some } [ Scandinavian delegate ] [ finished the report on time ]");
+ checkScope("{ Some } [ accountant ] [ attended the meeting ]");
+ checkScope("{ Some } [ accountants ] [ attended the meeting ]");
+ checkScope("{ Some } [ delegate ] [ finished the report on time ]");
+ checkScope("{ Some } [ delegates ] [ finished the survey ]");
+ checkScope("{ Some } [ delegates ] [ finished the survey on time ]");
+ checkScope("{ Some } [ great tenors ] [ are Swedish ]");
+// checkScope("{ Some } [ great tenors ] [ like popular music ]"); // parse error
+ checkScope("{ Some } [ people ] [ discover that they have been asleep ]");
+ }
+
+ @Test
+ public void fracasSentencesWithThe() {
+ checkScope("{ The } [ Ancient Greeks ] [ were all noted philosophers ]");
+ checkScope("{ The } [ Ancient Greeks ] [ were noted philosophers ]");
+ checkScope("{ The } [ ITEL-XZ ] [ is fast ]");
+ checkScope("{ The } [ ITEL-ZX ] [ is an ITEL computer ]");
+ checkScope("{ The } [ ITEL-ZX ] [ is slower than 500 MIPS ]");
+ checkScope("{ The } [ PC-6082 ] [ is as fast as the ITEL-XZ ]");
+ checkScope("{ The } [ PC-6082 ] [ is fast ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than 500 MIPS ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than any ITEL computer ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than every ITEL computer ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than some ITEL computer ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-XZ ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-ZX ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-ZX and the ITEL-ZY ]");
+ checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-ZX or the ITEL-ZY ]");
+ checkScope("{ The } [ PC-6082 ] [ is slow ]");
+ checkScope("{ The } [ PC-6082 ] [ is slower than the ITEL-XZ ]");
+ checkScope("{ The } [ chairman of the department ] [ is a person ]");
+ checkScope("{ The } [ chairman ] [ read out every item on the agenda ]");
+ checkScope("{ The } [ chairman ] [ read out the items on the agenda ]");
+ checkScope("{ The } [ conference ] [ started on July 4th , 1994 ]");
+ checkScope("{ The } [ conference ] [ was over on July 8th , 1994 ]");
+ checkScope("{ The } [ inhabitants of Cambridge ] [ voted for a Labour MP ]");
+// checkScope("{ The } [ people who were at the meeting ] [ all voted for a new chairman ]"); // TODO(gabor) Parse error on "meeting -dep-> all"
+ checkScope("{ The } [ people who were at the meeting ] [ voted for a new chairman ]");
+ checkScope("{ The } [ really ambitious tenors ] [ are Italian ]");
+ checkScope("{ The } [ residents of major western countries ] [ can travel freely within Europe ]");
+ checkScope("{ The } [ residents of major western countries ] [ have the right to live in Europe ]");
+ checkScope("{ The } [ residents of member states ] [ can travel freely within Europe ]");
+ checkScope("{ The } [ residents of member states ] [ have the right to live anywhere in Europe ]");
+ checkScope("{ The } [ residents of member states ] [ have the right to live in Europe ]");
+ checkScope("{ The } [ residents of western countries ] [ can travel freely within Europe ]");
+ checkScope("{ The } [ residents of western countries ] [ have the right to live in Europe ]");
+ checkScope("{ The } [ sales department ] [ rents a line from BT ]");
+ checkScope("{ The } [ sales department ] [ rents it from BT ]");
+ checkScope("{ The } [ students ] [ are going to Paris by train ]");
+ checkScope("{ The } [ students ] [ have spoken to Mary ]");
+ checkScope("{ The } [ system failure ] [ was blamed on one or more software faults ]");
+ }
+
+ @Test
+ public void fracasSentencesWithThereAre() {
+ checkScope("{ There are } [ 100 companies ]");
+ checkScope("{ There are } [ Italian men who want to be a great tenor ]");
+ checkScope("{ There are } [ Italian tenors who want to be great ]");
+ checkScope("{ There are } [ few committee members from Portugal ]");
+ checkScope("{ There are } [ few committee members from southern Europe ]");
+ checkScope("{ There are } [ great tenors who are British ]");
+ checkScope("{ There are } [ great tenors who are German ]");
+ checkScope("{ There are } [ great tenors who are Italian ]");
+ checkScope("{ There are } [ great tenors who are Swedish ]");
+ checkScope("{ There are } [ great tenors who sing popular music ]");
+// checkScope("{ There are } [ really ambitious tenors who are Italian ]"); // TODO(gabor) parse error on are -advmod-> really
+// checkScope("{ There are } [ really great tenors who are modest ]"); // TODO(gabor) as above
+ checkScope("{ There are } [ sixteen representatives ]");
+ checkScope("{ There are } [ some reports from ITEL on Smith 's desk ]");
+ checkScope("{ There are } [ tenors who will take part in the concert ]");
+
+ checkScope("{ There is } [ a car that John and Bill own ]");
+ checkScope("{ There is } [ someone whom Helen saw answer the phone ]");
+
+ checkScope("{ There was } [ a group of people that met ]");
+ checkScope("{ There was } [ an Italian who became the world 's greatest tenor ]");
+ checkScope("{ There was } [ one auditor who signed all the reports ]");
+ }
+
+ @Test
+ public void fracasSentencesWithProperNouns() {
+ checkScope("[ { APCOM } ] [ has a more important customer than ITEL ]");
+ checkScope("[ { APCOM } ] [ has a more important customer than ITEL has ]");
+ checkScope("[ { APCOM } ] [ has a more important customer than ITEL is ]");
+ checkScope("[ { APCOM } ] [ has been paying mortgage interest for a total of 15 years or more ]");
+ checkScope("[ { APCOM } ] [ lost some orders ]");
+ checkScope("[ { APCOM } ] [ lost ten orders ]");
+ checkScope("[ { APCOM } ] [ signed the contract Friday , 13th ]");
+ checkScope("[ { APCOM } ] [ sold exactly 2500 computers ]");
+ checkScope("[ { APCOM } ] [ won some orders ]");
+ checkScope("[ { APCOM } ] [ won ten orders ]");
+
+ checkScope("[ { Bill } ] [ bought a car ]");
+ checkScope("[ { Bill } ] [ has spoken to Mary ]");
+ checkScope("[ { Bill } ] [ is going to ]");
+ checkScope("[ { Bill } ] [ knows why John had his paper accepted ]");
+ checkScope("[ { Bill } ] [ owns a blue car ]");
+ checkScope("[ { Bill } ] [ owns a blue one ]");
+ checkScope("[ { Bill } ] [ owns a car ]");
+ checkScope("[ { Bill } ] [ owns a fast car ]");
+ checkScope("[ { Bill } ] [ owns a fast one ]");
+ checkScope("[ { Bill } ] [ owns a fast red car ]");
+ checkScope("[ { Bill } ] [ owns a red car ]");
+ checkScope("[ { Bill } ] [ owns a slow one ]");
+ checkScope("[ { Bill } ] [ owns a slow red car ]");
+ checkScope("[ { Bill } ] [ said Mary wrote a report ]");
+ checkScope("[ { Bill } ] [ said Peter wrote a report ]");
+ checkScope("[ { Bill } ] [ spoke to Mary ]");
+ checkScope("[ { Bill } ] [ spoke to Mary at five o'clock ]");
+ checkScope("[ { Bill } ] [ spoke to Mary at four o'clock ]");
+ checkScope("[ { Bill } ] [ spoke to Mary on Monday ]");
+ checkScope("[ { Bill } ] [ spoke to everyone that John did ]");
+ checkScope("[ { Bill } ] [ suggested to Frank 's boss that they should go to the meeting together , and Carl to Alan 's wife ]");
+ checkScope("[ { Bill } ] [ went to Berlin by car ]");
+ checkScope("[ { Bill } ] [ went to Berlin by train ]");
+ checkScope("[ { Bill } ] [ went to Paris by train ]");
+ checkScope("[ { Bill } ] [ will speak to Mary ]");
+ checkScope("[ { Bill } ] [ wrote a report ]");
+
+ checkScope("[ { Dumbo } ] [ is a four-legged animal ]");
+ checkScope("[ { Dumbo } ] [ is a large animal ]");
+ checkScope("[ { Dumbo } ] [ is a small animal ]");
+ checkScope("[ { Dumbo } ] [ is a small elephant ]");
+ checkScope("[ { Dumbo } ] [ is four-legged ]");
+ checkScope("[ { Dumbo } ] [ is larger than Mickey ]");
+
+ checkScope("[ { GFI } ] [ owns several computers ]");
+
+ checkScope("[ { Helen } ] [ saw the chairman of the department answer the phone ]");
+
+ checkScope("[ { ICM } ] [ is one of the companies and owns 150 computers ]");
+
+// checkScope("[ { ITEL } ] [ always delivers reports late ]"); // TODO(gabor) bad parse from ITEL -dep-> delivers
+ checkScope("[ { ITEL } ] [ built MTALK in 1993 ]");
+// checkScope("[ { ITEL } ] [ currently has a factory in Birmingham ]"); // fix me (bad scope)
+ checkScope("[ { ITEL } ] [ delivered reports late in 1993 ]");
+ checkScope("[ { ITEL } ] [ developed a new editor in 1993 ]");
+ checkScope("[ { ITEL } ] [ existed in 1992 ]");
+ checkScope("[ { ITEL } ] [ expanded in 1993 ]");
+ checkScope("[ { ITEL } ] [ finished MTALK in 1993 ]");
+ checkScope("[ { ITEL } ] [ has a factory in Birmingham ]");
+ checkScope("[ { ITEL } ] [ has developed a new editor since 1992 ]");
+ checkScope("[ { ITEL } ] [ has expanded since 1992 ]");
+ checkScope("[ { ITEL } ] [ has made a loss since 1992 ]");
+ checkScope("[ { ITEL } ] [ has sent most of the reports Smith needs ]");
+ checkScope("[ { ITEL } ] [ made a loss in 1993 ]");
+ checkScope("[ { ITEL } ] [ maintains all the computers that GFI owns ]");
+ checkScope("[ { ITEL } ] [ maintains them ]");
+ checkScope("[ { ITEL } ] [ managed to win the contract in 1992 ]");
+// checkScope("[ { ITEL } ] [ never delivers reports late ]"); // TODO(gabor) parse error
+ checkScope("[ { ITEL } ] [ owned APCOM from 1988 to 1992 ]");
+ checkScope("[ { ITEL } ] [ owned APCOM in 1990 ]");
+ checkScope("[ { ITEL } ] [ sent a progress report in July 1994 ]");
+ checkScope("[ { ITEL } ] [ sold 3000 more computers than APCOM ]");
+ checkScope("[ { ITEL } ] [ sold 5500 computers ]");
+ checkScope("[ { ITEL } ] [ tried to win the contract in 1992 ]");
+ checkScope("[ { ITEL } ] [ was building MTALK in 1993 ]");
+ checkScope("[ { ITEL } ] [ was winning the contract from APCOM in 1993 ]");
+ checkScope("[ { ITEL } ] [ won a contract in 1993 ]");
+ checkScope("[ { ITEL } ] [ won at least eleven orders ]");
+ checkScope("[ { ITEL } ] [ won more orders than APCOM ]");
+ checkScope("[ { ITEL } ] [ won more orders than APCOM did ]");
+// checkScope("[ { ITEL } ] [ won more orders than APCOM lost ]"); // TODO(gabor) parse error
+ checkScope("[ { ITEL } ] [ won more orders than the APCOM contract ]");
+ checkScope("[ { ITEL } ] [ won more than one order ]");
+ checkScope("[ { ITEL } ] [ won some orders ]");
+ checkScope("[ { ITEL } ] [ won the APCOM contract ]");
+ checkScope("[ { ITEL } ] [ won the contract from APCOM in 1993 ]");
+ checkScope("[ { ITEL } ] [ won the contract in 1992 ]");
+ checkScope("[ { ITEL } ] [ won twenty orders ]");
+ checkScope("[ { ITEL } ] [ won twice as many orders than APCOM ]");
+ checkScope("[ { Itel } ] [ was in Birmingham in 1993 ]");
+
+ checkScope("[ { John } ] [ bought a car ]");
+ checkScope("[ { John } ] [ found Mary before Bill ]");
+// checkScope("[ { John } ] [ found Mary before Bill found Mary ]"); // fix me (bad scope)
+// checkScope("[ { John } ] [ found Mary before John found Bill ]"); // fix me (bad scope)
+ checkScope("[ { John } ] [ had his paper accepted ]");
+ checkScope("[ { John } ] [ has a diamond ]");
+ checkScope("[ { John } ] [ has a genuine diamond ]");
+ checkScope("[ { John } ] [ has spoken to Mary ]");
+ checkScope("[ { John } ] [ hated the meeting ]");
+ checkScope("[ { John } ] [ is a cleverer politician than Bill ]");
+ checkScope("[ { John } ] [ is a fatter politician than Bill ]");
+ checkScope("[ { John } ] [ is a former successful university student ]");
+ checkScope("[ { John } ] [ is a former university student ]");
+ checkScope("[ { John } ] [ is a man and Mary is a woman ]");
+ checkScope("[ { John } ] [ is a successful former university student ]");
+ checkScope("[ { John } ] [ is a university student ]");
+ checkScope("[ { John } ] [ is cleverer than Bill ]");
+ checkScope("[ { John } ] [ is fatter than Bill ]");
+ checkScope("[ { John } ] [ is going to Paris by car , and the students by train ]");
+ checkScope("[ { John } ] [ is successful ]");
+// checkScope("[ { John } ] [ needed to buy a car ] and Bill did "); // interesting example; also, parse error
+ checkScope("[ { John } ] [ owns a car ]");
+ checkScope("[ { John } ] [ owns a fast red car ]");
+ checkScope("[ { John } ] [ owns a red car ]");
+ checkScope("[ { John } ] [ represents his company ] and so does Mary");
+ checkScope("[ { John } ] [ said Bill had been hurt ]");
+ checkScope("[ { John } ] [ said Bill had hurt himself ]");
+ checkScope("[ { John } ] [ said Bill wrote a report ]");
+ checkScope("[ { John } ] [ said Mary wrote a report ] , and Bill did too"); // interesting example
+// checkScope("[ { John } ] [ said that Mary wrote a report ] , and that Bill did too"); // fix me (bad scope)
+ checkScope("[ { John } ] [ spoke to Mary ]");
+ checkScope("[ { John } ] [ spoke to Mary at four o'clock ]");
+ checkScope("[ { John } ] [ spoke to Mary on Friday ]");
+ checkScope("[ { John } ] [ spoke to Mary on Monday ]");
+ checkScope("[ { John } ] [ spoke to Mary on Thursday ]");
+ checkScope("[ { John } ] [ spoke to Sue ]");
+ checkScope("[ { John } ] [ wanted to buy a car ] , and he did");
+ checkScope("[ { John } ] [ wants to know how many men work part time ]");
+ checkScope("[ { John } ] [ wants to know how many men work part time , and which ]");
+ checkScope("[ { John } ] [ wants to know how many women work part time ]");
+ checkScope("[ { John } ] [ wants to know which men work part time ]");
+ checkScope("[ { John } ] [ went to Paris by car ]");
+ checkScope("[ { John } ] [ went to Paris by car , and Bill by train ]");
+ checkScope("[ { John } ] [ went to Paris by car , and Bill by train to Berlin ]");
+ checkScope("[ { John } ] [ went to Paris by car , and Bill to Berlin ]");
+ checkScope("[ { John } ] [ wrote a report ]");
+// checkScope("[ { John } ] [ wrote a report ] , and Bill said Peter did too ]"); // fix me
+
+ checkScope("[ { Jones } ] [ claimed Smith had costed Jones ' proposal ]");
+ checkScope("[ { Jones } ] [ claimed Smith had costed Smith 's proposal ]");
+ checkScope("[ { Jones } ] [ claimed he had costed Smith 's proposal ]");
+ checkScope("[ { Jones } ] [ claimed he had costed his own proposal ]");
+ checkScope("[ { Jones } ] [ graduated in March ] and has been employed ever since");
+ checkScope("[ { Jones } ] [ has a company car ]");
+ checkScope("[ { Jones } ] [ has been unemployed in the past ]");
+ checkScope("[ { Jones } ] [ has more than one company car ]");
+ checkScope("[ { Jones } ] [ is an APCOM manager ]");
+ checkScope("[ { Jones } ] [ is the chairman of ITEL ]");
+ checkScope("[ { Jones } ] [ left after Anderson left ]");
+ checkScope("[ { Jones } ] [ left after Anderson was present ]");
+ checkScope("[ { Jones } ] [ left after Smith left ]");
+ checkScope("[ { Jones } ] [ left before Anderson left ]");
+ checkScope("[ { Jones } ] [ left before Smith left ]");
+ checkScope("[ { Jones } ] [ left the meeting ]");
+ checkScope("[ { Jones } ] [ represents Jones 's company ]");
+ checkScope("[ { Jones } ] [ represents Smith 's company ]");
+ checkScope("[ { Jones } ] [ revised the contract ]");
+ checkScope("[ { Jones } ] [ revised the contract after Smith did ]");
+ checkScope("[ { Jones } ] [ revised the contract before Smith did ]");
+ checkScope("[ { Jones } ] [ signed another contract ]");
+ checkScope("[ { Jones } ] [ signed the contract ]");
+ checkScope("[ { Jones } ] [ signed two contracts ]");
+ checkScope("[ { Jones } ] [ swam after Smith swam ]");
+ checkScope("[ { Jones } ] [ swam to the shore ]");
+ checkScope("[ { Jones } ] [ swam to the shore after Smith swam to the shore ]");
+ checkScope("[ { Jones } ] [ was present ]");
+ checkScope("[ { Jones } ] [ was present after Smith was present ]");
+ checkScope("[ { Jones } ] [ was present before Smith was present ]");
+ checkScope("[ { Jones } ] [ was unemployed at some time before he graduated ]");
+ checkScope("[ { Jones } ] [ was writing a report ]");
+ checkScope("[ { Jones } ] [ was writing a report after Smith was writing a report ]");
+ checkScope("[ { Jones } ] [ was writing a report before Smith was writing a report ]");
+
+ checkScope("[ { Kim } ] [ is a clever person ]");
+ checkScope("[ { Kim } ] [ is a clever politician ]");
+ checkScope("[ { Kim } ] [ is clever ]");
+
+ checkScope("[ { MFI } ] [ has a service contract for all its computers ]");
+ checkScope("[ { MFI } ] [ is a customer that owns exactly one computer ]");
+ checkScope("[ { MFI } ] [ is a customer that owns several computers ]");
+
+ checkScope("[ { Mary } ] [ has a workstation ]");
+ checkScope("[ { Mary } ] [ is a student ]");
+ checkScope("[ { Mary } ] [ is female ]");
+ checkScope("[ { Mary } ] [ represents John 's company ]");
+ checkScope("[ { Mary } ] [ represents her own company ]");
+ checkScope("[ { Mary } ] [ used a workstation ]");
+ checkScope("[ { Mary } ] [ used her workstation ]");
+
+ checkScope("[ { Mickey } ] [ is a large animal ]");
+ checkScope("[ { Mickey } ] [ is a large mouse ]");
+ checkScope("[ { Mickey } ] [ is a small animal ]");
+ checkScope("[ { Mickey } ] [ is larger than Dumbo ]");
+ checkScope("[ { Mickey } ] [ is smaller than Dumbo ]");
+
+ checkScope("[ { Pavarotti } ] [ is a leading tenor who comes cheap ]");
+ }
+
+ @Test
+ public void fracasSentencesWithAtMostAtLeast() {
+ checkScope("{ At least three } [ commissioners ] [ spend a lot of time at home ]");
+ checkScope("{ At least three } [ commissioners ] [ spend time at home ]");
+ checkScope("{ At least three } [ female commissioners ] [ spend time at home ]");
+ checkScope("{ At least three } [ male commissioners ] [ spend time at home ]");
+ checkScope("{ At least three } [ tenors ] [ will take part in the concert ]");
+ checkScope("{ At most ten } [ commissioners ] [ spend a lot of time at home ]");
+ checkScope("{ At most ten } [ commissioners ] [ spend time at home ]");
+ checkScope("{ At most ten } [ female commissioners ] [ spend time at home ]");
+
+ checkScope("{ Just one } [ accountant ] [ attended the meeting ]");
+ }
+
+ @Test
+ public void fracasSentencesWithPureNumbers() {
+ checkScope("{ Eight } [ machines ] [ have been removed ]");
+
+ checkScope("{ Five } [ men ] [ work part time ]");
+ checkScope("{ Forty five } [ women ] [ work part time ]");
+
+ checkScope("{ Six } [ accountants ] [ signed the contract ]");
+ checkScope("{ Six } [ lawyers ] [ signed the contract ]");
+
+// checkScope("{ Ten } [ machines ] [ were here yesterday ]"); // TODO(gabor) yesterday doesn't come into scope
+
+ checkScope("{ Twenty } [ men ] [ work in the Sales Department ]");
+ checkScope("{ Two } [ machines ] [ have been removed ]");
+ checkScope("{ Two } [ women ] [ work in the Sales Department ]");
+ }
+
+ @Test
+ public void fracasSentencesWithBoth() {
+ checkScope("{ Both } [ commissioners ] [ used to be businessmen ]");
+ checkScope("{ Both } [ commissioners ] [ used to be leading businessmen ]");
+ checkScope("{ Both } [ leading tenors ] [ are excellent ]");
+ checkScope("{ Both } [ leading tenors ] [ are indispensable ]");
+ }
+
+ @Test
+ public void fracasSentencesWithMany() {
+ checkScope("{ Many } [ British delegates ] [ obtained interesting results from the survey ]");
+ checkScope("{ Many } [ delegates ] [ obtained interesting results from the survey ]");
+ checkScope("{ Many } [ delegates ] [ obtained results from the survey ]");
+ checkScope("{ Many } [ great tenors ] [ are German ]");
+ }
+
+ @Test
+ public void fracasSentencesWithMost() {
+ checkScope("{ Most } [ Europeans ] [ can travel freely within Europe ]");
+ checkScope("{ Most } [ Europeans who are resident in Europe ] [ can travel freely within Europe ]");
+ checkScope("{ Most } [ Europeans who are resident outside Europe ] [ can travel freely within Europe ]");
+ checkScope("{ Most } [ clients at the demonstration ] [ were impressed by the system 's performance ]");
+ checkScope("{ Most } [ companies that own a computer ] [ have a service contract for it ]");
+ checkScope("{ Most } [ great tenors ] [ are Italian ]");
+ }
+
+ @Test
+ public void fracasSentencesWithNeither() {
+ checkScope("{ Neither } [ commissioner ] [ spends a lot of time at home ]");
+ checkScope("{ Neither } [ commissioner ] [ spends time at home ]");
+ checkScope("{ Neither } [ leading tenor ] [ comes cheap ]");
+ }
+
+ @Test
+ public void fracasSentencesWithBinaryNo() {
+ checkScope("{ No } [ Scandinavian delegate ] [ finished the report on time ]");
+ checkScope("{ No } [ accountant ] [ attended the meeting ]");
+ checkScope("{ No } [ accountants ] [ attended the meeting ]");
+ checkScope("{ No } [ delegate ] [ finished the report ]");
+ checkScope("{ No } [ really great tenors ] [ are modest ]");
+// checkScope("{ No } [ representative ] [ took less than half a day to read the report ]"); // TODO(gabor) vmod issue again
+ checkScope("{ No } [ student ] [ used her workstation ]");
+ checkScope("{ No } [ two representatives ] [ have read it at the same time ]");
+ checkScope("{ No } [ delegate ] [ finished the report on time ]");
+ }
+
+ @Test
+ public void fracasSentencesWithBinaryNoOne() {
+ // Ignore "no one" for now.
+// checkScope("{ No one } [ can gamble ] [ when he is broke ]"); // interesting: subject object reversal (we of course don't actually get this...)
+// checkScope("{ No one } [ gambling seriously ] [ stops until he is broke ]");
+// checkScope("{ No one } [ who starts gambling seriously ] [ stops until he is broke ]");
+
+ checkScope("{ Nobody } [ who is asleep ] [ ever knows that he is asleep ]");
+ }
+
+
+}
diff --git a/itest/src/edu/stanford/nlp/naturalli/PolarityITest.java b/itest/src/edu/stanford/nlp/naturalli/PolarityITest.java
new file mode 100644
index 0000000000..0fb97eb82f
--- /dev/null
+++ b/itest/src/edu/stanford/nlp/naturalli/PolarityITest.java
@@ -0,0 +1,66 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import org.junit.*;
+
+import java.util.List;
+import java.util.Properties;
+
+import static org.junit.Assert.*;
+
+
+/**
+ * A test to make sure {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator} marks the right polarities for the tokens
+ * in the sentence.
+ *
+ * @author Gabor Angeli
+ */
+public class PolarityITest {
+
+ private static final StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{
+ setProperty("annotators", "tokenize,ssplit,pos,lemma,parse");
+ setProperty("ssplit.isOneSentence", "true");
+ setProperty("tokenize.class", "PTBTokenizer");
+ setProperty("tokenize.language", "en");
+ }});
+
+ static {
+ pipeline.addAnnotator(new NaturalLogicAnnotator());
+ }
+
+ @SuppressWarnings("unchecked")
+ private Polarity[] annotate(String text) {
+ Annotation ann = new Annotation(text);
+ pipeline.annotate(ann);
+ List tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class);
+ Polarity[] polarities = new Polarity[tokens.size()];
+ for (int i = 0; i < tokens.size(); ++i) {
+ polarities[i] = tokens.get(i).get(NaturalLogicAnnotations.PolarityAnnotation.class);
+ }
+ return polarities;
+ }
+
+ @Test
+ public void allCatsHaveTails() {
+ Polarity[] p = annotate("all cats have tails");
+ assertTrue(p[0].isUpwards());
+ assertTrue(p[1].isDownwards());
+ assertTrue(p[2].isUpwards());
+ assertTrue(p[3].isUpwards());
+ }
+
+ @Test
+ public void someCatsDontHaveTails() {
+ Polarity[] p = annotate("some cats don't have tails");
+ assertTrue(p[0].isUpwards());
+ assertTrue(p[1].isUpwards());
+ assertTrue(p[2].isUpwards());
+ assertTrue(p[3].isUpwards());
+ assertTrue(p[4].isDownwards());
+ assertTrue(p[5].isDownwards());
+ }
+
+}
diff --git a/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java b/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java
index d9065c5fb3..a836d883cf 100644
--- a/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java
+++ b/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java
@@ -20,8 +20,6 @@
import junit.framework.TestCase;
import edu.stanford.nlp.util.StringUtils;
-import org.hamcrest.CoreMatchers;
-import org.junit.matchers.JUnitMatchers;
import static java.util.stream.Collectors.toList;
import static org.junit.Assert.assertThat;
@@ -92,8 +90,8 @@ public void testCCProcess() {
Collection dependencies = ccProcessed.typedDependencies();
GrammaticalRelation expected = EnglishGrammaticalRelations.getConj("and");
- assertThat(dependencies.stream().map(d -> d.reln()).collect(toList()),
- hasItem(expected));
+ assertThat(dependencies.stream().map(TypedDependency::reln).collect(toList()),
+ hasItem(expected));
}
/**
diff --git a/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java b/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java
index 9aae429cd8..d8bb83b58a 100644
--- a/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java
+++ b/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java
@@ -369,6 +369,11 @@ public void testSerializeSSplitTokensRegression() {
testAnnotators("tokenize,ssplit");
}
+ @Test
+ public void testSerializeNatLog() {
+ testAnnotators("tokenize,ssplit,pos,lemma,parse,natlog");
+ }
+
/**
* Is the protobuf annotator "CoreNLP complete?"
* That is, does it effectively save every combination of annotators possible?
diff --git a/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java b/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java
index 1ec50526a4..60be668bb1 100644
--- a/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java
+++ b/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java
@@ -245,8 +245,19 @@ public void testSerialization()
processSerialization(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class));
processSerialization(sentence);
- Object newDocument = processSerialization(document);
- assertTrue(newDocument instanceof Annotation);
+ Object processed = processSerialization(document);
+ assertTrue(processed instanceof Annotation);
+ Annotation newDocument = (Annotation) processed;
+ assertEquals(document.get(CoreAnnotations.SentencesAnnotation.class).size(),
+ newDocument.get(CoreAnnotations.SentencesAnnotation.class).size());
+ for (int i = 0; i < document.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) {
+ CoreMap oldSentence = document.get(CoreAnnotations.SentencesAnnotation.class).get(0);
+ CoreMap newSentence = newDocument.get(CoreAnnotations.SentencesAnnotation.class).get(0);
+ assertEquals(oldSentence.get(TreeCoreAnnotations.TreeAnnotation.class),
+ newSentence.get(TreeCoreAnnotations.TreeAnnotation.class));
+ assertEquals(oldSentence.get(CoreAnnotations.TokensAnnotation.class),
+ newSentence.get(CoreAnnotations.TokensAnnotation.class));
+ }
assertTrue(document.equals(newDocument));
}
diff --git a/itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java b/itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java
deleted file mode 100644
index fcaef4e26d..0000000000
--- a/itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.stanford.nlp.semgraph.semgrex;
-
-import edu.stanford.nlp.ling.CoreAnnotations;
-import edu.stanford.nlp.pipeline.Annotation;
-import edu.stanford.nlp.pipeline.StanfordCoreNLP;
-import edu.stanford.nlp.semgraph.SemanticGraph;
-import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
-import edu.stanford.nlp.util.CoreMap;
-import junit.framework.TestCase;
-import org.junit.Test;
-
-import java.util.Properties;
-
-/**
- * Created by sonalg on 7/15/14.
- */
-public class SemgrexPatternITest extends TestCase {
-
- @Test
- public void testNER() throws Exception{
- String sentence = "John lives in Washington.";
- Properties props = new Properties();
- props.setProperty("annotators","tokenize, ssplit, pos, lemma, ner, parse");
- StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
- Annotation doc = new Annotation(sentence);
- pipeline.annotate(doc);
- CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0);
- SemanticGraph graph = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
- graph.prettyPrint();
- String patStr = "({word:/lives/} >/prep_in/ {word:/\\QCalifornia\\E|\\QWashington\\E/} >nsubj {ner:PERSON})";
- SemgrexPattern pat = SemgrexPattern.compile(patStr);
- SemgrexMatcher mat = pat.matcher(graph, true);
- assertTrue(mat.find());
- }
-
-}
diff --git a/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java b/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java
index b16c296f7f..5f0cc4f540 100644
--- a/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java
+++ b/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java
@@ -19,5 +19,23 @@ public void testGradientCheck() {
assertTrue("Gradient check failed with random seed of " + op.randomSeed,
SentimentTraining.runGradientCheck(model, trainingTrees));
}
+
+ /**
+ * Because the regularizations are typically set to be 0.001 of the
+ * total cost, it is important to test those gradients with the reg
+ * values turned up a lot.
+ */
+ public void testRegularizationGradientCheck() {
+ List trainingTrees = SentimentUtils.readTreesWithGoldLabels(TRAIN_PATH);
+ RNNOptions op = new RNNOptions();
+ op.numHid = 5;
+ op.trainOptions.regTransformMatrix = 10.0;
+ op.trainOptions.regTransformTensor = 10.0;
+ op.trainOptions.regClassification = 10.0;
+ op.trainOptions.regWordVector = 10.0;
+ SentimentModel model = new SentimentModel(op, trainingTrees);
+ assertTrue("Gradient check failed with random seed of " + op.randomSeed,
+ SentimentTraining.runGradientCheck(model, trainingTrees));
+ }
}
diff --git a/lib/README b/lib/README
index 038ca90625..adbc0d3f95 100644
--- a/lib/README
+++ b/lib/README
@@ -323,7 +323,7 @@ LAST UPDATE BY: Spence Green
-----------------------------------------------------------------
protobuf.jar
ORIGINAL JAR NAME:
-VERSION: 2.4.1
+VERSION: 4.1
RELEASE DATE: April 2011
SOURCE AVAILABLE: yes
DESCRIPTION: Google's protocol buffer library
diff --git a/src/edu/stanford/nlp/classify/ColumnDataClassifier.java b/src/edu/stanford/nlp/classify/ColumnDataClassifier.java
index 60945a5d97..32a3dcb3e0 100644
--- a/src/edu/stanford/nlp/classify/ColumnDataClassifier.java
+++ b/src/edu/stanford/nlp/classify/ColumnDataClassifier.java
@@ -160,12 +160,18 @@
* useAllSplitWordPairs | boolean | false | Make features from all pairs of "words" that are returned by dividing the string into splitWords. Requires splitWordsRegexp or splitWordsTokenizerRegexp. | ASWP-str1-str2 |
* useAllSplitWordTriples | boolean | false | Make features from all triples of "words" that are returned by dividing the string into splitWords. Requires splitWordsRegexp or splitWordsTokenizerRegexp. | ASWT-str1-str2-str3 |
* useSplitWordNGrams | boolean | false | Make features of adjacent word n-grams of lengths between minWordNGramLeng and maxWordNGramLeng inclusive. Note that these are word sequences, not character n-grams. | SW#-str1-str2-strN |
+ *
+ * splitWordCount | boolean | false | The value of this real-valued feature is the number of split word tokens in the column. | SWNUM |
+ * logSplitWordCount | boolean | false | The value of this real-valued feature is the log of the number of split word tokens in the column. | LSWNUM |
+ * binnedSplitWordCounts | String | null | If non-null, treat as a sequence of comma-separated integer bounds, where items above the previous bound (if any) up to the next bound (inclusive) are binned (e.g., "1,5,15,30,60"). The feature represents the number of split words in this column. | SWNUMBIN-range |
+
* maxWordNGramLeng | int | -1 | If this number is positive, word n-grams above this size will not be used in the model |
* minWordNGramLeng | int | 1 | Must be positive. word n-grams below this size will not be used in the model |
* wordNGramBoundaryRegexp | String | null | If this is defined and the regexp matches, then the ngram stops |
* useSplitFirstLastWords | boolean | false | Make a feature from each of the first and last "words" that are returned as splitWords. This is equivalent to having word bigrams with boundary tokens at each end of the sequence (they get a special feature). Requires splitWordsRegexp or splitWordsTokenizerRegexp. | SFW-str, SLW-str |
* useSplitNGrams | boolean | false | Make features from letter n-grams - internal as well as edge all treated the same - after the data string has been split into tokens. Requires splitWordsRegexp or splitWordsTokenizerRegexp. | S#-str |
* useSplitPrefixSuffixNGrams | boolean | false | Make features from prefixes and suffixes of each token, after splitting string with splitWordsRegexp. Requires splitWordsRegexp or splitWordsTokenizerRegexp. | S#B-str, S#E-str |
+
* useNGrams | boolean | false | Make features from letter n-grams - internal as well as edge all treated the same. | #-str |
* usePrefixSuffixNGrams | boolean | false | Make features from prefix and suffix substrings of the string. | #B-str, #E-str |
* lowercase | boolean | false | Make the input string lowercase so all features work uncased |
@@ -224,6 +230,10 @@
*/
public class ColumnDataClassifier {
+ // todo [cdm 2014]: support reading files with comment lines starting with "#" or to ignore a first column headers line
+ // todo [cdm 2014]: be able to run on a test file without gold answers. For doing Kaggle competitions.
+ // todo [cdm 2014]: support lowercaseSplitWordPairs.
+
private static final double DEFAULT_VALUE = 1.0; // default value for setting categorical, boolean features
private static final String DEFAULT_IGNORE_REGEXP = "\\s+";
@@ -419,6 +429,10 @@ private Pair, List> readDataset(String f
if (inTestPhase) {
lineInfos.add(strings);
}
+ if (strings.length < flags.length) {
+ throw new RuntimeException("Error: Line has too few tab-separated columns (" + maxColumns +
+ ") for " + flags.length + " columns required by specified properties: " + line);
+ }
dataset.add(makeDatumFromStrings(strings));
}
if (lineNo > 0 && minColumns != maxColumns) {
@@ -427,7 +441,7 @@ private Pair, List> readDataset(String f
filename + " varies between " + minColumns + " and " + maxColumns);
}
} catch (Exception e) {
- throw new RuntimeException("Dataset could not be processed", e);
+ throw new RuntimeException("Dataset could not be loaded", e);
}
}
@@ -524,7 +538,7 @@ private Pair writeResultsSummary(int num, Counter contin
* Write out an answer, and update statistics.
*/
private void writeAnswer(String[] strs, String clAnswer, Distribution cntr, Counter contingency, Classifier c, double sim) {
- String goldAnswer = strs[globalFlags.goldAnswerColumn];
+ String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: "";
String printedText = "";
if (globalFlags.displayedColumn >= 0) {
printedText = strs[globalFlags.displayedColumn];
@@ -726,6 +740,7 @@ private Pair testExamples(Classifier cl, General
* @return The constructed Datum
*/
private Datum makeDatum(String[] strs) {
+ String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: "";
List theFeatures = new ArrayList();
Collection globalFeatures = Generics.newHashSet();
if (globalFlags.useClassFeature) {
@@ -735,7 +750,7 @@ private Datum makeDatum(String[] strs) {
for (int i = 0; i < flags.length; i++) {
Collection featuresC = Generics.newHashSet();//important that this is a hash set to prevent same feature from being added multiple times
- makeDatum(strs[i], flags[i], featuresC, strs[globalFlags.goldAnswerColumn]);
+ makeDatum(strs[i], flags[i], featuresC, goldAnswer);
addAllInterningAndPrefixing(theFeatures, featuresC, i + "-");
}
@@ -743,7 +758,7 @@ private Datum makeDatum(String[] strs) {
printFeatures(strs, theFeatures);
}
//System.out.println("Features are: " + theFeatures);
- return new BasicDatum(theFeatures, strs[globalFlags.goldAnswerColumn]);
+ return new BasicDatum(theFeatures, goldAnswer);
}
/**
@@ -755,6 +770,7 @@ private Datum makeDatum(String[] strs) {
* @return The constructed RVFDatum
*/
private RVFDatum makeRVFDatum(String[] strs) {
+ String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: "";
ClassicCounter theFeatures = new ClassicCounter();
ClassicCounter globalFeatures = new ClassicCounter();
if (globalFlags.useClassFeature) {
@@ -764,7 +780,7 @@ private RVFDatum makeRVFDatum(String[] strs) {
for (int i = 0; i < flags.length; i++) {
ClassicCounter featuresC = new ClassicCounter();
- makeDatum(strs[i], flags[i], featuresC, strs[globalFlags.goldAnswerColumn]);
+ makeDatum(strs[i], flags[i], featuresC, goldAnswer);
addAllInterningAndPrefixingRVF(theFeatures, featuresC, i + "-");
}
@@ -772,7 +788,7 @@ private RVFDatum makeRVFDatum(String[] strs) {
printFeatures(strs, theFeatures);
}
//System.out.println("Features are: " + theFeatures);
- return new RVFDatum(theFeatures, strs[globalFlags.goldAnswerColumn]);
+ return new RVFDatum(theFeatures, goldAnswer);
}
private void addAllInterningAndPrefixingRVF(ClassicCounter accumulator, ClassicCounter addend, String prefix) {
@@ -849,6 +865,8 @@ private static void addFeature(Object features, F newFeature, double value)
* Extracts all the features from a certain input column.
*
* @param cWord The String to extract data from
+ * @param goldAns The goldAnswer for this whole datum or emptyString if none.
+ * This is used only for filling in the binned lengths histogram counters
*/
private void makeDatum(String cWord, Flags flags, Object featuresC, String goldAns) {
@@ -939,6 +957,22 @@ private void makeDatum(String cWord, Flags flags, Object featuresC, String goldA
System.err.println(Arrays.toString(bits));
}
+ if (flags.splitWordCount) {
+ addFeature(featuresC, "SWNUM", bits.length);
+ }
+ if (flags.logSplitWordCount) {
+ addFeature(featuresC, "LSWNUM", Math.log(bits.length));
+ }
+ if (flags.binnedSplitWordCounts != null) {
+ String featureName = null;
+ for (int i = 0; i <= flags.binnedSplitWordCounts.length; i++) {
+ if (i == flags.binnedSplitWordCounts.length || bits.length <= flags.binnedSplitWordCounts[i]) {
+ featureName = "SWNUMBIN-" + ((i == 0) ? 0 : (flags.binnedSplitWordCounts[i - 1] + 1)) + '-' + ((i == flags.binnedSplitWordCounts.length) ? "Inf" : Integer.toString(flags.binnedSplitWordCounts[i]));
+ break;
+ }
+ }
+ addFeature(featuresC, featureName, DEFAULT_VALUE);
+ }
// add features over splitWords
for (int i = 0; i < bits.length; i++) {
if (flags.useSplitWords) {
@@ -1012,7 +1046,7 @@ private void makeDatum(String cWord, Flags flags, Object featuresC, String goldA
addFeature(featuresC,"SSHAPE-" + shape,DEFAULT_VALUE);
}
}
- }
+ } // end if uses some split words features
if (flags.wordShape > WordShapeClassifier.NOWORDSHAPE) {
String shape = edu.stanford.nlp.process.WordShapeClassifier.wordShape(cWord, flags.wordShape);
@@ -1420,9 +1454,7 @@ private Flags[] setProperties(Properties props) {
key = matcher.group(2);
}
if (col >= myFlags.length) {
- Flags[] newFl = new Flags[col + 1];
- System.arraycopy(myFlags, 0, newFl, 0, myFlags.length);
- myFlags = newFl;
+ myFlags = Arrays.copyOf(myFlags, col + 1);
}
if (myFlags[col] == null) {
myFlags[col] = new Flags();
@@ -1440,7 +1472,19 @@ private Flags[] setProperties(Properties props) {
}
} else if (key.equals("binnedLengthsStatistics")) {
if (Boolean.parseBoolean(val)) {
- myFlags[col].binnedLengthsCounter = new TwoDimensionalCounter();
+ myFlags[col].binnedLengthsCounter = new TwoDimensionalCounter();
+ }
+ } else if (key.equals("splitWordCount")) {
+ myFlags[col].splitWordCount = Boolean.parseBoolean(val);
+ } else if (key.equals("logSplitWordCount")) {
+ myFlags[col].logSplitWordCount = Boolean.parseBoolean(val);
+ } else if (key.equals("binnedSplitWordCounts")) {
+ if (val != null) {
+ String[] binnedSplitWordCountStrs = val.split("[, ]+");
+ myFlags[col].binnedSplitWordCounts = new int[binnedSplitWordCountStrs.length];
+ for (int i = 0; i < myFlags[col].binnedSplitWordCounts.length; i++) {
+ myFlags[col].binnedSplitWordCounts[i] = Integer.parseInt(binnedSplitWordCountStrs[i]);
+ }
}
} else if (key.equals("countChars")) {
myFlags[col].countChars = val.toCharArray();
@@ -2050,6 +2094,10 @@ static class Flags implements Serializable {
static boolean csvFormat = false; //train and test files are in csv format
boolean splitWordsWithPTBTokenizer = false;
+ boolean splitWordCount;
+ boolean logSplitWordCount;
+ int[] binnedSplitWordCounts;
+
@Override
public String toString() {
return "Flags[" +
diff --git a/src/edu/stanford/nlp/dcoref/CorefChain.java b/src/edu/stanford/nlp/dcoref/CorefChain.java
index 886cc1acda..b212b202d8 100644
--- a/src/edu/stanford/nlp/dcoref/CorefChain.java
+++ b/src/edu/stanford/nlp/dcoref/CorefChain.java
@@ -98,7 +98,7 @@ public int hashCode() {
/** get CorefMention by position */
public Set getMentionsWithSameHead(int sentenceNumber, int headIndex) {
- return mentionMap.get(new IntPair(sentenceNumber, headIndex));
+ return getMentionsWithSameHead(new IntPair(sentenceNumber, headIndex));
}
public Map> getMentionMap() { return mentionMap; }
@@ -144,6 +144,7 @@ public static class CorefMention implements Serializable {
public final IntTuple position;
public final String mentionSpan;
+ /** This constructor is used to recreate a CorefMention following serialization. */
public CorefMention(MentionType mentionType,
Number number,
Gender gender,
@@ -170,6 +171,7 @@ public CorefMention(MentionType mentionType,
this.mentionSpan = mentionSpan;
}
+ /** This constructor builds the external CorefMention class from the internal Mention. */
public CorefMention(Mention m, IntTuple pos){
mentionType = m.mentionType;
number = m.number;
@@ -232,10 +234,8 @@ public int hashCode() {
}
@Override
- public String toString(){
- StringBuilder s = new StringBuilder();
- s.append('"').append(mentionSpan).append('"').append(" in sentence ").append(sentNum);
- return s.toString();
+ public String toString() {
+ return '"' + mentionSpan + "\" in sentence " + sentNum;
// return "(sentence:" + sentNum + ", startIndex:" + startIndex + "-endIndex:" + endIndex + ")";
}
@@ -263,7 +263,9 @@ private boolean moreRepresentativeThan(CorefMention m) {
}
private static final long serialVersionUID = 3657691243504173L;
- }
+
+ } // end static class CorefMention
+
protected static class CorefMentionComparator implements Comparator {
@Override
diff --git a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java
index edcdc0e7a6..b239c5adcd 100644
--- a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java
+++ b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java
@@ -2,6 +2,7 @@
import java.util.*;
+import edu.stanford.nlp.semgraph.SemanticGraphEdge;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.MapFactory;
@@ -498,23 +499,26 @@ public Iterable edgeIterable() {
}
static class EdgeIterator implements Iterator {
+ private final Map>> incomingEdges;
private Iterator
- * Typical command-line usage
+ * Typical command-line usage
* For running a trained model with a provided serialized classifier on a
- * text file:
- *
+ * text file:
+ *
* java -mx500m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier
* conll.ner.gz -textFile samplesentences.txt
- *
+ *
*
* When specifying all parameters in a properties file (train, test, or
* runtime):
- *
- *
+ *
+ *
* java -mx1g edu.stanford.nlp.ie.crf.CRFClassifier -prop propFile
- *
+ *
*
- * To train and test a simple NER model from the command line:
- * java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier
+ * To train and test a simple NER model from the command line:
+ * java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier
* -trainFile trainFile -testFile testFile -macro > output
*
*
- * To train with multiple files:
- * java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier
+ * To train with multiple files:
+ * java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier
* -trainFileList file1,file2,... -testFile testFile -macro > output
*
*
* To test on multiple files, use the -testFiles option and a comma
* separated list.
*
+ *
* Features are defined by a {@link edu.stanford.nlp.sequences.FeatureFactory}.
* {@link NERFeatureFactory} is used by default, and you should look
* there for feature templates and properties or flags that will cause
@@ -115,18 +116,19 @@
* to get a CRFClassifier is to deserialize one via the static
* {@link CRFClassifier#getClassifier(String)} methods, which return a
* deserialized classifier. You may then tag (classify the items of) documents
- * using either the assorted classify()
or the assorted
- * classify
methods in {@link AbstractSequenceClassifier}.
+ * using either the assorted classify()
methods here or the additional
+ * ones in {@link AbstractSequenceClassifier}.
* Probabilities assigned by the CRF can be interrogated using either the
* printProbsDocument()
or getCliqueTrees()
methods.
*
* @author Jenny Finkel
* @author Sonal Gupta (made the class generic)
* @author Mengqiu Wang (LOP implementation and non-linear CRF implementation)
- * TODO(mengqiu) need to move the embedding lookup and capitalization features into a FeatureFactory
*/
public class CRFClassifier extends AbstractSequenceClassifier {
+ // TODO(mengqiu) need to move the embedding lookup and capitalization features into a FeatureFactory
+
List> labelIndices;
Index tagIndex;
Pair entityMatrices;
@@ -495,7 +497,7 @@ private int[][][] transformDocData(int[][][] docData) {
int[] cliqueFeatures = docData[i][j];
transData[i][j] = new int[cliqueFeatures.length];
for (int n = 0; n < cliqueFeatures.length; n++) {
- int transFeatureIndex = -1;
+ int transFeatureIndex; // initialized below;
if (j == 0) {
transFeatureIndex = nodeFeatureIndicesMap.indexOf(cliqueFeatures[n]);
if (transFeatureIndex == -1)
@@ -1369,8 +1371,7 @@ public void printProbsDocument(List document) {
* label at each point. This gives a simple way to examine the probability
* distributions of the CRF. See getCliqueTrees()
for more.
*
- * @param filename
- * The path to the specified file
+ * @param filename The path to the specified file
*/
public void printFirstOrderProbs(String filename, DocumentReaderAndWriter readerAndWriter) {
// only for the OCR data does this matter
diff --git a/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java b/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java
index a85ba0d985..4f1ea07287 100644
--- a/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java
+++ b/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java
@@ -20,23 +20,23 @@
*/
public class CRFCliqueTree implements ListeningSequenceModel {
- protected final FactorTable[] factorTables;
- protected final double z; // norm constant
- protected final Index classIndex;
+ private final FactorTable[] factorTables;
+ private final double z; // norm constant
+ private final Index classIndex;
private final E backgroundSymbol;
private final int backgroundIndex;
// the window size, which is also the clique size
- protected final int windowSize;
+ private final int windowSize;
// the number of possible classes for each label
private final int numClasses;
private final int[] possibleValues;
- /** Initialize a clique tree */
+ /** Initialize a clique tree. */
public CRFCliqueTree(FactorTable[] factorTables, Index classIndex, E backgroundSymbol) {
this(factorTables, classIndex, backgroundSymbol, factorTables[0].totalMass());
}
- /** This extra constructor was added to support the CRFCliqueTreeForPartialLabels */
+ /** This extra constructor was added to support the CRFCliqueTreeForPartialLabels. */
CRFCliqueTree(FactorTable[] factorTables, Index classIndex, E backgroundSymbol, double z) {
this.factorTables = factorTables;
this.z = z;
diff --git a/src/edu/stanford/nlp/ie/crf/NERGUI.java b/src/edu/stanford/nlp/ie/crf/NERGUI.java
index 863bcb56a0..4791bed58f 100644
--- a/src/edu/stanford/nlp/ie/crf/NERGUI.java
+++ b/src/edu/stanford/nlp/ie/crf/NERGUI.java
@@ -77,7 +77,7 @@ private void createAndShowGUI() {
//Create and set up the window.
frame = new JFrame("Stanford Named Entity Recognizer");
- frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
+ frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
frame.getContentPane().setLayout(new BorderLayout());
frame.getContentPane().setPreferredSize(new Dimension(WIDTH, HEIGHT));
diff --git a/src/edu/stanford/nlp/ie/demo/NERDemo.java b/src/edu/stanford/nlp/ie/demo/NERDemo.java
index 053e2db1e5..994f10f01a 100644
--- a/src/edu/stanford/nlp/ie/demo/NERDemo.java
+++ b/src/edu/stanford/nlp/ie/demo/NERDemo.java
@@ -5,16 +5,21 @@
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
+import edu.stanford.nlp.util.Triple;
import java.util.List;
/** This is a demo of calling CRFClassifier programmatically.
*
- * Usage: {@code java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]] }
+ * Usage: {@code java -mx400m -cp "*" NERDemo [serializedClassifier [fileName]] }
*
* If arguments aren't specified, they default to
* classifiers/english.all.3class.distsim.crf.ser.gz and some hardcoded sample text.
+ * If run with arguments, it shows some of the ways to get k-best labelings and
+ * probabilities out with CRFClassifier. If run without arguments, it shows some of
+ * the alternative output formats that you can get.
*
* To use CRFClassifier from the command line:
*
@@ -43,14 +48,17 @@ public static void main(String[] args) throws Exception {
AbstractSequenceClassifier classifier = CRFClassifier.getClassifier(serializedClassifier);
- /* For either a file to annotate or for the hardcoded text example,
- this demo file shows two ways to process the output, for teaching
- purposes. For the file, it shows both how to run NER on a String
- and how to run it on a whole file. For the hard-coded String,
- it shows how to run it on a single sentence, and how to do this
- and produce an inline XML output format.
+ /* For either a file to annotate or for the hardcoded text example, this
+ demo file shows several ways to process the input, for teaching purposes.
*/
+
if (args.length > 1) {
+
+ /* For the file, it shows (1) how to run NER on a String, (2) how
+ to get the entities in the String with character offsets, and
+ (3) how to run NER on a whole file (without loading it into a String).
+ */
+
String fileContents = IOUtils.slurpFile(args[1]);
List> out = classifier.classify(fileContents);
for (List sentence : out) {
@@ -59,6 +67,7 @@ public static void main(String[] args) throws Exception {
}
System.out.println();
}
+
System.out.println("---");
out = classifier.classifyFile(args[1]);
for (List sentence : out) {
@@ -68,7 +77,36 @@ public static void main(String[] args) throws Exception {
System.out.println();
}
+ System.out.println("---");
+ List> list = classifier.classifyToCharacterOffsets(fileContents);
+ for (Triple item : list) {
+ System.out.println(item.first() + ": " + fileContents.substring(item.second(), item.third()));
+ }
+ System.out.println("---");
+ System.out.println("Ten best entity labelings");
+ DocumentReaderAndWriter readerAndWriter = classifier.makePlainTextReaderAndWriter();
+ classifier.classifyAndWriteAnswersKBest(args[1], 10, readerAndWriter);
+
+ System.out.println("---");
+ System.out.println("Per-token marginalized probabilities");
+ classifier.printProbs(args[1], readerAndWriter);
+
+ // -- This code prints out the first order (token pair) clique probabilities.
+ // -- But that output is a bit overwhelming, so we leave it commented out by default.
+ // System.out.println("---");
+ // System.out.println("First Order Clique Probabilities");
+ // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter);
+
} else {
+
+ /* For the hard-coded String, it shows how to run it on a single
+ sentence, and how to do this and produce several formats, including
+ slash tags and an inline XML output format. It also shows the full
+ contents of the {@code CoreLabel}s that are constructed by the
+ classifier. And it shows getting out the probabilities of different
+ assignments and an n-best list of classifications with probabilities.
+ */
+
String[] example = {"Good afternoon Rajat Raina, how are you today?",
"I go to school at Stanford University, which is located in California." };
for (String str : example) {
@@ -82,6 +120,13 @@ public static void main(String[] args) throws Exception {
}
System.out.println("---");
+ for (String str : example) {
+ // This one is best for dealing with the output as a TSV (tab-separated column) file.
+ // The first column gives entities, the second their classes, and the third the remaining text in a document
+ System.out.print(classifier.classifyToString(str, "tabbedEntities", false));
+ }
+ System.out.println("---");
+
for (String str : example) {
System.out.println(classifier.classifyWithInlineXML(str));
}
@@ -92,6 +137,24 @@ public static void main(String[] args) throws Exception {
}
System.out.println("---");
+ for (String str : example) {
+ System.out.print(classifier.classifyToString(str, "tsv", false));
+ }
+ System.out.println("---");
+
+ // This gets out entities with character offsets
+ int j = 0;
+ for (String str : example) {
+ j++;
+ List> triples = classifier.classifyToCharacterOffsets(str);
+ for (Triple trip : triples) {
+ System.out.printf("%s over character offsets [%d, %d) in sentence %d.%n",
+ trip.first(), trip.second(), trip.third, j);
+ }
+ }
+ System.out.println("---");
+
+ // This prints out all the details of what is stored for each token
int i=0;
for (String str : example) {
for (List lcl : classifier.classify(str)) {
@@ -101,6 +164,9 @@ public static void main(String[] args) throws Exception {
}
}
}
+
+ System.out.println("---");
+
}
}
diff --git a/src/edu/stanford/nlp/ie/util/RelationTriple.java b/src/edu/stanford/nlp/ie/util/RelationTriple.java
new file mode 100644
index 0000000000..0e8a9f64aa
--- /dev/null
+++ b/src/edu/stanford/nlp/ie/util/RelationTriple.java
@@ -0,0 +1,338 @@
+package edu.stanford.nlp.ie.util;
+
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.semgraph.SemanticGraphEdge;
+import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
+import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
+import edu.stanford.nlp.util.FixedPrioritiesPriorityQueue;
+import edu.stanford.nlp.util.PriorityQueue;
+import edu.stanford.nlp.util.StringUtils;
+
+import java.util.*;
+
+/**
+ * A (subject, relation, object) triple; e.g., as used in the KBP challenges or in OpenIE systems.
+ *
+ * @author Gabor Angeli
+ */
+@SuppressWarnings("UnusedDeclaration")
+public class RelationTriple implements Comparable {
+ /** The subject (first argument) of this triple */
+ public final List subject;
+ /** The relation (second argument) of this triple */
+ public final List relation;
+ /** The object (third argument) of this triple */
+ public final List object;
+ /** An optional score (confidence) for this triple */
+ public final double confidence;
+
+ /**
+ * Create a new triple with known values for the subject, relation, and object.
+ * For example, "(cats, play with, yarn)"
+ * @param subject The subject of this triple; e.g., "cats".
+ * @param relation The relation of this triple; e.g., "play with".
+ * @param object The object of this triple; e.g., "yarn".
+ */
+ public RelationTriple(List subject, List relation, List object,
+ double confidence) {
+ this.subject = subject;
+ this.relation = relation;
+ this.object = object;
+ this.confidence = confidence;
+ }
+
+ /**
+ * @see edu.stanford.nlp.ie.util.RelationTriple#RelationTriple(java.util.List, java.util.List, java.util.List, double)
+ */
+ public RelationTriple(List subject, List relation, List object) {
+ this(subject, relation, object, 1.0);
+ }
+
+ /** The subject of this relation triple, as a String */
+ public String subjectGloss() {
+ return StringUtils.join(subject.stream().map(CoreLabel::word), " ");
+ }
+
+ /** The object of this relation triple, as a String */
+ public String objectGloss() {
+ return StringUtils.join(object.stream().map(CoreLabel::word), " ");
+ }
+
+ /** The relation of this relation triple, as a String */
+ public String relationGloss() {
+ return StringUtils.join(relation.stream().map(CoreLabel::word), " ");
+ }
+
+ /** An optional method, returning the dependency tree this triple was extracted from */
+ public Optional asDependencyTree() {
+ return Optional.empty();
+ }
+
+ /** Return the given relation triple as a flat sentence */
+ public List asSentence() {
+ PriorityQueue orderedSentence = new FixedPrioritiesPriorityQueue<>();
+ double defaultIndex = 0.0;
+ for (CoreLabel token : subject) {
+ orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
+ defaultIndex += 1.0;
+ }
+ for (CoreLabel token : relation) {
+ orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
+ defaultIndex += 1.0;
+ }
+ for (CoreLabel token : object) {
+ orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex);
+ defaultIndex += 1.0;
+ }
+ return orderedSentence.toSortedList();
+ }
+
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof RelationTriple)) return false;
+ RelationTriple that = (RelationTriple) o;
+ return object.equals(that.object) && relation.equals(that.relation) && subject.equals(that.subject);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ int result = subject.hashCode();
+ result = 31 * result + relation.hashCode();
+ result = 31 * result + object.hashCode();
+ return result;
+ }
+
+ /** Print a human-readable description of this relation triple, as a tab-separated line */
+ @Override
+ public String toString() {
+ return "" + this.confidence + "\t" + subjectGloss() + "\t" + relationGloss() + "\t" + objectGloss();
+ }
+
+ @Override
+ public int compareTo(RelationTriple o) {
+ if (this.confidence < o.confidence) {
+ return -1;
+ } else if (this.confidence > o.confidence) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ /**
+ * A {@link edu.stanford.nlp.ie.util.RelationTriple}, but with the tree saved as well.
+ */
+ protected static class WithTree extends RelationTriple {
+ public final SemanticGraph sourceTree;
+
+ /**
+ * Create a new triple with known values for the subject, relation, and object.
+ * For example, "(cats, play with, yarn)"
+ *
+ * @param subject The subject of this triple; e.g., "cats".
+ * @param relation The relation of this triple; e.g., "play with".
+ * @param object The object of this triple; e.g., "yarn".
+ * @param tree The tree this extraction was created from; we create a deep copy of the tree.
+ */
+ public WithTree(List subject, List relation, List object, SemanticGraph tree,
+ double confidence) {
+ super(subject, relation, object, confidence);
+ this.sourceTree = new SemanticGraph(tree);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public Optional asDependencyTree() {
+ return Optional.of(sourceTree);
+ }
+ }
+
+ /** A list of patterns to match relation extractions against */
+ private static final List PATTERNS = Collections.unmodifiableList(new ArrayList() {{
+ // { blue cats play [quietly] with yarn }
+ add(SemgrexPattern.compile("{$}=verb ?>/cop|auxpass/ {}=be >/.subj(pass)?/ {}=subject >/prep/ ({}=prep >/pobj/ {}=object)"));
+ // (w / collapsed dependencies)
+ add(SemgrexPattern.compile("{$}=verb ?>/cop|auxpass/ {}=be >/.subj(pass)?/ {}=subject >/prepc?_.*/=prepEdge {}=object"));
+ // { fish like to swim }
+ add(SemgrexPattern.compile("{$}=verb >/.subj(pass)?/ {}=subject >/xcomp/ {}=object"));
+ // { cats have tails }
+ add(SemgrexPattern.compile("{$}=verb ?>/auxpass/ {}=be >/.subj(pass)?/ {}=subject >/[di]obj|xcomp/ {}=object"));
+ // { cats are cute }
+ add(SemgrexPattern.compile("{$}=object >/.subj(pass)?/ {}=subject >/cop/ {}=verb"));
+ }});
+
+ /** A set of valid arcs denoting an entity we are interested in */
+ private static final Set VALID_ENTITY_ARCS = Collections.unmodifiableSet(new HashSet(){{
+ add("amod"); add("nn"); add("aux"); add("num"); add("prep"); add("nsubj"); add("prep_*");
+ }});
+
+ /** A set of valid arcs denoting an entity we are interested in */
+ private static final Set VALID_ADVERB_ARCS = Collections.unmodifiableSet(new HashSet(){{
+ add("amod"); add("advmod"); add("conj"); add("cc"); add("conj_and"); add("conj_or"); add("auxpass");
+ }});
+
+ private static CoreLabel mockNode(CoreLabel toCopy, int offset, String word, String POS) {
+ CoreLabel mock = new CoreLabel(toCopy);
+ mock.setWord(word);
+ mock.setLemma(word);
+ mock.setValue(word);
+ mock.setNER("O");
+ mock.setTag(POS);
+ mock.setIndex(toCopy.index() + offset);
+ return mock;
+ }
+
+ /**
+ * @see RelationTriple#getValidEntityChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)
+ * @see RelationTriple#getValidAdverbChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)
+ */
+ private static Optional> getValidChunk(SemanticGraph parse, IndexedWord originalRoot, Set validArcs) {
+ PriorityQueue chunk = new FixedPrioritiesPriorityQueue<>();
+ Queue fringe = new LinkedList<>();
+ IndexedWord root = originalRoot;
+ fringe.add(root);
+
+ boolean isCopula = false;
+ for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(originalRoot)) {
+ if (edge.getRelation().getShortName().equals("cop")) {
+ isCopula = true;
+ }
+ }
+
+ while (!fringe.isEmpty()) {
+ root = fringe.poll();
+ chunk.add(root.backingLabel(), -root.index());
+ for (SemanticGraphEdge edge : parse.incomingEdgeIterable(root)) {
+ if (edge.getDependent() != originalRoot) {
+ if (edge.getRelation().toString().startsWith("prep_") || edge.getRelation().toString().startsWith("prepc_")) {
+ chunk.add(mockNode(edge.getGovernor().backingLabel(), 1, edge.getRelation().toString().substring(edge.getRelation().toString().indexOf("_") + 1), "PP"), -(((double) edge.getGovernor().index()) + 0.9));
+ }
+ if (edge.getRelation().getShortName().equals("conj")) {
+ chunk.add(mockNode(root.backingLabel(), -1, edge.getRelation().getSpecific(), "CC"), -(((double) root.index()) - 0.9));
+ }
+ }
+ }
+ for (SemanticGraphEdge edge : parse.getOutEdgesSorted(root)) {
+ String shortName = edge.getRelation().getShortName();
+ //noinspection StatementWithEmptyBody
+ if (isCopula && (shortName.equals("cop") || shortName.contains("subj"))) {
+ // noop; ignore nsubj and cop for extractions with copula
+ } else if (!validArcs.contains(edge.getRelation().getShortName().replaceAll("_.*","_*"))) {
+ return Optional.empty();
+ } else {
+ fringe.add(edge.getDependent());
+ }
+ }
+ }
+
+ return Optional.of(chunk.toSortedList());
+ }
+
+ /**
+ * Get the yield of a given subtree, if it is a valid entity.
+ * Otherwise, return {@link java.util.Optional#empty()}}.
+ * @param parse The parse tree we are extracting a subtree from.
+ * @param root The root of the subtree.
+ * @return If this subtree is a valid entity, we return its yield. Otherwise, we return empty.
+ */
+ private static Optional> getValidEntityChunk(SemanticGraph parse, IndexedWord root) {
+ return getValidChunk(parse, root, VALID_ENTITY_ARCS);
+ }
+
+ /**
+ * Get the yield of a given subtree, if it is a adverb chunk.
+ * Otherwise, return {@link java.util.Optional#empty()}}.
+ * @param parse The parse tree we are extracting a subtree from.
+ * @param root The root of the subtree.
+ * @return If this subtree is a valid adverb, we return its yield. Otherwise, we return empty.
+ */
+ private static Optional> getValidAdverbChunk(SemanticGraph parse, IndexedWord root) {
+ return getValidChunk(parse, root, VALID_ADVERB_ARCS);
+ }
+
+ /**
+ *
+ * Try to segment this sentence as a relation triple.
+ * This sentence must already match one of a few strict patterns for a valid OpenIE extraction.
+ * If it does not, then no relation triple is created.
+ * That is, this is not a relation extractor; it is just a utility to segment what is already a
+ * (subject, relation, object) triple into these three parts.
+ *
+ *
+ * @param parse The sentence to process, as a dependency tree.
+ * @param confidence An optional confidence to pass on to the relation triple.
+ * @return A relation triple, if this sentence matches one of the patterns of a valid relation triple.
+ */
+ public static Optional segment(SemanticGraph parse, Optional confidence) {
+ PATTERN_LOOP: for (SemgrexPattern pattern : PATTERNS) { // For every candidate pattern...
+ SemgrexMatcher m = pattern.matcher(parse);
+ if (m.matches()) { // ... see if it matches the sentence
+ // Verb
+ PriorityQueue verbChunk = new FixedPrioritiesPriorityQueue<>();
+ IndexedWord verb = m.getNode("verb");
+ IndexedWord prep = m.getNode("prep");
+ List adverbs = new ArrayList<>();
+ for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(verb)) {
+ if ("advmod".equals(edge.getRelation().toString()) || "amod".equals(edge.getRelation().toString())) {
+ String tag = edge.getDependent().backingLabel().tag();
+ if (tag == null ||
+ (!tag.startsWith("W") && !edge.getDependent().backingLabel().word().equalsIgnoreCase("then"))) { // prohibit advmods like "where"
+ adverbs.add(edge.getDependent());
+ }
+ }
+ }
+ IndexedWord be = m.getNode("be");
+ String prepEdge = m.getRelnString("prepEdge");
+ verbChunk.add(verb.backingLabel(), -verb.index());
+ int numKnownDependents = 2; // subject and object, at minimum
+ if (prep != null) { verbChunk.add(prep.backingLabel(), -prep.index()); numKnownDependents += 1; }
+ if (be != null) { verbChunk.add(be.backingLabel(), -be.index()); numKnownDependents += 1; }
+ // (adverbs have to be well-formed)
+ if (!adverbs.isEmpty()) {
+ Set adverbialModifiers = new HashSet<>();
+ for (IndexedWord adv : adverbs) {
+ Optional> adverbChunk = getValidAdverbChunk(parse, adv);
+ if (adverbChunk.isPresent()) {
+ for (CoreLabel token : adverbChunk.get()) {
+ adverbialModifiers.add(token);
+ }
+ } else {
+ continue PATTERN_LOOP; // Invalid adverbial phrase
+ }
+ numKnownDependents += 1;
+ }
+ for (CoreLabel adverbToken : adverbialModifiers) {
+ verbChunk.add(adverbToken, -adverbToken.index());
+ }
+ }
+ // (add preposition edge)
+ if (prepEdge != null) {
+ verbChunk.add(mockNode(verb.backingLabel(), 1, prepEdge.substring(prepEdge.indexOf("_") + 1), "PP"), -(verb.index() + 10));
+ }
+ // (check for additional edges)
+ if (parse.outDegree(verb) > numKnownDependents) {
+ //noinspection UnnecessaryLabelOnContinueStatement
+ continue PATTERN_LOOP; // Too many outgoing edges; we didn't consume them all.
+ }
+ List relation = verbChunk.toSortedList();
+
+ // Subject+Object
+ Optional> subject = getValidEntityChunk(parse, m.getNode("subject"));
+ Optional> object = getValidEntityChunk(parse, m.getNode("object"));
+ // Create relation
+ if (subject.isPresent() && object.isPresent()) { // ... and has a valid subject+object
+ // Success! Found a valid extraction.
+ return Optional.of(new WithTree(subject.get(), relation, object.get(), parse, confidence.orElse(1.0)));
+ }
+ }
+ }
+ // Failed to match any pattern; return failure
+ return Optional.empty();
+ }
+}
diff --git a/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java b/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java
index fdc0983b37..51f9c7a9b5 100644
--- a/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java
+++ b/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java
@@ -137,11 +137,6 @@ public static TokenizerFactory newTokenizerFactory() {
}
/**
- * Constructs a new PTBTokenizer that returns Word objects and
- * uses the options passed in.
- * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP
- * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A
- * TokenizerFactory.
* todo [cdm 2013]: But we should change it to a method that can return any kind of Label and return CoreLabel here
*
* @param options A String of options
@@ -241,7 +236,7 @@ private static String usage() {
sb.append(" -ftb : Tokenization for experiments in Green et al. (2011).").append(nl);
sb.append(" -lowerCase : Apply lowercasing.").append(nl);
sb.append(" -encoding type : Encoding format.").append(nl);
- sb.append(" -orthoOpts str : Orthographic options (see FrenchLexer.java)").append(nl);
+ sb.append(" -options str : Orthographic options (see FrenchLexer.java)").append(nl);
return sb.toString();
}
@@ -251,7 +246,7 @@ private static Map argOptionDefs() {
argOptionDefs.put("ftb", 0);
argOptionDefs.put("lowerCase", 0);
argOptionDefs.put("encoding", 1);
- argOptionDefs.put("orthoOpts", 1);
+ argOptionDefs.put("options", 1);
return argOptionDefs;
}
@@ -276,12 +271,11 @@ public static void main(String[] args) {
// Lexer options
final TokenizerFactory tf = options.containsKey("ftb") ?
FrenchTokenizer.ftbFactory() : FrenchTokenizer.factory();
- String orthoOptions = options.getProperty("orthoOpts", "");
- tf.setOptions(orthoOptions);
-
+ String orthoOptions = options.getProperty("options", "");
// When called from this main method, split on newline. No options for
// more granular sentence splitting.
- tf.setOptions("tokenizeNLs");
+ orthoOptions = orthoOptions.length() == 0 ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs";
+ tf.setOptions(orthoOptions);
// Other options
final String encoding = options.getProperty("encoding", "UTF-8");
diff --git a/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java b/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java
index 360c04a0e5..c185b16e31 100644
--- a/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java
+++ b/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java
@@ -91,9 +91,9 @@ private void setupDictionary(String dictPath) {
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
- throw new RuntimeException("Could not load Spanish data file " + dictPath);
+ System.err.println("Could not load Spanish data file " + dictPath);
} catch (IOException e) {
- throw new RuntimeException("Could not load Spanish data file " + dictPath);
+ System.err.println("Could not load Spanish data file " + dictPath);
}
}
diff --git a/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java b/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java
index 7dc339d10a..ed35f07f74 100644
--- a/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java
+++ b/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java
@@ -16,13 +16,11 @@
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.CoreAnnotations.ParentAnnotation;
-import edu.stanford.nlp.ling.Word;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.process.AbstractTokenizer;
import edu.stanford.nlp.process.CoreLabelTokenFactory;
import edu.stanford.nlp.process.LexedTokenFactory;
import edu.stanford.nlp.process.Tokenizer;
-import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.StringUtils;
@@ -61,7 +59,7 @@ public class SpanishTokenizer extends AbstractTokenizer {
private SpanishVerbStripper verbStripper;
// Produces the tokenization for parsing used by AnCora (fixed) */
- public static final String ANCORA_OPTS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitAll=true";
+ public static final String ANCORA_OPTIONS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitAll=true";
/**
* Constructor.
@@ -79,7 +77,7 @@ public SpanishTokenizer(Reader r, LexedTokenFactory tf, Properties lexerPrope
this.splitAny = (splitCompounds || splitVerbs || splitContractions);
if (splitAny) compoundBuffer = Generics.newLinkedList();
- verbStripper = SpanishVerbStripper.getInstance();
+ if (splitVerbs) verbStripper = SpanishVerbStripper.getInstance();
}
@Override
@@ -91,22 +89,22 @@ protected T getNext() {
// some tokens can be obliterated. In this case, keep iterating
// until we see a non-zero length token.
do {
- nextToken = (splitAny && compoundBuffer.size() > 0) ?
- (T) compoundBuffer.remove(0) :
+ nextToken = (splitAny && compoundBuffer.size() > 0) ?
+ (T) compoundBuffer.remove(0) :
(T) lexer.next();
} while (nextToken != null && nextToken.word().length() == 0);
// Check for compounds to split
if (splitAny && nextToken instanceof CoreLabel) {
CoreLabel cl = (CoreLabel) nextToken;
- if (cl.containsKey(ParentAnnotation.class)) {
- if(splitCompounds && cl.get(ParentAnnotation.class).equals(SpanishLexer.COMPOUND_ANNOTATION))
- nextToken = (T) processCompound(cl);
- else if (splitVerbs && cl.get(ParentAnnotation.class).equals(SpanishLexer.VB_PRON_ANNOTATION))
- nextToken = (T) processVerb(cl);
- else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer.CONTR_ANNOTATION))
- nextToken = (T) processContraction(cl);
- }
+ if (cl.containsKey(ParentAnnotation.class)) {
+ if(splitCompounds && cl.get(ParentAnnotation.class).equals(SpanishLexer.COMPOUND_ANNOTATION))
+ nextToken = (T) processCompound(cl);
+ else if (splitVerbs && cl.get(ParentAnnotation.class).equals(SpanishLexer.VB_PRON_ANNOTATION))
+ nextToken = (T) processVerb(cl);
+ else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer.CONTR_ANNOTATION))
+ nextToken = (T) processContraction(cl);
+ }
}
return nextToken;
@@ -119,11 +117,11 @@ else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer
/* Copies the CoreLabel cl with the new word part */
private CoreLabel copyCoreLabel(CoreLabel cl, String part) {
- CoreLabel newLabel = new CoreLabel(cl);
- newLabel.setWord(part);
- newLabel.setValue(part);
- newLabel.set(OriginalTextAnnotation.class, part);
- return newLabel;
+ CoreLabel newLabel = new CoreLabel(cl);
+ newLabel.setWord(part);
+ newLabel.setValue(part);
+ newLabel.set(OriginalTextAnnotation.class, part);
+ return newLabel;
}
/**
@@ -195,13 +193,6 @@ private CoreLabel processCompound(CoreLabel cl) {
return compoundBuffer.remove(0);
}
- /**
- * a factory that vends CoreLabel tokens with default tokenization.
- */
- public static TokenizerFactory coreLabelFactory() {
- return SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
- }
-
/**
* recommended factory method
*/
@@ -210,7 +201,7 @@ public static TokenizerFactory factory(LexedTokenFactory<
}
public static TokenizerFactory factory(LexedTokenFactory factory) {
- return new SpanishTokenizerFactory(factory, ANCORA_OPTS);
+ return new SpanishTokenizerFactory(factory, ANCORA_OPTIONS);
}
/**
@@ -232,7 +223,7 @@ public static class SpanishTokenizerFactory implements Tokeni
protected boolean splitContractionOption = false;
public static TokenizerFactory newCoreLabelTokenizerFactory() {
- return new SpanishTokenizerFactory(new CoreLabelTokenFactory(), ANCORA_OPTS);
+ return new SpanishTokenizerFactory(new CoreLabelTokenFactory());
}
@@ -254,7 +245,6 @@ public static SpanishTokenizerFactory newSpanishTokenizer
/** Make a factory for SpanishTokenizers, default options */
private SpanishTokenizerFactory(LexedTokenFactory factory) {
this.factory = factory;
- setOptions(ANCORA_OPTS);
}
/** Make a factory for SpanishTokenizers, options passed in */
@@ -271,7 +261,7 @@ public Iterator getIterator(Reader r) {
@Override
public Tokenizer getTokenizer(Reader r) {
- return new SpanishTokenizer(r, factory, lexerProperties, splitCompoundOption, splitVerbOption, splitContractionOption);
+ return new SpanishTokenizer(r, factory, lexerProperties, splitCompoundOption, splitVerbOption, splitContractionOption);
}
/**
@@ -330,7 +320,25 @@ public Tokenizer getTokenizer(Reader r, String extraOptions) {
} // end static class SpanishTokenizerFactory
-
+ /**
+ * Returns a tokenizer with Ancora tokenization.
+ */
+ public static TokenizerFactory ancoraFactory() {
+ TokenizerFactory tf = SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
+ tf.setOptions(ANCORA_OPTIONS);
+ return tf;
+ }
+
+ /**
+ * a factory that vends CoreLabel tokens with default tokenization.
+ */
+ public static TokenizerFactory coreLabelFactory() {
+ return SpanishTokenizerFactory.newCoreLabelTokenizerFactory();
+ }
+
+ public static TokenizerFactory factory() {
+ return coreLabelFactory();
+ }
private static String usage() {
StringBuilder sb = new StringBuilder();
@@ -341,8 +349,8 @@ private static String usage() {
sb.append(" -ancora : Tokenization style of AnCora (fixed).").append(nl);
sb.append(" -lowerCase : Apply lowercasing.").append(nl);
sb.append(" -encoding type : Encoding format.").append(nl);
- sb.append(" -orthoOpts str : Orthographic options (see SpanishLexer.java)").append(nl);
- sb.append(" -lines : Keep tokens as space-separated, not line separated.").append(nl);
+ sb.append(" -options str : Orthographic options (see SpanishLexer.java)").append(nl);
+ sb.append(" -tokens : Output tokens as line-separated instead of space-separted.").append(nl);
return sb.toString();
}
@@ -350,10 +358,11 @@ private static Map argOptionDefs() {
Map argOptionDefs = Generics.newHashMap();
argOptionDefs.put("help", 0);
argOptionDefs.put("ftb", 0);
+ argOptionDefs.put("ancora", 0);
argOptionDefs.put("lowerCase", 0);
argOptionDefs.put("encoding", 1);
- argOptionDefs.put("orthoOpts", 1);
- argOptionDefs.put("lines", 0);
+ argOptionDefs.put("options", 1);
+ argOptionDefs.put("tokens", 0);
return argOptionDefs;
}
@@ -377,17 +386,17 @@ public static void main(String[] args) {
// Lexer options
final TokenizerFactory tf = SpanishTokenizer.coreLabelFactory();
- if (options.containsKey("ancora"))
- tf.setOptions(ANCORA_OPTS);
- String orthoOptions = options.getProperty("orthoOpts", "");
+ String orthoOptions = options.containsKey("ancora") ? ANCORA_OPTIONS : "";
+ if (options.containsKey("options")) {
+ orthoOptions = orthoOptions.length() == 0 ? options.getProperty("options") : orthoOptions + "," + options;
+ }
+ final boolean tokens = PropertiesUtils.getBool(options, "tokens", false);
+ if ( ! tokens) {
+ orthoOptions = orthoOptions.length() == 0 ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs";
+ }
tf.setOptions(orthoOptions);
- // When called from this main method, split on newline. No options for
- // more granular sentence splitting.
- tf.setOptions("tokenizeNLs");
-
// Other options
- final boolean lines = options.containsKey("lines");
final String encoding = options.getProperty("encoding", "UTF-8");
final boolean toLower = PropertiesUtils.getBool(options, "lowerCase", false);
final Locale es = new Locale("es");
@@ -407,10 +416,7 @@ public static void main(String[] args) {
printSpace = false;
System.out.println();
} else {
- if (printSpace) {
- if (lines) System.out.print(" ");
- else System.out.println();
- }
+ if (printSpace) System.out.print(" ");
String outputToken = toLower ? word.toLowerCase(es) : word;
System.out.print(outputToken);
printSpace = true;
diff --git a/src/edu/stanford/nlp/ling/AbstractCoreLabel.java b/src/edu/stanford/nlp/ling/AbstractCoreLabel.java
index a679be7f12..173e92ab42 100644
--- a/src/edu/stanford/nlp/ling/AbstractCoreLabel.java
+++ b/src/edu/stanford/nlp/ling/AbstractCoreLabel.java
@@ -3,13 +3,39 @@
import edu.stanford.nlp.util.TypesafeMap;
public interface AbstractCoreLabel extends Label, HasWord, HasIndex, HasTag, HasLemma, HasOffset, TypesafeMap {
+
+ /**
+ * Return the named entity class of the label (or null if none).
+ *
+ * @return The NER class for the label
+ */
public String ner();
+ /**
+ * Set the named entity class of the label.
+ *
+ * @param ner The NER class for the label
+ */
public void setNER(String ner);
+ // These next two are a partial implementation of HasContext. Maybe clean this up someday?
+
public String originalText();
public void setOriginalText(String originalText);
+ /**
+ * Return a non-null String value for a key. This method is included
+ * for backwards compatibility with the removed class AbstractMapLabel.
+ * It is guaranteed to not return null; if the key is not present or
+ * has a null value, it returns the empty string (""). It is only valid to
+ * call this method when key is paired with a value of type String.
+ *
+ * @param A key type with a String value
+ * @param key The key to return the value of.
+ * @return "" if the key is not in the map or has the value {@code null}
+ * and the String value of the key otherwise
+ */
public > String getString(Class key);
+
}
diff --git a/src/edu/stanford/nlp/ling/AnnotationLookup.java b/src/edu/stanford/nlp/ling/AnnotationLookup.java
index 3f43db5002..2cb96ed936 100644
--- a/src/edu/stanford/nlp/ling/AnnotationLookup.java
+++ b/src/edu/stanford/nlp/ling/AnnotationLookup.java
@@ -17,9 +17,9 @@ public enum KeyLookup {
WORD_KEY(CoreAnnotations.TextAnnotation.class, OldFeatureLabelKeys.WORD_KEY),
LEMMA_KEY(CoreAnnotations.LemmaAnnotation.class, OldFeatureLabelKeys.LEMMA_KEY),
CATEGORY_KEY(CoreAnnotations.CategoryAnnotation.class, OldFeatureLabelKeys.CATEGORY_KEY),
- PROJ_CAT_KEY(CoreAnnotations.ProjectedCategoryAnnotation.class, OldFeatureLabelKeys.PROJ_CAT_KEY),
- HEAD_WORD_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadWordAnnotation", OldFeatureLabelKeys.HEAD_WORD_KEY),
- HEAD_TAG_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadTagAnnotation", OldFeatureLabelKeys.HEAD_TAG_KEY),
+ //PROJ_CAT_KEY(CoreAnnotations.ProjectedCategoryAnnotation.class, OldFeatureLabelKeys.PROJ_CAT_KEY),
+ //HEAD_WORD_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadWordAnnotation", OldFeatureLabelKeys.HEAD_WORD_KEY),
+ //HEAD_TAG_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadTagAnnotation", OldFeatureLabelKeys.HEAD_TAG_KEY),
INDEX_KEY(CoreAnnotations.IndexAnnotation.class, OldFeatureLabelKeys.INDEX_KEY),
ARG_KEY(CoreAnnotations.ArgumentAnnotation.class, OldFeatureLabelKeys.ARG_KEY),
MARKING_KEY(CoreAnnotations.MarkingAnnotation.class, OldFeatureLabelKeys.MARKING_KEY),
diff --git a/src/edu/stanford/nlp/ling/CoreAnnotations.java b/src/edu/stanford/nlp/ling/CoreAnnotations.java
index 2210b81d0d..493c34ec5e 100644
--- a/src/edu/stanford/nlp/ling/CoreAnnotations.java
+++ b/src/edu/stanford/nlp/ling/CoreAnnotations.java
@@ -427,20 +427,6 @@ public Class getType() {
}
}
- /**
- * Keys from AbstractMapLabel (descriptions taken from that class)
- */
- /**
- * The standard key for storing a projected category in the map, as a String.
- * For any word (leaf node), the projected category is the syntactic category
- * of the maximal constituent headed by the word. Used in SemanticGraph.
- */
- public static class ProjectedCategoryAnnotation implements CoreAnnotation {
- public Class getType() {
- return String.class;
- }
- }
-
/**
* The standard key for a propbank label which is of type Argument
*/
diff --git a/src/edu/stanford/nlp/ling/CoreLabel.java b/src/edu/stanford/nlp/ling/CoreLabel.java
index aacebc6374..8ef92031ee 100644
--- a/src/edu/stanford/nlp/ling/CoreLabel.java
+++ b/src/edu/stanford/nlp/ling/CoreLabel.java
@@ -1,5 +1,6 @@
package edu.stanford.nlp.ling;
+import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeMap;
@@ -8,14 +9,13 @@
import edu.stanford.nlp.util.ArrayCoreMap;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
-import edu.stanford.nlp.util.StringUtils;
/**
* A CoreLabel represents a single word with ancillary information
- * attached using CoreAnnotations. If the proper annotations are set,
- * the CoreLabel also provides convenient methods to access tags,
- * lemmas, etc.
+ * attached using CoreAnnotations.
+ * A CoreLabel also provides convenient methods to access tags,
+ * lemmas, etc. (if the proper annotations are set).
*
* A CoreLabel is a Map from keys (which are Class objects) to values,
* whose type is determined by the key. That is, it is a heterogeneous
@@ -29,7 +29,7 @@
* @author dramage
* @author rafferty
*/
-public class CoreLabel extends ArrayCoreMap implements AbstractCoreLabel, HasWord, HasTag, HasCategory, HasLemma, HasContext, HasIndex, HasOffset {
+public class CoreLabel extends ArrayCoreMap implements AbstractCoreLabel, HasCategory, HasContext {
private static final long serialVersionUID = 2L;
@@ -86,8 +86,11 @@ public CoreLabel(CoreMap label) {
* Returns a new CoreLabel instance based on the contents of the given
* label. Warning: The behavior of this method is a bit disjunctive!
* If label is a CoreMap (including CoreLabel), then its entire
- * contents is copied into this label. But, otherwise, just the
- * value() and word iff it implements HasWord is copied.
+ * contents is copied into this label.
+ * If label is an IndexedWord, then the backing label is copied over
+ * entirely.
+ * But, otherwise, just the
+ * value() and word iff it implements {@link HasWord} is copied.
*
* @param label Basis for this label
*/
@@ -100,6 +103,12 @@ public CoreLabel(Label label) {
for (Class key : cl.keySet()) {
set(key, cl.get(key));
}
+ } else if (label instanceof IndexedWord) {
+ CoreMap cl = ((IndexedWord) label).backingLabel();
+ setCapacity(cl.size());
+ for (Class key : cl.keySet()) {
+ set(key, cl.get(key));
+ }
} else {
if (label instanceof HasWord) {
setWord(((HasWord)label).word());
@@ -130,7 +139,7 @@ public CoreLabel(String[] keys, String[] values) {
* This allows you to read in arbitrary values from a file as features, for example.
*/
public static interface GenericAnnotation extends CoreAnnotation { }
- //Unchecked is below because eclipse can't handle the level of type inference if we correctly parameterize GenericAnnotation with String
+ //Unchecked is below because eclipse can't handle the level of type inference if we correctly parametrize GenericAnnotation with String
@SuppressWarnings("unchecked")
public static final Map> genericKeys = Generics.newHashMap();
@SuppressWarnings("unchecked")
@@ -139,7 +148,11 @@ public static interface GenericAnnotation extends CoreAnnotation { }
@SuppressWarnings("unchecked")
private void initFromStrings(String[] keys, String[] values) {
- for (int i = 0; i < Math.min(keys.length, values.length); i++) {
+ if (keys.length != values.length) {
+ throw new UnsupportedOperationException("Argument array lengths differ: " +
+ Arrays.toString(keys) + " vs. " + Arrays.toString(values));
+ }
+ for (int i = 0; i < keys.length; i++) {
String key = keys[i];
String value = values[i];
KeyLookup lookup = AnnotationLookup.getCoreKey(key);
@@ -183,14 +196,15 @@ private void initFromStrings(String[] keys, String[] values) {
this.set(lookup.coreKey, Double.parseDouble(values[i]));
} else if(valueClass == Long.class) {
this.set(lookup.coreKey, Long.parseLong(values[i]));
+ } else {
+ throw new RuntimeException("Can't handle " + valueClass);
}
} catch (Exception e) {
- e.printStackTrace();
// unexpected value type
- System.err.println("CORE: CoreLabel.initFromStrings: "
+ throw new UnsupportedOperationException("CORE: CoreLabel.initFromStrings: "
+ "Bad type for " + key
+ ". Value was: " + value
- + "; expected "+AnnotationLookup.getValueType(lookup.coreKey));
+ + "; expected "+AnnotationLookup.getValueType(lookup.coreKey), e);
}
}
}
@@ -266,16 +280,7 @@ public LabelFactory labelFactory() {
}
/**
- * Return a non-null String value for a key.
- * This method is included for backwards compatibility with AbstractMapLabel.
- * It is guaranteed to not return null; if the key is not present or
- * has a null value, it returns the empty string (""). It is only valid to
- * call this method when key is paired with a value of type String.
- *
- * @param A key type with a String value
- * @param key The key to return the value of.
- * @return "" if the key is not in the map or has the value null
- * and the String value of the key otherwise
+ * {@inheritDoc}
*/
@Override
public > String getString(Class key) {
@@ -287,13 +292,6 @@ public > String getString(Class key) {
}
- /**
- * {@inheritDoc}
- */
-// public int size() {
-// return map.size();
-// }
-
/**
* {@inheritDoc}
*/
@@ -326,8 +324,8 @@ public final String value() {
public void setWord(String word) {
String originalWord = get(CoreAnnotations.TextAnnotation.class);
set(CoreAnnotations.TextAnnotation.class, word);
- // pado feb 09: if you change the word, delete the lemma.
- // gabor dec 2012: check if there was a real change -- this remove is actually rather expensive if it gets called a lot
+ // Pado feb 09: if you change the word, delete the lemma.
+ // Gabor dec 2012: check if there was a real change -- this remove is actually rather expensive if it gets called a lot
if (word != null && !word.equals(originalWord) && containsKey(CoreAnnotations.LemmaAnnotation.class)) {
remove(CoreAnnotations.LemmaAnnotation.class);
}
@@ -439,14 +437,17 @@ public void setDocID(String docID) {
}
/**
- * Return the named entity class of the label (or null if none).
- *
- * @return String the word value for the label
+ * {@inheritDoc}
*/
+ @Override
public String ner() {
return get(CoreAnnotations.NamedEntityTagAnnotation.class);
}
+ /**
+ * {@inheritDoc}
+ */
+ @Override
public void setNER(String ner) {
set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
}
@@ -549,7 +550,7 @@ public void setEndPosition(int endPos) {
public enum OutputFormat {
VALUE_INDEX, VALUE, VALUE_TAG, VALUE_TAG_INDEX, MAP, VALUE_MAP, VALUE_INDEX_MAP, WORD, WORD_INDEX
- };
+ }
public static final OutputFormat DEFAULT_FORMAT = OutputFormat.VALUE_INDEX;
@@ -638,7 +639,7 @@ public String toString(OutputFormat format) {
if (index != null) {
buf.append('-').append((index).intValue());
}
- Map map2 = new TreeMap();
+ Map map2 = new TreeMap<>();
for(Class key : this.keySet()) {
String cls = key.getName();
// special shortening of all the Annotation classes
@@ -673,11 +674,7 @@ public String toString(OutputFormat format) {
return buf.toString();
}
- private static final Comparator> asClassComparator = new Comparator>() {
- @Override
- public int compare(Class> o1, Class> o2) {
- return o1.getName().compareTo(o2.getName());
- }
- };
+ private static final Comparator> asClassComparator =
+ (o1, o2) -> o1.getName().compareTo(o2.getName());
}
diff --git a/src/edu/stanford/nlp/ling/IndexedWord.java b/src/edu/stanford/nlp/ling/IndexedWord.java
index 84f3281aa7..91cb02a3eb 100644
--- a/src/edu/stanford/nlp/ling/IndexedWord.java
+++ b/src/edu/stanford/nlp/ling/IndexedWord.java
@@ -2,26 +2,32 @@
import java.util.Set;
-import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.TypesafeMap;
/**
- * This class is mainly for use with RTE in terms of the methods it provides,
- * but on a more general level, it provides a {@link CoreLabel} that uses its
+ * This class provides a {@link CoreLabel} that uses its
* DocIDAnnotation, SentenceIndexAnnotation, and IndexAnnotation to implement
* Comparable/compareTo, hashCode, and equals. This means no other annotations,
* including the identity of the word, are taken into account when using these
- * methods.
- *
- * The actual implementation is to wrap a CoreLabel
.
- * This avoids breaking the equals()
and
- * hashCode()
contract and also avoids expensive copying
+ * methods. Historically, this class was introduced for and is mainly used in
+ * the RTE package, and it provides a number of methods that are really specific
+ * to that use case. A second use case is now the Stanford Dependencies code,
+ * where this class directly implements the "copy nodes" of section 4.6 of the
+ * Stanford Dependencies Manual, rather than these being placed directly in the
+ * backing CoreLabel. This was so there can stay one CoreLabel per token, despite
+ * there being multiple IndexedWord nodes, additional ones representing copy
+ * nodes.
+ *
+ * The actual implementation is to wrap a {@code CoreLabel}.
+ * This avoids breaking the {@code equals()} and
+ * {@code hashCode()} contract and also avoids expensive copying
* when used to represent the same data as the original
- * CoreLabel
.
+ * {@code CoreLabel}.
*
* @author rafferty
- *
+ * @author John Bauer
+ * @author Sonal Gupta
*/
public class IndexedWord implements AbstractCoreLabel, Comparable {
@@ -34,7 +40,8 @@ public class IndexedWord implements AbstractCoreLabel, Comparable {
private final CoreLabel label;
- private int copyCount = 0;
+ private int copyCount; // = 0;
+
/**
* Default constructor; uses {@link CoreLabel} default constructor
*/
@@ -102,38 +109,46 @@ public IndexedWord makeSoftCopy(int count) {
}
/**
- * TODO: would be nice to get rid of this. Only used in two places in RTE.
+ * TODO: get rid of this. Only used in two places in RTE (in rewriter code)
*/
public CoreLabel backingLabel() { return label; }
+ @Override
public VALUE get(Class extends TypesafeMap.Key> key) {
return label.get(key);
}
+ @Override
public boolean has(Class extends TypesafeMap.Key> key) {
return label.has(key);
}
+ @Override
public boolean containsKey(Class extends TypesafeMap.Key> key) {
return label.containsKey(key);
}
+ @Override
public VALUE set(Class extends TypesafeMap.Key> key, VALUE value) {
return label.set(key, value);
}
+ @Override
public > String getString(Class key) {
return label.getString(key);
}
+ @Override
public VALUE remove(Class extends Key> key) {
return label.remove(key);
}
+ @Override
public Set> keySet() {
return label.keySet();
}
+ @Override
public int size() {
return label.size();
}
@@ -346,6 +361,7 @@ public int hashCode() {
* @param w The IndexedWord to compare with
* @return Whether this is less than w or not in the ordering
*/
+ @Override
public int compareTo(IndexedWord w) {
if (this.equals(IndexedWord.NO_WORD)) {
if (w.equals(IndexedWord.NO_WORD)) {
@@ -395,26 +411,31 @@ public void setFromString(String labelStr) {
public static LabelFactory factory() {
return new LabelFactory() {
+ @Override
public Label newLabel(String labelStr) {
- CoreLabel label = new CoreLabel();
- label.setValue(labelStr);
- return new IndexedWord(label);
+ CoreLabel coreLabel = new CoreLabel();
+ coreLabel.setValue(labelStr);
+ return new IndexedWord(coreLabel);
}
+ @Override
public Label newLabel(String labelStr, int options) {
return newLabel(labelStr);
}
+ @Override
public Label newLabel(Label oldLabel) {
return new IndexedWord(oldLabel);
}
+ @Override
public Label newLabelFromString(String encodedLabelStr) {
throw new UnsupportedOperationException("This code branch left blank" +
" because we do not understand what this method should do.");
}
};
}
+
/**
* {@inheritDoc}
*/
@@ -422,4 +443,5 @@ public Label newLabelFromString(String encodedLabelStr) {
public LabelFactory labelFactory() {
return IndexedWord.factory();
}
+
}
diff --git a/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java b/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java
index 8a876cc530..51e28b520b 100644
--- a/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java
+++ b/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java
@@ -2,7 +2,6 @@
import edu.stanford.nlp.util.StringUtils;
-import java.io.Serializable;
import java.util.List;
/**
@@ -10,7 +9,7 @@
*
* @author Angel Chang
*/
-public abstract class NodePattern implements Serializable{
+public abstract class NodePattern {
public static final NodePattern ANY_NODE = new AnyNodePattern();
diff --git a/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java b/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java
index 79f6bffff5..5ec79bef4f 100644
--- a/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java
+++ b/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java
@@ -459,7 +459,7 @@ protected boolean findMatchStart(int start, boolean matchAllTokens)
protected boolean findMatchStartNoBacktracking(int start, boolean matchAllTokens)
{
boolean matchAll = true;
- MatchedStates cStates = getStartStates();
+ MatchedStates cStates = getStartStates();
// Save cStates for FIND_ALL ....
curMatchStates = cStates;
for(int i = start; i < regionEnd; i++){
@@ -702,6 +702,7 @@ public T get(int i)
return elements.get(i);
}
+ /** Returns a non-null MatchedStates, which has a non-empty states list inside. */
private MatchedStates getStartStates()
{
return new MatchedStates(this, pattern.root);
diff --git a/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java b/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java
index 92722b6a1a..8c5af2bbe1 100644
--- a/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java
+++ b/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java
@@ -2,10 +2,6 @@
import edu.stanford.nlp.util.*;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
-import java.io.Serializable;
import java.util.*;
import java.util.function.Function;
@@ -81,15 +77,12 @@
* see {@link MultiCoreMapNodePattern} for example)
* Conjunctions - conjunctions of sequence patterns (works for some cases)
*
- *
- *
- * Note that this and the inherited classes do not implement any custom equals and hashCode functions.
*
*
* @author Angel Chang
* @see SequenceMatcher
*/
-public class SequencePattern implements Serializable {
+public class SequencePattern {
// TODO:
// 1. Validate backref capture groupid
// 2. Actions
@@ -99,7 +92,6 @@ public class SequencePattern implements Serializable {
private String patternStr;
private PatternExpr patternExpr;
private SequenceMatchAction action;
-
State root;
int totalGroups = 0;
@@ -268,7 +260,7 @@ public boolean matches(Object o1, Object o2) {
/**
* Represents a sequence pattern expressions (before translating into NFA).
*/
- public abstract static class PatternExpr implements Serializable {
+ public abstract static class PatternExpr {
protected abstract Frag build();
@@ -1749,36 +1741,6 @@ protected void updateKeepBids(Set bids) {
}
}
-
-
- private void readObject(ObjectInputStream ois)
- throws IOException, ClassNotFoundException {
- patternStr = (String)ois.readObject();
-
- patternExpr = (PatternExpr) ois.readObject();
- //this.patternStr = patternStr;
- //this.patternExpr = nodeSequencePattern;
- action = (SequenceMatchAction) ois.readObject();
-
- patternExpr = new GroupPatternExpr(patternExpr, true);
- patternExpr = patternExpr.optimize();
- this.totalGroups = patternExpr.assignGroupIds(0);
- Frag f = patternExpr.build();
- f.connect(MATCH_STATE);
- this.root = f.start;
- varGroupBindings = new VarGroupBindings(totalGroups+1);
- patternExpr.updateBindings(varGroupBindings);
- }
-
-
- private void writeObject(ObjectOutputStream oos)
- throws IOException {
- oos.writeObject(toString());
- oos.writeObject(this.getPatternExpr());
- oos.writeObject(this.getAction());
-
- } // public void writeObject()
-
// States for matching conjunctions
// - Basic, not well tested implementation that may not work for all cases ...
// - Can be optimized to terminate earlier if one branch of the conjunction is known not to succeed
diff --git a/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java b/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java
index 1c7cca2fae..608e2828b0 100644
--- a/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java
+++ b/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java
@@ -3,9 +3,6 @@
import edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser;
import edu.stanford.nlp.util.*;
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
import java.util.*;
/**
@@ -273,7 +270,6 @@ public String toString(){
return this.pattern();
}
-
/**
* Create a multi-pattern matcher for matching across multiple TokensRegex patterns
* @param patterns Collection of input patterns
diff --git a/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java b/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java
index 02a53fb90c..7e1d8e231f 100644
--- a/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java
+++ b/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java
@@ -180,17 +180,15 @@ final public AssignableExpression AssignableExpression(Env env) throws ParseExce
final public Expression Expression(Env env) throws ParseException {
Expression expr;
- if (jj_2_8(5)) {
+ if (jj_2_8(4)) {
expr = NestedFunctionCallExpression(env);
- } else if (jj_2_9(5)) {
+ } else if (jj_2_9(4)) {
expr = NestedVarExpression(env);
- } else if (jj_2_10(5)) {
+ } else if (jj_2_10(4)) {
expr = ValueExpression(env);
- } else if (jj_2_11(5)) {
+ } else if (jj_2_11(4)) {
expr = ListExpression(env);
- } else if (jj_2_12(5)) {
- expr = ListExpression2(env);
- } else if (jj_2_13(5)) {
+ } else if (jj_2_12(4)) {
expr = CaseExpression(env);
} else {
jj_consume_token(-1);
@@ -229,7 +227,6 @@ final public Expression FunctionCallExpression(Env env) throws ParseException {
case STR:
case 22:
case 25:
- case 31:
case 38:
param = Expression(env);
params.add(param);
@@ -441,7 +438,6 @@ final public Expression MethodCallExpression(Env env, Expression parent) throws
case STR:
case 22:
case 25:
- case 31:
case 38:
param = Expression(env);
params.add(param);
@@ -487,15 +483,15 @@ final public AssignableExpression AssignableNestedVarExpression(Env env) throws
jj_la1[12] = jj_gen;
break label_5;
}
- if (jj_2_14(2)) {
+ if (jj_2_13(2)) {
i = Index();
expr = new Expressions.IndexedExpression(expr, i);
- } else if (jj_2_15(2)) {
+ } else if (jj_2_14(2)) {
jj_consume_token(31);
fieldExpr = Expression(env);
expr = new Expressions.FieldExpression(expr, fieldExpr);
jj_consume_token(32);
- } else if (jj_2_16(2)) {
+ } else if (jj_2_15(2)) {
jj_consume_token(35);
s = RelaxedString();
expr = new Expressions.FieldExpression(expr, s);
@@ -526,18 +522,18 @@ final public Expression NestedVarExpression(Env env) throws ParseException {
jj_la1[13] = jj_gen;
break label_6;
}
- if (jj_2_17(3)) {
+ if (jj_2_16(3)) {
i = Index();
expr = new Expressions.IndexedExpression(expr, i);
- } else if (jj_2_18(3)) {
+ } else if (jj_2_17(3)) {
jj_consume_token(31);
fieldExpr = Expression(env);
expr = new Expressions.FieldExpression(expr, fieldExpr);
jj_consume_token(32);
- } else if (jj_2_19(3)) {
+ } else if (jj_2_18(3)) {
jj_consume_token(35);
expr = MethodCallExpression(env, expr);
- } else if (jj_2_20(3)) {
+ } else if (jj_2_19(3)) {
jj_consume_token(35);
s = RelaxedString();
expr = new Expressions.FieldExpression(expr, s);
@@ -568,18 +564,18 @@ final public Expression NestedFunctionCallExpression(Env env) throws ParseExcept
jj_la1[14] = jj_gen;
break label_7;
}
- if (jj_2_21(3)) {
+ if (jj_2_20(3)) {
i = Index();
expr = new Expressions.IndexedExpression(expr, i);
- } else if (jj_2_22(3)) {
+ } else if (jj_2_21(3)) {
jj_consume_token(31);
fieldExpr = Expression(env);
expr = new Expressions.FieldExpression(expr, fieldExpr);
jj_consume_token(32);
- } else if (jj_2_23(3)) {
+ } else if (jj_2_22(3)) {
jj_consume_token(35);
expr = MethodCallExpression(env, expr);
- } else if (jj_2_24(3)) {
+ } else if (jj_2_23(3)) {
jj_consume_token(35);
s = RelaxedString();
expr = new Expressions.FieldExpression(expr, s);
@@ -617,36 +613,11 @@ final public Expression ListExpression(Env env) throws ParseException {
throw new Error("Missing return statement in function");
}
- final public Expression ListExpression2(Env env) throws ParseException {
- List exprs = new ArrayList();
- Expression expr;
- jj_consume_token(31);
- expr = Expression(env);
- exprs.add(expr);
- label_9:
- while (true) {
- switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
- case 33:
- ;
- break;
- default:
- jj_la1[16] = jj_gen;
- break label_9;
- }
- jj_consume_token(33);
- expr = Expression(env);
- exprs.add(expr);
- }
- jj_consume_token(32);
- {if (true) return new Expressions.ListExpression(Expressions.TYPE_LIST, exprs);}
- throw new Error("Missing return statement in function");
- }
-
final public Expression BasicCondExpression(Env env) throws ParseException {
Expression expr1 = null;
Expression expr2 = null;
Token op = null;
- if (jj_2_25(3)) {
+ if (jj_2_24(3)) {
expr1 = NestedVarExpression(env);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case NUMCMP:
@@ -655,7 +626,7 @@ final public Expression BasicCondExpression(Env env) throws ParseException {
expr2 = Expression(env);
break;
default:
- jj_la1[17] = jj_gen;
+ jj_la1[16] = jj_gen;
;
}
if (op == null) {
@@ -663,7 +634,7 @@ final public Expression BasicCondExpression(Env env) throws ParseException {
} else {
{if (true) return new Expressions.ConditionalExpression(op.image, expr1, expr2);}
}
- } else if (jj_2_26(3)) {
+ } else if (jj_2_25(3)) {
expr1 = NestedFunctionCallExpression(env);
{if (true) return new Expressions.ConditionalExpression(expr1);}
} else {
@@ -689,7 +660,7 @@ final public Expression CondGroup(Env env) throws ParseException {
jj_consume_token(26);
break;
default:
- jj_la1[18] = jj_gen;
+ jj_la1[17] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -704,7 +675,7 @@ final public Expression CondExpression(Env env) throws ParseException {
Token op;
child = CondGroup(env);
conjChildren.add(child);
- label_10:
+ label_9:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 36:
@@ -712,8 +683,8 @@ final public Expression CondExpression(Env env) throws ParseException {
;
break;
default:
- jj_la1[19] = jj_gen;
- break label_10;
+ jj_la1[18] = jj_gen;
+ break label_9;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 36:
@@ -723,7 +694,7 @@ final public Expression CondExpression(Env env) throws ParseException {
op = jj_consume_token(37);
break;
default:
- jj_la1[20] = jj_gen;
+ jj_la1[19] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -760,7 +731,7 @@ final public Expression CaseExpression(Env env) throws ParseException {
Expression elseExpr = null;
jj_consume_token(38);
jj_consume_token(22);
- label_11:
+ label_10:
while (true) {
cond = CondExpression(env);
jj_consume_token(23);
@@ -774,7 +745,7 @@ final public Expression CaseExpression(Env env) throws ParseException {
jj_consume_token(30);
break;
default:
- jj_la1[21] = jj_gen;
+ jj_la1[20] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -788,8 +759,8 @@ final public Expression CaseExpression(Env env) throws ParseException {
;
break;
default:
- jj_la1[22] = jj_gen;
- break label_11;
+ jj_la1[21] = jj_gen;
+ break label_10;
}
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -799,7 +770,7 @@ final public Expression CaseExpression(Env env) throws ParseException {
elseExpr = Expression(env);
break;
default:
- jj_la1[23] = jj_gen;
+ jj_la1[22] = jj_gen;
;
}
jj_consume_token(24);
@@ -842,7 +813,7 @@ final public Object StringNumberValue(Env env) throws ParseException {
{if (true) return Double.valueOf(tok.image);}
break;
default:
- jj_la1[24] = jj_gen;
+ jj_la1[23] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -855,7 +826,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce
MultiNodePattern multiNode;
SequencePattern.PatternExpr expr;
Object value = null;
- label_12:
+ label_11:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 31:
@@ -887,7 +858,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce
expr = SeqBackRef(env);
break;
default:
- jj_la1[25] = jj_gen;
+ jj_la1[24] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -899,7 +870,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce
expr = SeqRegexRepeatTimes(env, expr);
break;
default:
- jj_la1[26] = jj_gen;
+ jj_la1[25] = jj_gen;
;
}
children.add(expr);
@@ -920,8 +891,8 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce
;
break;
default:
- jj_la1[27] = jj_gen;
- break label_12;
+ jj_la1[26] = jj_gen;
+ break label_11;
}
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -930,7 +901,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce
value = Expression(env);
break;
default:
- jj_la1[28] = jj_gen;
+ jj_la1[27] = jj_gen;
;
}
if (children.size() != 1) {
@@ -963,13 +934,13 @@ final public SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePa
min = 1; max = -1;
break;
default:
- jj_la1[29] = jj_gen;
- if (jj_2_27(3)) {
+ jj_la1[28] = jj_gen;
+ if (jj_2_26(3)) {
jj_consume_token(22);
value = jj_consume_token(NONNEGINT);
jj_consume_token(24);
min = Integer.parseInt(value.image); max = min;
- } else if (jj_2_28(4)) {
+ } else if (jj_2_27(4)) {
jj_consume_token(22);
value = jj_consume_token(NONNEGINT);
jj_consume_token(33);
@@ -986,7 +957,7 @@ final public SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePa
min = Integer.parseInt(value.image); max = Integer.parseInt(v2.image);
break;
default:
- jj_la1[30] = jj_gen;
+ jj_la1[29] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -998,7 +969,7 @@ final public SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePa
greedy = false;
break;
default:
- jj_la1[31] = jj_gen;
+ jj_la1[30] = jj_gen;
;
}
{if (true) return new SequencePattern.RepeatPatternExpr(expr, min, max, greedy);}
@@ -1010,7 +981,7 @@ final public SequencePattern.PatternExpr SeqRegexDisj(Env env) throws ParseExcep
SequencePattern.PatternExpr expr;
expr = SeqRegexBasic(env);
children.add(expr);
- label_13:
+ label_12:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 37:
@@ -1018,8 +989,8 @@ final public SequencePattern.PatternExpr SeqRegexDisj(Env env) throws ParseExcep
;
break;
default:
- jj_la1[32] = jj_gen;
- break label_13;
+ jj_la1[31] = jj_gen;
+ break label_12;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 43:
@@ -1029,7 +1000,7 @@ final public SequencePattern.PatternExpr SeqRegexDisj(Env env) throws ParseExcep
jj_consume_token(37);
break;
default:
- jj_la1[33] = jj_gen;
+ jj_la1[32] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1048,7 +1019,7 @@ final public SequencePattern.PatternExpr SeqRegexDisjConj(Env env) throws ParseE
Token op;
child = SeqRegexBasic(env);
conjChildren.add(child);
- label_14:
+ label_13:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 36:
@@ -1058,8 +1029,8 @@ final public SequencePattern.PatternExpr SeqRegexDisjConj(Env env) throws ParseE
;
break;
default:
- jj_la1[34] = jj_gen;
- break label_14;
+ jj_la1[33] = jj_gen;
+ break label_13;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 44:
@@ -1075,7 +1046,7 @@ final public SequencePattern.PatternExpr SeqRegexDisjConj(Env env) throws ParseE
op = jj_consume_token(37);
break;
default:
- jj_la1[35] = jj_gen;
+ jj_la1[34] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1124,13 +1095,13 @@ final public SequencePattern.PatternExpr SeqRegexGroup(Env env) throws ParseExce
varname = var.image;
break;
default:
- jj_la1[36] = jj_gen;
+ jj_la1[35] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
default:
- jj_la1[37] = jj_gen;
+ jj_la1[36] = jj_gen;
;
}
expr = SeqRegex(env);
@@ -1145,7 +1116,7 @@ final public SequencePattern.PatternExpr SeqRegexGroup(Env env) throws ParseExce
final public NodePattern BracketedNode(Env env) throws ParseException {
NodePattern node;
- if (jj_2_29(2)) {
+ if (jj_2_28(2)) {
jj_consume_token(31);
jj_consume_token(32);
node = NodePattern.ANY_NODE;
@@ -1157,7 +1128,7 @@ final public NodePattern BracketedNode(Env env) throws ParseException {
jj_consume_token(32);
break;
default:
- jj_la1[38] = jj_gen;
+ jj_la1[37] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1203,7 +1174,7 @@ final public NodePattern Node(Env env) throws ParseException {
node = NodeGroup(env);
break;
default:
- jj_la1[39] = jj_gen;
+ jj_la1[38] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1216,7 +1187,7 @@ final public NodePattern NodeDisj(Env env) throws ParseException {
NodePattern child;
child = NodeGroup(env);
children.add(child);
- label_15:
+ label_14:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 37:
@@ -1224,8 +1195,8 @@ final public NodePattern NodeDisj(Env env) throws ParseException {
;
break;
default:
- jj_la1[40] = jj_gen;
- break label_15;
+ jj_la1[39] = jj_gen;
+ break label_14;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 43:
@@ -1235,7 +1206,7 @@ final public NodePattern NodeDisj(Env env) throws ParseException {
jj_consume_token(37);
break;
default:
- jj_la1[41] = jj_gen;
+ jj_la1[40] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1254,7 +1225,7 @@ final public NodePattern NodeConj(Env env) throws ParseException {
List children = new ArrayList();
child = NodeGroup(env);
children.add(child);
- label_16:
+ label_15:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 36:
@@ -1262,8 +1233,8 @@ final public NodePattern NodeConj(Env env) throws ParseException {
;
break;
default:
- jj_la1[42] = jj_gen;
- break label_16;
+ jj_la1[41] = jj_gen;
+ break label_15;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 44:
@@ -1273,7 +1244,7 @@ final public NodePattern NodeConj(Env env) throws ParseException {
jj_consume_token(36);
break;
default:
- jj_la1[43] = jj_gen;
+ jj_la1[42] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1294,7 +1265,7 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException {
Token op;
child = NodeGroup(env);
conjChildren.add(child);
- label_17:
+ label_16:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 36:
@@ -1304,8 +1275,8 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException {
;
break;
default:
- jj_la1[44] = jj_gen;
- break label_17;
+ jj_la1[43] = jj_gen;
+ break label_16;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 44:
@@ -1321,7 +1292,7 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException {
op = jj_consume_token(37);
break;
default:
- jj_la1[45] = jj_gen;
+ jj_la1[44] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1352,13 +1323,13 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException {
final public NodePattern NodeGroup(Env env) throws ParseException {
NodePattern node;
- if (jj_2_30(2)) {
+ if (jj_2_29(2)) {
node = NodeBasic(env);
- } else if (jj_2_31(2)) {
+ } else if (jj_2_30(2)) {
jj_consume_token(25);
node = NodeDisjConj(env);
jj_consume_token(26);
- } else if (jj_2_32(2)) {
+ } else if (jj_2_31(2)) {
jj_consume_token(46);
jj_consume_token(25);
node = NodeDisjConj(env);
@@ -1390,7 +1361,7 @@ final public NodePattern NodeBasic(Env env) throws ParseException {
{if (true) return child;}
break;
default:
- jj_la1[46] = jj_gen;
+ jj_la1[45] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1410,7 +1381,7 @@ final public NodePattern CoreMapNode(Env env) throws ParseException {
case 22:
jj_consume_token(22);
AttrValue(env, attributes);
- label_18:
+ label_17:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 30:
@@ -1418,8 +1389,8 @@ final public NodePattern CoreMapNode(Env env) throws ParseException {
;
break;
default:
- jj_la1[47] = jj_gen;
- break label_18;
+ jj_la1[46] = jj_gen;
+ break label_17;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 33:
@@ -1429,7 +1400,7 @@ final public NodePattern CoreMapNode(Env env) throws ParseException {
jj_consume_token(30);
break;
default:
- jj_la1[48] = jj_gen;
+ jj_la1[47] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1438,8 +1409,8 @@ final public NodePattern CoreMapNode(Env env) throws ParseException {
jj_consume_token(24);
break;
default:
- jj_la1[50] = jj_gen;
- if (jj_2_33(2)) {
+ jj_la1[49] = jj_gen;
+ if (jj_2_32(2)) {
AttrValue(env, attributes);
} else {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@@ -1455,14 +1426,14 @@ final public NodePattern CoreMapNode(Env env) throws ParseException {
value = jj_consume_token(REGEX);
break;
default:
- jj_la1[49] = jj_gen;
+ jj_la1[48] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
attributes.put("word", value.image);
break;
default:
- jj_la1[51] = jj_gen;
+ jj_la1[50] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1482,7 +1453,7 @@ final public NodePattern CoreMapNode(Env env) throws ParseException {
{if (true) return pat;}
break;
default:
- jj_la1[52] = jj_gen;
+ jj_la1[51] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1512,7 +1483,7 @@ final public Map AttrValue(Env env, Map attributes
str = CoreMapVarValue(env);
break;
default:
- jj_la1[53] = jj_gen;
+ jj_la1[52] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1533,13 +1504,13 @@ final public Map AttrValue(Env env, Map attributes
str = CoreMapVarValue(env);
break;
default:
- jj_la1[54] = jj_gen;
+ jj_la1[53] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
default:
- jj_la1[55] = jj_gen;
+ jj_la1[54] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1585,7 +1556,7 @@ final public NodePattern CoreMapWordPattern(Env env) throws ParseException {
value = jj_consume_token(STRSIMPLE);
break;
default:
- jj_la1[56] = jj_gen;
+ jj_la1[55] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1603,13 +1574,13 @@ final public MultiNodePattern MultiNodePattern(Env env) throws ParseException {
jj_consume_token(50);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 22:
- if (jj_2_34(3)) {
+ if (jj_2_33(3)) {
jj_consume_token(22);
v1 = jj_consume_token(NONNEGINT);
jj_consume_token(24);
min = Integer.parseInt(v1.image);
max = Integer.parseInt(v1.image);
- } else if (jj_2_35(4)) {
+ } else if (jj_2_34(4)) {
jj_consume_token(22);
v1 = jj_consume_token(NONNEGINT);
jj_consume_token(33);
@@ -1627,14 +1598,14 @@ final public MultiNodePattern MultiNodePattern(Env env) throws ParseException {
max = Integer.parseInt(v2.image);
break;
default:
- jj_la1[57] = jj_gen;
+ jj_la1[56] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
break;
default:
- jj_la1[58] = jj_gen;
+ jj_la1[57] = jj_gen;
;
}
pat = CoreMapWordPattern(env);
@@ -1675,7 +1646,7 @@ final public Pair> Seq
action = Action(env);
break;
default:
- jj_la1[59] = jj_gen;
+ jj_la1[58] = jj_gen;
;
}
{if (true) return new Pair>(expr,action);}
@@ -1705,7 +1676,7 @@ final public Map SetAttrValues(Env env) throws ParseException {
Map attributes = new ArrayMap();
jj_consume_token(22);
SetAttrValue(env, attributes);
- label_19:
+ label_18:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 30:
@@ -1713,8 +1684,8 @@ final public Map SetAttrValues(Env env) throws ParseException {
;
break;
default:
- jj_la1[60] = jj_gen;
- break label_19;
+ jj_la1[59] = jj_gen;
+ break label_18;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 33:
@@ -1724,7 +1695,7 @@ final public Map SetAttrValues(Env env) throws ParseException {
jj_consume_token(30);
break;
default:
- jj_la1[61] = jj_gen;
+ jj_la1[60] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1752,7 +1723,7 @@ final public Map SetAttrValue(Env env, Map attribu
value = NumberToken();
break;
default:
- jj_la1[62] = jj_gen;
+ jj_la1[61] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1780,7 +1751,7 @@ final public Token NumberToken() throws ParseException {
value = jj_consume_token(REAL);
break;
default:
- jj_la1[63] = jj_gen;
+ jj_la1[62] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1798,7 +1769,7 @@ final public Token IntegerToken() throws ParseException {
value = jj_consume_token(INT);
break;
default:
- jj_la1[64] = jj_gen;
+ jj_la1[63] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1816,7 +1787,7 @@ final public Token CmpToken() throws ParseException {
value = jj_consume_token(NUMCMP);
break;
default:
- jj_la1[65] = jj_gen;
+ jj_la1[64] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1834,7 +1805,7 @@ final public Token RelaxedStringToken() throws ParseException {
value = jj_consume_token(IDENTIFIER);
break;
default:
- jj_la1[66] = jj_gen;
+ jj_la1[65] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -1854,7 +1825,7 @@ final public String RelaxedString() throws ParseException {
{if (true) return value.image;}
break;
default:
- jj_la1[67] = jj_gen;
+ jj_la1[66] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
@@ -2117,213 +2088,251 @@ private boolean jj_2_34(int xla) {
finally { jj_save(33, xla); }
}
- private boolean jj_2_35(int xla) {
- jj_la = xla; jj_lastpos = jj_scanpos = token;
- try { return !jj_3_35(); }
- catch(LookaheadSuccess ls) { return true; }
- finally { jj_save(34, xla); }
- }
-
- private boolean jj_3_20() {
+ private boolean jj_3_19() {
if (jj_scan_token(35)) return true;
- if (jj_3R_32()) return true;
+ if (jj_3R_30()) return true;
return false;
}
- private boolean jj_3_19() {
+ private boolean jj_3_18() {
if (jj_scan_token(35)) return true;
- if (jj_3R_33()) return true;
+ if (jj_3R_31()) return true;
return false;
}
- private boolean jj_3_18() {
+ private boolean jj_3_17() {
if (jj_scan_token(31)) return true;
- if (jj_3R_31()) return true;
+ if (jj_3R_29()) return true;
if (jj_scan_token(32)) return true;
return false;
}
- private boolean jj_3R_43() {
+ private boolean jj_3R_41() {
Token xsp;
xsp = jj_scanpos;
+ if (jj_3_16()) {
+ jj_scanpos = xsp;
if (jj_3_17()) {
jj_scanpos = xsp;
if (jj_3_18()) {
jj_scanpos = xsp;
- if (jj_3_19()) {
- jj_scanpos = xsp;
- if (jj_3_20()) return true;
+ if (jj_3_19()) return true;
}
}
}
return false;
}
- private boolean jj_3_17() {
- if (jj_3R_30()) return true;
- return false;
- }
-
- private boolean jj_3R_112() {
- if (jj_scan_token(BACKREF)) return true;
+ private boolean jj_3_16() {
+ if (jj_3R_28()) return true;
return false;
}
- private boolean jj_3R_25() {
- if (jj_3R_42()) return true;
+ private boolean jj_3R_24() {
+ if (jj_3R_40()) return true;
Token xsp;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_43()) { jj_scanpos = xsp; break; }
+ if (jj_3R_41()) { jj_scanpos = xsp; break; }
}
return false;
}
- private boolean jj_3_16() {
+ private boolean jj_3_15() {
if (jj_scan_token(35)) return true;
- if (jj_3R_32()) return true;
+ if (jj_3R_30()) return true;
return false;
}
- private boolean jj_3R_111() {
- if (jj_scan_token(REGEXVAR)) return true;
+ private boolean jj_3R_107() {
+ if (jj_scan_token(BACKREF)) return true;
return false;
}
- private boolean jj_3_15() {
+ private boolean jj_3_14() {
if (jj_scan_token(31)) return true;
- if (jj_3R_31()) return true;
+ if (jj_3R_29()) return true;
return false;
}
- private boolean jj_3_14() {
- if (jj_3R_30()) return true;
+ private boolean jj_3_13() {
+ if (jj_3R_28()) return true;
return false;
}
- private boolean jj_3R_72() {
+ private boolean jj_3R_69() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3_14()) {
+ if (jj_3_13()) {
jj_scanpos = xsp;
- if (jj_3_15()) {
+ if (jj_3_14()) {
jj_scanpos = xsp;
- if (jj_3_16()) return true;
+ if (jj_3_15()) return true;
}
}
return false;
}
- private boolean jj_3R_114() {
+ private boolean jj_3R_57() {
+ if (jj_3R_68()) return true;
+ Token xsp;
+ while (true) {
+ xsp = jj_scanpos;
+ if (jj_3R_69()) { jj_scanpos = xsp; break; }
+ }
+ return false;
+ }
+
+ private boolean jj_3R_106() {
+ if (jj_scan_token(REGEXVAR)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_109() {
if (jj_scan_token(31)) return true;
- if (jj_3R_36()) return true;
+ if (jj_3R_34()) return true;
if (jj_scan_token(32)) return true;
return false;
}
- private boolean jj_3_29() {
+ private boolean jj_3_28() {
if (jj_scan_token(31)) return true;
if (jj_scan_token(32)) return true;
return false;
}
- private boolean jj_3R_60() {
- if (jj_3R_71()) return true;
+ private boolean jj_3R_31() {
+ if (jj_scan_token(IDENTIFIER)) return true;
+ if (jj_scan_token(25)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_102() {
Token xsp;
- while (true) {
- xsp = jj_scanpos;
- if (jj_3R_72()) { jj_scanpos = xsp; break; }
+ xsp = jj_scanpos;
+ if (jj_3_28()) {
+ jj_scanpos = xsp;
+ if (jj_3R_109()) return true;
}
return false;
}
- private boolean jj_3R_107() {
+ private boolean jj_3R_63() {
+ if (jj_scan_token(REGEXMRGROUP)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_62() {
+ if (jj_scan_token(REGEXMRVAR)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_61() {
+ if (jj_scan_token(REGEXGROUP)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_60() {
+ if (jj_scan_token(REGEXVAR)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_40() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3_29()) {
+ if (jj_3R_59()) {
+ jj_scanpos = xsp;
+ if (jj_3R_60()) {
+ jj_scanpos = xsp;
+ if (jj_3R_61()) {
jj_scanpos = xsp;
- if (jj_3R_114()) return true;
+ if (jj_3R_62()) {
+ jj_scanpos = xsp;
+ if (jj_3R_63()) return true;
+ }
+ }
+ }
}
return false;
}
- private boolean jj_3R_89() {
- if (jj_3R_31()) return true;
+ private boolean jj_3R_59() {
+ if (jj_scan_token(IDENTIFIER)) return true;
return false;
}
- private boolean jj_3R_126() {
+ private boolean jj_3R_121() {
if (jj_scan_token(41)) return true;
if (jj_scan_token(REGEXVAR)) return true;
return false;
}
- private boolean jj_3R_51() {
+ private boolean jj_3R_48() {
if (jj_scan_token(IDENTIFIER)) return true;
return false;
}
- private boolean jj_3R_125() {
+ private boolean jj_3R_120() {
if (jj_scan_token(45)) return true;
return false;
}
- private boolean jj_3R_118() {
+ private boolean jj_3R_113() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_125()) {
+ if (jj_3R_120()) {
jj_scanpos = xsp;
- if (jj_3R_126()) return true;
+ if (jj_3R_121()) return true;
}
return false;
}
- private boolean jj_3R_50() {
+ private boolean jj_3R_47() {
if (jj_scan_token(STR)) return true;
return false;
}
- private boolean jj_3R_32() {
+ private boolean jj_3R_30() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_50()) {
+ if (jj_3R_47()) {
jj_scanpos = xsp;
- if (jj_3R_51()) return true;
+ if (jj_3R_48()) return true;
}
return false;
}
- private boolean jj_3R_33() {
- if (jj_scan_token(IDENTIFIER)) return true;
- if (jj_scan_token(25)) return true;
+ private boolean jj_3R_84() {
+ if (jj_scan_token(REGEXVAR)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_68() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_89()) jj_scanpos = xsp;
- if (jj_scan_token(26)) return true;
+ if (jj_3R_83()) {
+ jj_scanpos = xsp;
+ if (jj_3R_84()) return true;
+ }
return false;
}
- private boolean jj_3R_66() {
- if (jj_scan_token(REGEXMRGROUP)) return true;
+ private boolean jj_3R_83() {
+ if (jj_scan_token(IDENTIFIER)) return true;
return false;
}
- private boolean jj_3R_110() {
+ private boolean jj_3R_105() {
if (jj_scan_token(25)) return true;
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_118()) jj_scanpos = xsp;
- if (jj_3R_59()) return true;
+ if (jj_3R_113()) jj_scanpos = xsp;
+ if (jj_3R_56()) return true;
if (jj_scan_token(26)) return true;
return false;
}
- private boolean jj_3R_65() {
- if (jj_scan_token(REGEXMRVAR)) return true;
- return false;
- }
-
- private boolean jj_3R_106() {
+ private boolean jj_3R_101() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(18)) {
@@ -2333,41 +2342,19 @@ private boolean jj_3R_106() {
return false;
}
- private boolean jj_3R_64() {
- if (jj_scan_token(REGEXGROUP)) return true;
- return false;
- }
-
- private boolean jj_3R_63() {
- if (jj_scan_token(REGEXVAR)) return true;
- return false;
- }
-
- private boolean jj_3R_42() {
- Token xsp;
- xsp = jj_scanpos;
- if (jj_3R_62()) {
- jj_scanpos = xsp;
- if (jj_3R_63()) {
- jj_scanpos = xsp;
- if (jj_3R_64()) {
- jj_scanpos = xsp;
- if (jj_3R_65()) {
- jj_scanpos = xsp;
- if (jj_3R_66()) return true;
- }
- }
- }
- }
+ private boolean jj_3R_76() {
+ if (jj_scan_token(25)) return true;
+ if (jj_3R_56()) return true;
+ if (jj_scan_token(26)) return true;
return false;
}
- private boolean jj_3R_62() {
- if (jj_scan_token(IDENTIFIER)) return true;
+ private boolean jj_3R_75() {
+ if (jj_scan_token(REAL)) return true;
return false;
}
- private boolean jj_3R_52() {
+ private boolean jj_3R_49() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(20)) {
@@ -2377,27 +2364,22 @@ private boolean jj_3R_52() {
return false;
}
- private boolean jj_3R_88() {
- if (jj_scan_token(REGEXVAR)) return true;
+ private boolean jj_3R_74() {
+ if (jj_scan_token(LONGINT)) return true;
return false;
}
- private boolean jj_3R_71() {
- Token xsp;
- xsp = jj_scanpos;
- if (jj_3R_87()) {
- jj_scanpos = xsp;
- if (jj_3R_88()) return true;
- }
+ private boolean jj_3R_73() {
+ if (jj_3R_46()) return true;
return false;
}
- private boolean jj_3R_87() {
- if (jj_scan_token(IDENTIFIER)) return true;
+ private boolean jj_3R_72() {
+ if (jj_scan_token(STR)) return true;
return false;
}
- private boolean jj_3R_49() {
+ private boolean jj_3R_46() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(13)) {
@@ -2407,32 +2389,34 @@ private boolean jj_3R_49() {
return false;
}
- private boolean jj_3R_132() {
+ private boolean jj_3R_64() {
Token xsp;
xsp = jj_scanpos;
- if (jj_scan_token(13)) {
+ if (jj_3R_71()) {
jj_scanpos = xsp;
- if (jj_scan_token(14)) {
+ if (jj_3R_72()) {
+ jj_scanpos = xsp;
+ if (jj_3R_73()) {
+ jj_scanpos = xsp;
+ if (jj_3R_74()) {
jj_scanpos = xsp;
- if (jj_scan_token(16)) return true;
+ if (jj_3R_75()) {
+ jj_scanpos = xsp;
+ if (jj_3R_76()) return true;
+ }
+ }
+ }
}
}
return false;
}
- private boolean jj_3R_79() {
- if (jj_scan_token(25)) return true;
- if (jj_3R_59()) return true;
- if (jj_scan_token(26)) return true;
- return false;
- }
-
- private boolean jj_3R_78() {
- if (jj_scan_token(REAL)) return true;
+ private boolean jj_3R_71() {
+ if (jj_scan_token(REGEX)) return true;
return false;
}
- private boolean jj_3R_105() {
+ private boolean jj_3R_100() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(44)) {
@@ -2445,92 +2429,55 @@ private boolean jj_3R_105() {
}
}
}
- if (jj_3R_86()) return true;
+ if (jj_3R_82()) return true;
return false;
}
- private boolean jj_3R_77() {
- if (jj_scan_token(LONGINT)) return true;
- return false;
- }
-
- private boolean jj_3R_76() {
- if (jj_3R_49()) return true;
- return false;
- }
-
- private boolean jj_3R_75() {
- if (jj_scan_token(STR)) return true;
+ private boolean jj_3R_37() {
+ if (jj_3R_30()) return true;
+ if (jj_scan_token(34)) return true;
+ if (jj_3R_29()) return true;
return false;
}
private boolean jj_3R_67() {
+ if (jj_3R_82()) return true;
Token xsp;
- xsp = jj_scanpos;
- if (jj_3R_74()) {
- jj_scanpos = xsp;
- if (jj_3R_75()) {
- jj_scanpos = xsp;
- if (jj_3R_76()) {
- jj_scanpos = xsp;
- if (jj_3R_77()) {
- jj_scanpos = xsp;
- if (jj_3R_78()) {
- jj_scanpos = xsp;
- if (jj_3R_79()) return true;
- }
- }
- }
- }
+ while (true) {
+ xsp = jj_scanpos;
+ if (jj_3R_100()) { jj_scanpos = xsp; break; }
}
return false;
}
- private boolean jj_3R_74() {
- if (jj_scan_token(REGEX)) return true;
- return false;
- }
-
- private boolean jj_3R_70() {
- if (jj_3R_86()) return true;
- Token xsp;
- while (true) {
- xsp = jj_scanpos;
- if (jj_3R_105()) { jj_scanpos = xsp; break; }
- }
+ private boolean jj_3R_21() {
+ if (jj_scan_token(22)) return true;
+ if (jj_3R_37()) return true;
return false;
}
- private boolean jj_3R_80() {
- Token xsp;
- xsp = jj_scanpos;
- if (jj_scan_token(33)) {
- jj_scanpos = xsp;
- if (jj_scan_token(30)) return true;
- }
+ private boolean jj_3R_118() {
+ if (jj_scan_token(41)) return true;
return false;
}
- private boolean jj_3R_39() {
- if (jj_3R_32()) return true;
- if (jj_scan_token(34)) return true;
- if (jj_3R_31()) return true;
+ private boolean jj_3R_117() {
+ if (jj_scan_token(22)) return true;
+ if (jj_scan_token(NONNEGINT)) return true;
return false;
}
- private boolean jj_3R_123() {
- if (jj_scan_token(41)) return true;
+ private boolean jj_3R_43() {
+ if (jj_3R_21()) return true;
return false;
}
- private boolean jj_3R_122() {
- if (jj_scan_token(22)) return true;
- if (jj_scan_token(NONNEGINT)) return true;
- if (jj_scan_token(33)) return true;
+ private boolean jj_3R_42() {
+ if (jj_3R_64()) return true;
return false;
}
- private boolean jj_3_28() {
+ private boolean jj_3_27() {
if (jj_scan_token(22)) return true;
if (jj_scan_token(NONNEGINT)) return true;
if (jj_scan_token(33)) return true;
@@ -2538,247 +2485,244 @@ private boolean jj_3_28() {
return false;
}
- private boolean jj_3R_121() {
+ private boolean jj_3R_116() {
if (jj_scan_token(42)) return true;
return false;
}
- private boolean jj_3R_22() {
- if (jj_scan_token(22)) return true;
- if (jj_3R_39()) return true;
+ private boolean jj_3R_25() {
Token xsp;
- while (true) {
- xsp = jj_scanpos;
- if (jj_3R_80()) { jj_scanpos = xsp; break; }
+ xsp = jj_scanpos;
+ if (jj_3R_42()) {
+ jj_scanpos = xsp;
+ if (jj_3R_43()) return true;
}
- if (jj_scan_token(24)) return true;
return false;
}
- private boolean jj_3_27() {
+ private boolean jj_3_26() {
if (jj_scan_token(22)) return true;
if (jj_scan_token(NONNEGINT)) return true;
if (jj_scan_token(24)) return true;
return false;
}
- private boolean jj_3R_120() {
+ private boolean jj_3R_115() {
if (jj_scan_token(41)) return true;
return false;
}
- private boolean jj_3R_119() {
+ private boolean jj_3R_70() {
+ if (jj_scan_token(33)) return true;
+ return false;
+ }
+
+ private boolean jj_3R_114() {
if (jj_scan_token(40)) return true;
return false;
}
- private boolean jj_3R_116() {
+ private boolean jj_3R_58() {
+ if (jj_3R_29()) return true;
+ Token xsp;
+ while (true) {
+ xsp = jj_scanpos;
+ if (jj_3R_70()) { jj_scanpos = xsp; break; }
+ }
+ return false;
+ }
+
+ private boolean jj_3R_111() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_119()) {
+ if (jj_3R_114()) {
jj_scanpos = xsp;
- if (jj_3R_120()) {
+ if (jj_3R_115()) {
jj_scanpos = xsp;
- if (jj_3R_121()) {
+ if (jj_3R_116()) {
jj_scanpos = xsp;
- if (jj_3_27()) {
+ if (jj_3_26()) {
jj_scanpos = xsp;
- if (jj_3_28()) {
+ if (jj_3_27()) {
jj_scanpos = xsp;
- if (jj_3R_122()) return true;
+ if (jj_3R_117()) return true;
}
}
}
}
}
xsp = jj_scanpos;
- if (jj_3R_123()) jj_scanpos = xsp;
- return false;
- }
-
- private boolean jj_3R_45() {
- if (jj_3R_22()) return true;
- return false;
- }
-
- private boolean jj_3R_44() {
- if (jj_3R_67()) return true;
+ if (jj_3R_118()) jj_scanpos = xsp;
return false;
}
- private boolean jj_3R_26() {
+ private boolean jj_3R_38() {
+ if (jj_scan_token(IDENTIFIER)) return true;
+ if (jj_scan_token(25)) return true;
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_44()) {
- jj_scanpos = xsp;
- if (jj_3R_45()) return true;
- }
+ if (jj_3R_58()) jj_scanpos = xsp;
+ if (jj_scan_token(26)) return true;
return false;
}
- private boolean jj_3R_113() {
+ private boolean jj_3R_108() {
if (jj_scan_token(23)) return true;
- if (jj_3R_31()) return true;
+ if (jj_3R_29()) return true;
return false;
}
- private boolean jj_3R_73() {
- if (jj_scan_token(33)) return true;
- if (jj_3R_31()) return true;
+ private boolean jj_3R_90() {
+ if (jj_3R_65()) return true;
return false;
}
- private boolean jj_3R_95() {
- if (jj_3R_68()) return true;
+ private boolean jj_3R_28() {
+ if (jj_scan_token(31)) return true;
+ if (jj_3R_46()) return true;
+ if (jj_scan_token(32)) return true;
return false;
}
- private boolean jj_3R_61() {
- if (jj_3R_31()) return true;
- Token xsp;
- while (true) {
- xsp = jj_scanpos;
- if (jj_3R_73()) { jj_scanpos = xsp; break; }
- }
+ private boolean jj_3R_89() {
+ if (jj_scan_token(REGEXVAR)) return true;
return false;
}
- private boolean jj_3R_94() {
- if (jj_scan_token(REGEXVAR)) return true;
+ private boolean jj_3R_110() {
+ if (jj_3R_111()) return true;
return false;
}
- private boolean jj_3R_115() {
- if (jj_3R_116()) return true;
+ private boolean jj_3_12() {
+ if (jj_3R_27()) return true;
return false;
}
- private boolean jj_3R_128() {
- if (jj_3R_131()) return true;
+ private boolean jj_3R_99() {
+ if (jj_3R_107()) return true;
return false;
}
- private boolean jj_3R_104() {
- if (jj_3R_112()) return true;
+ private boolean jj_3_11() {
+ if (jj_3R_26()) return true;
return false;
}
- private boolean jj_3R_40() {
- if (jj_scan_token(IDENTIFIER)) return true;
- if (jj_scan_token(25)) return true;
- Token xsp;
- xsp = jj_scanpos;
- if (jj_3R_61()) jj_scanpos = xsp;
- if (jj_scan_token(26)) return true;
+ private boolean jj_3R_98() {
+ if (jj_3R_106()) return true;
return false;
}
- private boolean jj_3R_131() {
- if (jj_scan_token(REGEXVAR)) return true;
+ private boolean jj_3_10() {
+ if (jj_3R_25()) return true;
return false;
}
- private boolean jj_3R_103() {
- if (jj_3R_111()) return true;
+ private boolean jj_3R_97() {
+ if (jj_3R_105()) return true;
return false;
}
- private boolean jj_3R_102() {
- if (jj_3R_110()) return true;
+ private boolean jj_3_9() {
+ if (jj_3R_24()) return true;
return false;
}
- private boolean jj_3R_101() {
- if (jj_3R_109()) return true;
+ private boolean jj_3R_96() {
+ if (jj_3R_104()) return true;
return false;
}
- private boolean jj_3R_100() {
- if (jj_3R_108()) return true;
+ private boolean jj_3_8() {
+ if (jj_3R_23()) return true;
return false;
}
- private boolean jj_3R_30() {
- if (jj_scan_token(31)) return true;
- if (jj_3R_49()) return true;
- if (jj_scan_token(32)) return true;
+ private boolean jj_3R_95() {
+ if (jj_3R_103()) return true;
return false;
}
- private boolean jj_3R_99() {
- if (jj_3R_107()) return true;
+ private boolean jj_3R_94() {
+ if (jj_3R_102()) return true;
return false;
}
- private boolean jj_3R_96() {
+ private boolean jj_3R_29() {
+ Token xsp;
+ xsp = jj_scanpos;
+ if (jj_3_8()) {
+ jj_scanpos = xsp;
+ if (jj_3_9()) {
+ jj_scanpos = xsp;
+ if (jj_3_10()) {
+ jj_scanpos = xsp;
+ if (jj_3_11()) {
+ jj_scanpos = xsp;
+ if (jj_3_12()) return true;
+ }
+ }
+ }
+ }
+ return false;
+ }
+
+ private boolean jj_3R_91() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_99()) {
+ if (jj_3R_94()) {
jj_scanpos = xsp;
- if (jj_3R_100()) {
+ if (jj_3R_95()) {
jj_scanpos = xsp;
- if (jj_3R_101()) {
+ if (jj_3R_96()) {
jj_scanpos = xsp;
- if (jj_3R_102()) {
+ if (jj_3R_97()) {
jj_scanpos = xsp;
- if (jj_3R_103()) {
+ if (jj_3R_98()) {
jj_scanpos = xsp;
- if (jj_3R_104()) return true;
+ if (jj_3R_99()) return true;
}
}
}
}
}
xsp = jj_scanpos;
- if (jj_3R_115()) jj_scanpos = xsp;
+ if (jj_3R_110()) jj_scanpos = xsp;
return false;
}
- private boolean jj_3R_86() {
+ private boolean jj_3R_82() {
Token xsp;
- if (jj_3R_96()) return true;
+ if (jj_3R_91()) return true;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_96()) { jj_scanpos = xsp; break; }
+ if (jj_3R_91()) { jj_scanpos = xsp; break; }
}
xsp = jj_scanpos;
- if (jj_3R_113()) jj_scanpos = xsp;
+ if (jj_3R_108()) jj_scanpos = xsp;
return false;
}
- private boolean jj_3_13() {
- if (jj_3R_29()) return true;
- return false;
- }
-
- private boolean jj_3_12() {
- if (jj_3R_28()) return true;
+ private boolean jj_3R_36() {
+ if (jj_3R_57()) return true;
return false;
}
- private boolean jj_3R_124() {
+ private boolean jj_3R_119() {
if (jj_scan_token(22)) return true;
if (jj_scan_token(NONNEGINT)) return true;
- if (jj_scan_token(33)) return true;
- return false;
- }
-
- private boolean jj_3_11() {
- if (jj_3R_27()) return true;
- return false;
- }
-
- private boolean jj_3_10() {
- if (jj_3R_26()) return true;
return false;
}
- private boolean jj_3_9() {
- if (jj_3R_25()) return true;
+ private boolean jj_3R_20() {
+ if (jj_3R_36()) return true;
+ if (jj_scan_token(29)) return true;
+ if (jj_3R_29()) return true;
return false;
}
- private boolean jj_3_35() {
+ private boolean jj_3_34() {
if (jj_scan_token(22)) return true;
if (jj_scan_token(NONNEGINT)) return true;
if (jj_scan_token(33)) return true;
@@ -2786,90 +2730,90 @@ private boolean jj_3_35() {
return false;
}
- private boolean jj_3_8() {
- if (jj_3R_24()) return true;
- return false;
- }
-
- private boolean jj_3R_130() {
- if (jj_3R_131()) return true;
+ private boolean jj_3_33() {
+ if (jj_scan_token(22)) return true;
+ if (jj_scan_token(NONNEGINT)) return true;
+ if (jj_scan_token(24)) return true;
return false;
}
- private boolean jj_3R_31() {
+ private boolean jj_3R_112() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3_8()) {
- jj_scanpos = xsp;
- if (jj_3_9()) {
- jj_scanpos = xsp;
- if (jj_3_10()) {
- jj_scanpos = xsp;
- if (jj_3_11()) {
+ if (jj_3_33()) {
jj_scanpos = xsp;
- if (jj_3_12()) {
+ if (jj_3_34()) {
jj_scanpos = xsp;
- if (jj_3_13()) return true;
- }
- }
- }
+ if (jj_3R_119()) return true;
}
}
return false;
}
- private boolean jj_3R_117() {
+ private boolean jj_3_7() {
+ if (jj_scan_token(22)) return true;
+ if (jj_scan_token(28)) return true;
+ if (jj_3R_22()) return true;
+ return false;
+ }
+
+ private boolean jj_3R_104() {
+ if (jj_scan_token(50)) return true;
Token xsp;
xsp = jj_scanpos;
- if (jj_3_34()) {
- jj_scanpos = xsp;
- if (jj_3_35()) {
- jj_scanpos = xsp;
- if (jj_3R_124()) return true;
- }
- }
+ if (jj_3R_112()) jj_scanpos = xsp;
+ if (jj_3R_103()) return true;
return false;
}
- private boolean jj_3_34() {
+ private boolean jj_3_6() {
if (jj_scan_token(22)) return true;
- if (jj_scan_token(NONNEGINT)) return true;
- if (jj_scan_token(24)) return true;
+ if (jj_scan_token(27)) return true;
+ if (jj_scan_token(25)) return true;
return false;
}
- private boolean jj_3R_109() {
- if (jj_scan_token(50)) return true;
- Token xsp;
- xsp = jj_scanpos;
- if (jj_3R_117()) jj_scanpos = xsp;
- if (jj_3R_108()) return true;
+ private boolean jj_3R_56() {
+ if (jj_3R_67()) return true;
return false;
}
- private boolean jj_3R_38() {
- if (jj_3R_60()) return true;
+ private boolean jj_3_5() {
+ if (jj_scan_token(22)) return true;
+ if (jj_scan_token(25)) return true;
+ if (jj_3R_56()) return true;
return false;
}
- private boolean jj_3R_59() {
- if (jj_3R_70()) return true;
+ private boolean jj_3_4() {
+ if (jj_scan_token(22)) return true;
+ if (jj_3R_22()) return true;
+ if (jj_scan_token(23)) return true;
return false;
}
- private boolean jj_3R_21() {
- if (jj_3R_38()) return true;
- if (jj_scan_token(29)) return true;
- if (jj_3R_31()) return true;
+ private boolean jj_3R_19() {
+ Token xsp;
+ xsp = jj_scanpos;
+ if (jj_3_4()) {
+ jj_scanpos = xsp;
+ if (jj_3_5()) {
+ jj_scanpos = xsp;
+ if (jj_3_6()) {
+ jj_scanpos = xsp;
+ if (jj_3_7()) return true;
+ }
+ }
+ }
return false;
}
- private boolean jj_3R_23() {
+ private boolean jj_3R_22() {
if (jj_scan_token(REGEX)) return true;
return false;
}
- private boolean jj_3R_108() {
+ private boolean jj_3R_103() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(18)) {
@@ -2897,351 +2841,283 @@ private boolean jj_3R_108() {
return false;
}
- private boolean jj_3_7() {
- if (jj_scan_token(22)) return true;
- if (jj_scan_token(28)) return true;
- if (jj_3R_23()) return true;
- return false;
- }
-
- private boolean jj_3R_129() {
- if (jj_3R_132()) return true;
- return false;
- }
-
- private boolean jj_3R_48() {
- if (jj_3R_68()) return true;
+ private boolean jj_3R_45() {
+ if (jj_3R_65()) return true;
if (jj_scan_token(23)) return true;
- if (jj_3R_31()) return true;
return false;
}
- private boolean jj_3_6() {
- if (jj_scan_token(22)) return true;
- if (jj_scan_token(27)) return true;
- if (jj_scan_token(25)) return true;
+ private boolean jj_3_3() {
+ if (jj_3R_21()) return true;
return false;
}
- private boolean jj_3R_58() {
+ private boolean jj_3R_55() {
if (jj_scan_token(NUMCMP)) return true;
- Token xsp;
- xsp = jj_scanpos;
- if (jj_3R_129()) {
- jj_scanpos = xsp;
- if (jj_3R_130()) return true;
- }
return false;
}
- private boolean jj_3_5() {
- if (jj_scan_token(22)) return true;
- if (jj_scan_token(25)) return true;
- if (jj_3R_59()) return true;
+ private boolean jj_3_2() {
+ if (jj_3R_20()) return true;
return false;
}
- private boolean jj_3R_57() {
+ private boolean jj_3R_54() {
if (jj_scan_token(49)) return true;
- if (jj_scan_token(IDENTIFIER)) return true;
return false;
}
- private boolean jj_3R_29() {
+ private boolean jj_3_1() {
+ if (jj_3R_19()) return true;
+ return false;
+ }
+
+ private boolean jj_3R_27() {
if (jj_scan_token(38)) return true;
if (jj_scan_token(22)) return true;
Token xsp;
- if (jj_3R_48()) return true;
+ if (jj_3R_45()) return true;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_48()) { jj_scanpos = xsp; break; }
+ if (jj_3R_45()) { jj_scanpos = xsp; break; }
}
return false;
}
- private boolean jj_3R_56() {
+ private boolean jj_3R_53() {
if (jj_scan_token(34)) return true;
- Token xsp;
- xsp = jj_scanpos;
- if (jj_scan_token(18)) {
- jj_scanpos = xsp;
- if (jj_scan_token(17)) {
- jj_scanpos = xsp;
- if (jj_scan_token(7)) {
- jj_scanpos = xsp;
- if (jj_3R_128()) return true;
- }
- }
- }
- return false;
- }
-
- private boolean jj_3R_20() {
- Token xsp;
- xsp = jj_scanpos;
- if (jj_3_4()) {
- jj_scanpos = xsp;
- if (jj_3_5()) {
- jj_scanpos = xsp;
- if (jj_3_6()) {
- jj_scanpos = xsp;
- if (jj_3_7()) return true;
- }
- }
- }
- return false;
- }
-
- private boolean jj_3_4() {
- if (jj_scan_token(22)) return true;
- if (jj_3R_23()) return true;
- if (jj_scan_token(23)) return true;
return false;
}
- private boolean jj_3R_37() {
+ private boolean jj_3R_35() {
if (jj_scan_token(IDENTIFIER)) return true;
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_56()) {
+ if (jj_3R_53()) {
jj_scanpos = xsp;
- if (jj_3R_57()) {
+ if (jj_3R_54()) {
jj_scanpos = xsp;
- if (jj_3R_58()) return true;
+ if (jj_3R_55()) return true;
}
}
return false;
}
- private boolean jj_3_3() {
- if (jj_3R_22()) return true;
- return false;
- }
-
- private boolean jj_3_2() {
- if (jj_3R_21()) return true;
- return false;
- }
-
- private boolean jj_3R_98() {
- if (jj_3R_106()) return true;
- return false;
- }
-
- private boolean jj_3_1() {
- if (jj_3R_20()) return true;
+ private boolean jj_3R_93() {
+ if (jj_3R_101()) return true;
return false;
}
- private boolean jj_3R_93() {
+ private boolean jj_3R_88() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_98()) {
+ if (jj_3R_93()) {
jj_scanpos = xsp;
if (jj_scan_token(17)) return true;
}
return false;
}
- private boolean jj_3_33() {
- if (jj_3R_37()) return true;
+ private boolean jj_3_32() {
+ if (jj_3R_35()) return true;
return false;
}
- private boolean jj_3R_85() {
+ private boolean jj_3R_81() {
if (jj_scan_token(47)) return true;
- if (jj_3R_95()) return true;
- if (jj_scan_token(48)) return true;
+ if (jj_3R_90()) return true;
return false;
}
- private boolean jj_3R_82() {
+ private boolean jj_3R_78() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(36)) {
jj_scanpos = xsp;
if (jj_scan_token(37)) return true;
}
- if (jj_3R_81()) return true;
return false;
}
- private boolean jj_3R_84() {
- if (jj_3R_94()) return true;
+ private boolean jj_3R_80() {
+ if (jj_3R_89()) return true;
return false;
}
- private boolean jj_3R_68() {
- if (jj_3R_81()) return true;
+ private boolean jj_3R_65() {
+ if (jj_3R_77()) return true;
Token xsp;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_82()) { jj_scanpos = xsp; break; }
+ if (jj_3R_78()) { jj_scanpos = xsp; break; }
}
return false;
}
- private boolean jj_3R_92() {
+ private boolean jj_3R_87() {
if (jj_scan_token(22)) return true;
- if (jj_3R_37()) return true;
+ if (jj_3R_35()) return true;
return false;
}
- private boolean jj_3R_69() {
+ private boolean jj_3R_66() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_83()) {
+ if (jj_3R_79()) {
jj_scanpos = xsp;
- if (jj_3R_84()) {
+ if (jj_3R_80()) {
jj_scanpos = xsp;
- if (jj_3R_85()) return true;
+ if (jj_3R_81()) return true;
}
}
return false;
}
- private boolean jj_3R_83() {
+ private boolean jj_3R_79() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_92()) {
+ if (jj_3R_87()) {
jj_scanpos = xsp;
- if (jj_3_33()) {
+ if (jj_3_32()) {
jj_scanpos = xsp;
- if (jj_3R_93()) return true;
+ if (jj_3R_88()) return true;
}
}
return false;
}
- private boolean jj_3R_91() {
+ private boolean jj_3R_86() {
if (jj_scan_token(25)) return true;
- if (jj_3R_68()) return true;
- if (jj_scan_token(26)) return true;
+ if (jj_3R_65()) return true;
return false;
}
- private boolean jj_3R_90() {
- if (jj_3R_97()) return true;
+ private boolean jj_3R_85() {
+ if (jj_3R_92()) return true;
return false;
}
- private boolean jj_3R_81() {
+ private boolean jj_3R_77() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_90()) {
+ if (jj_3R_85()) {
jj_scanpos = xsp;
- if (jj_3R_91()) return true;
+ if (jj_3R_86()) return true;
}
return false;
}
- private boolean jj_3R_54() {
- if (jj_3R_69()) return true;
+ private boolean jj_3R_51() {
+ if (jj_3R_66()) return true;
return false;
}
- private boolean jj_3_26() {
- if (jj_3R_24()) return true;
+ private boolean jj_3_25() {
+ if (jj_3R_23()) return true;
return false;
}
- private boolean jj_3R_53() {
+ private boolean jj_3R_50() {
if (jj_scan_token(46)) return true;
- if (jj_3R_69()) return true;
+ if (jj_3R_66()) return true;
return false;
}
- private boolean jj_3R_35() {
+ private boolean jj_3R_33() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_53()) {
+ if (jj_3R_50()) {
jj_scanpos = xsp;
- if (jj_3R_54()) return true;
+ if (jj_3R_51()) return true;
}
return false;
}
- private boolean jj_3R_34() {
- if (jj_3R_52()) return true;
- if (jj_3R_31()) return true;
+ private boolean jj_3R_32() {
+ if (jj_3R_49()) return true;
+ if (jj_3R_29()) return true;
return false;
}
- private boolean jj_3_32() {
+ private boolean jj_3_31() {
if (jj_scan_token(46)) return true;
if (jj_scan_token(25)) return true;
- if (jj_3R_36()) return true;
return false;
}
- private boolean jj_3_25() {
- if (jj_3R_25()) return true;
+ private boolean jj_3_24() {
+ if (jj_3R_24()) return true;
Token xsp;
xsp = jj_scanpos;
- if (jj_3R_34()) jj_scanpos = xsp;
+ if (jj_3R_32()) jj_scanpos = xsp;
return false;
}
- private boolean jj_3R_97() {
+ private boolean jj_3R_92() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3_25()) {
+ if (jj_3_24()) {
jj_scanpos = xsp;
- if (jj_3_26()) return true;
+ if (jj_3_25()) return true;
}
return false;
}
- private boolean jj_3_31() {
+ private boolean jj_3_30() {
if (jj_scan_token(25)) return true;
- if (jj_3R_36()) return true;
- if (jj_scan_token(26)) return true;
+ if (jj_3R_34()) return true;
return false;
}
- private boolean jj_3_30() {
- if (jj_3R_35()) return true;
+ private boolean jj_3_29() {
+ if (jj_3R_33()) return true;
return false;
}
- private boolean jj_3R_55() {
+ private boolean jj_3R_52() {
Token xsp;
xsp = jj_scanpos;
- if (jj_3_30()) {
+ if (jj_3_29()) {
jj_scanpos = xsp;
- if (jj_3_31()) {
+ if (jj_3_30()) {
jj_scanpos = xsp;
- if (jj_3_32()) return true;
+ if (jj_3_31()) return true;
}
}
return false;
}
- private boolean jj_3R_47() {
+ private boolean jj_3R_44() {
if (jj_scan_token(33)) return true;
- if (jj_3R_31()) return true;
+ if (jj_3R_29()) return true;
return false;
}
- private boolean jj_3R_28() {
- if (jj_scan_token(31)) return true;
- if (jj_3R_31()) return true;
+ private boolean jj_3R_26() {
+ if (jj_scan_token(25)) return true;
+ if (jj_3R_29()) return true;
Token xsp;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_47()) { jj_scanpos = xsp; break; }
+ if (jj_3R_44()) { jj_scanpos = xsp; break; }
}
- if (jj_scan_token(32)) return true;
+ if (jj_scan_token(26)) return true;
return false;
}
- private boolean jj_3R_46() {
- if (jj_scan_token(33)) return true;
+ private boolean jj_3_23() {
+ if (jj_scan_token(35)) return true;
+ if (jj_3R_30()) return true;
+ return false;
+ }
+
+ private boolean jj_3_22() {
+ if (jj_scan_token(35)) return true;
if (jj_3R_31()) return true;
return false;
}
- private boolean jj_3R_127() {
+ private boolean jj_3R_122() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(44)) {
@@ -3254,78 +3130,53 @@ private boolean jj_3R_127() {
}
}
}
- if (jj_3R_55()) return true;
return false;
}
- private boolean jj_3R_27() {
- if (jj_scan_token(25)) return true;
- if (jj_3R_31()) return true;
- Token xsp;
- while (true) {
- xsp = jj_scanpos;
- if (jj_3R_46()) { jj_scanpos = xsp; break; }
- }
- if (jj_scan_token(26)) return true;
+ private boolean jj_3_21() {
+ if (jj_scan_token(31)) return true;
+ if (jj_3R_29()) return true;
+ if (jj_scan_token(32)) return true;
return false;
}
- private boolean jj_3R_36() {
- if (jj_3R_55()) return true;
+ private boolean jj_3R_34() {
+ if (jj_3R_52()) return true;
Token xsp;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_127()) { jj_scanpos = xsp; break; }
+ if (jj_3R_122()) { jj_scanpos = xsp; break; }
}
return false;
}
- private boolean jj_3_24() {
- if (jj_scan_token(35)) return true;
- if (jj_3R_32()) return true;
- return false;
- }
-
- private boolean jj_3_23() {
- if (jj_scan_token(35)) return true;
- if (jj_3R_33()) return true;
- return false;
- }
-
- private boolean jj_3_22() {
- if (jj_scan_token(31)) return true;
- if (jj_3R_31()) return true;
- if (jj_scan_token(32)) return true;
- return false;
- }
-
- private boolean jj_3R_41() {
+ private boolean jj_3R_39() {
Token xsp;
xsp = jj_scanpos;
+ if (jj_3_20()) {
+ jj_scanpos = xsp;
if (jj_3_21()) {
jj_scanpos = xsp;
if (jj_3_22()) {
jj_scanpos = xsp;
- if (jj_3_23()) {
- jj_scanpos = xsp;
- if (jj_3_24()) return true;
+ if (jj_3_23()) return true;
}
}
}
return false;
}
- private boolean jj_3_21() {
- if (jj_3R_30()) return true;
+ private boolean jj_3_20() {
+ if (jj_3R_28()) return true;
return false;
}
- private boolean jj_3R_24() {
- if (jj_3R_40()) return true;
+ private boolean jj_3R_23() {
+ if (jj_3R_38()) return true;
Token xsp;
while (true) {
xsp = jj_scanpos;
- if (jj_3R_41()) { jj_scanpos = xsp; break; }
+ if (jj_3R_39()) { jj_scanpos = xsp; break; }
}
return false;
}
@@ -3341,7 +3192,7 @@ private boolean jj_3R_24() {
private Token jj_scanpos, jj_lastpos;
private int jj_la;
private int jj_gen;
- final private int[] jj_la1 = new int[68];
+ final private int[] jj_la1 = new int[67];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
@@ -3349,12 +3200,12 @@ private boolean jj_3R_24() {
jj_la1_init_1();
}
private static void jj_la1_init_0() {
- jj_la1_0 = new int[] {0x400180,0x40000000,0x0,0x8247ef80,0x247e000,0x40000000,0x40000000,0x207e000,0x180,0xf80,0x0,0x8247ef80,0x80000000,0x80000000,0x80000000,0x0,0x0,0x180000,0x2000f80,0x0,0x0,0x40000000,0x2000f80,0x0,0x5e000,0x8227f180,0x400000,0x8227f180,0x800000,0x0,0x400000,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x80000000,0x82460180,0x0,0x0,0x0,0x0,0x0,0x0,0x460180,0x40000000,0x40000000,0x60080,0x400000,0x60080,0x460180,0x60180,0x16100,0x80000,0x27e080,0x400000,0x400000,0x0,0x40000000,0x40000000,0x56080,0x16000,0x6000,0x180000,0x40080,0x40080,};
+ jj_la1_0 = new int[] {0x400180,0x40000000,0x0,0x247ef80,0x247e000,0x40000000,0x40000000,0x207e000,0x180,0xf80,0x0,0x247ef80,0x80000000,0x80000000,0x80000000,0x0,0x180000,0x2000f80,0x0,0x0,0x40000000,0x2000f80,0x0,0x5e000,0x8227f180,0x400000,0x8227f180,0x800000,0x0,0x400000,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x80000000,0x82460180,0x0,0x0,0x0,0x0,0x0,0x0,0x460180,0x40000000,0x40000000,0x60080,0x400000,0x60080,0x460180,0x60180,0x16100,0x80000,0x27e080,0x400000,0x400000,0x0,0x40000000,0x40000000,0x56080,0x16000,0x6000,0x180000,0x40080,0x40080,};
}
private static void jj_la1_init_1() {
- jj_la1_1 = new int[] {0x0,0x0,0x2,0x40,0x0,0x2,0x2,0x0,0x0,0x0,0x2,0x40,0x8,0x8,0x8,0x2,0x2,0x0,0x0,0x30,0x30,0x2,0x0,0x80,0x0,0x40000,0x700,0x40000,0x0,0x700,0x0,0x200,0x820,0x820,0x1830,0x1830,0x2200,0x2200,0x0,0xc000,0x820,0x820,0x1010,0x1010,0x1830,0x1830,0xc000,0x2,0x2,0x0,0x0,0x0,0x8000,0x0,0x0,0x20004,0x0,0x0,0x0,0x80000,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,};
+ jj_la1_1 = new int[] {0x0,0x0,0x2,0x40,0x0,0x2,0x2,0x0,0x0,0x0,0x2,0x40,0x8,0x8,0x8,0x2,0x0,0x0,0x30,0x30,0x2,0x0,0x80,0x0,0x40000,0x700,0x40000,0x0,0x700,0x0,0x200,0x820,0x820,0x1830,0x1830,0x2200,0x2200,0x0,0xc000,0x820,0x820,0x1010,0x1010,0x1830,0x1830,0xc000,0x2,0x2,0x0,0x0,0x0,0x8000,0x0,0x0,0x20004,0x0,0x0,0x0,0x80000,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,};
}
- final private JJCalls[] jj_2_rtns = new JJCalls[35];
+ final private JJCalls[] jj_2_rtns = new JJCalls[34];
private boolean jj_rescan = false;
private int jj_gc = 0;
@@ -3369,7 +3220,7 @@ public TokenSequenceParser(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 68; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 67; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -3384,7 +3235,7 @@ public void ReInit(java.io.InputStream stream, String encoding) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 68; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 67; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -3395,7 +3246,7 @@ public TokenSequenceParser(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 68; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 67; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -3406,7 +3257,7 @@ public void ReInit(java.io.Reader stream) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 68; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 67; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -3416,7 +3267,7 @@ public TokenSequenceParser(TokenSequenceParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 68; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 67; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -3426,7 +3277,7 @@ public void ReInit(TokenSequenceParserTokenManager tm) {
token = new Token();
jj_ntk = -1;
jj_gen = 0;
- for (int i = 0; i < 68; i++) jj_la1[i] = -1;
+ for (int i = 0; i < 67; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
@@ -3543,7 +3394,7 @@ public ParseException generateParseException() {
la1tokens[jj_kind] = true;
jj_kind = -1;
}
- for (int i = 0; i < 68; i++) {
+ for (int i = 0; i < 67; i++) {
if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1< exprs = new ArrayList();
- Expression expr;
-} {
- ( "["
- expr = Expression(env)
- { exprs.add(expr); }
- (
- ","
- expr = Expression(env)
- { exprs.add(expr); }
- )*
- "]"
- )
- { return new Expressions.ListExpression(Expressions.TYPE_LIST, exprs); }
-}
-
Expression BasicCondExpression(Env env) : {
Expression expr1 = null;
Expression expr2 = null;
diff --git a/src/edu/stanford/nlp/math/ArrayMath.java b/src/edu/stanford/nlp/math/ArrayMath.java
index 97989fa51c..10a263f990 100644
--- a/src/edu/stanford/nlp/math/ArrayMath.java
+++ b/src/edu/stanford/nlp/math/ArrayMath.java
@@ -786,7 +786,7 @@ public static float max(float[] a) {
}
/**
- * @return the index of the max value; if max is a tie, returns the first one.
+ * @return the index of the min value; if min is a tie, returns the first one.
*/
public static int argmin(double[] a) {
double min = Double.POSITIVE_INFINITY;
@@ -800,12 +800,15 @@ public static int argmin(double[] a) {
return argmin;
}
+ /**
+ * @return The minimum value in an array.
+ */
public static double min(double[] a) {
return a[argmin(a)];
}
/**
- * Returns the largest value in a vector of doubles. Any values which
+ * Returns the smallest value in a vector of doubles. Any values which
* are NaN or infinite are ignored. If the vector is empty, 0.0 is
* returned.
*/
@@ -816,7 +819,7 @@ public static double safeMin(double[] v) {
}
/**
- * @return the index of the max value; if max is a tie, returns the first one.
+ * @return the index of the min value; if min is a tie, returns the first one.
*/
public static int argmin(float[] a) {
float min = Float.POSITIVE_INFINITY;
@@ -835,7 +838,7 @@ public static float min(float[] a) {
}
/**
- * @return the index of the max value; if max is a tie, returns the first one.
+ * @return the index of the min value; if min is a tie, returns the first one.
*/
public static int argmin(int[] a) {
int min = Integer.MAX_VALUE;
@@ -1112,12 +1115,6 @@ public static double innerProduct(float[] a, float[] b) {
// UTILITIES
- public static int[] subArray(int[] a, int from, int to) {
- int[] result = new int[to-from];
- System.arraycopy(a, from, result, 0, to-from);
- return result;
- }
-
public static double[][] load2DMatrixFromFile(String filename) throws IOException {
String s = IOUtils.slurpFile(filename);
String[] rows = s.split("[\r\n]+");
@@ -1395,7 +1392,7 @@ public static double mean(double[] a) {
public static int mean(int[] a) {
return sum(a) / a.length;
}
-
+
public static double median(double[] a) {
double[] b = new double[a.length];
System.arraycopy(a, 0, b, 0, b.length);
@@ -1528,29 +1525,29 @@ public static boolean containsInSubarray(int[] a, int begin, int end, int i) {
* x and y, then compute innerProduct(x,y)/(x.length-1).
*/
public static double pearsonCorrelation(double[] x, double[] y) {
- double result;
- double sum_sq_x = 0, sum_sq_y = 0;
+ double result;
+ double sum_sq_x = 0, sum_sq_y = 0;
double mean_x = x[0], mean_y = y[0];
- double sum_coproduct = 0;
- for(int i=2; i 0) {
- if ((e & 1) != 0) {
- result *= currPow;
+ if (e <= 1) {
+ if (e == 1) {
+ return b;
+ } else {
+ return 1; // this is also what you get for e < 0 !
+ }
+ } else {
+ if (e == 2) {
+ return b * b;
+ } else {
+ int result = 1;
+ while (e > 0) {
+ if ((e & 1) != 0) {
+ result *= b;
+ }
+ b *= b;
+ e >>= 1;
}
- currPow *= currPow;
- e >>= 1;
+ return result;
}
- return result;
}
}
/**
- * Exponentiation like we learned in grade school:
- * multiply b by itself e times. Uses power of two trick.
- * e must be nonnegative!!! no checking!!!
- *
- * @param b base
- * @param e exponent
- * @return b^e
- */
+ * Exponentiation like we learned in grade school:
+ * multiply b by itself e times. Uses power of two trick.
+ * e must be nonnegative!!! no checking!!!
+ *
+ * @param b base
+ * @param e exponent
+ * @return b^e
+ */
public static float intPow(float b, int e) {
float result = 1.0f;
float currPow = b;
@@ -493,7 +495,7 @@ public static double hypergeometric(int k, int n, int r, int m) {
double ans = 1.0;
// do (n-r)x...x((n-r)-((m-k)-1))/n x...x (n-((m-k-1)))
- // leaving rest of denominator to get to multimply by (n-(m-1))
+ // leaving rest of denominator to get to multiply by (n-(m-1))
// that's k things which goes into next loop
for (int nr = n - r, n0 = n; nr > (n - r) - (m - k); nr--, n0--) {
// System.out.println("Multiplying by " + nr);
@@ -640,7 +642,8 @@ public static double sigmoid(double x) {
}
- private static float[] acosCache = null;
+ private static float[] acosCache; // = null;
+
/**
* Compute acos very quickly by directly looking up the value.
* @param cosValue The cosine of the angle to fine.
@@ -668,13 +671,13 @@ public static double acos(double cosValue) {
public static double poisson(int x, double lambda) {
if (x<0 || lambda<=0.0) throw new RuntimeException("Bad arguments: " + x + " and " + lambda);
double p = (Math.exp(-lambda) * Math.pow(lambda, x)) / factorial(x);
- if (Double.isInfinite(p) || p<=0.0) throw new RuntimeException(Math.exp(-lambda) +" "+ Math.pow(lambda, x) +" "+ factorial(x));
+ if (Double.isInfinite(p) || p<=0.0) throw new RuntimeException(Math.exp(-lambda) +" "+ Math.pow(lambda, x) + ' ' + factorial(x));
return p;
}
/**
* Uses floating point so that it can represent the really big numbers that come up.
- * @param x Argumet to take factorial of
+ * @param x Argument to take factorial of
* @return Factorial of argument
*/
public static double factorial(int x) {
@@ -689,7 +692,7 @@ public static double factorial(int x) {
* Tests the hypergeometric distribution code, or other functions
* provided in this module.
*
- * @param args Either none, and the log add rountines are tested, or the
+ * @param args Either none, and the log add routines are tested, or the
* following 4 arguments: k (cell), n (total), r (row), m (col)
*/
public static void main(String[] args) {
diff --git a/src/edu/stanford/nlp/naturalli/Monotonicity.java b/src/edu/stanford/nlp/naturalli/Monotonicity.java
new file mode 100644
index 0000000000..7444f2f0c5
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/Monotonicity.java
@@ -0,0 +1,13 @@
+package edu.stanford.nlp.naturalli;
+
+/**
+ * A monotonicity value.
+ *
+ * @author Gabor Angeli
+ */
+public enum Monotonicity {
+ MONOTONE,
+ ANTITONE,
+ NONMONOTONE,
+ INVALID;
+}
diff --git a/src/edu/stanford/nlp/naturalli/MonotonicityType.java b/src/edu/stanford/nlp/naturalli/MonotonicityType.java
new file mode 100644
index 0000000000..48fe71fd9b
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/MonotonicityType.java
@@ -0,0 +1,13 @@
+package edu.stanford.nlp.naturalli;
+
+/**
+ * The monotonicity type -- that is, additive, multiplicative, or both/neither
+ *
+ * @author Gabor Angeli
+ */
+public enum MonotonicityType {
+ NONE,
+ ADDITIVE,
+ MULTIPLICATIVE,
+ BOTH;
+}
diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java
new file mode 100644
index 0000000000..b276879f16
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java
@@ -0,0 +1,58 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ie.util.RelationTriple;
+import edu.stanford.nlp.ling.CoreAnnotation;
+
+import java.util.Collection;
+
+/**
+ * A collection of {@link edu.stanford.nlp.ling.CoreAnnotation}s for various Natural Logic data.
+ *
+ * @author Gabor Angeli
+ */
+public class NaturalLogicAnnotations {
+
+ /**
+ * An annotation which attaches to a CoreLabel to denote that this is an operator in natural logic,
+ * to describe which operator it is, and to give the scope of its argument(s).
+ */
+ public static final class OperatorAnnotation implements CoreAnnotation {
+ @Override
+ public Class getType() {
+ return OperatorSpec.class;
+ }
+ }
+
+ /**
+ * An annotation which attaches to a CoreLabel to denote that this is an operator in natural logic,
+ * to describe which operator it is, and to give the scope of its argument(s).
+ */
+ public static final class PolarityAnnotation implements CoreAnnotation {
+ @Override
+ public Class getType() {
+ return Polarity.class;
+ }
+ }
+
+ /**
+ * The set of sentences which are entailed by the original sentence, according to Natural Logic semantics.
+ */
+ public static final class EntailedSentencesAnnotation implements CoreAnnotation> {
+ @SuppressWarnings("unchecked")
+ @Override
+ public Class> getType() {
+ return (Class>) ((Object) Collection.class);
+ }
+ }
+
+ /**
+ * The set of relation triples extracted from this sentence.
+ */
+ public static final class RelationTriplesAnnotation implements CoreAnnotation> {
+ @SuppressWarnings("unchecked")
+ @Override
+ public Class> getType() {
+ return (Class>) ((Object) Collection.class);
+ }
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java
new file mode 100644
index 0000000000..ba1e38592a
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java
@@ -0,0 +1,435 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.SentenceAnnotator;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
+import edu.stanford.nlp.semgraph.SemanticGraphEdge;
+import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
+import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.Pair;
+import edu.stanford.nlp.util.StringUtils;
+import edu.stanford.nlp.util.Triple;
+import edu.stanford.nlp.naturalli.NaturalLogicAnnotations.*;
+
+import java.util.*;
+import java.util.function.Function;
+
+/**
+ * An annotator marking operators with their scope.
+ * Look at {@link NaturalLogicAnnotator#PATTERNS} for the full list of patterns, otherwise
+ * {@link NaturalLogicAnnotator#doOneSentence(Annotation, CoreMap)} is the main interface for this class.
+ *
+ * TODO(gabor) annotate generics as "most"
+ *
+ * @author Gabor Angeli
+ */
+@SuppressWarnings("unchecked")
+public class NaturalLogicAnnotator extends SentenceAnnotator {
+
+ /**
+ * A regex for arcs that act as determiners.
+ */
+ private static final String DET = "/(pre)?det|a(dv)?mod|neg|num|nn/";
+ /**
+ * A regex for arcs that we pretend are subject arcs.
+ */
+ private static final String GEN_SUBJ = "/[ni]subj(pass)?/";
+ /**
+ * A regex for arcs that we pretend are object arcs.
+ */
+ private static final String GEN_OBJ = "/[di]obj|xcomp|advcl|acomp/";
+ /**
+ * A regex for arcs that we pretend are copula.
+ */
+ private static final String GEN_COP = "/cop|aux(pass)?/";
+ /**
+ * A regex for arcs which denote a sub-clause (e.g., "at Stanford" or "who are at Stanford")
+ */
+ private static final String GEN_CLAUSE = "/prep|rcmod/";
+ /**
+ * A regex for arcs which denote a preposition
+ */
+ private static final String GEN_PREP = "/prep|advcl|ccomp|advmod/";
+
+ /**
+ * A Semgrex fragment for matching a quantifier.
+ */
+ private static final String QUANTIFIER;
+
+ static {
+ Set singleWordQuantifiers = new HashSet<>();
+ for (Operator q : Operator.values()) {
+ String[] tokens = q.surfaceForm.split("\\s+");
+ if (!tokens[tokens.length - 1].startsWith("_")) {
+ singleWordQuantifiers.add("(" + tokens[tokens.length - 1].toLowerCase() + ")");
+ }
+ }
+ QUANTIFIER = "[ {lemma:/" + StringUtils.join(singleWordQuantifiers, "|") + "/}=quantifier | {pos:CD}=quantifier ]";
+ }
+
+ /**
+ * The patterns to use for marking quantifier scopes.
+ */
+ private static final List PATTERNS = Collections.unmodifiableList(new ArrayList() {{
+ // { All cats eat mice,
+ // All cats want milk }
+ add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_OBJ+" {}=object"));
+ // { All cats are in boxes,
+ // All cats voted for Obama,
+ // All cats have voted for Obama }
+ add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_PREP+" {}=object"));
+ // { All cats are cute,
+ // All cats can purr }
+ add(SemgrexPattern.compile("{}=object >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_COP+" {}=pivot"));
+ // { Everyone at Stanford likes cats,
+ // Everyone who is at Stanford likes cats }
+ add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >"+GEN_CLAUSE+" {}=subject ) >"+GEN_OBJ+" {}=object"));
+ // { Everyone at Stanford voted for Colbert }
+ add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >"+GEN_CLAUSE+" {}=subject ) >"+GEN_PREP+" {}=object"));
+ // { Felix likes cat food }
+ add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" {pos:NNP}=Subject >"+GEN_OBJ+" {}=object"));
+ // { Felix has spoken to Fido }
+ add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" {pos:NNP}=Subject >/prep|ccomp|[di]obj/ {}=object"));
+ // { Felix is a cat,
+ // Felix is cute }
+ add(SemgrexPattern.compile("{}=object >"+GEN_SUBJ+" {pos:NNP}=Subject >"+GEN_COP+" {}=pivot"));
+ // { Some cats do n't like dogs }
+ add(SemgrexPattern.compile("{}=pivot >neg "+QUANTIFIER+" >"+GEN_OBJ+" {}=object"));
+ // { All of the cats hate dogs. }
+ add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >prep {}=subject ) >"+GEN_OBJ+" {}=object"));
+ add(SemgrexPattern.compile("{pos:/V.*/}=pivot >dep ( "+QUANTIFIER+" >prep {}=subject ) >"+GEN_SUBJ+" {}=object")); // as above, but handle a common parse error
+ // { Either cats or dogs have tails. }
+ add(SemgrexPattern.compile("{pos:/V.*/}=pivot >dep {lemma:either}=quantifier >"+GEN_SUBJ+" {}=subject >"+GEN_OBJ+" {}=object"));
+ // { There are cats }
+ add(SemgrexPattern.compile("{}=quantifier >"+GEN_SUBJ+" {}=pivot >>expl {}"));
+ }});
+
+ /** A helper method for
+ * {@link NaturalLogicAnnotator#getModifierSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)} and
+ * {@link NaturalLogicAnnotator#getSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)}.
+ */
+ private static Pair getGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, Set validArcs) {
+ int min = root.index();
+ int max = root.index();
+ Queue fringe = new LinkedList<>();
+ for (SemanticGraphEdge edge : tree.getOutEdgesSorted(root)) {
+ String edgeLabel = edge.getRelation().getShortName();
+ if ((validArcs == null || validArcs.contains(edgeLabel)) &&
+ !"punct".equals(edgeLabel)) {
+ fringe.add(edge.getDependent());
+ }
+ }
+ while (!fringe.isEmpty()) {
+ IndexedWord node = fringe.poll();
+ min = Math.min(node.index(), min);
+ max = Math.max(node.index(), max);
+ for (SemanticGraphEdge edge : tree.getOutEdgesSorted(node)) {
+ if (!"punct".equals(edge.getRelation().getShortName())) { // ignore punctuation
+ fringe.add(edge.getDependent());
+ }
+ }
+ }
+ return Pair.makePair(min, max + 1);
+ }
+
+ private static final Set MODIFIER_ARCS = Collections.unmodifiableSet(new HashSet() {{
+ add("aux");
+ add("prep");
+ }});
+
+ /**
+ * Returns the yield span for the word rooted at the given node, but only traversing a fixed set of relations.
+ * @param tree The dependency graph to get the span from.
+ * @param root The root word of the span.
+ * @return A one indexed span rooted at the given word.
+ */
+ private static Pair getModifierSubtreeSpan(SemanticGraph tree, IndexedWord root) {
+ return getGeneralizedSubtreeSpan(tree, root, MODIFIER_ARCS);
+ }
+
+ /**
+ * Returns the yield span for the word rooted at the given node. So, for example, all cats like dogs rooted at the word
+ * "cats" would yield a span (1, 3) -- "all cats".
+ * @param tree The dependency graph to get the span from.
+ * @param root The root word of the span.
+ * @return A one indexed span rooted at the given word.
+ */
+ private static Pair getSubtreeSpan(SemanticGraph tree, IndexedWord root) {
+ return getGeneralizedSubtreeSpan(tree, root, null);
+ }
+
+ /**
+ * Effectively, merge two spans
+ */
+ private static Pair includeInSpan(Pair span, Pair toInclude) {
+ return Pair.makePair(Math.min(span.first, toInclude.first), Math.max(span.second, toInclude.second));
+ }
+
+ /**
+ * Exclude the second span from the first, if the second is on the edge of the first. If the second is in the middle, it's
+ * unclear what this function should do, so it just returns the original span.
+ */
+ private static Pair excludeFromSpan(Pair span, Pair toExclude) {
+ if (toExclude.second <= span.first || toExclude.first >= span.second) {
+ // Case: toExclude is outside of the span anyways
+ return span;
+ } else if (toExclude.first <= span.first && toExclude.second > span.first) {
+ // Case: overlap on the front
+ return Pair.makePair(toExclude.second, span.second);
+ } else if (toExclude.first < span.second && toExclude.second >= span.second) {
+ // Case: overlap on the front
+ return Pair.makePair(span.first, toExclude.first);
+ } else if (toExclude.first > span.first && toExclude.second < span.second) {
+ // Case: toExclude is within the span
+ return span;
+ } else {
+ throw new IllegalStateException("This case should be impossible");
+ }
+ }
+
+ /**
+ * Compute the span for a given matched pattern.
+ * At a high level:
+ *
+ *
+ * - If both a subject and an object exist, we take the subject minus the quantifier, and the object plus the pivot.
+ * - If only an object exists, we make the subject the object, and create a dummy object to signify a one-place quantifier.
+ * - If neither the subject or object exist, the pivot is the subject and there is no object.
+ *
+ *
+ * But:
+ *
+ *
+ * - If we have a two-place quantifier, the object is allowed to absorb various specific arcs from the pivot.
+ * - If we have a one-place quantifier, the object is allowed to absorb only prepositions from the pivot.
+ *
+ */
+ private OperatorSpec computeScope(SemanticGraph tree, Operator operator,
+ IndexedWord pivot, Pair quantifierSpan,
+ IndexedWord subject, IndexedWord object) {
+ Pair subjSpan;
+ Pair objSpan;
+ if (subject == null && object == null) {
+ subjSpan = getSubtreeSpan(tree, pivot);
+ objSpan = Pair.makePair(subjSpan.second, subjSpan.second);
+ } else if (subject == null) {
+ subjSpan = includeInSpan(getSubtreeSpan(tree, object), getGeneralizedSubtreeSpan(tree, pivot, Collections.singleton("prep")));
+ objSpan = Pair.makePair(subjSpan.second, subjSpan.second);
+ } else {
+ Pair subjectSubtree = getSubtreeSpan(tree, subject);
+ subjSpan = excludeFromSpan(subjectSubtree, quantifierSpan);
+ objSpan = excludeFromSpan(includeInSpan(getSubtreeSpan(tree, object), getModifierSubtreeSpan(tree, pivot)), subjectSubtree);
+ }
+ return new OperatorSpec(operator,
+ quantifierSpan.first - 1, quantifierSpan.second - 1,
+ subjSpan.first - 1, subjSpan.second - 1,
+ objSpan.first - 1, objSpan.second - 1);
+ }
+
+ /**
+ * Try to find which quantifier we matched, given that we matched the head of a quantifier at the given IndexedWord, and that
+ * this whole deal is taking place in the given sentence.
+ *
+ * @param sentence The sentence we are matching.
+ * @param quantifier The word at which we matched a quantifier.
+ * @return An optional triple consisting of the particular quantifier we matched, as well as the span of that quantifier in the sentence.
+ */
+ private Optional> validateQuantiferByHead(CoreMap sentence, IndexedWord quantifier) {
+ int end = quantifier.index();
+ for (int start = Math.max(0, end - 10); start < end; ++start) {
+ Function glossFn = (label) -> "CD".equals(label.tag()) ? "__NUM__" : label.lemma();
+ String gloss = StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), " ", glossFn, start, end).toLowerCase();
+ for (Operator q : Operator.values()) {
+ if (q.surfaceForm.equals(gloss)) {
+ return Optional.of(Triple.makeTriple(q, start + 1, end + 1));
+ }
+ }
+ }
+ return Optional.empty();
+ }
+
+
+ /**
+ * Find the operators in this sentence, annotating the head word (only!) of each operator with the
+ * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.OperatorAnnotation}.
+ *
+ * @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)}
+ */
+ private void annotateOperators(CoreMap sentence) {
+ SemanticGraph tree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
+ for (SemgrexPattern pattern : PATTERNS) {
+ SemgrexMatcher matcher = pattern.matcher(tree);
+ while (matcher.find()) {
+
+ // Get terms
+ IndexedWord properSubject = matcher.getNode("Subject");
+ IndexedWord quantifier, subject;
+ boolean namedEntityQuantifier = false;
+ if (properSubject != null) {
+ quantifier = subject = properSubject;
+ namedEntityQuantifier = true;
+ } else {
+ quantifier = matcher.getNode("quantifier");
+ subject = matcher.getNode("subject");
+ }
+
+ // Validate quantifier
+ // At the end of this
+ Optional> quantifierInfo;
+ if (namedEntityQuantifier) {
+ // named entities have the "all" semantics by default.
+ quantifierInfo = Optional.of(Triple.makeTriple(Operator.ALL, quantifier.index(), quantifier.index())); // note: empty quantifier span given
+ } else {
+ // find the quantifier, and return some info about it.
+ quantifierInfo = validateQuantiferByHead(sentence, quantifier);
+ }
+
+ // Set tokens
+ if (quantifierInfo.isPresent()) {
+ // Compute span
+ OperatorSpec scope = computeScope(tree, quantifierInfo.get().first,
+ matcher.getNode("pivot"), Pair.makePair(quantifierInfo.get().second, quantifierInfo.get().third), subject, matcher.getNode("object"));
+ // Set annotation
+ CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(quantifier.index() - 1);
+ OperatorSpec oldScope = token.get(OperatorAnnotation.class);
+ if (oldScope == null || oldScope.quantifierLength() < scope.quantifierLength() ||
+ oldScope.instance != scope.instance) {
+ token.set(OperatorAnnotation.class, scope);
+ } else {
+ token.set(OperatorAnnotation.class, OperatorSpec.merge(oldScope, scope));
+ }
+ }
+ }
+ }
+
+ // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap.
+ // In these cases, take the longer quantifier match.
+ List quantifiers = new ArrayList<>();
+ for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
+ if (token.has(OperatorAnnotation.class)) {
+ quantifiers.add(token.get(OperatorAnnotation.class));
+ }
+ }
+ quantifiers.sort( (x, y) -> y.quantifierLength() - x.quantifierLength());
+ for (OperatorSpec quantifier : quantifiers) {
+ for (int i = quantifier.quantifierBegin; i < quantifier.quantifierEnd; ++i) {
+ if (i != quantifier.quantifierHead) {
+ sentence.get(CoreAnnotations.TokensAnnotation.class).get(i).remove(OperatorAnnotation.class);
+ }
+ }
+ }
+ }
+
+ /**
+ * Annotate every token for its polarity, based on the operators found. This function will set the
+ * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.PolarityAnnotation} for every token.
+ *
+ * @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)}
+ */
+ private void annotatePolarity(CoreMap sentence) {
+ // Collect all the operators in this sentence
+ List operators = new ArrayList<>();
+ List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
+ for (CoreLabel token : tokens) {
+ OperatorSpec specOrNull = token.get(OperatorAnnotation.class);
+ if (specOrNull != null) {
+ operators.add(specOrNull);
+ }
+ }
+
+ // Set polarity for each token
+ for (int i = 0; i < tokens.size(); ++i) {
+ CoreLabel token = tokens.get(i);
+ // Get operators in scope
+ List> inScope = new ArrayList<>(4);
+ for (OperatorSpec operator : operators) {
+ if (i >= operator.subjectBegin && i < operator.subjectEnd) {
+ inScope.add(Triple.makeTriple(operator.subjectEnd - operator.subjectBegin, operator.instance.subjMono, operator.instance.subjType));
+ } else if (i >= operator.objectBegin && i < operator.objectEnd) {
+ inScope.add(Triple.makeTriple(operator.objectEnd - operator.objectBegin, operator.instance.objMono, operator.instance.objType));
+ }
+ }
+ // Sort the operators by their scope (approximated by the size of their argument span
+ inScope.sort( (x, y) -> y.first - x.first);
+ // Create polarity
+ List> info = new ArrayList<>(inScope.size());
+ for (Triple term : inScope) {
+ info.add(Pair.makePair(term.second, term.third));
+ }
+ Polarity polarity = new Polarity(info);
+ // Set polarity
+ token.set(PolarityAnnotation.class, polarity);
+ }
+ }
+
+ /**
+ * If false, don't annotate tokens for polarity but only find the operators and their scopes.
+ */
+ public final boolean doPolarity;
+
+ /**
+ * Create a new annotator.
+ * @param annotatorName The prefix for the properties for this annotator.
+ * @param props The properties to configure this annotator with.
+ */
+ public NaturalLogicAnnotator(String annotatorName, Properties props) {
+ this.doPolarity = Boolean.valueOf(props.getProperty(annotatorName + ".doPolarity", "true"));
+ }
+
+ /**
+ * @see edu.stanford.nlp.naturalli.NaturalLogicAnnotator#NaturalLogicAnnotator(String, java.util.Properties)
+ */
+ public NaturalLogicAnnotator(Properties props) {
+ this(STANFORD_NATLOG, props);
+
+ }
+
+ /** The default constructor */
+ public NaturalLogicAnnotator() {
+ this("__irrelevant__", new Properties());
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected void doOneSentence(Annotation annotation, CoreMap sentence) {
+ annotateOperators(sentence);
+ if (doPolarity) {
+ annotatePolarity(sentence);
+ }
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected int nThreads() {
+ return 1;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected long maxTime() {
+ return Long.MAX_VALUE;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ protected void doOneFailedSentence(Annotation annotation, CoreMap sentence) {
+ System.err.println("Failed to annotate: " + sentence.get(CoreAnnotations.TextAnnotation.class));
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public Set requirementsSatisfied() {
+ return Collections.singleton(NATLOG_REQUIREMENT);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public Set requires() {
+ return Collections.EMPTY_SET; // TODO(gabor) set me!
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java b/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java
new file mode 100644
index 0000000000..f995165894
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java
@@ -0,0 +1,481 @@
+package edu.stanford.nlp.naturalli;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * The catalog of the seven Natural Logic relations.
+ * Set-theoretically, if we assume A and B are two sets (e.g., denotations),
+ * and D is the universe of discourse,
+ * then the relations between A and B are defined as follows:
+ *
+ *
+ * - Equivalence: A = B
+ * - Forward entailment: A \\subset B
+ * - Reverse entailment: A \\supset B
+ * - Negation: A \\intersect B = \\empty \\land A \\union B = D
+ * - Alternation: A \\intersect B = \\empty
+ * - Cover: A \\union B = D
+ *
+ *
+ * @author Gabor Angeli
+ */
+public enum NaturalLogicRelation {
+ EQUIVALENT(0, true, false),
+ FORWARD_ENTAILMENT(1, true, false),
+ REVERSE_ENTAILMENT(2, false, false),
+ NEGATION(3, false, true),
+ ALTERNATION(4, false, true),
+ COVER(5, false, false),
+ INDEPENDENCE(6, false, false);
+
+ public final int fixedIndex;
+ public final boolean isEntailed, isNegated;
+
+ NaturalLogicRelation(int fixedIndex, boolean isEntailed, boolean isNegated) {
+ this.fixedIndex = fixedIndex;
+ this.isEntailed = isEntailed;
+ this.isNegated = isNegated;
+ }
+
+ protected static NaturalLogicRelation byFixedIndex(int index) {
+ switch (index) {
+ case 0: return EQUIVALENT;
+ case 1: return FORWARD_ENTAILMENT;
+ case 2: return REVERSE_ENTAILMENT;
+ case 3: return NEGATION;
+ case 4: return ALTERNATION;
+ case 5: return COVER;
+ case 6: return INDEPENDENCE;
+ default: throw new IllegalArgumentException("Unknown index for Natural Logic relation: " + index);
+ }
+ }
+
+ /**
+ * The MacCartney "join table" -- this determines the transitivity of entailment if we chain two relations together.
+ * These should already be projected up through the sentence, so that the relations being joined are relations between
+ * sentences rather than relations between lexical items (see {@link Polarity#projectLexicalRelation(NaturalLogicRelation)},
+ * set by {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator} using the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.PolarityAnnotation}).
+ * @param other The relation to join this relation with.
+ * @return The new joined relation.
+ */
+ public NaturalLogicRelation join(NaturalLogicRelation other) {
+ switch (this) {
+ case EQUIVALENT:
+ return other;
+ case FORWARD_ENTAILMENT:
+ switch (other) {
+ case EQUIVALENT:
+ case FORWARD_ENTAILMENT:
+ return FORWARD_ENTAILMENT;
+ case NEGATION:
+ case ALTERNATION:
+ return COVER;
+ case REVERSE_ENTAILMENT:
+ case COVER:
+ case INDEPENDENCE:
+ return INDEPENDENCE;
+ }
+ case REVERSE_ENTAILMENT:
+ switch (other) {
+ case EQUIVALENT:
+ case REVERSE_ENTAILMENT:
+ return REVERSE_ENTAILMENT;
+ case NEGATION:
+ case COVER:
+ return COVER;
+ case FORWARD_ENTAILMENT:
+ case ALTERNATION:
+ case INDEPENDENCE:
+ return INDEPENDENCE;
+ }
+ case NEGATION:
+ switch (other) {
+ case EQUIVALENT:
+ return NEGATION;
+ case FORWARD_ENTAILMENT:
+ return COVER;
+ case REVERSE_ENTAILMENT:
+ return ALTERNATION;
+ case NEGATION:
+ return EQUIVALENT;
+ case ALTERNATION:
+ return REVERSE_ENTAILMENT;
+ case COVER:
+ return FORWARD_ENTAILMENT;
+ case INDEPENDENCE:
+ return INDEPENDENCE;
+ }
+ case ALTERNATION:
+ switch (other) {
+ case EQUIVALENT:
+ case REVERSE_ENTAILMENT:
+ return ALTERNATION;
+ case NEGATION:
+ case COVER:
+ return FORWARD_ENTAILMENT;
+ case FORWARD_ENTAILMENT:
+ case ALTERNATION:
+ case INDEPENDENCE:
+ return INDEPENDENCE;
+ }
+ case COVER:
+ switch (other) {
+ case EQUIVALENT:
+ case FORWARD_ENTAILMENT:
+ return COVER;
+ case NEGATION:
+ case ALTERNATION:
+ return REVERSE_ENTAILMENT;
+ case REVERSE_ENTAILMENT:
+ case COVER:
+ case INDEPENDENCE:
+ return INDEPENDENCE;
+ }
+ case INDEPENDENCE:
+ return INDEPENDENCE;
+ }
+ throw new IllegalStateException("[should be impossible]: Incomplete join table for " + this + " joined with " + other);
+ }
+
+ private static final Map insertArcToNaturalLogicRelation = Collections.unmodifiableMap(new HashMap() {{
+ put("acomp", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("advcl", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("purpcl", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("advmod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("amod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("appos", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("aux", NaturalLogicRelation.INDEPENDENCE); // he left -/-> he should leave
+ put("auxpass", NaturalLogicRelation.INDEPENDENCE); // some cat adopts -/-> some cat got adopted
+ put("cc", NaturalLogicRelation.REVERSE_ENTAILMENT); // match dep_conj
+ put("ccomp", NaturalLogicRelation.INDEPENDENCE); // interesting project here... "he said x" -> "x"?
+ put("conj", NaturalLogicRelation.REVERSE_ENTAILMENT); // match dep_cc
+ put("cop", NaturalLogicRelation.INDEPENDENCE); //
+ put("csubj", NaturalLogicRelation.INDEPENDENCE); // don't drop subjects.
+ put("csubjpass", NaturalLogicRelation.INDEPENDENCE); // as above
+ put("dep", NaturalLogicRelation.INDEPENDENCE); //
+ put("det", NaturalLogicRelation.EQUIVALENT); //
+ put("discourse", NaturalLogicRelation.EQUIVALENT); //
+ put("dobj", NaturalLogicRelation.REVERSE_ENTAILMENT); // "he studied NLP at Stanford" -> "he studied NLP"
+ put("expl", NaturalLogicRelation.EQUIVALENT); // though we shouldn't see this...
+ put("goeswith", NaturalLogicRelation.EQUIVALENT); // also shouldn't see this
+ put("iobj", NaturalLogicRelation.REVERSE_ENTAILMENT); // she gave me a raise -> she gave a raise
+ put("mark", NaturalLogicRelation.REVERSE_ENTAILMENT); // he says that you like to swim -> he says you like to swim
+ put("mwe", NaturalLogicRelation.INDEPENDENCE); // shouldn't see this
+ put("neg", NaturalLogicRelation.NEGATION); //
+ put("nn", NaturalLogicRelation.INDEPENDENCE); //
+ put("npadvmod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("nsubj", NaturalLogicRelation.INDEPENDENCE); //
+ put("nsubjpass", NaturalLogicRelation.INDEPENDENCE); //
+ put("num", NaturalLogicRelation.INDEPENDENCE); //
+ put("number", NaturalLogicRelation.INDEPENDENCE); //
+ put("parataxis", NaturalLogicRelation.INDEPENDENCE); // or, reverse?
+ put("pcomp", NaturalLogicRelation.INDEPENDENCE); // though, not so in collapsed dependencies
+ put("pobj", NaturalLogicRelation.INDEPENDENCE); // must delete whole preposition
+ put("poss", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("posseive", NaturalLogicRelation.INDEPENDENCE); // see dep_poss
+ put("preconj", NaturalLogicRelation.INDEPENDENCE); // forbidden to see this
+ put("predet", NaturalLogicRelation.INDEPENDENCE); // forbidden to see this
+ put("prep", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prt", NaturalLogicRelation.INDEPENDENCE); //
+ put("punct", NaturalLogicRelation.EQUIVALENT); //
+ put("quantmod", NaturalLogicRelation.FORWARD_ENTAILMENT); //
+ put("rcmod", NaturalLogicRelation.FORWARD_ENTAILMENT); // "there are great tennors --rcmod--> who are modest"
+ put("root", NaturalLogicRelation.INDEPENDENCE); // err.. never delete
+ put("tmod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("vmod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("partmod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("infmod", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("xcomp", NaturalLogicRelation.INDEPENDENCE); //
+ put("conj_and", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("conj_or", NaturalLogicRelation.FORWARD_ENTAILMENT); //
+ put("prep_aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_about", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_above", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_across", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_after", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_against", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_along", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_amid", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_among", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_anti", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_around", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_as", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_at", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_before", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_behind", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_below", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_beside", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_besides", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_between", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_but", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_by", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_concerning", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_considering", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_despite", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_down", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_during", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_except", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_following", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_for", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_inside", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_into", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_like", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_minus", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_near", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_off", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_on", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_onto", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_outside", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_over", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_past", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_per", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_plus", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_round", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_save", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_since", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_than", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_through", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_toward", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_towards", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_under", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_until", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_up", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_upon", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_versus", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_via", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_within", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_without", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_whether", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_compared_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_compared_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_close_by", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_close_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prep_with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_about", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_above", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_across", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_after", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_against", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_along", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_amid", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_among", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_anti", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_around", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_as", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_at", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_before", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_behind", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_below", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_beside", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_besides", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_between", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_but", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_by", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_concerning", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_considering", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_despite", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_down", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_during", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_except", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_following", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_for", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_inside", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_into", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_like", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_minus", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_near", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_off", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_on", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_onto", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_outside", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_over", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_past", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_per", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_plus", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_round", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_save", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_since", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_than", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_through", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_toward", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_towards", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_under", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_until", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_up", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_upon", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_versus", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_via", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_within", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_without", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_compared_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_compared_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_close_by", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_close_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ put("prepc_with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); //
+ }});
+
+ /**
+ * Returns the natural logic relation corresponding to the given dependency arc being inserted into a sentence.
+ */
+ public static NaturalLogicRelation forDependencyInsertion(String dependencyLabel) {
+ NaturalLogicRelation rel = insertArcToNaturalLogicRelation.get(dependencyLabel.toLowerCase());
+ if (rel != null) {
+ return rel;
+ } else {
+ throw new IllegalArgumentException("Unknown dependency arc label: " + dependencyLabel);
+ }
+ }
+
+ /**
+ * Returns the natural logic relation corresponding to the given dependency arc being deleted from a sentence.
+ */
+ public static NaturalLogicRelation forDependencyDeletion(String dependencyLabel) {
+ NaturalLogicRelation rel = forDependencyInsertion(dependencyLabel);
+ switch (rel) {
+ case EQUIVALENT: return EQUIVALENT;
+ case FORWARD_ENTAILMENT: return REVERSE_ENTAILMENT;
+ case REVERSE_ENTAILMENT: return FORWARD_ENTAILMENT;
+ case NEGATION: return NEGATION;
+ case ALTERNATION: return COVER;
+ case COVER: return ALTERNATION;
+ case INDEPENDENCE: return INDEPENDENCE;
+ default:
+ throw new IllegalStateException("Unhandled natural logic relation: " + rel);
+ }
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java b/src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java
new file mode 100644
index 0000000000..8378e1a8ee
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java
@@ -0,0 +1,85 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.io.IOUtils;
+import edu.stanford.nlp.stats.ClassicCounter;
+import edu.stanford.nlp.stats.Counter;
+import edu.stanford.nlp.stats.Counters;
+import edu.stanford.nlp.stats.TwoDimensionalCounter;
+
+/**
+ * TODO(gabor) JavaDoc
+ *
+ * @author Gabor Angeli
+ */
+public class NaturalLogicWeights {
+
+ private TwoDimensionalCounter ppAffinity = new TwoDimensionalCounter<>();
+ private Counter dobjAffinity = new ClassicCounter<>();
+
+ public NaturalLogicWeights(String PP_AFFINITY, String DOBJ_AFFINITY) {
+ // Preposition affinities
+ for (String line : IOUtils.readLines(PP_AFFINITY, "utf8")) {
+ String[] fields = line.split("\t");
+ if (fields.length != 3) {
+ throw new IllegalArgumentException("Invalid format for the pp_affinity data");
+ }
+ ppAffinity.setCount(fields[0], fields[1], Double.parseDouble(fields[2]));
+ }
+ for (String verb : ppAffinity.firstKeySet()) {
+ // Normalize counts to be between 0 and 1
+ Counter preps = ppAffinity.getCounter(verb);
+ Counters.multiplyInPlace(preps, -1.0);
+ Counters.addInPlace(preps, 1.0);
+ double min = Counters.min(preps);
+ double max = Counters.max(preps);
+ Counters.addInPlace(preps, -min);
+ if (max == min) {
+ Counters.addInPlace(preps, 0.5);
+ } else {
+ Counters.divideInPlace(preps, max - min);
+ }
+ Counters.multiplyInPlace(preps, -1.0);
+ Counters.addInPlace(preps, 1.0);
+ }
+ // Object affinities
+ for (String line : IOUtils.readLines(DOBJ_AFFINITY, "utf8")) {
+ String[] fields = line.split("\t");
+ if (fields.length != 2) {
+ throw new IllegalArgumentException("Invalid format for the dobj_affinity data");
+ }
+ dobjAffinity.setCount(fields[0], Double.parseDouble(fields[1]));
+ }
+ }
+
+ private double backoffEdgeProbability(String edgeRel) {
+ return 1.0; // TODO(gabor) should probably learn these...
+ }
+
+ public double deletionProbability(String parent, String edgeRel) {
+ return deletionProbability(parent, edgeRel, false);
+ }
+
+ public double deletionProbability(String parent, String edgeRel, boolean isSecondaryEdgeOfType) {
+ if (edgeRel.startsWith("prep")) {
+ double affinity = ppAffinity.getCount(parent, edgeRel);
+ if (affinity != 0.0 && !isSecondaryEdgeOfType) {
+ return Math.sqrt(1.0 - Math.min(1.0, affinity));
+ } else {
+ return backoffEdgeProbability(edgeRel);
+ }
+ } else if (edgeRel.startsWith("dobj")) {
+ double affinity = dobjAffinity.getCount(parent);
+ if (affinity != 0.0 && !isSecondaryEdgeOfType) {
+ return Math.sqrt(1.0 - Math.min(1.0, affinity));
+ } else {
+ return backoffEdgeProbability(edgeRel);
+ }
+ } else {
+ return backoffEdgeProbability(edgeRel);
+ }
+ }
+
+ public static NaturalLogicWeights fromString(String str) {
+ return new NaturalLogicWeights(null, null); // TODO(gabor)
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/OpenIE.java b/src/edu/stanford/nlp/naturalli/OpenIE.java
new file mode 100644
index 0000000000..496bbd21f9
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/OpenIE.java
@@ -0,0 +1,597 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ie.util.RelationTriple;
+import edu.stanford.nlp.ling.CoreAnnotations;
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.pipeline.Annotation;
+import edu.stanford.nlp.pipeline.Annotator;
+import edu.stanford.nlp.pipeline.StanfordCoreNLP;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
+import edu.stanford.nlp.semgraph.SemanticGraphEdge;
+import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
+import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
+import edu.stanford.nlp.trees.GrammaticalRelation;
+import edu.stanford.nlp.util.CoreMap;
+import edu.stanford.nlp.util.Execution;
+import edu.stanford.nlp.util.StringUtils;
+
+import java.util.*;
+
+/**
+ * A simple OpenIE system based on valid Natural Logic deletions of a sentence.
+ *
+ * @author Gabor Angeli
+ */
+@SuppressWarnings("FieldCanBeLocal")
+public class OpenIE implements Annotator {
+
+
+ @Execution.Option(name="openie.pp_affinity", gloss="A tab separated file of 'verb preposition affinity' values, where affinity is between 0 and 1")
+ private String PP_AFFINITY = "/home/gabor/workspace/naturalli/etc/pp_affinity.tsv.gz"; //"edu/stanford/nlp/naturalli/pp_affinity.tab";
+ @Execution.Option(name="openie.dobj_affinity", gloss="A tab separated file of 'verb dobj_affinity' values, where affinity is between 0 and 1")
+ private String DOBJ_AFFINITY = "/home/gabor/workspace/naturalli/etc/dobj_affinity.tsv.gz"; // "edu/stanford/nlp/naturalli/dobj_affinity.tab";
+
+ private static enum Optimization { GENERAL, KB }
+ @Execution.Option(name="openie.optimize.for", gloss="{General, KB}: Optimize the system for particular tasks (e.g., knowledge base completion tasks -- try to make the subject and object coherent named entities).")
+ private Optimization OPTIMIZE_FOR = Optimization.GENERAL;
+
+ private final NaturalLogicWeights WEIGHTS;
+
+ /** Create a new OpenIE system, with default properties */
+ @SuppressWarnings("UnusedDeclaration")
+ public OpenIE() {
+ this(new Properties());
+ }
+
+ /**
+ * Create a ne OpenIE system, based on the given properties.
+ * @param props The properties to parameterize the system with.
+ */
+ public OpenIE(Properties props) {
+ Execution.fillOptions(this, props);
+ this.WEIGHTS = new NaturalLogicWeights(PP_AFFINITY, DOBJ_AFFINITY);
+ }
+
+ /**
+ * A result from the search over possible shortenings of the sentence.
+ */
+ private static class SearchResult {
+ public final SemanticGraph tree;
+ public final List deletedEdges;
+ public final double confidence;
+
+ private SearchResult(SemanticGraph tree, List deletedEdges, double confidence) {
+ this.tree = tree;
+ this.deletedEdges = deletedEdges;
+ this.confidence = confidence;
+ }
+
+ @Override
+ public String toString() {
+ return StringUtils.join(tree.vertexListSorted().stream().map(IndexedWord::word), " ");
+ }
+ }
+
+ /**
+ * A state in the search, denoting a partial shortening of the sentence.
+ */
+ private static class SearchState {
+ public final long deletionMask;
+ public final int currentIndex;
+ public final SemanticGraph tree;
+ public final String lastDeletedEdge;
+ public final SearchState source;
+ public final double score;
+
+ private SearchState(long deletionMask, int currentIndex, SemanticGraph tree, String lastDeletedEdge, SearchState source, double score) {
+ this.deletionMask = deletionMask;
+ this.currentIndex = currentIndex;
+ this.tree = tree;
+ this.lastDeletedEdge = lastDeletedEdge;
+ this.source = source;
+ this.score = score;
+ }
+ }
+
+ /**
+ * The search algorithm, starting with a full sentence and iteratively shortening it to its entailed sentences.
+ * @param sentence The sentence to begin with.
+ * @param originalTree The original tree of the sentence we are beginning with
+ * @return A list of search results, corresponding to shortenings of the sentence.
+ */
+ private List search(List sentence, SemanticGraph originalTree) {
+ // Pre-process the tree
+ originalTree = new SemanticGraph(originalTree);
+ // (remove common determiners)
+ List determinerRemovals = new ArrayList<>();
+ for (IndexedWord vertex : originalTree.getLeafVertices()) {
+ if (vertex.word().equalsIgnoreCase("the") || vertex.word().equalsIgnoreCase("a") ||
+ vertex.word().equalsIgnoreCase("an")) {
+ originalTree.removeVertex(vertex);
+ determinerRemovals.add("det");
+ }
+ }
+ // (cut conj_and nodes)
+ Set andsToAdd = new HashSet<>();
+ for (IndexedWord vertex : originalTree.vertexSet()) {
+ if( originalTree.inDegree(vertex) > 1 ) {
+ SemanticGraphEdge conjAnd = null;
+ for (SemanticGraphEdge edge : originalTree.incomingEdgeIterable(vertex)) {
+ if (edge.getRelation().toString().equals("conj_and")) {
+ conjAnd = edge;
+ }
+ }
+ if (conjAnd != null) {
+ originalTree.removeEdge(conjAnd);
+ andsToAdd.add(conjAnd);
+ }
+ }
+ }
+ // (find secondary edges)
+ Set secondaryEdges = classifySecondaryEdges(originalTree);
+
+ // Outputs
+ List results = new ArrayList<>();
+ if (!determinerRemovals.isEmpty()) {
+ if (andsToAdd.isEmpty()) {
+ double score = Math.pow(WEIGHTS.deletionProbability(null, "det"), (double) determinerRemovals.size());
+ assert !Double.isNaN(score);
+ assert !Double.isInfinite(score);
+ results.add(new SearchResult(originalTree, determinerRemovals, score));
+ } else {
+ SemanticGraph treeWithAnds = new SemanticGraph(originalTree);
+ for (SemanticGraphEdge and : andsToAdd) {
+ treeWithAnds.addEdge(and.getGovernor(), and.getDependent(), and.getRelation(), Double.NEGATIVE_INFINITY, false);
+ }
+ results.add(new SearchResult(treeWithAnds, determinerRemovals,
+ Math.pow(WEIGHTS.deletionProbability(null, "det"), (double) determinerRemovals.size())));
+ }
+ }
+
+ // Initialize the search
+ List topologicalVertices = originalTree.topologicalSort();
+ Stack fringe = new Stack<>();
+ fringe.push(new SearchState(0l, 0, originalTree, null, null, 1.0));
+
+ // Start the search
+ while (!fringe.isEmpty()) {
+ SearchState state = fringe.pop();
+ IndexedWord currentWord = topologicalVertices.get(state.currentIndex);
+
+ // Push the case where we don't delete
+ int nextIndex = state.currentIndex + 1;
+ while (nextIndex < topologicalVertices.size()) {
+ IndexedWord nextWord = topologicalVertices.get(nextIndex);
+ if ( ((state.deletionMask >>> (nextWord.index() - 1)) & 0x1l) == 0) {
+ fringe.push(new SearchState(state.deletionMask, nextIndex, state.tree, null, state, state.score));
+ break;
+ } else {
+ nextIndex += 1;
+ }
+ }
+
+ // Check if we can delete this subtree
+ boolean canDelete = state.tree.getFirstRoot() != currentWord;
+ for (SemanticGraphEdge edge : state.tree.incomingEdgeIterable(currentWord)) {
+ // Get token information
+ CoreLabel token = sentence.get(edge.getDependent().index() - 1);
+ Polarity tokenPolarity = token.get(NaturalLogicAnnotations.PolarityAnnotation.class);
+ // Get the relation for this deletion
+ NaturalLogicRelation lexicalRelation = NaturalLogicRelation.forDependencyDeletion(edge.getRelation().toString());
+ NaturalLogicRelation projectedRelation = tokenPolarity.projectLexicalRelation(lexicalRelation);
+ // Make sure this is a valid entailment
+ if (!projectedRelation.isEntailed) { canDelete = false; }
+ }
+
+ if (canDelete) {
+ // Register the deletion
+ long newMask = state.deletionMask;
+ SemanticGraph treeWithDeletions = new SemanticGraph(state.tree);
+ for (IndexedWord vertex : state.tree.descendants(currentWord)) {
+ treeWithDeletions.removeVertex(vertex);
+ newMask |= (0x1l << (vertex.index() - 1));
+ assert vertex.index() <= 64;
+ assert ((newMask >>> (vertex.index() - 1)) & 0x1l) == 1;
+ }
+ SemanticGraph resultTree = new SemanticGraph(treeWithDeletions);
+ for (SemanticGraphEdge edge : andsToAdd) {
+ if (resultTree.containsVertex(edge.getGovernor()) && resultTree.containsVertex(edge.getDependent())) {
+ resultTree.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), Double.NEGATIVE_INFINITY, false);
+ }
+ }
+ // Compute the score of the sentence
+ double newScore = state.score;
+ for (SemanticGraphEdge edge : state.tree.incomingEdgeIterable(currentWord)) {
+ String relationString = edge.getRelation().toString();
+ double multiplier = WEIGHTS.deletionProbability(
+ edge.getGovernor().word().toLowerCase(),
+ relationString,
+ secondaryEdges.contains(edge)
+ );
+ assert !Double.isNaN(multiplier);
+ assert !Double.isInfinite(multiplier);
+ newScore *= multiplier;
+ }
+ // Register the result
+ results.add(new SearchResult(resultTree,
+ aggregateDeletedEdges(state, state.tree.incomingEdgeIterable(currentWord), determinerRemovals),
+ newScore));
+
+ // Push the state with this subtree deleted
+ nextIndex = state.currentIndex + 1;
+ while (nextIndex < topologicalVertices.size()) {
+ IndexedWord nextWord = topologicalVertices.get(nextIndex);
+ if ( ((newMask >>> (nextWord.index() - 1)) & 0x1l) == 0) {
+ assert treeWithDeletions.containsVertex(topologicalVertices.get(nextIndex));
+ fringe.push(new SearchState(newMask, nextIndex, treeWithDeletions, null, state, newScore));
+ break;
+ } else {
+ nextIndex += 1;
+ }
+ }
+ }
+ }
+
+ // Return
+ return results;
+ }
+
+ /**
+ * A simple heuristic to classify whether an edge is a secondary edge of the given type.
+ * For example, in:
+ *
+ * In foreign policy, Obama ended military involvement in Iraq.
+ *
+ *
+ * The first 'in' ('in foreign policy') is to be considered a secondary edge of the type 'prep_in'.
+ *
+ * @param graph The graph to classify
+ *
+ * @return The edges in the graph which are to be considered secondary edges.
+ */
+ private Set classifySecondaryEdges(SemanticGraph graph) {
+ Set secondaryEdges = new HashSet<>();
+ for (IndexedWord root : graph.vertexSet()) {
+ Map> edgesByType = new HashMap<>();
+ boolean hasDirectObject = false;
+ for (SemanticGraphEdge edge : graph.outgoingEdgeIterable(root)) {
+ String type = edge.getRelation().toString();
+ if (type.startsWith("prep")) {
+ if (!edgesByType.containsKey(type)) {
+ edgesByType.put(type, new ArrayList<>());
+ }
+ edgesByType.get(type).add(edge);
+ }
+ if (type.equals("dobj")) {
+ hasDirectObject = true;
+ }
+ }
+ for (Map.Entry> entry : edgesByType.entrySet()) {
+ List edges = entry.getValue();
+ if (hasDirectObject) {
+ // If we have a dobj, all prep_* edges are secondary
+ for (SemanticGraphEdge e : edges) {
+ secondaryEdges.add(e);
+ }
+ } else if (entry.getValue().size() > 1) {
+ // Candidate for a secondary edge (i.e., more than one outgoing edge of the given type)
+ Collections.sort(edges, (o1, o2) -> {
+ if (o1.getDependent().index() < root.index()) {
+ return -1;
+ } else if (o2.getDependent().index() < root.index()) {
+ return 1;
+ } else {
+ return o1.getDependent().index() - o2.getDependent().index();
+ }
+ });
+ // Register secondary edges
+ for (int i = 1; i < edges.size(); ++i) {
+ secondaryEdges.add(edges.get(i));
+ }
+ } else if (edges.get(0).getDependent().index() < root.index()) {
+ secondaryEdges.add(edges.get(0));
+ }
+ }
+ }
+ return secondaryEdges;
+ }
+
+ /**
+ * Backtrace from a search state, collecting all of the deleted edges used to get there.
+ * @param state The final search state.
+ * @param justDeleted The edges we have just deleted.
+ * @param otherEdges Other deletions we want to register
+ * @return A list of deleted edges for that search state.
+ */
+ private static List aggregateDeletedEdges(SearchState state, Iterable justDeleted, Iterable otherEdges) {
+ List rtn = new ArrayList<>();
+ for (SemanticGraphEdge edge : justDeleted) {
+ rtn.add(edge.getRelation().toString());
+ }
+ for (String edge : otherEdges) {
+ rtn.add(edge);
+ }
+ while (state != null) {
+ if (state.lastDeletedEdge != null) {
+ rtn.add(state.lastDeletedEdge);
+ }
+ state = state.source;
+ }
+ return rtn;
+ }
+
+ /** The pattern for a clause to be split off of the sentence */
+ private static final List CLAUSE_PATTERNS = Collections.unmodifiableList(new ArrayList() {{
+ String clauseBreakers = "vmod|partmod|infmod|prepc.*|advcl|purpcl|conj(_and)?|prep_.*|dep";
+ add(SemgrexPattern.compile("{$} ?>/.subj(pass)?/ {}=subject >/" + clauseBreakers + "/ ( {pos:/V.*/}=clause ?>/.subj(pass)?/ {}=clausesubj )"));
+ add(SemgrexPattern.compile("{$} ?>/.subj(pass)?/ {}=subject >/.obj|prep.*/ ( !{pos:/N*/} >/" + clauseBreakers + "/ ( {pos:/V.*/}=clause ?>/.subj(pass)?/ {}=clausesubj ) )"));
+ }});
+
+ private static final SemgrexPattern LIMITED_CC_COLLAPSE
+ = SemgrexPattern.compile("{}=root >/.*/=rel ( {}=a >/conj_.*/ {}=b )");
+
+ /**
+ * Do some limited CC collapsing.
+ * @param tree The tree to perform the collapsing on.
+ * @return The same tree. THIS IS AN IN PLACE FUNCTION
+ */
+ private static SemanticGraph tweakCC(SemanticGraph tree) {
+ SemgrexMatcher matcher = LIMITED_CC_COLLAPSE.matcher(tree);
+ List edgesToAdd = new ArrayList<>(); // Avoid a concurrent modification exception
+ while (matcher.find()) {
+ edgesToAdd.add(new SemanticGraphEdge(matcher.getNode("root"), matcher.getNode("b"),
+ GrammaticalRelation.valueOf(GrammaticalRelation.Language.Any, matcher.getRelnString("rel")),
+ Double.NEGATIVE_INFINITY, false));
+ }
+ for (SemanticGraphEdge edge : edgesToAdd) {
+ tree.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra());
+ }
+ return tree;
+ }
+
+ /**
+ * Split a given subtree off of the main tree.
+ * This will do two things:
+ *
+ *
+ * - It will return a {@link edu.stanford.nlp.semgraph.SemanticGraph} consisting of the subtree split off.
+ * - It will delete all the nodes in the original tree which were split off into the subtree.
+ *
+ * @param tree The original tree; this will be smaller when we return.
+ * @param subtreeRoot The root of the subtree we are splitting off.
+ * @param subjectOrNull An optional subject to clone into the split subtree. This will appear in both trees.
+ * @return The split off tree.
+ */
+ private SemanticGraph splitOffTree(SemanticGraph tree, IndexedWord subtreeRoot, IndexedWord subjectOrNull) {
+ SemanticGraph subtree = new SemanticGraph();
+ subtree.addRoot(subtreeRoot);
+ // Initialize the search
+ Stack fringe = new Stack<>();
+ for (IndexedWord child : tree.getChildren(subtreeRoot)) {
+ fringe.add(child);
+ }
+ // Run the search
+ Set seen = new HashSet<>();
+ while (!fringe.isEmpty()) {
+ IndexedWord node = fringe.pop();
+ if (seen.contains(node.index())) {
+ continue;
+ }
+ seen.add(node.index());
+ subtree.addVertex(node);
+ for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(node)) {
+ subtree.addEdge(incomingEdge.getGovernor(), incomingEdge.getDependent(), incomingEdge.getRelation(), incomingEdge.getWeight(), incomingEdge.isExtra());
+ }
+ for (IndexedWord child : tree.getChildren(node)) {
+ if (child.index() != node.index()) { // wat...?
+ fringe.add(child);
+ }
+ }
+ }
+ // Delete from original tree
+ for (IndexedWord vertex : subtree.vertexSet()) {
+ tree.removeVertex(vertex);
+ }
+ tree.removeVertex(subtreeRoot);
+ // Optionally clone the subject
+ if (subjectOrNull != null) {
+ subtree.addVertex(subjectOrNull);
+ for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(subjectOrNull)) {
+ subtree.addEdge(subtreeRoot, subjectOrNull, incomingEdge.getRelation(), incomingEdge.getWeight(), incomingEdge.isExtra());
+ }
+ }
+ // Return
+ return subtree;
+ }
+
+ /**
+ * Split a tree into constituent clauses
+ * @param rawTree The tree to split into clauses.
+ * @return A list of clauses in this sentence.
+ */
+ private List coarseClauseSplitting(SemanticGraph rawTree) {
+ List clauses = new ArrayList<>();
+ SemanticGraph original = null;
+ for (SemgrexPattern pattern : CLAUSE_PATTERNS) {
+ SemgrexMatcher matcher = pattern.matcher(original != null ? original : rawTree);
+ while (matcher.find()) {
+ if (original == null) {
+ original = new SemanticGraph(rawTree);
+ }
+ IndexedWord subjectOrNull = matcher.getNode("subject");
+ IndexedWord clauseRoot = matcher.getNode("clause");
+ IndexedWord clauseSubjectOrNull = matcher.getNode("clausesubj");
+ SemanticGraph clause;
+ if (clauseSubjectOrNull != null || subjectOrNull == null) {
+ // Case: independent clause; no need to copy the subject
+ clause = splitOffTree(original, clauseRoot, null);
+ } else {
+ // Case: copy subject from main clause
+ //noinspection ConstantConditions
+ assert subjectOrNull != null;
+ clause = splitOffTree(original, clauseRoot, subjectOrNull);
+ }
+ if (original.isEmpty()) {
+ clauses.add(clause);
+ } else {
+ clauses.addAll(coarseClauseSplitting(clause));
+ }
+ }
+ }
+ // Base case: just add the original tree
+ if (clauses.isEmpty()) {
+ clauses.add(tweakCC(rawTree));
+ } else if (original != null && original.vertexSet().size() > 0) {
+ clauses.add(tweakCC(original));
+ }
+ // Return
+ return clauses;
+ }
+
+ /**
+ * Fix some bizarre peculiarities with certain trees.
+ * So far, these include:
+ *
+ * - Sometimes there's a node from a word to itself. This seems wrong.
+ *
+ * @param tree The tree to clean (in place!)
+ */
+ private void cleanTree(SemanticGraph tree) {
+ // Clean nodes
+ List toDelete = new ArrayList<>();
+ for (IndexedWord vertex : tree.vertexSet()) {
+ // Clean punctuation
+ char tag = vertex.backingLabel().tag().charAt(0);
+ if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') {
+ toDelete.add(vertex);
+ }
+ }
+ for (IndexedWord v : toDelete) { tree.removeVertex(v); }
+ // Clean edges
+ Iterator iter = tree.edgeIterable().iterator();
+ while (iter.hasNext()) {
+ SemanticGraphEdge edge = iter.next();
+ if (edge.getDependent().index() == edge.getGovernor().index()) {
+ // Clean self-edges
+ iter.remove();
+ } else if (edge.getRelation().toString().equals("punct")) {
+ // Clean punctuation (again)
+ iter.remove();
+ }
+ }
+ }
+
+
+ /**
+ *
+ * Annotate a single sentence.
+ *
+ *
+ * This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
+ * and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
+ *
+ */
+ @SuppressWarnings("unchecked")
+ public void annotateSentence(CoreMap sentence) {
+ SemanticGraph fullTree = new SemanticGraph(sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class));
+ cleanTree(fullTree);
+ List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
+ if (tokens.size() > 63) {
+ System.err.println("Very long sentence (>63 tokens); " + this.getClass().getSimpleName() + " is not attempting to extract relations.");
+ sentence.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, Collections.EMPTY_LIST);
+ sentence.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, Collections.EMPTY_LIST);
+ } else {
+ List clauses = coarseClauseSplitting(fullTree);
+ Collection fragments = new ArrayList<>();
+ List extractions = new ArrayList<>();
+ // Add clauses
+ if (clauses.size() > 1) {
+ for (SemanticGraph tree : clauses) {
+ fragments.add(new SentenceFragment(tree, false));
+ Optional extraction = RelationTriple.segment(tree, Optional.empty());
+ if (extraction.isPresent()) {
+ extractions.add(extraction.get());
+ }
+ }
+ }
+ // Add search results
+ for (SemanticGraph tree : clauses) {
+ List results = search(tokens, tree);
+ for (SearchResult result : results) {
+ SentenceFragment fragment = new SentenceFragment(result.tree, false);
+ fragments.add(fragment);
+ Optional extraction = RelationTriple.segment(result.tree, Optional.of(result.confidence));
+ if (extraction.isPresent()) {
+ extractions.add(extraction.get());
+ }
+ }
+ }
+ sentence.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, fragments);
+ Collections.sort(extractions);
+ sentence.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, extractions);
+ }
+ }
+
+ /**
+ * A simple utility function for just getting a list of relation triples from a sentence.
+ * Calls {@link OpenIE#annotate(edu.stanford.nlp.pipeline.Annotation)} on the backend.
+ */
+ @SuppressWarnings("UnusedDeclaration")
+ public Collection relationsForSentence(CoreMap sentence) {
+ annotateSentence(sentence);
+ return sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
+ }
+
+ /**
+ * {@inheritDoc}
+ *
+ *
+ * This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation}
+ * and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations.
+ *
+ */
+ @Override
+ public void annotate(Annotation annotation) {
+ annotation.get(CoreAnnotations.SentencesAnnotation.class).forEach(this::annotateSentence);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public Set requirementsSatisfied() {
+ return Collections.singleton(Annotator.OPENIE_REQUIREMENT);
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public Set requires() {
+ return Collections.singleton(Annotator.NATLOG_REQUIREMENT);
+ }
+
+ /**
+ * An entry method for annotating standard in with OpenIE extractions.
+ */
+ public static void main(String[] args) {
+ // Initialize prerequisites
+ Properties props = StringUtils.argsToProperties(args);
+ props.setProperty("annotators", "tokenize,ssplit,pos,depparse,natlog,openie");
+ props.setProperty("ssplit.isOneSentence", "true");
+ StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
+
+ // Run extractor
+ Scanner in = new Scanner(System.in);
+ while (in.hasNext()) {
+ String line = in.nextLine();
+ Annotation ann = new Annotation(line);
+ pipeline.annotate(ann);
+ Collection extractions = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
+ if (extractions.isEmpty()) {
+ System.err.println("No extractions for: " + line);
+ }
+ for (RelationTriple extraction : extractions) {
+ System.out.println(extraction);
+ }
+ }
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/Operator.java b/src/edu/stanford/nlp/naturalli/Operator.java
new file mode 100644
index 0000000000..edc40065d4
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/Operator.java
@@ -0,0 +1,160 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.util.Pair;
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A collection of quantifiers. This is the exhaustive list of quantifiers our system knows about.
+ *
+ * @author Gabor Angeli
+ */
+public enum Operator {
+ // "All" quantifiers
+ ALL("all", "anti-additive", "multiplicative"),
+ EVERY("every", "anti-additive", "multiplicative"),
+ ANY("any", "anti-additive", "multiplicative"),
+ EACH("each", "anti-additive", "multiplicative"),
+ THE_LOT_OF("the lot of", "anti-additive", "multiplicative"),
+ ALL_OF("all of", "anti-additive", "multiplicative"),
+ FOR_ALL("for all", "anti-additive", "multiplicative"),
+ FOR_EVERY("for every", "anti-additive", "multiplicative"),
+ FOR_EACH("for each", "anti-additive", "multiplicative"),
+ EVERYONE("everyone", "anti-additive", "multiplicative"),
+ NUM("__num__", "anti-additive", "multiplicative"), // TODO check me
+ NUM_NUM("__num__ __num__", "anti-additive", "multiplicative"), // TODO check me
+ NUM_NUM_NUM("__num__ __num__ __num__", "anti-additive", "multiplicative"), // TODO check me
+ NUM_NUM_NUM_NUM("__num__ __num__ __num__ __num__", "anti-additive", "multiplicative"), // TODO check me
+ FEW("few", "anti-additive", "multiplicative"), // TODO check me
+ IMPLICIT_NAMED_ENTITY("__implicit_named_entity__", "anti-additive", "multiplicative"),
+
+ // "No" quantifiers
+ NO("no", "anti-additive", "anti-additive"),
+ NEITHER("neither", "anti-additive", "anti-additive"),
+ NO_ONE("no one", "anti-additive", "anti-additive"),
+ NOBODY("nobody", "anti-additive", "anti-additive"),
+ UNARY_NO("no", "anti-additive"),
+ UNARY_NOT("not", "anti-additive"),
+ UNARY_NO_ONE("no one", "anti-additive"),
+ UNARY_NT("n't", "anti-additive"),
+
+ // "Some" quantifiers
+ SOME("some", "additive", "additive"),
+ SEVERAL("several", "additive", "additive"),
+ EITHER("either", "additive", "additive"),
+ A("a", "additive", "additive"),
+ THE("the", "additive", "additive"),
+ LESS_THAN("less than __num__", "additive", "additive"),
+ SOME_OF("some of", "additive", "additive"),
+ ONE_OF("one of", "additive", "additive"),
+ AT_LEAST("at least __num__", "additive", "additive"),
+ A_FEW("a few", "additive", "additive"),
+ AT_LEAST_A_FEW("at least a few", "additive", "additive"),
+ THERE_BE("there be", "additive", "additive"),
+ THERE_BE_A_FEW("there be a few", "additive", "additive"),
+ THERE_EXIST("there exist", "additive", "additive"),
+ NUM_OF_THE("__num__ of the", "additive", "additive"),
+
+ // "Not All" quantifiers
+ NOT_ALL("not all", "additive", "anti-multiplicative"),
+ NOT_EVERY("not every", "additive", "anti-multiplicative"),
+
+ // "Most" quantifiers
+ // TODO(gabor) check these
+ MOST("most", "nonmonotone", "multiplicative"),
+ MANY("many", "nonmonotone", "multiplicative"),
+ ENOUGH("enough", "nonmonotone", "multiplicative"),
+ MORE_THAN("more than __num_", "nonmonotone", "multiplicative"),
+ A_LOT_OF("a lot of", "nonmonotone", "multiplicative"),
+ LOTS_OF("lots of", "nonmonotone", "multiplicative"),
+ PLENTY_OF("plenty of", "nonmonotone", "multiplicative"),
+ HEAPS_OF("heap of", "nonmonotone", "multiplicative"),
+ A_LOAD_OF("a load of", "nonmonotone", "multiplicative"),
+ LOADS_OF("load of", "nonmonotone", "multiplicative"),
+ TONS_OF("ton of", "nonmonotone", "multiplicative"),
+ BOTH("both", "nonmonotone", "multiplicative"),
+ JUST_NUM("just __num__", "nonmonotone", "multiplicative"),
+ ONLY_NUM("only __num__", "nonmonotone", "multiplicative"),
+
+ // Strange cases
+ AT_MOST_NUM("at most __num__", "anti-additive", "anti-additive"),
+ ;
+
+ public static final Set GLOSSES = Collections.unmodifiableSet(new HashSet() {{
+ for (Operator q : Operator.values()) {
+ add(q.surfaceForm);
+ }
+ }});
+
+ public final String surfaceForm;
+ public final Monotonicity subjMono;
+ public final MonotonicityType subjType;
+ public final Monotonicity objMono;
+ public final MonotonicityType objType;
+
+ Operator(String surfaceForm, String subjMono, String objMono) {
+ this.surfaceForm = surfaceForm;
+ Pair subj = monoFromString(subjMono);
+ this.subjMono = subj.first;
+ this.subjType = subj.second;
+ Pair obj = monoFromString(objMono);
+ this.objMono = obj.first;
+ this.objType = obj.second;
+ }
+
+ Operator(String surfaceForm, String subjMono) {
+ this.surfaceForm = surfaceForm;
+ Pair subj = monoFromString(subjMono);
+ this.subjMono = subj.first;
+ this.subjType = subj.second;
+ this.objMono = Monotonicity.INVALID;
+ this.objType = MonotonicityType.NONE;
+ }
+
+ public boolean isUnary() {
+ return objMono == Monotonicity.INVALID;
+ }
+
+ public static Pair monoFromString(String mono) {
+ switch (mono) {
+ case "nonmonotone": return Pair.makePair(Monotonicity.NONMONOTONE, MonotonicityType.NONE);
+ case "additive": return Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.ADDITIVE);
+ case "multiplicative": return Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.MULTIPLICATIVE);
+ case "additive-multiplicative": return Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.BOTH);
+ case "anti-additive": return Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.ADDITIVE);
+ case "anti-multiplicative": return Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.MULTIPLICATIVE);
+ case "anti-additive-multiplicative": return Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.BOTH);
+ default: throw new IllegalArgumentException("Unknown monotonicity: " + mono);
+ }
+ }
+
+ public static String monotonicitySignature(Monotonicity mono, MonotonicityType type) {
+ switch (mono) {
+ case MONOTONE:
+ switch (type) {
+ case NONE: return "nonmonotone";
+ case ADDITIVE: return "additive";
+ case MULTIPLICATIVE: return "multiplicative";
+ case BOTH: return "additive-multiplicative";
+ }
+ case ANTITONE:
+ switch (type) {
+ case NONE: return "nonmonotone";
+ case ADDITIVE: return "anti-additive";
+ case MULTIPLICATIVE: return "anti-multiplicative";
+ case BOTH: return "anti-additive-multiplicative";
+ }
+ case NONMONOTONE: return "nonmonotone";
+ }
+ throw new IllegalStateException("Unhandled case: " + mono + " and " + type);
+ }
+
+ @SuppressWarnings("UnusedDeclaration")
+ public static final Set quantifierGlosses = Collections.unmodifiableSet(new HashSet() {{
+ for (Operator operator : values()) {
+ add(operator.surfaceForm);
+ }
+ }});
+}
diff --git a/src/edu/stanford/nlp/naturalli/OperatorSpec.java b/src/edu/stanford/nlp/naturalli/OperatorSpec.java
new file mode 100644
index 0000000000..d3f3322374
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/OperatorSpec.java
@@ -0,0 +1,97 @@
+package edu.stanford.nlp.naturalli;
+
+/**
+ * A silly little class to denote a quantifier scope.
+ *
+ * @author Gabor Angeli
+ */
+public class OperatorSpec {
+ public final Operator instance;
+ public final int quantifierBegin;
+ public final int quantifierEnd;
+ public final int quantifierHead;
+ public final int subjectBegin;
+ public final int subjectEnd;
+ public final int objectBegin;
+ public final int objectEnd;
+
+ public OperatorSpec(
+ Operator instance,
+ int quantifierBegin, int quantifierEnd,
+ int subjectBegin, int subjectEnd,
+ int objectBegin, int objectEnd) {
+ this.instance = instance;
+ this.quantifierBegin = quantifierBegin;
+ this.quantifierEnd = quantifierEnd;
+ this.quantifierHead = quantifierEnd - 1;
+ this.subjectBegin = subjectBegin;
+ this.subjectEnd = subjectEnd;
+ this.objectBegin = objectBegin;
+ this.objectEnd = objectEnd;
+ }
+
+ /**
+ * If true, this is an explicit quantifier, such as "all" or "some."
+ * The other option is for this to be an implicit quantification, for instance with proper names:
+ *
+ *
+ * "Felix is a cat" -> \forall x, Felix(x) \rightarrow cat(x).
+ *
+ */
+ public boolean isExplicit() {
+ return instance != Operator.IMPLICIT_NAMED_ENTITY;
+ }
+
+ public boolean isBinary() {
+ return objectEnd > objectBegin;
+ }
+
+ public int quantifierLength() {
+ return quantifierEnd - quantifierBegin;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof OperatorSpec)) return false;
+ OperatorSpec that = (OperatorSpec) o;
+ return objectBegin == that.objectBegin && objectEnd == that.objectEnd && subjectBegin == that.subjectBegin && subjectEnd == that.subjectEnd;
+
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public int hashCode() {
+ int result = subjectBegin;
+ result = 31 * result + subjectEnd;
+ result = 31 * result + objectBegin;
+ result = 31 * result + objectEnd;
+ return result;
+ }
+
+ /** {@inheritDoc} */
+ @Override
+ public String toString() {
+ return "QuantifierScope{" +
+ "subjectBegin=" + subjectBegin +
+ ", subjectEnd=" + subjectEnd +
+ ", objectBegin=" + objectBegin +
+ ", objectEnd=" + objectEnd +
+ '}';
+ }
+
+ public static OperatorSpec merge(OperatorSpec x, OperatorSpec y) {
+ assert (x.quantifierBegin == y.quantifierBegin);
+ assert (x.quantifierEnd == y.quantifierEnd);
+ assert (x.instance == y.instance);
+ return new OperatorSpec(
+ x.instance,
+ Math.min(x.quantifierBegin, y.quantifierBegin),
+ Math.min(x.quantifierEnd, y.quantifierEnd),
+ Math.min(x.subjectBegin, y.subjectBegin),
+ Math.max(x.subjectEnd, y.subjectEnd),
+ Math.min(x.objectBegin, y.objectBegin),
+ Math.max(x.objectEnd, y.objectEnd));
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/Polarity.java b/src/edu/stanford/nlp/naturalli/Polarity.java
new file mode 100644
index 0000000000..9710b2351e
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/Polarity.java
@@ -0,0 +1,242 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.util.Pair;
+
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ *
+ * A class intended to be attached to a lexical item, determining what mutations are valid on it while
+ * maintaining valid Natural Logic inference.
+ *
+ *
+ * @author Gabor Angeli
+ */
+@SuppressWarnings("UnusedDeclaration")
+public class Polarity {
+
+ /** The projection function, as a table from a relations fixed index to the projected fixed index */
+ private final byte[] projectionFunction = new byte[7];
+
+ /** Create a polarity from a list of operators in scope */
+ protected Polarity(List> operatorsInNarrowingScopeOrder) {
+ if (operatorsInNarrowingScopeOrder.isEmpty()) {
+ for (byte i = 0; i < projectionFunction.length; ++i) {
+ projectionFunction[i] = i;
+ }
+ } else {
+ for (int rel = 0; rel < 7; ++rel) {
+ NaturalLogicRelation relation = NaturalLogicRelation.byFixedIndex(rel);
+ for (int op = operatorsInNarrowingScopeOrder.size() - 1; op >= 0; --op) {
+ relation = project(relation, operatorsInNarrowingScopeOrder.get(op).first, operatorsInNarrowingScopeOrder.get(op).second);
+ }
+ projectionFunction[rel] = (byte) relation.fixedIndex;
+ }
+ }
+ }
+
+ /**
+ * Create a polarity item by directly copying the projection function from {@link edu.stanford.nlp.naturalli.NaturalLogicRelation}s to
+ * their projected relation.
+ */
+ public Polarity(byte[] projectionFunction) {
+ if (projectionFunction.length != 7) {
+ throw new IllegalArgumentException("Invalid projection function: " + Arrays.toString(projectionFunction));
+ }
+ for (int i = 0; i < 7; ++i) {
+ if (projectionFunction[i] < 0 || projectionFunction[i] > 6) {
+ throw new IllegalArgumentException("Invalid projection function: " + Arrays.toString(projectionFunction));
+ }
+ }
+ System.arraycopy(projectionFunction, 0, this.projectionFunction, 0, 7);
+ }
+
+ /**
+ * Encode the projection table in painful detail.
+ *
+ * @param input The input natural logic relation to project up through the operator.
+ * @param mono The monotonicity of the operator we are projecting through.
+ * @param type The monotonicity type of the operator we are projecting through.
+ *
+ * @return The projected relation, once passed through an operator with the given specifications.
+ */
+ private NaturalLogicRelation project(NaturalLogicRelation input, Monotonicity mono, MonotonicityType type) {
+ switch (input) {
+ case EQUIVALENT:
+ return NaturalLogicRelation.EQUIVALENT;
+ case FORWARD_ENTAILMENT:
+ switch (mono) {
+ case MONOTONE:
+ return NaturalLogicRelation.FORWARD_ENTAILMENT;
+ case ANTITONE:
+ return NaturalLogicRelation.REVERSE_ENTAILMENT;
+ case NONMONOTONE:
+ case INVALID:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ case REVERSE_ENTAILMENT:
+ switch (mono) {
+ case MONOTONE:
+ return NaturalLogicRelation.REVERSE_ENTAILMENT;
+ case ANTITONE:
+ return NaturalLogicRelation.FORWARD_ENTAILMENT;
+ case NONMONOTONE:
+ case INVALID:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ case NEGATION:
+ switch (type) {
+ case NONE:
+ return NaturalLogicRelation.INDEPENDENCE;
+ case ADDITIVE:
+ switch (mono) {
+ case MONOTONE:
+ return NaturalLogicRelation.COVER;
+ case ANTITONE:
+ return NaturalLogicRelation.ALTERNATION;
+ case NONMONOTONE:
+ case INVALID:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ case MULTIPLICATIVE:
+ switch (mono) {
+ case MONOTONE:
+ return NaturalLogicRelation.ALTERNATION;
+ case ANTITONE:
+ return NaturalLogicRelation.COVER;
+ case NONMONOTONE:
+ case INVALID:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ break;
+ case BOTH:
+ return NaturalLogicRelation.NEGATION;
+ }
+ break;
+ case ALTERNATION:
+ switch (mono) {
+ case MONOTONE:
+ switch (type) {
+ case NONE:
+ case ADDITIVE:
+ return NaturalLogicRelation.INDEPENDENCE;
+ case MULTIPLICATIVE:
+ case BOTH:
+ return NaturalLogicRelation.ALTERNATION;
+ }
+ case ANTITONE:
+ switch (type) {
+ case NONE:
+ case ADDITIVE:
+ return NaturalLogicRelation.INDEPENDENCE;
+ case MULTIPLICATIVE:
+ case BOTH:
+ return NaturalLogicRelation.COVER;
+ }
+ case NONMONOTONE:
+ case INVALID:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ case COVER:
+ switch (mono) {
+ case MONOTONE:
+ switch (type) {
+ case NONE:
+ case MULTIPLICATIVE:
+ return NaturalLogicRelation.INDEPENDENCE;
+ case ADDITIVE:
+ case BOTH:
+ return NaturalLogicRelation.COVER;
+ }
+ case ANTITONE:
+ switch (type) {
+ case NONE:
+ case MULTIPLICATIVE:
+ return NaturalLogicRelation.INDEPENDENCE;
+ case ADDITIVE:
+ case BOTH:
+ return NaturalLogicRelation.ALTERNATION;
+ }
+ case NONMONOTONE:
+ case INVALID:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ case INDEPENDENCE:
+ return NaturalLogicRelation.INDEPENDENCE;
+ }
+ throw new IllegalStateException("[should not happen!] Projection table is incomplete for " + mono + " : " + type + " on relation " + input);
+ }
+
+ /**
+ * Project the given natural logic lexical relation on this word. So, for example, if we want to go up the
+ * Hypernymy hierarchy ({@link edu.stanford.nlp.naturalli.NaturalLogicRelation#FORWARD_ENTAILMENT}) on this word,
+ * then this function will tell you what relation holds between the new mutated fact and this fact.
+ *
+ * @param lexicalRelation The lexical relation we are applying to this word.
+ * @return The relation between the mutated sentence and the original sentence.
+ */
+ public NaturalLogicRelation projectLexicalRelation(NaturalLogicRelation lexicalRelation) {
+ return NaturalLogicRelation.byFixedIndex( projectionFunction[lexicalRelation.fixedIndex] );
+ }
+
+ /**
+ * If true, applying this lexical relation to this word creates a sentence which is entailed by the original sentence,
+ * Note that both this, and {@link Polarity#introducesNegation(NaturalLogicRelation)} can be false. If this is the case, then
+ * natural logic can neither verify nor disprove this mutation.
+ */
+ public boolean maintainsEntailment(NaturalLogicRelation lexicalRelation) {
+ return projectLexicalRelation(lexicalRelation).isEntailed;
+ }
+
+ /**
+ * If true, applying this lexical relation to this word creates a sentence which is negated by the original sentence
+ * Note that both this, and {@link Polarity#maintainsEntailment(NaturalLogicRelation)}} can be false. If this is the case, then
+ * natural logic can neither verify nor disprove this mutation.
+ */
+ public boolean introducesNegation(NaturalLogicRelation lexicalRelation) {
+ return projectLexicalRelation(lexicalRelation).isNegated;
+ }
+
+ /**
+ * Ignoring exclusion, determine if this word has upward polarity.
+ */
+ public boolean isUpwards() {
+ return projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT) == NaturalLogicRelation.FORWARD_ENTAILMENT &&
+ projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT) == NaturalLogicRelation.REVERSE_ENTAILMENT;
+
+ }
+
+ /**
+ * Ignoring exclusion, determine if this word has downward polarity.
+ */
+ public boolean isDownwards() {
+ return projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT) == NaturalLogicRelation.REVERSE_ENTAILMENT &&
+ projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT) == NaturalLogicRelation.FORWARD_ENTAILMENT;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof Polarity)) return false;
+ Polarity polarity = (Polarity) o;
+ return Arrays.equals(projectionFunction, polarity.projectionFunction);
+
+ }
+
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(projectionFunction);
+ }
+
+ @Override
+ public String toString() {
+ if (isUpwards()) {
+ return "up";
+ } else if (isDownwards()) {
+ return "down";
+ } else {
+ return "flat";
+ }
+ }
+}
diff --git a/src/edu/stanford/nlp/naturalli/SentenceFragment.java b/src/edu/stanford/nlp/naturalli/SentenceFragment.java
new file mode 100644
index 0000000000..d6f72d0130
--- /dev/null
+++ b/src/edu/stanford/nlp/naturalli/SentenceFragment.java
@@ -0,0 +1,51 @@
+package edu.stanford.nlp.naturalli;
+
+import edu.stanford.nlp.ling.CoreLabel;
+import edu.stanford.nlp.ling.IndexedWord;
+import edu.stanford.nlp.semgraph.SemanticGraph;
+import edu.stanford.nlp.util.StringUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * A representation of a sentence fragment.
+ *
+ * @author Gabor Angeli
+ */
+public class SentenceFragment {
+ public final List words = new ArrayList<>();
+ public final SemanticGraph parseTree;
+
+ public SentenceFragment(SemanticGraph tree, boolean copy) {
+ if (copy) {
+ this.parseTree = new SemanticGraph(tree);
+ } else {
+ this.parseTree = tree;
+ }
+ words.addAll(this.parseTree.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList()));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof SentenceFragment)) return false;
+ if (!super.equals(o)) return false;
+ SentenceFragment that = (SentenceFragment) o;
+ return parseTree.equals(that.parseTree);
+
+ }
+
+ @Override
+ public int hashCode() {
+ int result = super.hashCode();
+ result = 31 * result + parseTree.hashCode();
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return StringUtils.join(words.stream().map(CoreLabel::word), " ");
+ }
+}
diff --git a/src/edu/stanford/nlp/neural/NeuralUtils.java b/src/edu/stanford/nlp/neural/NeuralUtils.java
index d2e47e1a5f..c96bc26295 100644
--- a/src/edu/stanford/nlp/neural/NeuralUtils.java
+++ b/src/edu/stanford/nlp/neural/NeuralUtils.java
@@ -1,6 +1,8 @@
package edu.stanford.nlp.neural;
+import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.PrintStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
@@ -8,6 +10,7 @@
import java.util.function.Predicate;
import org.ejml.simple.SimpleMatrix;
+import org.ejml.ops.MatrixIO;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.CollectionUtils;
@@ -70,6 +73,15 @@ public boolean test(String s) {
return new SimpleMatrix(data);
}
+ /**
+ * @param matrix The matrix to return as a String
+ * @param format The format to use for each value in the matrix, eg "%f"
+ */
+ public static String toString(SimpleMatrix matrix, String format) {
+ ByteArrayOutputStream stream = new ByteArrayOutputStream();
+ MatrixIO.print(new PrintStream(stream), matrix.getMatrix(), format);
+ return stream.toString();
+ }
/**
* Compute cosine distance between two column vectors.
diff --git a/src/edu/stanford/nlp/neural/SimpleTensor.java b/src/edu/stanford/nlp/neural/SimpleTensor.java
index ef3ebb7d0a..ba69f06101 100644
--- a/src/edu/stanford/nlp/neural/SimpleTensor.java
+++ b/src/edu/stanford/nlp/neural/SimpleTensor.java
@@ -296,5 +296,18 @@ public String toString() {
return result.toString();
}
+ /**
+ * Output the tensor one slice at a time. Each number is output
+ * with the format given, so for example "%f"
+ */
+ public String toString(String format) {
+ StringBuilder result = new StringBuilder();
+ for (int slice = 0; slice < numSlices; ++slice) {
+ result.append("Slice " + slice + "\n");
+ result.append(NeuralUtils.toString(slices[slice], format));
+ }
+ return result.toString();
+ }
+
private static final long serialVersionUID = 1;
}
diff --git a/src/edu/stanford/nlp/optimization/QNMinimizer.java b/src/edu/stanford/nlp/optimization/QNMinimizer.java
index 1de0a249cc..984a72d643 100644
--- a/src/edu/stanford/nlp/optimization/QNMinimizer.java
+++ b/src/edu/stanford/nlp/optimization/QNMinimizer.java
@@ -1030,15 +1030,13 @@ public double[] minimize(DiffFunction dfunction, double functionTolerance,
say("M");
break;
default:
- sayln("Invalid line search option for QNMinimizer. ");
- System.exit(1);
- break;
-
+ throw new IllegalArgumentException("Invalid line search option for QNMinimizer.");
}
}
newValue = newPoint[f];
- System.err.print(" " + nf.format(newPoint[a]));
+ say(" ");
+ say(nf.format(newPoint[a]));
say("] ");
// This shouldn't actually evaluate anything since that should have been
diff --git a/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java b/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java
index f15da4be39..5b80902fd3 100644
--- a/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java
+++ b/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java
@@ -1148,7 +1148,7 @@ public void setOptionFlags(String... flags) {
* -outputFormatOptions
Provide options that control the
* behavior of various -outputFormat
choices, such as
* lexicalize
, stem
, markHeadNodes
,
- * or xml
.
+ * or xml
. {@link edu.stanford.nlp.trees.TreePrint}
* Options are specified as a comma-separated list.
* -writeOutputFiles
Write output files corresponding
* to the input files, with the same name but a ".stp"
diff --git a/src/edu/stanford/nlp/parser/nndep/Classifier.java b/src/edu/stanford/nlp/parser/nndep/Classifier.java
index 12dc993836..6fd0cdb7dc 100644
--- a/src/edu/stanford/nlp/parser/nndep/Classifier.java
+++ b/src/edu/stanford/nlp/parser/nndep/Classifier.java
@@ -14,7 +14,6 @@
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.IntStream;
-import static java.util.stream.Collectors.toSet;
/**
* Neural network classifier which powers a transition-based dependency
@@ -140,7 +139,7 @@ public Classifier(Config config, Dataset dataset, double[][] E, double[][] W1, d
numLabels = W2.length;
preMap = new HashMap<>();
- for (int i = 0; i < preComputed.size(); ++i)
+ for (int i = 0; i < preComputed.size() && i < config.numPreComputed; ++i)
preMap.put(preComputed.get(i), i);
isTraining = dataset != null;
@@ -636,13 +635,7 @@ public void finalizeTraining() {
* @see #preCompute(java.util.Set)
*/
public void preCompute() {
- // If no features are specified, pre-compute all of them (which fit
- // into a `saved` array of size `config.numPreComputed`)
- Set keys = preMap.entrySet().stream()
- .filter(e -> e.getValue() < config.numPreComputed)
- .map(Map.Entry::getKey)
- .collect(toSet());
- preCompute(keys);
+ preCompute(preMap.keySet());
}
/**
diff --git a/src/edu/stanford/nlp/parser/nndep/Config.java b/src/edu/stanford/nlp/parser/nndep/Config.java
index 6a0dff343c..4e5a7d66bc 100644
--- a/src/edu/stanford/nlp/parser/nndep/Config.java
+++ b/src/edu/stanford/nlp/parser/nndep/Config.java
@@ -233,7 +233,6 @@ private Languages.Language getLanguage(String languageStr) {
if (l.name().equalsIgnoreCase(languageStr))
return l;
}
-
return null;
}
diff --git a/src/edu/stanford/nlp/parser/nndep/DependencyParser.java b/src/edu/stanford/nlp/parser/nndep/DependencyParser.java
index 137902ec28..32058d9cbc 100644
--- a/src/edu/stanford/nlp/parser/nndep/DependencyParser.java
+++ b/src/edu/stanford/nlp/parser/nndep/DependencyParser.java
@@ -504,7 +504,7 @@ public void loadModelFile(String modelFile) {
private void loadModelFile(String modelFile, boolean verbose) {
Timing t = new Timing();
try {
- // System.err.println(Config.SEPARATOR);
+
System.err.println("Loading depparse model file: " + modelFile + " ... ");
String s;
BufferedReader input = IOUtils.readerFromString(modelFile);
@@ -643,9 +643,8 @@ private void readEmbedFile(String embedFile) {
embeddings = new double[nWords][dim];
System.err.println("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim);
- //TODO: how if the embedding dim. does not match..?
if (dim != config.embeddingSize)
- System.err.println("ERROR: embedding dimension mismatch");
+ throw new IllegalArgumentException("The dimension of embedding file does not match config.embeddingSize");
for (int i = 0; i < lines.size(); ++i) {
splits = lines.get(i).split("\\s+");
@@ -992,13 +991,16 @@ public double testCoNLL(String testFile, String outFile) {
List predicted = testSents.stream().map(this::predictInner).collect(toList());
Map result = system.evaluate(testSents, predicted, testTrees);
+
+ double uasNoPunc = result.get("UASwoPunc");
double lasNoPunc = result.get("LASwoPunc");
- System.err.printf("UAS = %.4f%n", result.get("UASwoPunc"));
+ System.err.printf("UAS = %.4f%n", uasNoPunc);
System.err.printf("LAS = %.4f%n", lasNoPunc);
+
long millis = timer.stop();
double wordspersec = numWords / (((double) millis) / 1000);
double sentspersec = numSentences / (((double) millis) / 1000);
- System.err.printf("%s tagged %d words in %d sentences in %.1fs at %.1f w/s, %.1f sent/s.%n",
+ System.err.printf("%s parsed %d words in %d sentences in %.1fs at %.1f w/s, %.1f sent/s.%n",
StringUtils.getShortClassName(this), numWords, numSentences, millis / 1000.0, wordspersec, sentspersec);
if (outFile != null) {
@@ -1156,7 +1158,6 @@ public static void main(String[] args) {
if (props.containsKey("testFile")) {
parser.loadModelFile(props.getProperty("model"));
loaded = true;
-
parser.testCoNLL(props.getProperty("testFile"), props.getProperty("outFile"));
}
@@ -1191,5 +1192,4 @@ public static void main(String[] args) {
parser.parseTextFile(input, output);
}
}
-
}
\ No newline at end of file
diff --git a/src/edu/stanford/nlp/parser/nndep/Util.java b/src/edu/stanford/nlp/parser/nndep/Util.java
index 71e83f0795..34c48c0068 100644
--- a/src/edu/stanford/nlp/parser/nndep/Util.java
+++ b/src/edu/stanford/nlp/parser/nndep/Util.java
@@ -162,17 +162,20 @@ public static void writeConllFile(String outFile, List sentences, List<
try
{
PrintWriter output = IOUtils.getPrintWriter(outFile);
- for (CoreMap sentence : sentences)
+
+ for (int i = 0; i < sentences.size(); i++)
{
+ CoreMap sentence = sentences.get(i);
+ DependencyTree tree = trees.get(i);
+
List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
for (int j = 1, size = tokens.size(); j <= size; ++j)
{
CoreLabel token = tokens.get(j - 1);
output.printf("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_%n",
- j, token.word(), token.tag(), token.tag(),
- token.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class),
- token.get(CoreAnnotations.CoNLLDepTypeAnnotation.class));
+ j, token.word(), token.tag(), token.tag(),
+ tree.getHead(j), tree.getLabel(j));
}
output.println();
}
@@ -186,17 +189,25 @@ public static void writeConllFile(String outFile, List sentences, List<
public static void printTreeStats(String str, List trees)
{
System.err.println(Config.SEPARATOR + " " + str);
- System.err.println("#Trees: " + trees.size());
- int nonTrees = 0;
+ int nTrees = trees.size();
+ int nonTree = 0;
+ int multiRoot = 0;
int nonProjective = 0;
for (DependencyTree tree : trees) {
if (!tree.isTree())
- ++nonTrees;
- else if (!tree.isProjective())
- ++nonProjective;
+ ++nonTree;
+ else
+ {
+ if (!tree.isProjective())
+ ++nonProjective;
+ if (!tree.isSingleRoot())
+ ++multiRoot;
+ }
}
- System.err.println(nonTrees + " tree(s) are illegal.");
- System.err.println(nonProjective + " tree(s) are legal but not projective.");
+ System.err.printf("#Trees: %d%n", nTrees);
+ System.err.printf("%d tree(s) are illegal (%.2f%%).%n", nonTree, nonTree * 100.0 / nTrees);
+ System.err.printf("%d tree(s) are legal but have multiple roots (%.2f%%).%n", multiRoot, multiRoot * 100.0 / nTrees);
+ System.err.printf("%d tree(s) are legal but not projective (%.2f%%).%n", nonProjective, nonProjective * 100.0 / nTrees);
}
public static void printTreeStats(List trees)
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java b/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java
index c0f0c5ce51..0d69e24bda 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java
@@ -29,8 +29,8 @@ public static void addUnaryQueueFeatures(List features, CoreLabel label,
features.add(wtFeature + NULL);
return;
}
- String tag = label.get(TreeCoreAnnotations.HeadTagAnnotation.class).label().value();
- String word = label.get(TreeCoreAnnotations.HeadWordAnnotation.class).label().value();
+ String tag = label.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class).value();
+ String word = label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class).value();
// TODO: check to see if this is slow because of the string concat
features.add(wtFeature + tag + "-" + word);
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java b/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java
index f109a5e0ad..495ca84ef2 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java
@@ -177,8 +177,8 @@ public State apply(State state, double scoreDelta) {
CoreLabel production = new CoreLabel();
production.setValue(label);
- production.set(TreeCoreAnnotations.HeadWordAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class));
- production.set(TreeCoreAnnotations.HeadTagAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagAnnotation.class));
+ production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
+ production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
Tree newTop = new LabeledScoredTreeNode(production);
newTop.addChild(left);
newTop.addChild(right);
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java b/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java
index 40b94746fe..dde9f2fec6 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java
@@ -73,9 +73,9 @@ private static void createTransitionSequenceHelper(List transitions,
CoreLabel label = (CoreLabel) tree.label();
CoreLabel leftLabel = (CoreLabel) tree.children()[0].label();
CoreLabel rightLabel = (CoreLabel) tree.children()[1].label();
- Tree head = label.get(TreeCoreAnnotations.HeadWordAnnotation.class);
- Tree leftHead = leftLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class);
- Tree rightHead = rightLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class);
+ CoreLabel head = label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class);
+ CoreLabel leftHead = leftLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class);
+ CoreLabel rightHead = rightLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class);
if (head == null || leftHead == null || rightHead == null) {
throw new IllegalArgumentException("Expected tree labels to have their heads assigned");
}
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java b/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java
index 01cee163cc..7c9f02a12c 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java
@@ -37,10 +37,10 @@ public static String getFeatureFromCoreLabel(CoreLabel label, FeatureComponent f
String value = null;
switch(feature) {
case HEADWORD:
- value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadWordAnnotation.class).label().value();
+ value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class).value();
break;
case HEADTAG:
- value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadTagAnnotation.class).label().value();
+ value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class).value();
break;
case VALUE:
value = (label == null) ? NULL : label.value();
@@ -67,7 +67,7 @@ public static CoreLabel getRecentDependent(TreeShapedStack stack, Transiti
if (!(node.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
- Tree head = ((CoreLabel) node.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class);
+ CoreLabel head = ((CoreLabel) node.label()).get(TreeCoreAnnotations.HeadWordLabelAnnotation.class);
switch (transition) {
case LEFT: {
@@ -79,7 +79,7 @@ public static CoreLabel getRecentDependent(TreeShapedStack stack, Transiti
if (!(child.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
- if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class) != head) {
+ if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordLabelAnnotation.class) != head) {
return (CoreLabel) child.label();
}
node = child;
@@ -98,7 +98,7 @@ public static CoreLabel getRecentDependent(TreeShapedStack stack, Transiti
if (!(child.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Can only featurize CoreLabel trees");
}
- if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class) != head) {
+ if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordLabelAnnotation.class) != head) {
return (CoreLabel) child.label();
}
node = child;
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java
index 5884ea6d70..c3527be357 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java
@@ -151,6 +151,13 @@ public String[] defaultCoreNLPFlags() {
}
}
+ /**
+ * Return an unmodifiableSet containing the known states (including binarization)
+ */
+ public Set knownStates() {
+ return Collections.unmodifiableSet(model.knownStates);
+ }
+
@Override
public boolean requiresTags() {
return true;
@@ -241,10 +248,11 @@ public static State initialStateFromTaggedSentence(List extends HasWord> words
LabeledScoredTreeNode tagNode = new LabeledScoredTreeNode(tagLabel);
tagNode.addChild(wordNode);
- wordLabel.set(TreeCoreAnnotations.HeadWordAnnotation.class, wordNode);
- wordLabel.set(TreeCoreAnnotations.HeadTagAnnotation.class, tagNode);
- tagLabel.set(TreeCoreAnnotations.HeadWordAnnotation.class, wordNode);
- tagLabel.set(TreeCoreAnnotations.HeadTagAnnotation.class, tagNode);
+ // TODO: can we get away with not setting these on the wordLabel?
+ wordLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel);
+ wordLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel);
+ tagLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel);
+ tagLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel);
preterminals.add(tagNode);
}
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java
index 3144c0846e..d965b9a255 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java
@@ -15,7 +15,7 @@ static BinaryTransition.Side getBinarySide(Tree tree) {
}
CoreLabel label = ErasureUtils.uncheckedCast(tree.label());
CoreLabel childLabel = ErasureUtils.uncheckedCast(tree.children()[0].label());
- if (label.get(TreeCoreAnnotations.HeadWordAnnotation.class) == childLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class)) {
+ if (label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class) == childLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)) {
return BinaryTransition.Side.LEFT;
} else {
return BinaryTransition.Side.RIGHT;
@@ -36,8 +36,7 @@ static boolean isEquivalentCategory(String l1, String l2) {
/** Returns a 0-based index of the head of the tree. Assumes the leaves had been indexed from 1 */
static int headIndex(Tree tree) {
CoreLabel label = ErasureUtils.uncheckedCast(tree.label());
- Tree head = label.get(TreeCoreAnnotations.HeadWordAnnotation.class);
- CoreLabel headLabel = ErasureUtils.uncheckedCast(head.label());
+ CoreLabel headLabel = label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class);
return headLabel.index() - 1;
}
diff --git a/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java b/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java
index a843513dc4..e4429dc44c 100644
--- a/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java
+++ b/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java
@@ -80,8 +80,8 @@ static Tree createNode(Tree top, String label, Tree ... children) {
CoreLabel headLabel = (CoreLabel) top.label();
CoreLabel production = new CoreLabel();
production.setValue(label);
- production.set(TreeCoreAnnotations.HeadWordAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class));
- production.set(TreeCoreAnnotations.HeadTagAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagAnnotation.class));
+ production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
+ production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
Tree newTop = new LabeledScoredTreeNode(production);
for (Tree child : children) {
newTop.addChild(child);
diff --git a/src/edu/stanford/nlp/pipeline/Annotation.java b/src/edu/stanford/nlp/pipeline/Annotation.java
index 8234a0a9bc..9e3546263f 100644
--- a/src/edu/stanford/nlp/pipeline/Annotation.java
+++ b/src/edu/stanford/nlp/pipeline/Annotation.java
@@ -88,6 +88,7 @@ public String toString() {
return this.get(CoreAnnotations.TextAnnotation.class);
}
+ /** Make a new Annotation from a List of tokenized sentences. */
public Annotation(List sentences) {
super();
this.set(CoreAnnotations.SentencesAnnotation.class, sentences);
diff --git a/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java b/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java
index dd1cb7b5f9..7b989c66c4 100644
--- a/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java
+++ b/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java
@@ -67,7 +67,7 @@ public void annotate(Annotation annotation) {
}
annotator.annotate(annotation);
if (TIME) {
- int elapsed = (int) t.stop();
+ long elapsed = t.stop();
MutableLong m = it.next();
m.incValue(elapsed);
}
diff --git a/src/edu/stanford/nlp/pipeline/Annotator.java b/src/edu/stanford/nlp/pipeline/Annotator.java
index b728243a5a..b07c279a26 100644
--- a/src/edu/stanford/nlp/pipeline/Annotator.java
+++ b/src/edu/stanford/nlp/pipeline/Annotator.java
@@ -104,6 +104,8 @@ public String toString() {
public static final String STANFORD_SENTIMENT = "sentiment";
public static final String STANFORD_COLUMN_DATA_CLASSIFIER = "cdc";
public static final String STANFORD_DEPENDENCIES = "depparse";
+ public static final String STANFORD_NATLOG = "natlog";
+ public static final String STANFORD_OPENIE = "openie";
public static final Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE);
@@ -117,6 +119,8 @@ public String toString() {
public static final Requirement PARSE_REQUIREMENT = new Requirement(STANFORD_PARSE);
public static final Requirement DETERMINISTIC_COREF_REQUIREMENT = new Requirement(STANFORD_DETERMINISTIC_COREF);
public static final Requirement RELATION_EXTRACTOR_REQUIREMENT = new Requirement(STANFORD_RELATION);
+ public static final Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG);
+ public static final Requirement OPENIE_REQUIREMENT = new Requirement(STANFORD_OPENIE);
/**
* These are annotators which StanfordCoreNLP does not know how to
diff --git a/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java b/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java
index 2df98f8753..f9fa43672d 100644
--- a/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java
+++ b/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java
@@ -20,6 +20,8 @@
*/
public class AnnotatorFactories {
+ private AnnotatorFactories() {} // static factory class
+
public static AnnotatorFactory tokenize(Properties properties, final AnnotatorImplementations annotatorImplementation) {
return new AnnotatorFactory(properties, annotatorImplementation) {
private static final long serialVersionUID = 1L;
@@ -53,15 +55,15 @@ public String additionalSignature() {
}
if (Boolean.valueOf(properties.getProperty("tokenize.whitespace",
"false"))) {
- os.append(TokenizerAnnotator.EOL_PROPERTY + ":").append(properties.getProperty(TokenizerAnnotator.EOL_PROPERTY,
+ os.append(TokenizerAnnotator.EOL_PROPERTY + ':').append(properties.getProperty(TokenizerAnnotator.EOL_PROPERTY,
"false"));
- os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ":").append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY,
+ os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ':').append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY,
"false"));
return os.toString();
} else {
- os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ":").append(Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY,
+ os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ':').append(Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY,
"false")));
- os.append(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY + ":").append(properties.getProperty(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, StanfordCoreNLP.DEFAULT_NEWLINE_IS_SENTENCE_BREAK));
+ os.append(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY + ':').append(properties.getProperty(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, StanfordCoreNLP.DEFAULT_NEWLINE_IS_SENTENCE_BREAK));
}
return os.toString();
}
@@ -190,7 +192,8 @@ public static AnnotatorFactory sentenceSplit(Properties properties, final Annota
private static final long serialVersionUID = 1L;
@Override
public Annotator create() {
- System.err.println(signature());
+ // System.err.println(signature());
+ // todo: The above shows that signature is edu.stanford.nlp.pipeline.AnnotatorImplementations: and doesn't reflect what annotator it is! Should fix.
boolean nlSplitting = Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false"));
if (nlSplitting) {
boolean whitespaceTokenization = Boolean.valueOf(properties.getProperty("tokenize.whitespace", "false"));
@@ -254,8 +257,8 @@ public String additionalSignature() {
// keep track of all relevant properties for this annotator here!
StringBuilder os = new StringBuilder();
if (Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false"))) {
- os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + "=").append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")).append("\n");
- os.append("tokenize.whitespace=").append(properties.getProperty("tokenize.whitespace", "false")).append("\n");
+ os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + '=').append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")).append('\n');
+ os.append("tokenize.whitespace=").append(properties.getProperty("tokenize.whitespace", "false")).append('\n');
} else {
os.append(baseSignature(properties, StanfordCoreNLP.STANFORD_SSPLIT));
}
@@ -327,10 +330,10 @@ public String additionalSignature() {
// keep track of all relevant properties for this annotator here!
return "ner.model:" +
properties.getProperty("ner.model", "") +
- NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY + ":" +
+ NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY + ':' +
properties.getProperty(NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY,
Boolean.toString(NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_DEFAULT)) +
- NumberSequenceClassifier.USE_SUTIME_PROPERTY + ":" +
+ NumberSequenceClassifier.USE_SUTIME_PROPERTY + ':' +
properties.getProperty(NumberSequenceClassifier.USE_SUTIME_PROPERTY,
Boolean.toString(NumberSequenceClassifier.USE_SUTIME_DEFAULT));
}
@@ -501,6 +504,7 @@ public String additionalSignature() {
public static AnnotatorFactory columnDataClassifier(Properties properties, final AnnotatorImplementations annotatorImpls) {
return new AnnotatorFactory(properties, annotatorImpls) {
+ private static final long serialVersionUID = 1L;
@Override
public Annotator create() {
if(!properties.containsKey("loadClassifier"))
@@ -520,6 +524,7 @@ protected String additionalSignature() {
//
public static AnnotatorFactory dependencies(Properties properties, final AnnotatorImplementations annotatorImpl) {
return new AnnotatorFactory(properties, annotatorImpl) {
+ private static final long serialVersionUID = 1L;
@Override
public Annotator create() {
return annotatorImpl.dependencies(properties);
@@ -532,4 +537,38 @@ protected String additionalSignature() {
};
}
+ //
+ // Monotonicity and Polarity
+ //
+ public static AnnotatorFactory natlog(Properties properties, final AnnotatorImplementations annotatorImpl) {
+ return new AnnotatorFactory(properties, annotatorImpl) {
+ @Override
+ public Annotator create() {
+ return annotatorImpl.natlog(properties);
+ }
+
+ @Override
+ protected String additionalSignature() {
+ return "";
+ }
+ };
+ }
+
+ //
+ // RelationTriples
+ //
+ public static AnnotatorFactory openie(Properties properties, final AnnotatorImplementations annotatorImpl) {
+ return new AnnotatorFactory(properties, annotatorImpl) {
+ @Override
+ public Annotator create() {
+ return annotatorImpl.openie(properties);
+ }
+
+ @Override
+ protected String additionalSignature() {
+ return "";
+ }
+ };
+ }
+
}
diff --git a/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java b/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java
index c73b94114c..179a965de3 100644
--- a/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java
+++ b/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java
@@ -2,6 +2,8 @@
import edu.stanford.nlp.ie.NERClassifierCombiner;
import edu.stanford.nlp.ie.regexp.NumberSequenceClassifier;
+import edu.stanford.nlp.naturalli.NaturalLogicAnnotator;
+import edu.stanford.nlp.naturalli.OpenIE;
import edu.stanford.nlp.util.PropertiesUtils;
import edu.stanford.nlp.util.ReflectionLoading;
@@ -65,7 +67,7 @@ public Annotator morpha(Properties properties, boolean verbose) {
}
/**
- * Annotate for named entities -- note that this combines multiple NER tag sets, and some auxilliary things (like temporal tagging)
+ * Annotate for named entities -- note that this combines multiple NER tag sets, and some auxiliary things (like temporal tagging)
*/
public Annotator ner(Properties properties) throws FileNotFoundException {
@@ -196,4 +198,22 @@ public Annotator dependencies(Properties properties) {
return new DependencyParseAnnotator(relevantProperties);
}
+ /**
+ * Annotate operators (e.g., quantifiers) and polarity of tokens in a sentence
+ */
+ public Annotator natlog(Properties properties) {
+ Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
+ Annotator.STANFORD_NATLOG + '.');
+ return new NaturalLogicAnnotator(relevantProperties);
+ }
+
+ /**
+ * Annotate {@link edu.stanford.nlp.ie.util.RelationTriple}s from text.
+ */
+ public Annotator openie(Properties properties) {
+ Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties,
+ Annotator.STANFORD_OPENIE + '.');
+ return new OpenIE(relevantProperties);
+ }
+
}
diff --git a/src/edu/stanford/nlp/pipeline/CoreNLP.proto b/src/edu/stanford/nlp/pipeline/CoreNLP.proto
index e48feb2a14..10de54f253 100644
--- a/src/edu/stanford/nlp/pipeline/CoreNLP.proto
+++ b/src/edu/stanford/nlp/pipeline/CoreNLP.proto
@@ -70,29 +70,33 @@ message Sentence {
//
message Token {
// Fields set by the default annotators [new CoreNLP(new Properties())]
- required string word = 1; // the word's gloss (post-tokenization)
- optional string pos = 2; // The word's part of speech tag
- optional string value = 3; // The word's 'value', (e.g., parse tree node)
- optional string category = 4; // The word's 'category' (e.g., parse tree node)
- optional string before = 5; // The whitespace/xml before the token
- optional string after = 6; // The whitespace/xml after the token
- optional string originalText = 7; // The original text for this token
- optional string ner = 8; // The word's NER tag
- optional string normalizedNER = 9; // The word's normalized NER tag
- optional string lemma = 10; // The word's lemma
- optional uint32 beginChar = 11; // The character offset begin
- optional uint32 endChar = 12; // The character offset end
- optional uint32 utterance = 13; // The utterance tag used in dcoref
- optional string speaker = 14; // The speaker speaking this word
- optional uint32 beginIndex = 15; // The begin index of, e.g., a span
- optional uint32 endIndex = 16; // The begin index of, e.g., a span
- optional uint32 tokenBeginIndex = 17; // The begin index of the token
- optional uint32 tokenEndIndex = 18; // The end index of the token
- optional Timex timexValue = 19; // The time this word refers to
- optional bool hasXmlContext = 21; // Used by clean xml annotator
- repeated string xmlContext = 22; // Used by clean xml annotator
- optional uint32 corefClusterID = 23; // The [primary] cluster id for this token
- optional string answer = 24; // A temporary annotation which is occasionally left in
+ required string word = 1; // the word's gloss (post-tokenization)
+ optional string pos = 2; // The word's part of speech tag
+ optional string value = 3; // The word's 'value', (e.g., parse tree node)
+ optional string category = 4; // The word's 'category' (e.g., parse tree node)
+ optional string before = 5; // The whitespace/xml before the token
+ optional string after = 6; // The whitespace/xml after the token
+ optional string originalText = 7; // The original text for this token
+ optional string ner = 8; // The word's NER tag
+ optional string normalizedNER = 9; // The word's normalized NER tag
+ optional string lemma = 10; // The word's lemma
+ optional uint32 beginChar = 11; // The character offset begin
+ optional uint32 endChar = 12; // The character offset end
+ optional uint32 utterance = 13; // The utterance tag used in dcoref
+ optional string speaker = 14; // The speaker speaking this word
+ optional uint32 beginIndex = 15; // The begin index of, e.g., a span
+ optional uint32 endIndex = 16; // The begin index of, e.g., a span
+ optional uint32 tokenBeginIndex = 17; // The begin index of the token
+ optional uint32 tokenEndIndex = 18; // The end index of the token
+ optional Timex timexValue = 19; // The time this word refers to
+ optional bool hasXmlContext = 21; // Used by clean xml annotator
+ repeated string xmlContext = 22; // Used by clean xml annotator
+ optional uint32 corefClusterID = 23; // The [primary] cluster id for this token
+ optional string answer = 24; // A temporary annotation which is occasionally left in
+ // optional string projectedCategory = 25; // The syntactic category of the maximal constituent headed by the word. Not used anywhere, so deleted.
+ optional uint32 headWordIndex = 26; // The index of the head word of this word.
+ optional Operator operator = 27; // If this is an operator, which one is it and what is its scope (as per Natural Logic)?
+ optional Polarity polarity = 28; // The polarity of this word, according to Natural Logic
// Fields set by other annotators in CoreNLP
optional string gender = 51; // gender annotation (machine reading)
@@ -109,6 +113,17 @@ message Token {
extensions 100 to 255;
}
+//
+// An enumeration of valid sentiment values for the sentiment classifier.
+//
+enum Sentiment {
+ STRONG_NEGATIVE = 0;
+ WEAK_NEGATIVE = 1;
+ NEUTRAL = 2;
+ WEAK_POSITIVE = 3;
+ STRONG_POSITIVE = 4;
+}
+
//
// A syntactic parse tree, with scores.
//
@@ -118,6 +133,7 @@ message ParseTree {
optional uint32 yieldBeginIndex = 3;
optional uint32 yieldEndIndex = 4;
optional double score = 5;
+ optional Sentiment sentiment = 6;
}
//
@@ -220,3 +236,42 @@ message Relation {
// Implicit
// uint32 sentence @see implicit in sentence
}
+
+//
+// A Natural Logic operator
+//
+message Operator {
+ required string name = 1;
+ required int32 quantifierSpanBegin = 2;
+ required int32 quantifierSpanEnd = 3;
+ required int32 subjectSpanBegin = 4;
+ required int32 subjectSpanEnd = 5;
+ required int32 objectSpanBegin = 6;
+ required int32 objectSpanEnd = 7;
+}
+
+//
+// The seven informative Natural Logic relations
+//
+enum NaturalLogicRelation {
+ EQUIVALENCE = 0;
+ FORWARD_ENTAILMENT = 1;
+ REVERSE_ENTAILMENT = 2;
+ NEGATION = 3;
+ ALTERNATION = 4;
+ COVER = 5;
+ INDEPENDENCE = 6;
+}
+
+//
+// The polarity of a word, according to Natural Logic
+//
+message Polarity {
+ required NaturalLogicRelation projectEquivalence = 1;
+ required NaturalLogicRelation projectForwardEntailment = 2;
+ required NaturalLogicRelation projectReverseEntailment = 3;
+ required NaturalLogicRelation projectNegation = 4;
+ required NaturalLogicRelation projectAlternation = 5;
+ required NaturalLogicRelation projectCover = 6;
+ required NaturalLogicRelation projectIndependence = 7;
+}
diff --git a/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java b/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java
index b2dee81c37..0a4352bf72 100644
--- a/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java
+++ b/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java
@@ -95,6 +95,168 @@ private Language(int index, int value) {
// @@protoc_insertion_point(enum_scope:edu.stanford.nlp.pipeline.Language)
}
+ public enum Sentiment
+ implements com.google.protobuf.ProtocolMessageEnum {
+ STRONG_NEGATIVE(0, 0),
+ WEAK_NEGATIVE(1, 1),
+ NEUTRAL(2, 2),
+ WEAK_POSITIVE(3, 3),
+ STRONG_POSITIVE(4, 4),
+ ;
+
+ public static final int STRONG_NEGATIVE_VALUE = 0;
+ public static final int WEAK_NEGATIVE_VALUE = 1;
+ public static final int NEUTRAL_VALUE = 2;
+ public static final int WEAK_POSITIVE_VALUE = 3;
+ public static final int STRONG_POSITIVE_VALUE = 4;
+
+
+ public final int getNumber() { return value; }
+
+ public static Sentiment valueOf(int value) {
+ switch (value) {
+ case 0: return STRONG_NEGATIVE;
+ case 1: return WEAK_NEGATIVE;
+ case 2: return NEUTRAL;
+ case 3: return WEAK_POSITIVE;
+ case 4: return STRONG_POSITIVE;
+ default: return null;
+ }
+ }
+
+ public static com.google.protobuf.Internal.EnumLiteMap