From 4246a16eaccf28f79923acd4bdb3f0c04dd1750d Mon Sep 17 00:00:00 2001 From: Gabor Angeli Date: Thu, 12 Mar 2015 16:55:30 -0700 Subject: [PATCH] Tests still failing, but OpenIE is starting to come together --- README.md | 8 +- build.xml | 2 + .../spanish/SpanishTokenizerITest.java | 2 +- .../TokenSequenceMatcherITest.java | 4 +- .../naturalli/NaturalLogicAnnotatorITest.java | 44 + .../stanford/nlp/naturalli/OpenIEITest.java | 156 + .../nlp/naturalli/OperatorScopeITest.java | 749 +++++ .../stanford/nlp/naturalli/PolarityITest.java | 66 + .../parser/nndep/DependencyParserITest.java | 6 +- ...ProtobufAnnotationSerializerSlowITest.java | 5 + .../nlp/pipeline/StanfordCoreNLPITest.java | 15 +- .../semgraph/semgrex/SemgrexPatternITest.java | 36 - .../nlp/sentiment/SentimentTrainingITest.java | 18 + lib/README | 2 +- .../nlp/classify/ColumnDataClassifier.java | 70 +- src/edu/stanford/nlp/dcoref/CorefChain.java | 14 +- .../nlp/graph/DirectedMultiGraph.java | 29 +- .../nlp/ie/AbstractSequenceClassifier.java | 62 +- .../nlp/ie/NERClassifierCombiner.java | 3 +- .../stanford/nlp/ie/NERFeatureFactory.java | 15 +- .../stanford/nlp/ie/crf/CRFClassifier.java | 37 +- .../stanford/nlp/ie/crf/CRFCliqueTree.java | 12 +- src/edu/stanford/nlp/ie/crf/NERGUI.java | 2 +- src/edu/stanford/nlp/ie/demo/NERDemo.java | 80 +- .../stanford/nlp/ie/util/RelationTriple.java | 338 +++ .../french/process/FrenchTokenizer.java | 16 +- .../spanish/SpanishVerbStripper.java | 4 +- .../spanish/process/SpanishTokenizer.java | 100 +- .../stanford/nlp/ling/AbstractCoreLabel.java | 26 + .../stanford/nlp/ling/AnnotationLookup.java | 6 +- .../stanford/nlp/ling/CoreAnnotations.java | 14 - src/edu/stanford/nlp/ling/CoreLabel.java | 81 +- src/edu/stanford/nlp/ling/IndexedWord.java | 52 +- .../nlp/ling/tokensregex/NodePattern.java | 3 +- .../nlp/ling/tokensregex/SequenceMatcher.java | 3 +- .../nlp/ling/tokensregex/SequencePattern.java | 42 +- .../tokensregex/TokenSequencePattern.java | 4 - .../parser/TokenSequenceParser.java | 1242 ++++---- .../tokensregex/parser/TokenSequenceParser.jj | 29 +- src/edu/stanford/nlp/math/ArrayMath.java | 74 +- src/edu/stanford/nlp/math/SloppyMath.java | 73 +- .../stanford/nlp/naturalli/Monotonicity.java | 13 + .../nlp/naturalli/MonotonicityType.java | 13 + .../naturalli/NaturalLogicAnnotations.java | 58 + .../nlp/naturalli/NaturalLogicAnnotator.java | 435 +++ .../nlp/naturalli/NaturalLogicRelation.java | 481 +++ .../nlp/naturalli/NaturalLogicWeights.java | 85 + src/edu/stanford/nlp/naturalli/OpenIE.java | 597 ++++ src/edu/stanford/nlp/naturalli/Operator.java | 160 + .../stanford/nlp/naturalli/OperatorSpec.java | 97 + src/edu/stanford/nlp/naturalli/Polarity.java | 242 ++ .../nlp/naturalli/SentenceFragment.java | 51 + src/edu/stanford/nlp/neural/NeuralUtils.java | 12 + src/edu/stanford/nlp/neural/SimpleTensor.java | 13 + .../nlp/optimization/QNMinimizer.java | 8 +- .../parser/lexparser/LexicalizedParser.java | 2 +- .../stanford/nlp/parser/nndep/Classifier.java | 11 +- src/edu/stanford/nlp/parser/nndep/Config.java | 1 - .../nlp/parser/nndep/DependencyParser.java | 14 +- src/edu/stanford/nlp/parser/nndep/Util.java | 33 +- .../shiftreduce/BasicFeatureFactory.java | 4 +- .../parser/shiftreduce/BinaryTransition.java | 4 +- .../shiftreduce/CreateTransitionSequence.java | 6 +- .../parser/shiftreduce/FeatureFactory.java | 10 +- .../parser/shiftreduce/ShiftReduceParser.java | 16 +- .../parser/shiftreduce/ShiftReduceUtils.java | 5 +- .../parser/shiftreduce/UnaryTransition.java | 4 +- src/edu/stanford/nlp/pipeline/Annotation.java | 1 + .../nlp/pipeline/AnnotationPipeline.java | 2 +- src/edu/stanford/nlp/pipeline/Annotator.java | 4 + .../nlp/pipeline/AnnotatorFactories.java | 57 +- .../pipeline/AnnotatorImplementations.java | 22 +- src/edu/stanford/nlp/pipeline/CoreNLP.proto | 101 +- .../stanford/nlp/pipeline/CoreNLPProtos.java | 2598 +++++++++++++++-- .../nlp/pipeline/ParserAnnotator.java | 12 - .../ProtobufAnnotationSerializer.java | 92 +- .../nlp/pipeline/StanfordCoreNLP.java | 8 +- .../nlp/pipeline/TokenizerAnnotator.java | 89 +- .../pipeline/WordsToSentencesAnnotator.java | 8 +- .../pipeline/demo/StanfordCoreNlpDemo.java | 64 +- .../nlp/process/DocumentPreprocessor.java | 186 +- src/edu/stanford/nlp/process/Morpha.flex | 27 +- src/edu/stanford/nlp/process/Morpha.java | 33 +- .../nlp/process/WordToSentenceProcessor.java | 7 +- .../stanford/nlp/semgraph/SemanticGraph.java | 3 +- .../nlp/semgraph/SemanticGraphFactory.java | 31 - .../nlp/semgraph/SemanticGraphUtils.java | 83 +- .../semgraph/semgrex/SemgrexBatchParser.java | 13 +- .../nlp/sentiment/RNNTrainOptions.java | 24 + .../sentiment/SentimentCostAndGradient.java | 58 +- .../nlp/sentiment/SentimentModel.java | 103 +- .../nlp/sentiment/SentimentTraining.java | 12 +- .../ColumnTabDocumentReaderWriter.java | 7 +- .../PlainTextDocumentReaderAndWriter.java | 1 + src/edu/stanford/nlp/time/TimeFormatter.java | 2 + .../nlp/trees/ENUniversalPOS.tsurgeon | 297 ++ .../trees/EnglishGrammaticalStructure.java | 2 +- .../nlp/trees/GrammaticalStructure.java | 58 +- .../stanford/nlp/trees/PennTreeReader.java | 5 + src/edu/stanford/nlp/trees/Tree.java | 54 +- .../nlp/trees/TreeCoreAnnotations.java | 14 +- src/edu/stanford/nlp/trees/TreeGraphNode.java | 160 +- src/edu/stanford/nlp/trees/Trees.java | 41 + .../nlp/trees/UniversalPOSMapper.java | 66 + .../nlp/trees/tregex/TregexPattern.java | 13 +- .../nlp/trees/tregex/tsurgeon/AdjoinNode.java | 2 +- .../tregex/tsurgeon/AdjoinToFootNode.java | 2 +- .../tregex/tsurgeon/AdjoinToHeadNode.java | 2 +- .../trees/tregex/tsurgeon/AuxiliaryTree.java | 26 +- .../tregex/tsurgeon/CreateSubtreeNode.java | 2 +- .../trees/tregex/tsurgeon/HoldTreeNode.java | 2 +- .../nlp/trees/tregex/tsurgeon/Tsurgeon.java | 6 +- .../trees/tregex/tsurgeon/TsurgeonParser.java | 1 - .../trees/tregex/tsurgeon/TsurgeonParser.jj | 2 +- .../trees/tregex/tsurgeon/TsurgeonParser.jjt | 2 +- .../tsurgeon/TsurgeonParserTokenManager.java | 109 +- src/edu/stanford/nlp/util/ArrayCoreMap.java | 39 +- .../stanford/nlp/util/FileBackedCache.java | 2 +- .../stanford/nlp/util/IterableIterator.java | 46 +- src/edu/stanford/nlp/util/MutableLong.java | 28 +- src/edu/stanford/nlp/util/StringUtils.java | 24 +- .../nlp/ie/util/RelationTripleTest.java | 250 ++ .../naturalli/NaturalLogicRelationTest.java | 65 + .../stanford/nlp/naturalli/PolarityTest.java | 133 + .../shiftreduce/BinaryTransitionTest.java | 4 +- .../nlp/semgraph/SemanticGraphUtilsTest.java | 41 - .../trees/tregex/tsurgeon/TsurgeonTest.java | 6 + .../edu/stanford/nlp/util/CoreMapTest.java | 3 +- .../nlp/util/IterableIteratorTest.java | 71 - 129 files changed, 9063 insertions(+), 2082 deletions(-) create mode 100644 itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java create mode 100644 itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java create mode 100644 itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java create mode 100644 itest/src/edu/stanford/nlp/naturalli/PolarityITest.java delete mode 100644 itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java create mode 100644 src/edu/stanford/nlp/ie/util/RelationTriple.java create mode 100644 src/edu/stanford/nlp/naturalli/Monotonicity.java create mode 100644 src/edu/stanford/nlp/naturalli/MonotonicityType.java create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java create mode 100644 src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java create mode 100644 src/edu/stanford/nlp/naturalli/OpenIE.java create mode 100644 src/edu/stanford/nlp/naturalli/Operator.java create mode 100644 src/edu/stanford/nlp/naturalli/OperatorSpec.java create mode 100644 src/edu/stanford/nlp/naturalli/Polarity.java create mode 100644 src/edu/stanford/nlp/naturalli/SentenceFragment.java create mode 100644 src/edu/stanford/nlp/trees/ENUniversalPOS.tsurgeon create mode 100644 src/edu/stanford/nlp/trees/UniversalPOSMapper.java create mode 100644 test/src/edu/stanford/nlp/ie/util/RelationTripleTest.java create mode 100644 test/src/edu/stanford/nlp/naturalli/NaturalLogicRelationTest.java create mode 100644 test/src/edu/stanford/nlp/naturalli/PolarityTest.java delete mode 100644 test/src/edu/stanford/nlp/semgraph/SemanticGraphUtilsTest.java delete mode 100644 test/src/edu/stanford/nlp/util/IterableIteratorTest.java diff --git a/README.md b/README.md index a1c4a5598b..a2078cb1ce 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,14 @@ Stanford CoreNLP ================ -Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, and German. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry. +Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry. -The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in distributed proprietary software. +The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute. You can find releases of Stanford CoreNLP on [Maven Central](http://search.maven.org/#browse%7C11864822). -You can find more explanation and documentation of Stanford CoreNLP on [the Stanford CoreNLP homepage](http://nlp.stanford.edu/software/corenlp.shtml#Demo). +You can find more explanation and documentation on [the Stanford CoreNLP homepage](http://nlp.stanford.edu/software/corenlp.shtml#Demo). -The most recent models associated the code in this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar). +The most recent models associated with the code in the HEAD of this repository can be found [here](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar). For information about making contributions to Stanford CoreNLP, see the file `CONTRIBUTING.md`. diff --git a/build.xml b/build.xml index 891a4faf89..91cce31d7e 100644 --- a/build.xml +++ b/build.xml @@ -107,6 +107,8 @@ --> + tf = SpanishTokenizer.coreLabelFactory(); + final TokenizerFactory tf = SpanishTokenizer.ancoraFactory(); tf.setOptions(""); tf.setOptions("tokenizeNLs"); diff --git a/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java b/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java index ef81bedc47..b449f97a14 100644 --- a/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java +++ b/itest/src/edu/stanford/nlp/ling/tokensregex/TokenSequenceMatcherITest.java @@ -1,6 +1,5 @@ package edu.stanford.nlp.ling.tokensregex; -import edu.stanford.nlp.io.IOUtils; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.pipeline.*; import edu.stanford.nlp.util.CoreMap; @@ -9,10 +8,8 @@ import edu.stanford.nlp.util.Timing; import junit.framework.TestCase; -import java.io.File; import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -1526,4 +1523,5 @@ public void testCaseInsensitive2(){ boolean match = m.find(); assertTrue(match); } + } diff --git a/itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java b/itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java new file mode 100644 index 0000000000..4ccfd58ac8 --- /dev/null +++ b/itest/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotatorITest.java @@ -0,0 +1,44 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import org.junit.Test; + +import java.util.List; +import java.util.Properties; + +import static org.junit.Assert.*; + +/** + * A lightweight test to makes sure the annotator runs in the pipeline. + * For more in-depth tests, see {@link edu.stanford.nlp.naturalli.OperatorScopeITest} and + * {@link edu.stanford.nlp.naturalli.PolarityITest}. + * + * @author Gabor Angeli + */ +public class NaturalLogicAnnotatorITest { + + @Test + public void testAnnotatorRuns() { + // Run pipeline + StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{ + setProperty("annotators", "tokenize,ssplit,pos,lemma,parse,natlog"); + setProperty("ssplit.isOneSentence", "true"); + setProperty("tokenize.class", "PTBTokenizer"); + setProperty("tokenize.language", "en"); + setProperty("enforceRequirements", "true"); + }}); + Annotation ann = new Annotation("All cats have tails"); + pipeline.annotate(ann); + + // Check output + List tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class); + assertTrue(tokens.get(0).containsKey(NaturalLogicAnnotations.OperatorAnnotation.class)); + assertTrue(tokens.get(0).get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards()); + assertTrue(tokens.get(1).get(NaturalLogicAnnotations.PolarityAnnotation.class).isDownwards()); + assertTrue(tokens.get(2).get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards()); + assertTrue(tokens.get(3).get(NaturalLogicAnnotations.PolarityAnnotation.class).isUpwards()); + } +} diff --git a/itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java b/itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java new file mode 100644 index 0000000000..697466301e --- /dev/null +++ b/itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java @@ -0,0 +1,156 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ie.util.RelationTriple; +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.StringUtils; +import org.junit.Test; + +import java.util.*; +import java.util.stream.Collectors; + +import static org.junit.Assert.*; + +/** + * Test the natural logic OpenIE extractor at {@link edu.stanford.nlp.naturalli.OpenIE}. + * + * @author Gabor Angeli + */ +public class OpenIEITest { + protected static StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{ + setProperty("annotators", "tokenize,ssplit,pos,lemma,depparse,natlog,openie"); + setProperty("ssplit.isOneSentence", "true"); + setProperty("tokenize.class", "PTBTokenizer"); + setProperty("tokenize.language", "en"); + setProperty("enforceRequirements", "true"); + }}); + + public CoreMap annotate(String text) { + Annotation ann = new Annotation(text); + pipeline.annotate(ann); + return ann.get(CoreAnnotations.SentencesAnnotation.class).get(0); + } + + public void assertExtracted(String expected, String text) { + boolean found = false; + Collection extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); + for (RelationTriple extraction : extractions) { + if (extraction.toString().equals("1.0\t" + expected)) { + found = true; + } + } + assertTrue("The extraction '" + expected + "' was not found in '" + text + "'", found); + } + + public void assertExtracted(Set expected, String text) { + Collection extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); + Set guess = extractions.stream().filter(x -> x.confidence > 0.1).map(RelationTriple::toString).collect(Collectors.toSet()); + assertEquals(StringUtils.join(expected.stream().sorted(), "\n").toLowerCase(), StringUtils.join(guess.stream().map( x -> x.substring(x.indexOf("\t") + 1) ).sorted(), "\n").toLowerCase()); + } + + public void assertEntailed(String expected, String text) { + boolean found = false; + Collection extractions = annotate(text).get(NaturalLogicAnnotations.EntailedSentencesAnnotation.class); + for (SentenceFragment extraction : extractions) { + if (extraction.toString().equals(expected)) { + found = true; + } + } + assertTrue("The sentence '" + expected + "' was not entailed from '" + text + "'", found); + } + + + @Test + public void testAnnotatorRuns() { + annotate("all cats have tails"); + } + + @Test + public void testBasicEntailments() { + assertEntailed("some cats have tails", "some blue cats have tails"); + assertEntailed("blue cats have tails", "some blue cats have tails"); + assertEntailed("cats have tails", "some blue cats have tails"); + } + + @Test + public void testBasicExtractions() { + assertExtracted("cats\thave\ttails", "some cats have tails"); + } + + @Test + public void testExtractionsObamaWikiOne() { + assertExtracted(new HashSet() {{ + add("Barack Hussein Obama II\tis 44th and current President of\tUnited States"); + add("Barack Hussein Obama II\tis 44th President of\tUnited States"); + add("Barack Hussein Obama II\tis current President of\tUnited States"); + add("Barack Hussein Obama II\tis President of\tUnited States"); + add("Barack Hussein Obama II\tis\tPresident"); + add("Barack Hussein Obama II\tis\tcurrent President"); + add("Barack Hussein Obama II\tis\t44th President"); + }}, "Barack Hussein Obama II is the 44th and current President of the United States, and the first African American to hold the office."); + } + + @Test + public void testExtractionsObamaWikiTwo() { + assertExtracted(new HashSet() {{ + add("Obama\tis graduate of\tColumbia University"); + add("Obama\tis graduate of\tHarvard Law School"); + add("Obama\tborn in\tHonolulu Hawaii"); + add("he\tserved as\tpresident of Harvard Law Review"); + add("he\tserved as\tpresident"); + add("Obama\tis\tgraduate"); + }}, "Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he served as president of the Harvard Law Review."); + } + + @Test + public void testExtractionsObamaWikiThree() { + assertExtracted(new HashSet() {{ + add("He\twas\tcommunity organizer in Chicago"); + add("He\twas\tcommunity organizer"); + add("He\tearning\tlaw degree"); + }}, "He was a community organizer in Chicago before earning his law degree."); + } + + @Test + public void testExtractionsObamaWikiFour() { + assertExtracted(new HashSet() {{ + add("He\tworked as\tcivil rights attorney"); + add("He\tworked as\trights attorney"); + add("He\ttaught\tconstitutional law"); + add("He\ttaught\tlaw"); + add("He\ttaught at\tUniversity of Chicago Law School"); + add("He\ttaught at\tUniversity of Chicago Law School from 1992"); + add("He\ttaught at\tUniversity"); + add("He\ttaught to\t2004"); // shouldn't be here, but sometimes appears? + }}, "He worked as a civil rights attorney and taught constitutional law at the University of Chicago Law School from 1992 to 2004."); + } + + @Test + public void testExtractionsObamaWikiFive() { + assertExtracted(new HashSet() {{ + add("He\tserved\tthree terms"); + add("He\trepresenting\t13th District in Illinois Senate"); + add("He\trepresenting\t13th District"); + add("He\trepresenting\tDistrict in Illinois Senate"); + add("He\trepresenting\tDistrict"); + add("He\trunning unsuccessfully for\tUnited States House of Representatives in 2000"); + add("He\trunning unsuccessfully for\tUnited States House of Representatives"); + add("He\trunning unsuccessfully for\tUnited States House"); + add("He\trunning for\tUnited States House of Representatives in 2000"); + add("He\trunning for\tUnited States House of Representatives"); + add("He\trunning for\tUnited States House"); + }}, "He served three terms representing the 13th District in the Illinois Senate from 1997 to 2004, running unsuccessfully for the United States House of Representatives in 2000."); + } + + @Test + public void testExtractionsObamaWikiSix() { + assertExtracted(new HashSet() {{ + add("He\tdefeated\tRepublican nominee John McCain"); + add("He\tdefeated\tnominee John McCain"); + add("He\twas inaugurated as\tpresident on January 20 2009"); + add("He\twas inaugurated as\tpresident"); + }}, "He then defeated Republican nominee John McCain in the general election, and was inaugurated as president on January 20, 2009."); + } +} diff --git a/itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java b/itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java new file mode 100644 index 0000000000..1202e9d79d --- /dev/null +++ b/itest/src/edu/stanford/nlp/naturalli/OperatorScopeITest.java @@ -0,0 +1,749 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.util.StringUtils; +import org.junit.*; + +import java.util.*; + +import static org.junit.Assert.*; + +/** + * A test for the {@link NaturalLogicAnnotator} setting the right + * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.OperatorAnnotation}s. + * + * TODO(gabor) add parses to the parser using "like" as a verb (among other things) + * + * @author Gabor Angeli + */ +public class OperatorScopeITest { + + private static final StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{ + setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); + setProperty("ssplit.isOneSentence", "true"); + setProperty("tokenize.class", "PTBTokenizer"); + setProperty("tokenize.language", "en"); + }}); + + static { + pipeline.addAnnotator(new NaturalLogicAnnotator()); + } + + @SuppressWarnings("unchecked") + private Optional[] annotate(String text) { + Annotation ann = new Annotation(text); + pipeline.annotate(ann); + List tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class); + Optional[] scopes = new Optional[tokens.size()]; + Arrays.fill(scopes, Optional.empty()); + for (int i = 0; i < tokens.size(); ++i) { + if (tokens.get(i).containsKey(NaturalLogicAnnotations.OperatorAnnotation.class)) { + scopes[i] = Optional.of(tokens.get(i).get(NaturalLogicAnnotations.OperatorAnnotation.class)); + } + } + return scopes; + } + + private void checkScope(int subjBegin, int subjEnd, int objBegin, int objEnd, Optional guess) { + assertTrue("No quantifier found", guess.isPresent()); + assertEquals("Bad subject begin " + guess.get(), subjBegin, guess.get().subjectBegin); + assertEquals("Bad subject end " + guess.get(), subjEnd, guess.get().subjectEnd); + assertEquals("Bad object begin " + guess.get(), objBegin, guess.get().objectBegin); + assertEquals("Bad object end " + guess.get(), objEnd, guess.get().objectEnd); + } + + private void checkScope(int subjBegin, int subjEnd, Optional guess) { + assertTrue("No quantifier found", guess.isPresent()); + assertEquals("Bad subject begin " + guess.get(), subjBegin, guess.get().subjectBegin); + assertEquals("Bad subject end " + guess.get(), subjEnd, guess.get().subjectEnd); + assertEquals("Two-place quantifier matched", subjEnd, guess.get().objectBegin); + assertEquals("Two place quantifier matched", subjEnd, guess.get().objectEnd); + } + + private void checkScope(String spec) { + String[] terms = spec.split("\\s+"); +// int quantStart = -1; + int quantEnd = -1; + int subjBegin = -1; + int subjEnd = -1; + int objBegin = -1; + int objEnd = -1; + boolean seenSubj = false; + int tokenIndex = 0; + List cleanSentence = new ArrayList<>(); + for (String term : terms) { + switch (term) { + case "{": +// quantStart = tokenIndex; + break; + case "}": + quantEnd = tokenIndex; + break; + case "[": + if (!seenSubj) { + subjBegin = tokenIndex; + } else { + objBegin = tokenIndex; + } + break; + case "]": + if (!seenSubj) { + subjEnd = tokenIndex; + seenSubj = true; + } else { + objEnd = tokenIndex; + } + break; + default: + cleanSentence.add(term); + tokenIndex += 1; + break; + } + } + Optional[] scopes = annotate(StringUtils.join(cleanSentence, " ")); + System.err.println("Checking [@ " + (quantEnd - 1) + "]: " + spec); + if (objBegin >= 0 && objEnd >= 0) { + checkScope(subjBegin, subjEnd, objBegin, objEnd, scopes[quantEnd - 1]); + } else { + checkScope(subjBegin, subjEnd, scopes[quantEnd - 1]); + } + } + + @Test + public void annotatorRuns() { + annotate("All green cats have tails."); + } + + @Test + public void all_X_verb_Y() { + checkScope(1, 2, 2, 4, annotate("All cats eat mice.")[0]); + checkScope(1, 2, 2, 4, annotate("All cats have tails.")[0]); + } + + @Test + public void all_X_want_Y() { + checkScope(1, 2, 2, 4, annotate("All cats want milk.")[0]); + } + + @Test + public void all_X_verb_prep_Y() { + checkScope(1, 2, 2, 5, annotate("All cats are in boxes.")[0]); + checkScope(1, 2, 2, 5, annotate("All cats voted for Roosevelt.")[0]); + checkScope(1, 5, 5, 8, annotate("All cats who like dogs voted for Teddy.")[0]); + checkScope(1, 2, 2, 6, annotate("All cats have spoken to Fido.")[0]); + } + + @Test + public void all_X_be_Y() { + checkScope(1, 2, 2, 4, annotate("All cats are cute")[0]); + } + + @Test + public void all_X_can_Y() { + checkScope(1, 2, 2, 4, annotate("All cats can purr")[0]); + } + + @Test + public void all_X_relclause_verb_Y() { + checkScope(1, 5, 5, 7, annotate("All cats who like dogs eat fish.")[0]); + } + + @Test + public void all_of_X_verb_Y() { + checkScope(1, 4, 4, 6, annotate("All of the cats hate dogs.")[0]); + checkScope(1, 6, 6, 9, annotate("Each of the other 99 companies owns one computer.")[0]); + } + + @Test + public void PER_predicate() { + checkScope(0, 1, 1, 4, annotate("Felix likes cat food.")[0]); + } + + @Test + public void PER_has_predicate() { + checkScope(0, 1, 1, 5, annotate("Felix has liked cat food.")[0]); + } + + @Test + public void PER_predicate_prep() { + checkScope(0, 1, 1, 7, annotate("Jack paid the bank for 10 years")[0]); + } + + @Test + public void PER_has_predicate_prep() { + checkScope(0, 1, 1, 5, annotate("Felix has spoken to Fido.")[0]); + } + + @Test + public void PER_is_nn() { + checkScope(0, 1, 1, 4, annotate("Felix is a cat.")[0]); + } + + @Test + public void PER_is_jj() { + checkScope(0, 1, 1, 3, annotate("Felix is cute.")[0]); + } + + @Test + public void few_x_verb_y() { + checkScope(1, 2, 2, 4, annotate("all cats chase dogs")[0]); + } + + @Test + public void a_few_x_verb_y() { + checkScope(2, 3, 3, 5, annotate("a few cats chase dogs")[1]); + assertFalse(annotate("a few cats chase dogs")[0].isPresent()); + } + + @Test + public void binary_no() { + checkScope(1, 2, 2, 4, annotate("no cats chase dogs")[0]); + } + + @Test + public void unary_not() { + Optional[] quantifiers = annotate("some cats don't like dogs"); + checkScope(1, 2, 2, 6, quantifiers[0]); // some + checkScope(4, 6, quantifiers[3]); // no + assertFalse(quantifiers[3].get().isBinary()); // is unary no + } + + @Test + public void num_X_verb_Y() { + checkScope(1, 2, 2, 4, annotate("Three cats eat mice.")[0]); + checkScope(1, 2, 2, 4, annotate("3 cats have tails.")[0]); + } + + @Test + public void at_least_num_X_verb_Y() { + checkScope(3, 4, 4, 6, annotate("at least Three cats eat mice.")[2]); + checkScope(3, 4, 4, 6, annotate("at least 3 cats have tails.")[2]); + } + + @Test + public void everyone_pp_verb_Y() { + checkScope(1, 3, 3, 5, annotate("everyone at Stanford likes cats.")[0]); + checkScope(1, 5, 5, 7, annotate("everyone who is at Stanford likes cats.")[0]); + } + + @Test + public void there_are_np() { + checkScope(2, 3, annotate("there are cats")[1]); + } + + @Test + public void there_are_np_pp() { + checkScope(2, 6, annotate("there are cats who like dogs")[1]); + } + + @Test + public void regressionStrangeComma() { + Optional[] operators = annotate("all cats, have tails."); + checkScope(1, 2, 3, 5, operators[0]); // though, unclear if this should even be true? + } + + @Test + public void fracasSentencesWithAll() { + checkScope("{ All } [ APCOM managers ] [ have company cars ]"); + checkScope("{ All } [ Canadian residents ] [ can travel freely within Europe ]"); + checkScope("{ All } [ Europeans ] [ are people ]"); + checkScope("{ All } [ Europeans ] [ can travel freely within Europe ]"); + checkScope("{ All } [ Europeans ] [ have the right to live in Europe ]"); + checkScope("{ All } [ Italian men ] [ want to be a great tenor ]"); + checkScope("{ All } [ committee members ] [ are people ]"); + checkScope("{ All } [ competent legal authorities ] [ are competent law lecturers ]"); + checkScope("{ All } [ elephants ] [ are large animals ]"); + checkScope("{ All } [ fat legal authorities ] [ are fat law lecturers ]"); + checkScope("{ All } [ law lecturers ] [ are legal authorities ]"); + checkScope("{ All } [ legal authorities ] [ are law lecturers ]"); + checkScope("{ All } [ mice ] [ are small animals ]"); + checkScope("{ All } [ people who are from Portugal ] [ are from southern Europe ]"); + checkScope("{ All } [ people who are from Sweden ] [ are from Scandinavia ]"); + checkScope("{ All } [ people who are resident in Europe ] [ can travel freely within Europe ]"); + checkScope("{ All } [ residents of major western countries ] [ are residents of western countries ]"); + checkScope("{ All } [ residents of member states ] [ are individuals ]"); + checkScope("{ All } [ residents of the North American continent ] [ can travel freely within Europe ]"); + checkScope("{ All } [ the people who were at the meeting ] [ voted for a new chairman ]"); + } + + @Test + public void fracasSentencesWithEach() { + checkScope("{ Each } [ Canadian resident ] [ can travel freely within Europe ]"); + checkScope("{ Each } [ European ] [ can travel freely within Europe ]"); + checkScope("{ Each } [ European ] [ has the right to live in Europe ]"); + checkScope("{ Each } [ Italian tenor ] [ wants to be great ]"); + checkScope("{ Each } [ department ] [ has a dedicated line ]"); + checkScope("{ Each } [ of the other 99 companies ] [ owns one computer ]"); + checkScope("{ Each } [ resident of the North American continent ] [ can travel freely within Europe ]"); + } + + @Test + public void fracasSentencesWithEvery() { + checkScope("{ Every } [ Ancient Greek ] [ was a noted philosopher ]"); + checkScope("{ Every } [ Canadian resident ] [ can travel freely within Europe ]"); + checkScope("{ Every } [ Canadian resident ] [ is a resident of the North American continent ]"); + checkScope("{ Every } [ European ] [ can travel freely within Europe ]"); + checkScope("{ Every } [ European ] [ has the right to live in Europe ]"); + checkScope("{ Every } [ European ] [ is a person ]"); + checkScope("{ Every } [ Italian man ] [ wants to be a great tenor ]"); + checkScope("{ Every } [ Swede ] [ is a Scandinavian ]"); + checkScope("{ Every } [ committee ] [ has a chairman ]"); + checkScope("{ Every } [ committee ] [ has a chairman appointed by members of the committee ]"); + checkScope("{ Every } [ customer who owns a computer ] [ has a service contract for it ]"); + checkScope("{ Every } [ department ] [ rents a line from BT ]"); + checkScope("{ Every } [ executive who had a laptop computer ] [ brought it to take notes at the meeting ]"); + checkScope("{ Every } [ four-legged mammal ] [ is a four-legged animal ]"); + checkScope("{ Every } [ individual who has the right to live anywhere in Europe ] [ can travel freely within Europe ]"); + checkScope("{ Every } [ individual who has the right to live in Europe ] [ can travel freely within Europe ]"); + checkScope("{ Every } [ inhabitant of Cambridge ] [ voted for a Labour MP ]"); + checkScope("{ Every } [ mammal ] [ is an animal ]"); + checkScope("{ Every } [ person who has the right to live in Europe ] [ can travel freely within Europe ]"); + checkScope("{ Every } [ report ] [ has a cover page ]"); + checkScope("{ Every } [ representative and client ] [ was at the meeting ]"); + checkScope("{ Every } [ representative and every client ] [ was at the meeting ]"); + checkScope("{ Every } [ representative ] [ has read this report ]"); + checkScope("{ Every } [ representative or client ] [ was at the meeting ]"); + checkScope("{ Every } [ representative ] [ was at the meeting ]"); + checkScope("{ Every } [ resident of the North American continent ] [ can travel freely within Europe ]"); + checkScope("{ Every } [ student ] [ used her workstation ]"); + } + + @Test + public void fracasSentencesWithEveryone() { + checkScope("{ Everyone } [ at the meeting ] [ voted for a new chairman ]"); + checkScope("{ Everyone } [ who starts gambling seriously ] [ continues until he is broke ]"); + checkScope("{ Everyone } [ who starts gambling seriously ] [ stops the moment he is broke ]"); + } + + @Test + public void fracasSentencesWithFew() { + checkScope("{ Few } [ committee members ] [ are from Portugal ]"); + checkScope("{ Few } [ committee members ] [ are from southern Europe ]"); + checkScope("{ Few } [ female committee members ] [ are from southern Europe ]"); + } + + @Test + public void fracasSentencesWithA() { + checkScope("{ A } [ Scandinavian ] [ won a Nobel prize ]"); + checkScope("{ A } [ Swede ] [ won a Nobel prize ]"); + checkScope("{ A } [ company director ] [ awarded himself a large payrise ]"); + checkScope("{ A } [ company director ] [ has awarded and been awarded a payrise ]"); + checkScope("{ A } [ lawyer ] [ signed every report ]"); + + checkScope("{ An } [ Irishman ] [ won a Nobel prize ]"); + checkScope("{ An } [ Irishman ] [ won the Nobel prize for literature ]"); + checkScope("{ An } [ Italian ] [ became the world 's greatest tenor ]"); + } + + @Test + public void fracasSentencesWithAFew() { + checkScope("{ A few } [ committee members ] [ are from Scandinavia ]"); + checkScope("{ A few } [ committee members ] [ are from Sweden ]"); + checkScope("{ A few } [ female committee members ] [ are from Scandinavia ]"); + checkScope("{ A few } [ great tenors ] [ sing popular music ]"); + } + + @Test + public void fracasSentencesWithAtLeastAFew() { + checkScope("{ At least a few } [ committee members ] [ are from Scandinavia ]"); + checkScope("{ At least a few } [ committee members ] [ are from Sweden ]"); + checkScope("{ At least a few } [ female committee members ] [ are from Scandinavia ]"); + } + + @Test + public void fracasSentencesWithEither() { + checkScope("{ Either } [ Smith Jones or Anderson ] [ signed the contract ]"); + } + + @Test + public void fracasSentencesWithOneOfThe() { + checkScope("{ One of the } [ commissioners ] [ spends a lot of time at home ]"); + checkScope("{ One of the } [ leading tenors ] [ is Pavarotti ]"); + } + + @Test + public void fracasSentencesWithSeveral() { + checkScope("{ Several } [ Portuguese delegates ] [ got the results published in major national newspapers ]"); + checkScope("{ Several } [ delegates ] [ got the results published ]"); + checkScope("{ Several } [ delegates ] [ got the results published in major national newspapers ]"); + checkScope("{ Several } [ great tenors ] [ are British ]"); + } + + @Test + public void fracasSentencesWithSome() { + checkScope("{ Some } [ Irish delegates ] [ finished the survey on time ]"); + checkScope("{ Some } [ Italian men ] [ are great tenors ]"); + checkScope("{ Some } [ Italian tenors ] [ are great ]"); + checkScope("{ Some } [ Scandinavian delegate ] [ finished the report on time ]"); + checkScope("{ Some } [ accountant ] [ attended the meeting ]"); + checkScope("{ Some } [ accountants ] [ attended the meeting ]"); + checkScope("{ Some } [ delegate ] [ finished the report on time ]"); + checkScope("{ Some } [ delegates ] [ finished the survey ]"); + checkScope("{ Some } [ delegates ] [ finished the survey on time ]"); + checkScope("{ Some } [ great tenors ] [ are Swedish ]"); +// checkScope("{ Some } [ great tenors ] [ like popular music ]"); // parse error + checkScope("{ Some } [ people ] [ discover that they have been asleep ]"); + } + + @Test + public void fracasSentencesWithThe() { + checkScope("{ The } [ Ancient Greeks ] [ were all noted philosophers ]"); + checkScope("{ The } [ Ancient Greeks ] [ were noted philosophers ]"); + checkScope("{ The } [ ITEL-XZ ] [ is fast ]"); + checkScope("{ The } [ ITEL-ZX ] [ is an ITEL computer ]"); + checkScope("{ The } [ ITEL-ZX ] [ is slower than 500 MIPS ]"); + checkScope("{ The } [ PC-6082 ] [ is as fast as the ITEL-XZ ]"); + checkScope("{ The } [ PC-6082 ] [ is fast ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than 500 MIPS ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than any ITEL computer ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than every ITEL computer ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than some ITEL computer ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-XZ ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-ZX ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-ZX and the ITEL-ZY ]"); + checkScope("{ The } [ PC-6082 ] [ is faster than the ITEL-ZX or the ITEL-ZY ]"); + checkScope("{ The } [ PC-6082 ] [ is slow ]"); + checkScope("{ The } [ PC-6082 ] [ is slower than the ITEL-XZ ]"); + checkScope("{ The } [ chairman of the department ] [ is a person ]"); + checkScope("{ The } [ chairman ] [ read out every item on the agenda ]"); + checkScope("{ The } [ chairman ] [ read out the items on the agenda ]"); + checkScope("{ The } [ conference ] [ started on July 4th , 1994 ]"); + checkScope("{ The } [ conference ] [ was over on July 8th , 1994 ]"); + checkScope("{ The } [ inhabitants of Cambridge ] [ voted for a Labour MP ]"); +// checkScope("{ The } [ people who were at the meeting ] [ all voted for a new chairman ]"); // TODO(gabor) Parse error on "meeting -dep-> all" + checkScope("{ The } [ people who were at the meeting ] [ voted for a new chairman ]"); + checkScope("{ The } [ really ambitious tenors ] [ are Italian ]"); + checkScope("{ The } [ residents of major western countries ] [ can travel freely within Europe ]"); + checkScope("{ The } [ residents of major western countries ] [ have the right to live in Europe ]"); + checkScope("{ The } [ residents of member states ] [ can travel freely within Europe ]"); + checkScope("{ The } [ residents of member states ] [ have the right to live anywhere in Europe ]"); + checkScope("{ The } [ residents of member states ] [ have the right to live in Europe ]"); + checkScope("{ The } [ residents of western countries ] [ can travel freely within Europe ]"); + checkScope("{ The } [ residents of western countries ] [ have the right to live in Europe ]"); + checkScope("{ The } [ sales department ] [ rents a line from BT ]"); + checkScope("{ The } [ sales department ] [ rents it from BT ]"); + checkScope("{ The } [ students ] [ are going to Paris by train ]"); + checkScope("{ The } [ students ] [ have spoken to Mary ]"); + checkScope("{ The } [ system failure ] [ was blamed on one or more software faults ]"); + } + + @Test + public void fracasSentencesWithThereAre() { + checkScope("{ There are } [ 100 companies ]"); + checkScope("{ There are } [ Italian men who want to be a great tenor ]"); + checkScope("{ There are } [ Italian tenors who want to be great ]"); + checkScope("{ There are } [ few committee members from Portugal ]"); + checkScope("{ There are } [ few committee members from southern Europe ]"); + checkScope("{ There are } [ great tenors who are British ]"); + checkScope("{ There are } [ great tenors who are German ]"); + checkScope("{ There are } [ great tenors who are Italian ]"); + checkScope("{ There are } [ great tenors who are Swedish ]"); + checkScope("{ There are } [ great tenors who sing popular music ]"); +// checkScope("{ There are } [ really ambitious tenors who are Italian ]"); // TODO(gabor) parse error on are -advmod-> really +// checkScope("{ There are } [ really great tenors who are modest ]"); // TODO(gabor) as above + checkScope("{ There are } [ sixteen representatives ]"); + checkScope("{ There are } [ some reports from ITEL on Smith 's desk ]"); + checkScope("{ There are } [ tenors who will take part in the concert ]"); + + checkScope("{ There is } [ a car that John and Bill own ]"); + checkScope("{ There is } [ someone whom Helen saw answer the phone ]"); + + checkScope("{ There was } [ a group of people that met ]"); + checkScope("{ There was } [ an Italian who became the world 's greatest tenor ]"); + checkScope("{ There was } [ one auditor who signed all the reports ]"); + } + + @Test + public void fracasSentencesWithProperNouns() { + checkScope("[ { APCOM } ] [ has a more important customer than ITEL ]"); + checkScope("[ { APCOM } ] [ has a more important customer than ITEL has ]"); + checkScope("[ { APCOM } ] [ has a more important customer than ITEL is ]"); + checkScope("[ { APCOM } ] [ has been paying mortgage interest for a total of 15 years or more ]"); + checkScope("[ { APCOM } ] [ lost some orders ]"); + checkScope("[ { APCOM } ] [ lost ten orders ]"); + checkScope("[ { APCOM } ] [ signed the contract Friday , 13th ]"); + checkScope("[ { APCOM } ] [ sold exactly 2500 computers ]"); + checkScope("[ { APCOM } ] [ won some orders ]"); + checkScope("[ { APCOM } ] [ won ten orders ]"); + + checkScope("[ { Bill } ] [ bought a car ]"); + checkScope("[ { Bill } ] [ has spoken to Mary ]"); + checkScope("[ { Bill } ] [ is going to ]"); + checkScope("[ { Bill } ] [ knows why John had his paper accepted ]"); + checkScope("[ { Bill } ] [ owns a blue car ]"); + checkScope("[ { Bill } ] [ owns a blue one ]"); + checkScope("[ { Bill } ] [ owns a car ]"); + checkScope("[ { Bill } ] [ owns a fast car ]"); + checkScope("[ { Bill } ] [ owns a fast one ]"); + checkScope("[ { Bill } ] [ owns a fast red car ]"); + checkScope("[ { Bill } ] [ owns a red car ]"); + checkScope("[ { Bill } ] [ owns a slow one ]"); + checkScope("[ { Bill } ] [ owns a slow red car ]"); + checkScope("[ { Bill } ] [ said Mary wrote a report ]"); + checkScope("[ { Bill } ] [ said Peter wrote a report ]"); + checkScope("[ { Bill } ] [ spoke to Mary ]"); + checkScope("[ { Bill } ] [ spoke to Mary at five o'clock ]"); + checkScope("[ { Bill } ] [ spoke to Mary at four o'clock ]"); + checkScope("[ { Bill } ] [ spoke to Mary on Monday ]"); + checkScope("[ { Bill } ] [ spoke to everyone that John did ]"); + checkScope("[ { Bill } ] [ suggested to Frank 's boss that they should go to the meeting together , and Carl to Alan 's wife ]"); + checkScope("[ { Bill } ] [ went to Berlin by car ]"); + checkScope("[ { Bill } ] [ went to Berlin by train ]"); + checkScope("[ { Bill } ] [ went to Paris by train ]"); + checkScope("[ { Bill } ] [ will speak to Mary ]"); + checkScope("[ { Bill } ] [ wrote a report ]"); + + checkScope("[ { Dumbo } ] [ is a four-legged animal ]"); + checkScope("[ { Dumbo } ] [ is a large animal ]"); + checkScope("[ { Dumbo } ] [ is a small animal ]"); + checkScope("[ { Dumbo } ] [ is a small elephant ]"); + checkScope("[ { Dumbo } ] [ is four-legged ]"); + checkScope("[ { Dumbo } ] [ is larger than Mickey ]"); + + checkScope("[ { GFI } ] [ owns several computers ]"); + + checkScope("[ { Helen } ] [ saw the chairman of the department answer the phone ]"); + + checkScope("[ { ICM } ] [ is one of the companies and owns 150 computers ]"); + +// checkScope("[ { ITEL } ] [ always delivers reports late ]"); // TODO(gabor) bad parse from ITEL -dep-> delivers + checkScope("[ { ITEL } ] [ built MTALK in 1993 ]"); +// checkScope("[ { ITEL } ] [ currently has a factory in Birmingham ]"); // fix me (bad scope) + checkScope("[ { ITEL } ] [ delivered reports late in 1993 ]"); + checkScope("[ { ITEL } ] [ developed a new editor in 1993 ]"); + checkScope("[ { ITEL } ] [ existed in 1992 ]"); + checkScope("[ { ITEL } ] [ expanded in 1993 ]"); + checkScope("[ { ITEL } ] [ finished MTALK in 1993 ]"); + checkScope("[ { ITEL } ] [ has a factory in Birmingham ]"); + checkScope("[ { ITEL } ] [ has developed a new editor since 1992 ]"); + checkScope("[ { ITEL } ] [ has expanded since 1992 ]"); + checkScope("[ { ITEL } ] [ has made a loss since 1992 ]"); + checkScope("[ { ITEL } ] [ has sent most of the reports Smith needs ]"); + checkScope("[ { ITEL } ] [ made a loss in 1993 ]"); + checkScope("[ { ITEL } ] [ maintains all the computers that GFI owns ]"); + checkScope("[ { ITEL } ] [ maintains them ]"); + checkScope("[ { ITEL } ] [ managed to win the contract in 1992 ]"); +// checkScope("[ { ITEL } ] [ never delivers reports late ]"); // TODO(gabor) parse error + checkScope("[ { ITEL } ] [ owned APCOM from 1988 to 1992 ]"); + checkScope("[ { ITEL } ] [ owned APCOM in 1990 ]"); + checkScope("[ { ITEL } ] [ sent a progress report in July 1994 ]"); + checkScope("[ { ITEL } ] [ sold 3000 more computers than APCOM ]"); + checkScope("[ { ITEL } ] [ sold 5500 computers ]"); + checkScope("[ { ITEL } ] [ tried to win the contract in 1992 ]"); + checkScope("[ { ITEL } ] [ was building MTALK in 1993 ]"); + checkScope("[ { ITEL } ] [ was winning the contract from APCOM in 1993 ]"); + checkScope("[ { ITEL } ] [ won a contract in 1993 ]"); + checkScope("[ { ITEL } ] [ won at least eleven orders ]"); + checkScope("[ { ITEL } ] [ won more orders than APCOM ]"); + checkScope("[ { ITEL } ] [ won more orders than APCOM did ]"); +// checkScope("[ { ITEL } ] [ won more orders than APCOM lost ]"); // TODO(gabor) parse error + checkScope("[ { ITEL } ] [ won more orders than the APCOM contract ]"); + checkScope("[ { ITEL } ] [ won more than one order ]"); + checkScope("[ { ITEL } ] [ won some orders ]"); + checkScope("[ { ITEL } ] [ won the APCOM contract ]"); + checkScope("[ { ITEL } ] [ won the contract from APCOM in 1993 ]"); + checkScope("[ { ITEL } ] [ won the contract in 1992 ]"); + checkScope("[ { ITEL } ] [ won twenty orders ]"); + checkScope("[ { ITEL } ] [ won twice as many orders than APCOM ]"); + checkScope("[ { Itel } ] [ was in Birmingham in 1993 ]"); + + checkScope("[ { John } ] [ bought a car ]"); + checkScope("[ { John } ] [ found Mary before Bill ]"); +// checkScope("[ { John } ] [ found Mary before Bill found Mary ]"); // fix me (bad scope) +// checkScope("[ { John } ] [ found Mary before John found Bill ]"); // fix me (bad scope) + checkScope("[ { John } ] [ had his paper accepted ]"); + checkScope("[ { John } ] [ has a diamond ]"); + checkScope("[ { John } ] [ has a genuine diamond ]"); + checkScope("[ { John } ] [ has spoken to Mary ]"); + checkScope("[ { John } ] [ hated the meeting ]"); + checkScope("[ { John } ] [ is a cleverer politician than Bill ]"); + checkScope("[ { John } ] [ is a fatter politician than Bill ]"); + checkScope("[ { John } ] [ is a former successful university student ]"); + checkScope("[ { John } ] [ is a former university student ]"); + checkScope("[ { John } ] [ is a man and Mary is a woman ]"); + checkScope("[ { John } ] [ is a successful former university student ]"); + checkScope("[ { John } ] [ is a university student ]"); + checkScope("[ { John } ] [ is cleverer than Bill ]"); + checkScope("[ { John } ] [ is fatter than Bill ]"); + checkScope("[ { John } ] [ is going to Paris by car , and the students by train ]"); + checkScope("[ { John } ] [ is successful ]"); +// checkScope("[ { John } ] [ needed to buy a car ] and Bill did "); // interesting example; also, parse error + checkScope("[ { John } ] [ owns a car ]"); + checkScope("[ { John } ] [ owns a fast red car ]"); + checkScope("[ { John } ] [ owns a red car ]"); + checkScope("[ { John } ] [ represents his company ] and so does Mary"); + checkScope("[ { John } ] [ said Bill had been hurt ]"); + checkScope("[ { John } ] [ said Bill had hurt himself ]"); + checkScope("[ { John } ] [ said Bill wrote a report ]"); + checkScope("[ { John } ] [ said Mary wrote a report ] , and Bill did too"); // interesting example +// checkScope("[ { John } ] [ said that Mary wrote a report ] , and that Bill did too"); // fix me (bad scope) + checkScope("[ { John } ] [ spoke to Mary ]"); + checkScope("[ { John } ] [ spoke to Mary at four o'clock ]"); + checkScope("[ { John } ] [ spoke to Mary on Friday ]"); + checkScope("[ { John } ] [ spoke to Mary on Monday ]"); + checkScope("[ { John } ] [ spoke to Mary on Thursday ]"); + checkScope("[ { John } ] [ spoke to Sue ]"); + checkScope("[ { John } ] [ wanted to buy a car ] , and he did"); + checkScope("[ { John } ] [ wants to know how many men work part time ]"); + checkScope("[ { John } ] [ wants to know how many men work part time , and which ]"); + checkScope("[ { John } ] [ wants to know how many women work part time ]"); + checkScope("[ { John } ] [ wants to know which men work part time ]"); + checkScope("[ { John } ] [ went to Paris by car ]"); + checkScope("[ { John } ] [ went to Paris by car , and Bill by train ]"); + checkScope("[ { John } ] [ went to Paris by car , and Bill by train to Berlin ]"); + checkScope("[ { John } ] [ went to Paris by car , and Bill to Berlin ]"); + checkScope("[ { John } ] [ wrote a report ]"); +// checkScope("[ { John } ] [ wrote a report ] , and Bill said Peter did too ]"); // fix me + + checkScope("[ { Jones } ] [ claimed Smith had costed Jones ' proposal ]"); + checkScope("[ { Jones } ] [ claimed Smith had costed Smith 's proposal ]"); + checkScope("[ { Jones } ] [ claimed he had costed Smith 's proposal ]"); + checkScope("[ { Jones } ] [ claimed he had costed his own proposal ]"); + checkScope("[ { Jones } ] [ graduated in March ] and has been employed ever since"); + checkScope("[ { Jones } ] [ has a company car ]"); + checkScope("[ { Jones } ] [ has been unemployed in the past ]"); + checkScope("[ { Jones } ] [ has more than one company car ]"); + checkScope("[ { Jones } ] [ is an APCOM manager ]"); + checkScope("[ { Jones } ] [ is the chairman of ITEL ]"); + checkScope("[ { Jones } ] [ left after Anderson left ]"); + checkScope("[ { Jones } ] [ left after Anderson was present ]"); + checkScope("[ { Jones } ] [ left after Smith left ]"); + checkScope("[ { Jones } ] [ left before Anderson left ]"); + checkScope("[ { Jones } ] [ left before Smith left ]"); + checkScope("[ { Jones } ] [ left the meeting ]"); + checkScope("[ { Jones } ] [ represents Jones 's company ]"); + checkScope("[ { Jones } ] [ represents Smith 's company ]"); + checkScope("[ { Jones } ] [ revised the contract ]"); + checkScope("[ { Jones } ] [ revised the contract after Smith did ]"); + checkScope("[ { Jones } ] [ revised the contract before Smith did ]"); + checkScope("[ { Jones } ] [ signed another contract ]"); + checkScope("[ { Jones } ] [ signed the contract ]"); + checkScope("[ { Jones } ] [ signed two contracts ]"); + checkScope("[ { Jones } ] [ swam after Smith swam ]"); + checkScope("[ { Jones } ] [ swam to the shore ]"); + checkScope("[ { Jones } ] [ swam to the shore after Smith swam to the shore ]"); + checkScope("[ { Jones } ] [ was present ]"); + checkScope("[ { Jones } ] [ was present after Smith was present ]"); + checkScope("[ { Jones } ] [ was present before Smith was present ]"); + checkScope("[ { Jones } ] [ was unemployed at some time before he graduated ]"); + checkScope("[ { Jones } ] [ was writing a report ]"); + checkScope("[ { Jones } ] [ was writing a report after Smith was writing a report ]"); + checkScope("[ { Jones } ] [ was writing a report before Smith was writing a report ]"); + + checkScope("[ { Kim } ] [ is a clever person ]"); + checkScope("[ { Kim } ] [ is a clever politician ]"); + checkScope("[ { Kim } ] [ is clever ]"); + + checkScope("[ { MFI } ] [ has a service contract for all its computers ]"); + checkScope("[ { MFI } ] [ is a customer that owns exactly one computer ]"); + checkScope("[ { MFI } ] [ is a customer that owns several computers ]"); + + checkScope("[ { Mary } ] [ has a workstation ]"); + checkScope("[ { Mary } ] [ is a student ]"); + checkScope("[ { Mary } ] [ is female ]"); + checkScope("[ { Mary } ] [ represents John 's company ]"); + checkScope("[ { Mary } ] [ represents her own company ]"); + checkScope("[ { Mary } ] [ used a workstation ]"); + checkScope("[ { Mary } ] [ used her workstation ]"); + + checkScope("[ { Mickey } ] [ is a large animal ]"); + checkScope("[ { Mickey } ] [ is a large mouse ]"); + checkScope("[ { Mickey } ] [ is a small animal ]"); + checkScope("[ { Mickey } ] [ is larger than Dumbo ]"); + checkScope("[ { Mickey } ] [ is smaller than Dumbo ]"); + + checkScope("[ { Pavarotti } ] [ is a leading tenor who comes cheap ]"); + } + + @Test + public void fracasSentencesWithAtMostAtLeast() { + checkScope("{ At least three } [ commissioners ] [ spend a lot of time at home ]"); + checkScope("{ At least three } [ commissioners ] [ spend time at home ]"); + checkScope("{ At least three } [ female commissioners ] [ spend time at home ]"); + checkScope("{ At least three } [ male commissioners ] [ spend time at home ]"); + checkScope("{ At least three } [ tenors ] [ will take part in the concert ]"); + checkScope("{ At most ten } [ commissioners ] [ spend a lot of time at home ]"); + checkScope("{ At most ten } [ commissioners ] [ spend time at home ]"); + checkScope("{ At most ten } [ female commissioners ] [ spend time at home ]"); + + checkScope("{ Just one } [ accountant ] [ attended the meeting ]"); + } + + @Test + public void fracasSentencesWithPureNumbers() { + checkScope("{ Eight } [ machines ] [ have been removed ]"); + + checkScope("{ Five } [ men ] [ work part time ]"); + checkScope("{ Forty five } [ women ] [ work part time ]"); + + checkScope("{ Six } [ accountants ] [ signed the contract ]"); + checkScope("{ Six } [ lawyers ] [ signed the contract ]"); + +// checkScope("{ Ten } [ machines ] [ were here yesterday ]"); // TODO(gabor) yesterday doesn't come into scope + + checkScope("{ Twenty } [ men ] [ work in the Sales Department ]"); + checkScope("{ Two } [ machines ] [ have been removed ]"); + checkScope("{ Two } [ women ] [ work in the Sales Department ]"); + } + + @Test + public void fracasSentencesWithBoth() { + checkScope("{ Both } [ commissioners ] [ used to be businessmen ]"); + checkScope("{ Both } [ commissioners ] [ used to be leading businessmen ]"); + checkScope("{ Both } [ leading tenors ] [ are excellent ]"); + checkScope("{ Both } [ leading tenors ] [ are indispensable ]"); + } + + @Test + public void fracasSentencesWithMany() { + checkScope("{ Many } [ British delegates ] [ obtained interesting results from the survey ]"); + checkScope("{ Many } [ delegates ] [ obtained interesting results from the survey ]"); + checkScope("{ Many } [ delegates ] [ obtained results from the survey ]"); + checkScope("{ Many } [ great tenors ] [ are German ]"); + } + + @Test + public void fracasSentencesWithMost() { + checkScope("{ Most } [ Europeans ] [ can travel freely within Europe ]"); + checkScope("{ Most } [ Europeans who are resident in Europe ] [ can travel freely within Europe ]"); + checkScope("{ Most } [ Europeans who are resident outside Europe ] [ can travel freely within Europe ]"); + checkScope("{ Most } [ clients at the demonstration ] [ were impressed by the system 's performance ]"); + checkScope("{ Most } [ companies that own a computer ] [ have a service contract for it ]"); + checkScope("{ Most } [ great tenors ] [ are Italian ]"); + } + + @Test + public void fracasSentencesWithNeither() { + checkScope("{ Neither } [ commissioner ] [ spends a lot of time at home ]"); + checkScope("{ Neither } [ commissioner ] [ spends time at home ]"); + checkScope("{ Neither } [ leading tenor ] [ comes cheap ]"); + } + + @Test + public void fracasSentencesWithBinaryNo() { + checkScope("{ No } [ Scandinavian delegate ] [ finished the report on time ]"); + checkScope("{ No } [ accountant ] [ attended the meeting ]"); + checkScope("{ No } [ accountants ] [ attended the meeting ]"); + checkScope("{ No } [ delegate ] [ finished the report ]"); + checkScope("{ No } [ really great tenors ] [ are modest ]"); +// checkScope("{ No } [ representative ] [ took less than half a day to read the report ]"); // TODO(gabor) vmod issue again + checkScope("{ No } [ student ] [ used her workstation ]"); + checkScope("{ No } [ two representatives ] [ have read it at the same time ]"); + checkScope("{ No } [ delegate ] [ finished the report on time ]"); + } + + @Test + public void fracasSentencesWithBinaryNoOne() { + // Ignore "no one" for now. +// checkScope("{ No one } [ can gamble ] [ when he is broke ]"); // interesting: subject object reversal (we of course don't actually get this...) +// checkScope("{ No one } [ gambling seriously ] [ stops until he is broke ]"); +// checkScope("{ No one } [ who starts gambling seriously ] [ stops until he is broke ]"); + + checkScope("{ Nobody } [ who is asleep ] [ ever knows that he is asleep ]"); + } + + +} diff --git a/itest/src/edu/stanford/nlp/naturalli/PolarityITest.java b/itest/src/edu/stanford/nlp/naturalli/PolarityITest.java new file mode 100644 index 0000000000..0fb97eb82f --- /dev/null +++ b/itest/src/edu/stanford/nlp/naturalli/PolarityITest.java @@ -0,0 +1,66 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import org.junit.*; + +import java.util.List; +import java.util.Properties; + +import static org.junit.Assert.*; + + +/** + * A test to make sure {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator} marks the right polarities for the tokens + * in the sentence. + * + * @author Gabor Angeli + */ +public class PolarityITest { + + private static final StanfordCoreNLP pipeline = new StanfordCoreNLP(new Properties(){{ + setProperty("annotators", "tokenize,ssplit,pos,lemma,parse"); + setProperty("ssplit.isOneSentence", "true"); + setProperty("tokenize.class", "PTBTokenizer"); + setProperty("tokenize.language", "en"); + }}); + + static { + pipeline.addAnnotator(new NaturalLogicAnnotator()); + } + + @SuppressWarnings("unchecked") + private Polarity[] annotate(String text) { + Annotation ann = new Annotation(text); + pipeline.annotate(ann); + List tokens = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(CoreAnnotations.TokensAnnotation.class); + Polarity[] polarities = new Polarity[tokens.size()]; + for (int i = 0; i < tokens.size(); ++i) { + polarities[i] = tokens.get(i).get(NaturalLogicAnnotations.PolarityAnnotation.class); + } + return polarities; + } + + @Test + public void allCatsHaveTails() { + Polarity[] p = annotate("all cats have tails"); + assertTrue(p[0].isUpwards()); + assertTrue(p[1].isDownwards()); + assertTrue(p[2].isUpwards()); + assertTrue(p[3].isUpwards()); + } + + @Test + public void someCatsDontHaveTails() { + Polarity[] p = annotate("some cats don't have tails"); + assertTrue(p[0].isUpwards()); + assertTrue(p[1].isUpwards()); + assertTrue(p[2].isUpwards()); + assertTrue(p[3].isUpwards()); + assertTrue(p[4].isDownwards()); + assertTrue(p[5].isDownwards()); + } + +} diff --git a/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java b/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java index d9065c5fb3..a836d883cf 100644 --- a/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java +++ b/itest/src/edu/stanford/nlp/parser/nndep/DependencyParserITest.java @@ -20,8 +20,6 @@ import junit.framework.TestCase; import edu.stanford.nlp.util.StringUtils; -import org.hamcrest.CoreMatchers; -import org.junit.matchers.JUnitMatchers; import static java.util.stream.Collectors.toList; import static org.junit.Assert.assertThat; @@ -92,8 +90,8 @@ public void testCCProcess() { Collection dependencies = ccProcessed.typedDependencies(); GrammaticalRelation expected = EnglishGrammaticalRelations.getConj("and"); - assertThat(dependencies.stream().map(d -> d.reln()).collect(toList()), - hasItem(expected)); + assertThat(dependencies.stream().map(TypedDependency::reln).collect(toList()), + hasItem(expected)); } /** diff --git a/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java b/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java index 9aae429cd8..d8bb83b58a 100644 --- a/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java +++ b/itest/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializerSlowITest.java @@ -369,6 +369,11 @@ public void testSerializeSSplitTokensRegression() { testAnnotators("tokenize,ssplit"); } + @Test + public void testSerializeNatLog() { + testAnnotators("tokenize,ssplit,pos,lemma,parse,natlog"); + } + /** * Is the protobuf annotator "CoreNLP complete?" * That is, does it effectively save every combination of annotators possible? diff --git a/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java b/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java index 1ec50526a4..60be668bb1 100644 --- a/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java +++ b/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java @@ -245,8 +245,19 @@ public void testSerialization() processSerialization(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)); processSerialization(sentence); - Object newDocument = processSerialization(document); - assertTrue(newDocument instanceof Annotation); + Object processed = processSerialization(document); + assertTrue(processed instanceof Annotation); + Annotation newDocument = (Annotation) processed; + assertEquals(document.get(CoreAnnotations.SentencesAnnotation.class).size(), + newDocument.get(CoreAnnotations.SentencesAnnotation.class).size()); + for (int i = 0; i < document.get(CoreAnnotations.SentencesAnnotation.class).size(); ++i) { + CoreMap oldSentence = document.get(CoreAnnotations.SentencesAnnotation.class).get(0); + CoreMap newSentence = newDocument.get(CoreAnnotations.SentencesAnnotation.class).get(0); + assertEquals(oldSentence.get(TreeCoreAnnotations.TreeAnnotation.class), + newSentence.get(TreeCoreAnnotations.TreeAnnotation.class)); + assertEquals(oldSentence.get(CoreAnnotations.TokensAnnotation.class), + newSentence.get(CoreAnnotations.TokensAnnotation.class)); + } assertTrue(document.equals(newDocument)); } diff --git a/itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java b/itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java deleted file mode 100644 index fcaef4e26d..0000000000 --- a/itest/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPatternITest.java +++ /dev/null @@ -1,36 +0,0 @@ -package edu.stanford.nlp.semgraph.semgrex; - -import edu.stanford.nlp.ling.CoreAnnotations; -import edu.stanford.nlp.pipeline.Annotation; -import edu.stanford.nlp.pipeline.StanfordCoreNLP; -import edu.stanford.nlp.semgraph.SemanticGraph; -import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; -import edu.stanford.nlp.util.CoreMap; -import junit.framework.TestCase; -import org.junit.Test; - -import java.util.Properties; - -/** - * Created by sonalg on 7/15/14. - */ -public class SemgrexPatternITest extends TestCase { - - @Test - public void testNER() throws Exception{ - String sentence = "John lives in Washington."; - Properties props = new Properties(); - props.setProperty("annotators","tokenize, ssplit, pos, lemma, ner, parse"); - StanfordCoreNLP pipeline = new StanfordCoreNLP(props); - Annotation doc = new Annotation(sentence); - pipeline.annotate(doc); - CoreMap sent = doc.get(CoreAnnotations.SentencesAnnotation.class).get(0); - SemanticGraph graph = sent.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); - graph.prettyPrint(); - String patStr = "({word:/lives/} >/prep_in/ {word:/\\QCalifornia\\E|\\QWashington\\E/} >nsubj {ner:PERSON})"; - SemgrexPattern pat = SemgrexPattern.compile(patStr); - SemgrexMatcher mat = pat.matcher(graph, true); - assertTrue(mat.find()); - } - -} diff --git a/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java b/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java index b16c296f7f..5f0cc4f540 100644 --- a/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java +++ b/itest/src/edu/stanford/nlp/sentiment/SentimentTrainingITest.java @@ -19,5 +19,23 @@ public void testGradientCheck() { assertTrue("Gradient check failed with random seed of " + op.randomSeed, SentimentTraining.runGradientCheck(model, trainingTrees)); } + + /** + * Because the regularizations are typically set to be 0.001 of the + * total cost, it is important to test those gradients with the reg + * values turned up a lot. + */ + public void testRegularizationGradientCheck() { + List trainingTrees = SentimentUtils.readTreesWithGoldLabels(TRAIN_PATH); + RNNOptions op = new RNNOptions(); + op.numHid = 5; + op.trainOptions.regTransformMatrix = 10.0; + op.trainOptions.regTransformTensor = 10.0; + op.trainOptions.regClassification = 10.0; + op.trainOptions.regWordVector = 10.0; + SentimentModel model = new SentimentModel(op, trainingTrees); + assertTrue("Gradient check failed with random seed of " + op.randomSeed, + SentimentTraining.runGradientCheck(model, trainingTrees)); + } } diff --git a/lib/README b/lib/README index 038ca90625..adbc0d3f95 100644 --- a/lib/README +++ b/lib/README @@ -323,7 +323,7 @@ LAST UPDATE BY: Spence Green ----------------------------------------------------------------- protobuf.jar ORIGINAL JAR NAME: -VERSION: 2.4.1 +VERSION: 4.1 RELEASE DATE: April 2011 SOURCE AVAILABLE: yes DESCRIPTION: Google's protocol buffer library diff --git a/src/edu/stanford/nlp/classify/ColumnDataClassifier.java b/src/edu/stanford/nlp/classify/ColumnDataClassifier.java index 60945a5d97..32a3dcb3e0 100644 --- a/src/edu/stanford/nlp/classify/ColumnDataClassifier.java +++ b/src/edu/stanford/nlp/classify/ColumnDataClassifier.java @@ -160,12 +160,18 @@ * useAllSplitWordPairsbooleanfalseMake features from all pairs of "words" that are returned by dividing the string into splitWords. Requires splitWordsRegexp or splitWordsTokenizerRegexp.ASWP-str1-str2 * useAllSplitWordTriplesbooleanfalseMake features from all triples of "words" that are returned by dividing the string into splitWords. Requires splitWordsRegexp or splitWordsTokenizerRegexp.ASWT-str1-str2-str3 * useSplitWordNGramsbooleanfalseMake features of adjacent word n-grams of lengths between minWordNGramLeng and maxWordNGramLeng inclusive. Note that these are word sequences, not character n-grams.SW#-str1-str2-strN + * + * splitWordCountbooleanfalseThe value of this real-valued feature is the number of split word tokens in the column.SWNUM + * logSplitWordCountbooleanfalseThe value of this real-valued feature is the log of the number of split word tokens in the column.LSWNUM + * binnedSplitWordCountsStringnullIf non-null, treat as a sequence of comma-separated integer bounds, where items above the previous bound (if any) up to the next bound (inclusive) are binned (e.g., "1,5,15,30,60"). The feature represents the number of split words in this column.SWNUMBIN-range + * maxWordNGramLengint-1If this number is positive, word n-grams above this size will not be used in the model * minWordNGramLengint1Must be positive. word n-grams below this size will not be used in the model * wordNGramBoundaryRegexpStringnullIf this is defined and the regexp matches, then the ngram stops * useSplitFirstLastWordsbooleanfalseMake a feature from each of the first and last "words" that are returned as splitWords. This is equivalent to having word bigrams with boundary tokens at each end of the sequence (they get a special feature). Requires splitWordsRegexp or splitWordsTokenizerRegexp.SFW-str, SLW-str * useSplitNGramsbooleanfalseMake features from letter n-grams - internal as well as edge all treated the same - after the data string has been split into tokens. Requires splitWordsRegexp or splitWordsTokenizerRegexp.S#-str * useSplitPrefixSuffixNGramsbooleanfalseMake features from prefixes and suffixes of each token, after splitting string with splitWordsRegexp. Requires splitWordsRegexp or splitWordsTokenizerRegexp.S#B-str, S#E-str + * useNGramsbooleanfalseMake features from letter n-grams - internal as well as edge all treated the same.#-str * usePrefixSuffixNGramsbooleanfalseMake features from prefix and suffix substrings of the string.#B-str, #E-str * lowercasebooleanfalseMake the input string lowercase so all features work uncased @@ -224,6 +230,10 @@ */ public class ColumnDataClassifier { + // todo [cdm 2014]: support reading files with comment lines starting with "#" or to ignore a first column headers line + // todo [cdm 2014]: be able to run on a test file without gold answers. For doing Kaggle competitions. + // todo [cdm 2014]: support lowercaseSplitWordPairs. + private static final double DEFAULT_VALUE = 1.0; // default value for setting categorical, boolean features private static final String DEFAULT_IGNORE_REGEXP = "\\s+"; @@ -419,6 +429,10 @@ private Pair, List> readDataset(String f if (inTestPhase) { lineInfos.add(strings); } + if (strings.length < flags.length) { + throw new RuntimeException("Error: Line has too few tab-separated columns (" + maxColumns + + ") for " + flags.length + " columns required by specified properties: " + line); + } dataset.add(makeDatumFromStrings(strings)); } if (lineNo > 0 && minColumns != maxColumns) { @@ -427,7 +441,7 @@ private Pair, List> readDataset(String f filename + " varies between " + minColumns + " and " + maxColumns); } } catch (Exception e) { - throw new RuntimeException("Dataset could not be processed", e); + throw new RuntimeException("Dataset could not be loaded", e); } } @@ -524,7 +538,7 @@ private Pair writeResultsSummary(int num, Counter contin * Write out an answer, and update statistics. */ private void writeAnswer(String[] strs, String clAnswer, Distribution cntr, Counter contingency, Classifier c, double sim) { - String goldAnswer = strs[globalFlags.goldAnswerColumn]; + String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: ""; String printedText = ""; if (globalFlags.displayedColumn >= 0) { printedText = strs[globalFlags.displayedColumn]; @@ -726,6 +740,7 @@ private Pair testExamples(Classifier cl, General * @return The constructed Datum */ private Datum makeDatum(String[] strs) { + String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: ""; List theFeatures = new ArrayList(); Collection globalFeatures = Generics.newHashSet(); if (globalFlags.useClassFeature) { @@ -735,7 +750,7 @@ private Datum makeDatum(String[] strs) { for (int i = 0; i < flags.length; i++) { Collection featuresC = Generics.newHashSet();//important that this is a hash set to prevent same feature from being added multiple times - makeDatum(strs[i], flags[i], featuresC, strs[globalFlags.goldAnswerColumn]); + makeDatum(strs[i], flags[i], featuresC, goldAnswer); addAllInterningAndPrefixing(theFeatures, featuresC, i + "-"); } @@ -743,7 +758,7 @@ private Datum makeDatum(String[] strs) { printFeatures(strs, theFeatures); } //System.out.println("Features are: " + theFeatures); - return new BasicDatum(theFeatures, strs[globalFlags.goldAnswerColumn]); + return new BasicDatum(theFeatures, goldAnswer); } /** @@ -755,6 +770,7 @@ private Datum makeDatum(String[] strs) { * @return The constructed RVFDatum */ private RVFDatum makeRVFDatum(String[] strs) { + String goldAnswer = globalFlags.goldAnswerColumn < strs.length ? strs[globalFlags.goldAnswerColumn]: ""; ClassicCounter theFeatures = new ClassicCounter(); ClassicCounter globalFeatures = new ClassicCounter(); if (globalFlags.useClassFeature) { @@ -764,7 +780,7 @@ private RVFDatum makeRVFDatum(String[] strs) { for (int i = 0; i < flags.length; i++) { ClassicCounter featuresC = new ClassicCounter(); - makeDatum(strs[i], flags[i], featuresC, strs[globalFlags.goldAnswerColumn]); + makeDatum(strs[i], flags[i], featuresC, goldAnswer); addAllInterningAndPrefixingRVF(theFeatures, featuresC, i + "-"); } @@ -772,7 +788,7 @@ private RVFDatum makeRVFDatum(String[] strs) { printFeatures(strs, theFeatures); } //System.out.println("Features are: " + theFeatures); - return new RVFDatum(theFeatures, strs[globalFlags.goldAnswerColumn]); + return new RVFDatum(theFeatures, goldAnswer); } private void addAllInterningAndPrefixingRVF(ClassicCounter accumulator, ClassicCounter addend, String prefix) { @@ -849,6 +865,8 @@ private static void addFeature(Object features, F newFeature, double value) * Extracts all the features from a certain input column. * * @param cWord The String to extract data from + * @param goldAns The goldAnswer for this whole datum or emptyString if none. + * This is used only for filling in the binned lengths histogram counters */ private void makeDatum(String cWord, Flags flags, Object featuresC, String goldAns) { @@ -939,6 +957,22 @@ private void makeDatum(String cWord, Flags flags, Object featuresC, String goldA System.err.println(Arrays.toString(bits)); } + if (flags.splitWordCount) { + addFeature(featuresC, "SWNUM", bits.length); + } + if (flags.logSplitWordCount) { + addFeature(featuresC, "LSWNUM", Math.log(bits.length)); + } + if (flags.binnedSplitWordCounts != null) { + String featureName = null; + for (int i = 0; i <= flags.binnedSplitWordCounts.length; i++) { + if (i == flags.binnedSplitWordCounts.length || bits.length <= flags.binnedSplitWordCounts[i]) { + featureName = "SWNUMBIN-" + ((i == 0) ? 0 : (flags.binnedSplitWordCounts[i - 1] + 1)) + '-' + ((i == flags.binnedSplitWordCounts.length) ? "Inf" : Integer.toString(flags.binnedSplitWordCounts[i])); + break; + } + } + addFeature(featuresC, featureName, DEFAULT_VALUE); + } // add features over splitWords for (int i = 0; i < bits.length; i++) { if (flags.useSplitWords) { @@ -1012,7 +1046,7 @@ private void makeDatum(String cWord, Flags flags, Object featuresC, String goldA addFeature(featuresC,"SSHAPE-" + shape,DEFAULT_VALUE); } } - } + } // end if uses some split words features if (flags.wordShape > WordShapeClassifier.NOWORDSHAPE) { String shape = edu.stanford.nlp.process.WordShapeClassifier.wordShape(cWord, flags.wordShape); @@ -1420,9 +1454,7 @@ private Flags[] setProperties(Properties props) { key = matcher.group(2); } if (col >= myFlags.length) { - Flags[] newFl = new Flags[col + 1]; - System.arraycopy(myFlags, 0, newFl, 0, myFlags.length); - myFlags = newFl; + myFlags = Arrays.copyOf(myFlags, col + 1); } if (myFlags[col] == null) { myFlags[col] = new Flags(); @@ -1440,7 +1472,19 @@ private Flags[] setProperties(Properties props) { } } else if (key.equals("binnedLengthsStatistics")) { if (Boolean.parseBoolean(val)) { - myFlags[col].binnedLengthsCounter = new TwoDimensionalCounter(); + myFlags[col].binnedLengthsCounter = new TwoDimensionalCounter(); + } + } else if (key.equals("splitWordCount")) { + myFlags[col].splitWordCount = Boolean.parseBoolean(val); + } else if (key.equals("logSplitWordCount")) { + myFlags[col].logSplitWordCount = Boolean.parseBoolean(val); + } else if (key.equals("binnedSplitWordCounts")) { + if (val != null) { + String[] binnedSplitWordCountStrs = val.split("[, ]+"); + myFlags[col].binnedSplitWordCounts = new int[binnedSplitWordCountStrs.length]; + for (int i = 0; i < myFlags[col].binnedSplitWordCounts.length; i++) { + myFlags[col].binnedSplitWordCounts[i] = Integer.parseInt(binnedSplitWordCountStrs[i]); + } } } else if (key.equals("countChars")) { myFlags[col].countChars = val.toCharArray(); @@ -2050,6 +2094,10 @@ static class Flags implements Serializable { static boolean csvFormat = false; //train and test files are in csv format boolean splitWordsWithPTBTokenizer = false; + boolean splitWordCount; + boolean logSplitWordCount; + int[] binnedSplitWordCounts; + @Override public String toString() { return "Flags[" + diff --git a/src/edu/stanford/nlp/dcoref/CorefChain.java b/src/edu/stanford/nlp/dcoref/CorefChain.java index 886cc1acda..b212b202d8 100644 --- a/src/edu/stanford/nlp/dcoref/CorefChain.java +++ b/src/edu/stanford/nlp/dcoref/CorefChain.java @@ -98,7 +98,7 @@ public int hashCode() { /** get CorefMention by position */ public Set getMentionsWithSameHead(int sentenceNumber, int headIndex) { - return mentionMap.get(new IntPair(sentenceNumber, headIndex)); + return getMentionsWithSameHead(new IntPair(sentenceNumber, headIndex)); } public Map> getMentionMap() { return mentionMap; } @@ -144,6 +144,7 @@ public static class CorefMention implements Serializable { public final IntTuple position; public final String mentionSpan; + /** This constructor is used to recreate a CorefMention following serialization. */ public CorefMention(MentionType mentionType, Number number, Gender gender, @@ -170,6 +171,7 @@ public CorefMention(MentionType mentionType, this.mentionSpan = mentionSpan; } + /** This constructor builds the external CorefMention class from the internal Mention. */ public CorefMention(Mention m, IntTuple pos){ mentionType = m.mentionType; number = m.number; @@ -232,10 +234,8 @@ public int hashCode() { } @Override - public String toString(){ - StringBuilder s = new StringBuilder(); - s.append('"').append(mentionSpan).append('"').append(" in sentence ").append(sentNum); - return s.toString(); + public String toString() { + return '"' + mentionSpan + "\" in sentence " + sentNum; // return "(sentence:" + sentNum + ", startIndex:" + startIndex + "-endIndex:" + endIndex + ")"; } @@ -263,7 +263,9 @@ private boolean moreRepresentativeThan(CorefMention m) { } private static final long serialVersionUID = 3657691243504173L; - } + + } // end static class CorefMention + protected static class CorefMentionComparator implements Comparator { @Override diff --git a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java index edcdc0e7a6..b239c5adcd 100644 --- a/src/edu/stanford/nlp/graph/DirectedMultiGraph.java +++ b/src/edu/stanford/nlp/graph/DirectedMultiGraph.java @@ -2,6 +2,7 @@ import java.util.*; +import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.util.CollectionUtils; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.MapFactory; @@ -498,23 +499,26 @@ public Iterable edgeIterable() { } static class EdgeIterator implements Iterator { + private final Map>> incomingEdges; private Iterator>> vertexIterator; private Iterator> connectionIterator; private Iterator edgeIterator; + private E lastRemoved = null; private boolean hasNext = true; public EdgeIterator(DirectedMultiGraph graph) { vertexIterator = graph.outgoingEdges.values().iterator(); + incomingEdges = graph.incomingEdges; } public EdgeIterator(Map>> source, V startVertex) { Map> neighbors = source.get(startVertex); - if (neighbors == null) { - return; + if (neighbors != null) { + vertexIterator = null; + connectionIterator = neighbors.values().iterator(); } - vertexIterator = null; - connectionIterator = neighbors.values().iterator(); + incomingEdges = null; } @Override @@ -528,7 +532,8 @@ public E next() { if (!hasNext()) { throw new NoSuchElementException("Graph edge iterator exhausted."); } - return edgeIterator.next(); + lastRemoved = edgeIterator.next(); + return lastRemoved; } private void primeIterator() { @@ -547,7 +552,19 @@ private void primeIterator() { @Override public void remove() { - edgeIterator.remove(); + if (incomingEdges == null) { + throw new UnsupportedOperationException("remove() is only valid if iterating over entire graph (Gabor was too lazy to implement the general case...sorry!)"); + } + if (lastRemoved != null) { + if (lastRemoved instanceof SemanticGraphEdge) { + SemanticGraphEdge edge = (SemanticGraphEdge) lastRemoved; + //noinspection unchecked + incomingEdges.get((V) edge.getDependent()).get((V) edge.getGovernor()).remove((E) edge); + edgeIterator.remove(); + } else { + throw new UnsupportedOperationException("remove() is only valid if iterating over semantic graph edges (Gabor was too lazy to implement the general case...sorry!)"); + } + } } } diff --git a/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java b/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java index 84961b1446..22b2f8c9fb 100644 --- a/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java +++ b/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java @@ -522,8 +522,8 @@ public String apply(String in) { * divided into documents according to (heuristically * determined) sentence boundaries. * @param outputFormat - * The format to put the output in: one of "slashTags", "xml", or - * "inlineXML" + * The format to put the output in: one of "slashTags", "xml", + * "inlineXML", "tsv", or "tabbedEntities" * @param preserveSpacing * Whether to preserve the input spacing between tokens, which may * sometimes be none (true) or whether to tokenize the text and print @@ -608,8 +608,7 @@ public String classifyToString(String sentences) { * marker, and that abbreviation is part of a named entity, the reported * entity string excludes the period. * - * @param sentences - * The string to be classified + * @param sentences The string to be classified * @return A {@link List} of {@link Triple}s, each of which gives an entity * type and the beginning and ending character offsets. */ @@ -617,8 +616,7 @@ public List> classifyToCharacterOffsets(String ObjectBank> documents = makeObjectBankFromString(sentences, plainTextReaderAndWriter); - List> entities = - new ArrayList>(); + List> entities = new ArrayList<>(); for (List doc : documents) { String prevEntityType = flags.backgroundSymbol; Triple prevEntity = null; @@ -637,8 +635,9 @@ public List> classifyToCharacterOffsets(String if (prevEntity != null) { entities.add(prevEntity); } - prevEntity = new Triple(guessedAnswer, fl - .get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), fl.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); + prevEntity = new Triple<>(guessedAnswer, + fl.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), + fl.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { assert prevEntity != null; // if you read the code carefully, this // should always be true! @@ -1010,15 +1009,13 @@ public void classifyAndWriteAnswers(String testFile, } /** If the flag - * outputEncoding is defined, the output is written in that - * character encoding, otherwise in the system default character encoding. + * {@code outputEncoding} is defined, the output is written in that + * character encoding, otherwise in the system default character encoding. */ public void classifyAndWriteAnswers(String testFile, OutputStream outStream, DocumentReaderAndWriter readerWriter, boolean outputScores) - throws IOException - { - ObjectBank> documents = - makeObjectBankFromFile(testFile, readerWriter); + throws IOException { + ObjectBank> documents = makeObjectBankFromFile(testFile, readerWriter); PrintWriter pw = IOUtils.encodedOutputStreamPrintWriter(outStream, flags.outputEncoding, true); classifyAndWriteAnswers(documents, pw, readerWriter, outputScores); } @@ -1026,23 +1023,19 @@ public void classifyAndWriteAnswers(String testFile, OutputStream outStream, public void classifyAndWriteAnswers(String baseDir, String filePattern, DocumentReaderAndWriter readerWriter, boolean outputScores) - throws IOException - { - ObjectBank> documents = - makeObjectBankFromFiles(baseDir, filePattern, readerWriter); + throws IOException { + ObjectBank> documents = makeObjectBankFromFiles(baseDir, filePattern, readerWriter); classifyAndWriteAnswers(documents, readerWriter, outputScores); } public void classifyFilesAndWriteAnswers(Collection testFiles) - throws IOException - { + throws IOException { classifyFilesAndWriteAnswers(testFiles, plainTextReaderAndWriter, false); } public void classifyFilesAndWriteAnswers(Collection testFiles, DocumentReaderAndWriter readerWriter, boolean outputScores) - throws IOException - { + throws IOException { ObjectBank> documents = makeObjectBankFromFiles(testFiles, readerWriter); classifyAndWriteAnswers(documents, readerWriter, outputScores); @@ -1051,8 +1044,7 @@ public void classifyFilesAndWriteAnswers(Collection testFiles, public void classifyAndWriteAnswers(Collection> documents, DocumentReaderAndWriter readerWriter, boolean outputScores) - throws IOException - { + throws IOException { classifyAndWriteAnswers(documents, IOUtils.encodedOutputStreamPrintWriter(System.out, flags.outputEncoding, true), readerWriter, outputScores); @@ -1065,8 +1057,7 @@ public void classifyAndWriteAnswers(Collection> documents, PrintWriter printWriter, DocumentReaderAndWriter readerWriter, boolean outputScores) - throws IOException - { + throws IOException { if (flags.exportFeatures != null) { dumpFeatures(documents); } @@ -1145,16 +1136,17 @@ public ThreadsafeProcessor, List> newInstance() { * stdout (with timing to stderr). This uses the value of flags.documentReader * to determine testFile format. * - * @param testFile The filename to test on. + * @param testFile The name of the file to test on. + * @param k How many best to print + * @param readerAndWriter */ public void classifyAndWriteAnswersKBest(String testFile, int k, DocumentReaderAndWriter readerAndWriter) - throws IOException - { - ObjectBank> documents = - makeObjectBankFromFile(testFile, readerAndWriter); + throws IOException { + ObjectBank> documents = makeObjectBankFromFile(testFile, readerAndWriter); PrintWriter pw = IOUtils.encodedOutputStreamPrintWriter(System.out, flags.outputEncoding, true); classifyAndWriteAnswersKBest(documents, k, pw, readerAndWriter); + pw.flush(); } /** @@ -1176,10 +1168,10 @@ public void classifyAndWriteAnswersKBest(ObjectBank> documents, int k, List> sorted = Counters.toSortedList(kBest); int n = 1; for (List l : sorted) { - System.out.println(""); + printWriter.println(""); n++; } numSentences++; @@ -1227,7 +1219,7 @@ public void classifyAndWriteViterbiSearchGraph(String testFile, String searchGra } /** - * Write the classifications of the Sequence classifier out to a writer in a + * Write the classifications of the Sequence classifier to a writer in a * format determined by the DocumentReaderAndWriter used. * * @param doc Documents to write out @@ -1236,7 +1228,7 @@ public void classifyAndWriteViterbiSearchGraph(String testFile, String searchGra */ public void writeAnswers(List doc, PrintWriter printWriter, DocumentReaderAndWriter readerAndWriter) - throws IOException { + throws IOException { if (flags.lowerNewgeneThreshold) { return; } diff --git a/src/edu/stanford/nlp/ie/NERClassifierCombiner.java b/src/edu/stanford/nlp/ie/NERClassifierCombiner.java index b2837c451e..e156c67f2a 100644 --- a/src/edu/stanford/nlp/ie/NERClassifierCombiner.java +++ b/src/edu/stanford/nlp/ie/NERClassifierCombiner.java @@ -90,7 +90,8 @@ public boolean appliesNumericClassifiers() { } public boolean usesSUTime() { - return useSUTime; + // if applyNumericClassifiers is false, SUTime isn't run regardless of setting of useSUTime + return useSUTime && applyNumericClassifiers; } private static void copyAnswerFieldsToNERField(List l) { diff --git a/src/edu/stanford/nlp/ie/NERFeatureFactory.java b/src/edu/stanford/nlp/ie/NERFeatureFactory.java index 47bc509cb6..241283c3b8 100644 --- a/src/edu/stanford/nlp/ie/NERFeatureFactory.java +++ b/src/edu/stanford/nlp/ie/NERFeatureFactory.java @@ -51,7 +51,6 @@ import edu.stanford.nlp.sequences.CoNLLDocumentReaderAndWriter; import edu.stanford.nlp.sequences.FeatureFactory; import edu.stanford.nlp.sequences.SeqClassifierFlags; -import edu.stanford.nlp.trees.TreeCoreAnnotations; import edu.stanford.nlp.trees.international.pennchinese.RadicalMap; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.PaddedList; @@ -1168,12 +1167,15 @@ protected Collection featuresC(PaddedList cInfo, int loc) { } if (flags.useNPHead) { - featuresC.add(c.get(TreeCoreAnnotations.HeadWordAnnotation.class) + "-HW"); + // TODO: neat idea, but this would need to be set somewhere. + // Probably should have its own annotation as this one would + // be more narrow and would clobber other potential uses + featuresC.add(c.get(CoreAnnotations.HeadWordStringAnnotation.class) + "-HW"); if (flags.useTags) { - featuresC.add(c.get(TreeCoreAnnotations.HeadWordAnnotation.class) + "-" + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-HW-T"); + featuresC.add(c.get(CoreAnnotations.HeadWordStringAnnotation.class) + "-" + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-HW-T"); } if (flags.useDistSim) { - featuresC.add(c.get(TreeCoreAnnotations.HeadWordAnnotation.class) + "-" + c.get(CoreAnnotations.DistSimAnnotation.class) + "-HW-DISTSIM"); + featuresC.add(c.get(CoreAnnotations.HeadWordStringAnnotation.class) + "-" + c.get(CoreAnnotations.DistSimAnnotation.class) + "-HW-DISTSIM"); } } @@ -1188,7 +1190,10 @@ protected Collection featuresC(PaddedList cInfo, int loc) { } if (flags.useHeadGov) { - featuresC.add(c.get(TreeCoreAnnotations.HeadWordAnnotation.class) + "-" + c.get(CoreAnnotations.GovernorAnnotation.class) + "-HW_GW"); + // TODO: neat idea, but this would need to be set somewhere. + // Probably should have its own annotation as this one would + // be more narrow and would clobber other potential uses + featuresC.add(c.get(CoreAnnotations.HeadWordStringAnnotation.class) + "-" + c.get(CoreAnnotations.GovernorAnnotation.class) + "-HW_GW"); } if (flags.useClassFeature) { diff --git a/src/edu/stanford/nlp/ie/crf/CRFClassifier.java b/src/edu/stanford/nlp/ie/crf/CRFClassifier.java index 084cb26332..dea497e381 100644 --- a/src/edu/stanford/nlp/ie/crf/CRFClassifier.java +++ b/src/edu/stanford/nlp/ie/crf/CRFClassifier.java @@ -50,7 +50,7 @@ import java.util.zip.GZIPOutputStream; /** - * Class for Sequence Classification using a Conditional Random Field model. + * Class for sequence classification using a Conditional Random Field model. * The code has functionality for different document formats, but when * using the standard {@link edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter} for training * or testing models, input files are expected to @@ -71,34 +71,35 @@ * To read from stdin, use the flag -readStdin. The same * reader/writer will be used as for -textFile. *

- * Typical command-line usage + *

Typical command-line usage

*

For running a trained model with a provided serialized classifier on a - * text file:

- * + * text file:

+ *

* java -mx500m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier * conll.ner.gz -textFile samplesentences.txt - * + *

*

* When specifying all parameters in a properties file (train, test, or * runtime): - *

- * + *

+ *

* java -mx1g edu.stanford.nlp.ie.crf.CRFClassifier -prop propFile - * + *

*

- * To train and test a simple NER model from the command line:
- * java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier + * To train and test a simple NER model from the command line:

+ *

java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier * -trainFile trainFile -testFile testFile -macro > output *

*

- * To train with multiple files:
- * java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier + * To train with multiple files:

+ *

java -mx1000m edu.stanford.nlp.ie.crf.CRFClassifier * -trainFileList file1,file2,... -testFile testFile -macro > output *

*

* To test on multiple files, use the -testFiles option and a comma * separated list. *

+ *

* Features are defined by a {@link edu.stanford.nlp.sequences.FeatureFactory}. * {@link NERFeatureFactory} is used by default, and you should look * there for feature templates and properties or flags that will cause @@ -115,18 +116,19 @@ * to get a CRFClassifier is to deserialize one via the static * {@link CRFClassifier#getClassifier(String)} methods, which return a * deserialized classifier. You may then tag (classify the items of) documents - * using either the assorted classify() or the assorted - * classify methods in {@link AbstractSequenceClassifier}. + * using either the assorted classify() methods here or the additional + * ones in {@link AbstractSequenceClassifier}. * Probabilities assigned by the CRF can be interrogated using either the * printProbsDocument() or getCliqueTrees() methods. * * @author Jenny Finkel * @author Sonal Gupta (made the class generic) * @author Mengqiu Wang (LOP implementation and non-linear CRF implementation) - * TODO(mengqiu) need to move the embedding lookup and capitalization features into a FeatureFactory */ public class CRFClassifier extends AbstractSequenceClassifier { + // TODO(mengqiu) need to move the embedding lookup and capitalization features into a FeatureFactory + List> labelIndices; Index tagIndex; Pair entityMatrices; @@ -495,7 +497,7 @@ private int[][][] transformDocData(int[][][] docData) { int[] cliqueFeatures = docData[i][j]; transData[i][j] = new int[cliqueFeatures.length]; for (int n = 0; n < cliqueFeatures.length; n++) { - int transFeatureIndex = -1; + int transFeatureIndex; // initialized below; if (j == 0) { transFeatureIndex = nodeFeatureIndicesMap.indexOf(cliqueFeatures[n]); if (transFeatureIndex == -1) @@ -1369,8 +1371,7 @@ public void printProbsDocument(List document) { * label at each point. This gives a simple way to examine the probability * distributions of the CRF. See getCliqueTrees() for more. * - * @param filename - * The path to the specified file + * @param filename The path to the specified file */ public void printFirstOrderProbs(String filename, DocumentReaderAndWriter readerAndWriter) { // only for the OCR data does this matter diff --git a/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java b/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java index a85ba0d985..4f1ea07287 100644 --- a/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java +++ b/src/edu/stanford/nlp/ie/crf/CRFCliqueTree.java @@ -20,23 +20,23 @@ */ public class CRFCliqueTree implements ListeningSequenceModel { - protected final FactorTable[] factorTables; - protected final double z; // norm constant - protected final Index classIndex; + private final FactorTable[] factorTables; + private final double z; // norm constant + private final Index classIndex; private final E backgroundSymbol; private final int backgroundIndex; // the window size, which is also the clique size - protected final int windowSize; + private final int windowSize; // the number of possible classes for each label private final int numClasses; private final int[] possibleValues; - /** Initialize a clique tree */ + /** Initialize a clique tree. */ public CRFCliqueTree(FactorTable[] factorTables, Index classIndex, E backgroundSymbol) { this(factorTables, classIndex, backgroundSymbol, factorTables[0].totalMass()); } - /** This extra constructor was added to support the CRFCliqueTreeForPartialLabels */ + /** This extra constructor was added to support the CRFCliqueTreeForPartialLabels. */ CRFCliqueTree(FactorTable[] factorTables, Index classIndex, E backgroundSymbol, double z) { this.factorTables = factorTables; this.z = z; diff --git a/src/edu/stanford/nlp/ie/crf/NERGUI.java b/src/edu/stanford/nlp/ie/crf/NERGUI.java index 863bcb56a0..4791bed58f 100644 --- a/src/edu/stanford/nlp/ie/crf/NERGUI.java +++ b/src/edu/stanford/nlp/ie/crf/NERGUI.java @@ -77,7 +77,7 @@ private void createAndShowGUI() { //Create and set up the window. frame = new JFrame("Stanford Named Entity Recognizer"); - frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); + frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE); frame.getContentPane().setLayout(new BorderLayout()); frame.getContentPane().setPreferredSize(new Dimension(WIDTH, HEIGHT)); diff --git a/src/edu/stanford/nlp/ie/demo/NERDemo.java b/src/edu/stanford/nlp/ie/demo/NERDemo.java index 053e2db1e5..994f10f01a 100644 --- a/src/edu/stanford/nlp/ie/demo/NERDemo.java +++ b/src/edu/stanford/nlp/ie/demo/NERDemo.java @@ -5,16 +5,21 @@ import edu.stanford.nlp.io.IOUtils; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.sequences.DocumentReaderAndWriter; +import edu.stanford.nlp.util.Triple; import java.util.List; /** This is a demo of calling CRFClassifier programmatically. *

- * Usage: {@code java -mx400m -cp "stanford-ner.jar:." NERDemo [serializedClassifier [fileName]] } + * Usage: {@code java -mx400m -cp "*" NERDemo [serializedClassifier [fileName]] } *

* If arguments aren't specified, they default to * classifiers/english.all.3class.distsim.crf.ser.gz and some hardcoded sample text. + * If run with arguments, it shows some of the ways to get k-best labelings and + * probabilities out with CRFClassifier. If run without arguments, it shows some of + * the alternative output formats that you can get. *

* To use CRFClassifier from the command line: *

@@ -43,14 +48,17 @@ public static void main(String[] args) throws Exception { AbstractSequenceClassifier classifier = CRFClassifier.getClassifier(serializedClassifier); - /* For either a file to annotate or for the hardcoded text example, - this demo file shows two ways to process the output, for teaching - purposes. For the file, it shows both how to run NER on a String - and how to run it on a whole file. For the hard-coded String, - it shows how to run it on a single sentence, and how to do this - and produce an inline XML output format. + /* For either a file to annotate or for the hardcoded text example, this + demo file shows several ways to process the input, for teaching purposes. */ + if (args.length > 1) { + + /* For the file, it shows (1) how to run NER on a String, (2) how + to get the entities in the String with character offsets, and + (3) how to run NER on a whole file (without loading it into a String). + */ + String fileContents = IOUtils.slurpFile(args[1]); List> out = classifier.classify(fileContents); for (List sentence : out) { @@ -59,6 +67,7 @@ public static void main(String[] args) throws Exception { } System.out.println(); } + System.out.println("---"); out = classifier.classifyFile(args[1]); for (List sentence : out) { @@ -68,7 +77,36 @@ public static void main(String[] args) throws Exception { System.out.println(); } + System.out.println("---"); + List> list = classifier.classifyToCharacterOffsets(fileContents); + for (Triple item : list) { + System.out.println(item.first() + ": " + fileContents.substring(item.second(), item.third())); + } + System.out.println("---"); + System.out.println("Ten best entity labelings"); + DocumentReaderAndWriter readerAndWriter = classifier.makePlainTextReaderAndWriter(); + classifier.classifyAndWriteAnswersKBest(args[1], 10, readerAndWriter); + + System.out.println("---"); + System.out.println("Per-token marginalized probabilities"); + classifier.printProbs(args[1], readerAndWriter); + + // -- This code prints out the first order (token pair) clique probabilities. + // -- But that output is a bit overwhelming, so we leave it commented out by default. + // System.out.println("---"); + // System.out.println("First Order Clique Probabilities"); + // ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter); + } else { + + /* For the hard-coded String, it shows how to run it on a single + sentence, and how to do this and produce several formats, including + slash tags and an inline XML output format. It also shows the full + contents of the {@code CoreLabel}s that are constructed by the + classifier. And it shows getting out the probabilities of different + assignments and an n-best list of classifications with probabilities. + */ + String[] example = {"Good afternoon Rajat Raina, how are you today?", "I go to school at Stanford University, which is located in California." }; for (String str : example) { @@ -82,6 +120,13 @@ public static void main(String[] args) throws Exception { } System.out.println("---"); + for (String str : example) { + // This one is best for dealing with the output as a TSV (tab-separated column) file. + // The first column gives entities, the second their classes, and the third the remaining text in a document + System.out.print(classifier.classifyToString(str, "tabbedEntities", false)); + } + System.out.println("---"); + for (String str : example) { System.out.println(classifier.classifyWithInlineXML(str)); } @@ -92,6 +137,24 @@ public static void main(String[] args) throws Exception { } System.out.println("---"); + for (String str : example) { + System.out.print(classifier.classifyToString(str, "tsv", false)); + } + System.out.println("---"); + + // This gets out entities with character offsets + int j = 0; + for (String str : example) { + j++; + List> triples = classifier.classifyToCharacterOffsets(str); + for (Triple trip : triples) { + System.out.printf("%s over character offsets [%d, %d) in sentence %d.%n", + trip.first(), trip.second(), trip.third, j); + } + } + System.out.println("---"); + + // This prints out all the details of what is stored for each token int i=0; for (String str : example) { for (List lcl : classifier.classify(str)) { @@ -101,6 +164,9 @@ public static void main(String[] args) throws Exception { } } } + + System.out.println("---"); + } } diff --git a/src/edu/stanford/nlp/ie/util/RelationTriple.java b/src/edu/stanford/nlp/ie/util/RelationTriple.java new file mode 100644 index 0000000000..0e8a9f64aa --- /dev/null +++ b/src/edu/stanford/nlp/ie/util/RelationTriple.java @@ -0,0 +1,338 @@ +package edu.stanford.nlp.ie.util; + +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.semgraph.SemanticGraphEdge; +import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; +import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern; +import edu.stanford.nlp.util.FixedPrioritiesPriorityQueue; +import edu.stanford.nlp.util.PriorityQueue; +import edu.stanford.nlp.util.StringUtils; + +import java.util.*; + +/** + * A (subject, relation, object) triple; e.g., as used in the KBP challenges or in OpenIE systems. + * + * @author Gabor Angeli + */ +@SuppressWarnings("UnusedDeclaration") +public class RelationTriple implements Comparable { + /** The subject (first argument) of this triple */ + public final List subject; + /** The relation (second argument) of this triple */ + public final List relation; + /** The object (third argument) of this triple */ + public final List object; + /** An optional score (confidence) for this triple */ + public final double confidence; + + /** + * Create a new triple with known values for the subject, relation, and object. + * For example, "(cats, play with, yarn)" + * @param subject The subject of this triple; e.g., "cats". + * @param relation The relation of this triple; e.g., "play with". + * @param object The object of this triple; e.g., "yarn". + */ + public RelationTriple(List subject, List relation, List object, + double confidence) { + this.subject = subject; + this.relation = relation; + this.object = object; + this.confidence = confidence; + } + + /** + * @see edu.stanford.nlp.ie.util.RelationTriple#RelationTriple(java.util.List, java.util.List, java.util.List, double) + */ + public RelationTriple(List subject, List relation, List object) { + this(subject, relation, object, 1.0); + } + + /** The subject of this relation triple, as a String */ + public String subjectGloss() { + return StringUtils.join(subject.stream().map(CoreLabel::word), " "); + } + + /** The object of this relation triple, as a String */ + public String objectGloss() { + return StringUtils.join(object.stream().map(CoreLabel::word), " "); + } + + /** The relation of this relation triple, as a String */ + public String relationGloss() { + return StringUtils.join(relation.stream().map(CoreLabel::word), " "); + } + + /** An optional method, returning the dependency tree this triple was extracted from */ + public Optional asDependencyTree() { + return Optional.empty(); + } + + /** Return the given relation triple as a flat sentence */ + public List asSentence() { + PriorityQueue orderedSentence = new FixedPrioritiesPriorityQueue<>(); + double defaultIndex = 0.0; + for (CoreLabel token : subject) { + orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex); + defaultIndex += 1.0; + } + for (CoreLabel token : relation) { + orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex); + defaultIndex += 1.0; + } + for (CoreLabel token : object) { + orderedSentence.add(token, token.index() >= 0 ? (double) -token.index() : -defaultIndex); + defaultIndex += 1.0; + } + return orderedSentence.toSortedList(); + } + + + /** {@inheritDoc} */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof RelationTriple)) return false; + RelationTriple that = (RelationTriple) o; + return object.equals(that.object) && relation.equals(that.relation) && subject.equals(that.subject); + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + int result = subject.hashCode(); + result = 31 * result + relation.hashCode(); + result = 31 * result + object.hashCode(); + return result; + } + + /** Print a human-readable description of this relation triple, as a tab-separated line */ + @Override + public String toString() { + return "" + this.confidence + "\t" + subjectGloss() + "\t" + relationGloss() + "\t" + objectGloss(); + } + + @Override + public int compareTo(RelationTriple o) { + if (this.confidence < o.confidence) { + return -1; + } else if (this.confidence > o.confidence) { + return 1; + } else { + return 0; + } + } + + /** + * A {@link edu.stanford.nlp.ie.util.RelationTriple}, but with the tree saved as well. + */ + protected static class WithTree extends RelationTriple { + public final SemanticGraph sourceTree; + + /** + * Create a new triple with known values for the subject, relation, and object. + * For example, "(cats, play with, yarn)" + * + * @param subject The subject of this triple; e.g., "cats". + * @param relation The relation of this triple; e.g., "play with". + * @param object The object of this triple; e.g., "yarn". + * @param tree The tree this extraction was created from; we create a deep copy of the tree. + */ + public WithTree(List subject, List relation, List object, SemanticGraph tree, + double confidence) { + super(subject, relation, object, confidence); + this.sourceTree = new SemanticGraph(tree); + } + + /** {@inheritDoc} */ + @Override + public Optional asDependencyTree() { + return Optional.of(sourceTree); + } + } + + /** A list of patterns to match relation extractions against */ + private static final List PATTERNS = Collections.unmodifiableList(new ArrayList() {{ + // { blue cats play [quietly] with yarn } + add(SemgrexPattern.compile("{$}=verb ?>/cop|auxpass/ {}=be >/.subj(pass)?/ {}=subject >/prep/ ({}=prep >/pobj/ {}=object)")); + // (w / collapsed dependencies) + add(SemgrexPattern.compile("{$}=verb ?>/cop|auxpass/ {}=be >/.subj(pass)?/ {}=subject >/prepc?_.*/=prepEdge {}=object")); + // { fish like to swim } + add(SemgrexPattern.compile("{$}=verb >/.subj(pass)?/ {}=subject >/xcomp/ {}=object")); + // { cats have tails } + add(SemgrexPattern.compile("{$}=verb ?>/auxpass/ {}=be >/.subj(pass)?/ {}=subject >/[di]obj|xcomp/ {}=object")); + // { cats are cute } + add(SemgrexPattern.compile("{$}=object >/.subj(pass)?/ {}=subject >/cop/ {}=verb")); + }}); + + /** A set of valid arcs denoting an entity we are interested in */ + private static final Set VALID_ENTITY_ARCS = Collections.unmodifiableSet(new HashSet(){{ + add("amod"); add("nn"); add("aux"); add("num"); add("prep"); add("nsubj"); add("prep_*"); + }}); + + /** A set of valid arcs denoting an entity we are interested in */ + private static final Set VALID_ADVERB_ARCS = Collections.unmodifiableSet(new HashSet(){{ + add("amod"); add("advmod"); add("conj"); add("cc"); add("conj_and"); add("conj_or"); add("auxpass"); + }}); + + private static CoreLabel mockNode(CoreLabel toCopy, int offset, String word, String POS) { + CoreLabel mock = new CoreLabel(toCopy); + mock.setWord(word); + mock.setLemma(word); + mock.setValue(word); + mock.setNER("O"); + mock.setTag(POS); + mock.setIndex(toCopy.index() + offset); + return mock; + } + + /** + * @see RelationTriple#getValidEntityChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord) + * @see RelationTriple#getValidAdverbChunk(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord) + */ + private static Optional> getValidChunk(SemanticGraph parse, IndexedWord originalRoot, Set validArcs) { + PriorityQueue chunk = new FixedPrioritiesPriorityQueue<>(); + Queue fringe = new LinkedList<>(); + IndexedWord root = originalRoot; + fringe.add(root); + + boolean isCopula = false; + for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(originalRoot)) { + if (edge.getRelation().getShortName().equals("cop")) { + isCopula = true; + } + } + + while (!fringe.isEmpty()) { + root = fringe.poll(); + chunk.add(root.backingLabel(), -root.index()); + for (SemanticGraphEdge edge : parse.incomingEdgeIterable(root)) { + if (edge.getDependent() != originalRoot) { + if (edge.getRelation().toString().startsWith("prep_") || edge.getRelation().toString().startsWith("prepc_")) { + chunk.add(mockNode(edge.getGovernor().backingLabel(), 1, edge.getRelation().toString().substring(edge.getRelation().toString().indexOf("_") + 1), "PP"), -(((double) edge.getGovernor().index()) + 0.9)); + } + if (edge.getRelation().getShortName().equals("conj")) { + chunk.add(mockNode(root.backingLabel(), -1, edge.getRelation().getSpecific(), "CC"), -(((double) root.index()) - 0.9)); + } + } + } + for (SemanticGraphEdge edge : parse.getOutEdgesSorted(root)) { + String shortName = edge.getRelation().getShortName(); + //noinspection StatementWithEmptyBody + if (isCopula && (shortName.equals("cop") || shortName.contains("subj"))) { + // noop; ignore nsubj and cop for extractions with copula + } else if (!validArcs.contains(edge.getRelation().getShortName().replaceAll("_.*","_*"))) { + return Optional.empty(); + } else { + fringe.add(edge.getDependent()); + } + } + } + + return Optional.of(chunk.toSortedList()); + } + + /** + * Get the yield of a given subtree, if it is a valid entity. + * Otherwise, return {@link java.util.Optional#empty()}}. + * @param parse The parse tree we are extracting a subtree from. + * @param root The root of the subtree. + * @return If this subtree is a valid entity, we return its yield. Otherwise, we return empty. + */ + private static Optional> getValidEntityChunk(SemanticGraph parse, IndexedWord root) { + return getValidChunk(parse, root, VALID_ENTITY_ARCS); + } + + /** + * Get the yield of a given subtree, if it is a adverb chunk. + * Otherwise, return {@link java.util.Optional#empty()}}. + * @param parse The parse tree we are extracting a subtree from. + * @param root The root of the subtree. + * @return If this subtree is a valid adverb, we return its yield. Otherwise, we return empty. + */ + private static Optional> getValidAdverbChunk(SemanticGraph parse, IndexedWord root) { + return getValidChunk(parse, root, VALID_ADVERB_ARCS); + } + + /** + *

+ * Try to segment this sentence as a relation triple. + * This sentence must already match one of a few strict patterns for a valid OpenIE extraction. + * If it does not, then no relation triple is created. + * That is, this is not a relation extractor; it is just a utility to segment what is already a + * (subject, relation, object) triple into these three parts. + *

+ * + * @param parse The sentence to process, as a dependency tree. + * @param confidence An optional confidence to pass on to the relation triple. + * @return A relation triple, if this sentence matches one of the patterns of a valid relation triple. + */ + public static Optional segment(SemanticGraph parse, Optional confidence) { + PATTERN_LOOP: for (SemgrexPattern pattern : PATTERNS) { // For every candidate pattern... + SemgrexMatcher m = pattern.matcher(parse); + if (m.matches()) { // ... see if it matches the sentence + // Verb + PriorityQueue verbChunk = new FixedPrioritiesPriorityQueue<>(); + IndexedWord verb = m.getNode("verb"); + IndexedWord prep = m.getNode("prep"); + List adverbs = new ArrayList<>(); + for (SemanticGraphEdge edge : parse.outgoingEdgeIterable(verb)) { + if ("advmod".equals(edge.getRelation().toString()) || "amod".equals(edge.getRelation().toString())) { + String tag = edge.getDependent().backingLabel().tag(); + if (tag == null || + (!tag.startsWith("W") && !edge.getDependent().backingLabel().word().equalsIgnoreCase("then"))) { // prohibit advmods like "where" + adverbs.add(edge.getDependent()); + } + } + } + IndexedWord be = m.getNode("be"); + String prepEdge = m.getRelnString("prepEdge"); + verbChunk.add(verb.backingLabel(), -verb.index()); + int numKnownDependents = 2; // subject and object, at minimum + if (prep != null) { verbChunk.add(prep.backingLabel(), -prep.index()); numKnownDependents += 1; } + if (be != null) { verbChunk.add(be.backingLabel(), -be.index()); numKnownDependents += 1; } + // (adverbs have to be well-formed) + if (!adverbs.isEmpty()) { + Set adverbialModifiers = new HashSet<>(); + for (IndexedWord adv : adverbs) { + Optional> adverbChunk = getValidAdverbChunk(parse, adv); + if (adverbChunk.isPresent()) { + for (CoreLabel token : adverbChunk.get()) { + adverbialModifiers.add(token); + } + } else { + continue PATTERN_LOOP; // Invalid adverbial phrase + } + numKnownDependents += 1; + } + for (CoreLabel adverbToken : adverbialModifiers) { + verbChunk.add(adverbToken, -adverbToken.index()); + } + } + // (add preposition edge) + if (prepEdge != null) { + verbChunk.add(mockNode(verb.backingLabel(), 1, prepEdge.substring(prepEdge.indexOf("_") + 1), "PP"), -(verb.index() + 10)); + } + // (check for additional edges) + if (parse.outDegree(verb) > numKnownDependents) { + //noinspection UnnecessaryLabelOnContinueStatement + continue PATTERN_LOOP; // Too many outgoing edges; we didn't consume them all. + } + List relation = verbChunk.toSortedList(); + + // Subject+Object + Optional> subject = getValidEntityChunk(parse, m.getNode("subject")); + Optional> object = getValidEntityChunk(parse, m.getNode("object")); + // Create relation + if (subject.isPresent() && object.isPresent()) { // ... and has a valid subject+object + // Success! Found a valid extraction. + return Optional.of(new WithTree(subject.get(), relation, object.get(), parse, confidence.orElse(1.0))); + } + } + } + // Failed to match any pattern; return failure + return Optional.empty(); + } +} diff --git a/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java b/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java index fdc0983b37..51f9c7a9b5 100644 --- a/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java +++ b/src/edu/stanford/nlp/international/french/process/FrenchTokenizer.java @@ -137,11 +137,6 @@ public static TokenizerFactory newTokenizerFactory() { } /** - * Constructs a new PTBTokenizer that returns Word objects and - * uses the options passed in. - * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP - * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A - * TokenizerFactory. * todo [cdm 2013]: But we should change it to a method that can return any kind of Label and return CoreLabel here * * @param options A String of options @@ -241,7 +236,7 @@ private static String usage() { sb.append(" -ftb : Tokenization for experiments in Green et al. (2011).").append(nl); sb.append(" -lowerCase : Apply lowercasing.").append(nl); sb.append(" -encoding type : Encoding format.").append(nl); - sb.append(" -orthoOpts str : Orthographic options (see FrenchLexer.java)").append(nl); + sb.append(" -options str : Orthographic options (see FrenchLexer.java)").append(nl); return sb.toString(); } @@ -251,7 +246,7 @@ private static Map argOptionDefs() { argOptionDefs.put("ftb", 0); argOptionDefs.put("lowerCase", 0); argOptionDefs.put("encoding", 1); - argOptionDefs.put("orthoOpts", 1); + argOptionDefs.put("options", 1); return argOptionDefs; } @@ -276,12 +271,11 @@ public static void main(String[] args) { // Lexer options final TokenizerFactory tf = options.containsKey("ftb") ? FrenchTokenizer.ftbFactory() : FrenchTokenizer.factory(); - String orthoOptions = options.getProperty("orthoOpts", ""); - tf.setOptions(orthoOptions); - + String orthoOptions = options.getProperty("options", ""); // When called from this main method, split on newline. No options for // more granular sentence splitting. - tf.setOptions("tokenizeNLs"); + orthoOptions = orthoOptions.length() == 0 ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs"; + tf.setOptions(orthoOptions); // Other options final String encoding = options.getProperty("encoding", "UTF-8"); diff --git a/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java b/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java index 360c04a0e5..c185b16e31 100644 --- a/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java +++ b/src/edu/stanford/nlp/international/spanish/SpanishVerbStripper.java @@ -91,9 +91,9 @@ private void setupDictionary(String dictPath) { } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { - throw new RuntimeException("Could not load Spanish data file " + dictPath); + System.err.println("Could not load Spanish data file " + dictPath); } catch (IOException e) { - throw new RuntimeException("Could not load Spanish data file " + dictPath); + System.err.println("Could not load Spanish data file " + dictPath); } } diff --git a/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java b/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java index 7dc339d10a..ed35f07f74 100644 --- a/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java +++ b/src/edu/stanford/nlp/international/spanish/process/SpanishTokenizer.java @@ -16,13 +16,11 @@ import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.CoreAnnotations.ParentAnnotation; -import edu.stanford.nlp.ling.Word; import edu.stanford.nlp.process.TokenizerFactory; import edu.stanford.nlp.process.AbstractTokenizer; import edu.stanford.nlp.process.CoreLabelTokenFactory; import edu.stanford.nlp.process.LexedTokenFactory; import edu.stanford.nlp.process.Tokenizer; -import edu.stanford.nlp.process.WordTokenFactory; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.PropertiesUtils; import edu.stanford.nlp.util.StringUtils; @@ -61,7 +59,7 @@ public class SpanishTokenizer extends AbstractTokenizer { private SpanishVerbStripper verbStripper; // Produces the tokenization for parsing used by AnCora (fixed) */ - public static final String ANCORA_OPTS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitAll=true"; + public static final String ANCORA_OPTIONS = "ptb3Ellipsis=true,normalizeParentheses=true,ptb3Dashes=false,splitAll=true"; /** * Constructor. @@ -79,7 +77,7 @@ public SpanishTokenizer(Reader r, LexedTokenFactory tf, Properties lexerPrope this.splitAny = (splitCompounds || splitVerbs || splitContractions); if (splitAny) compoundBuffer = Generics.newLinkedList(); - verbStripper = SpanishVerbStripper.getInstance(); + if (splitVerbs) verbStripper = SpanishVerbStripper.getInstance(); } @Override @@ -91,22 +89,22 @@ protected T getNext() { // some tokens can be obliterated. In this case, keep iterating // until we see a non-zero length token. do { - nextToken = (splitAny && compoundBuffer.size() > 0) ? - (T) compoundBuffer.remove(0) : + nextToken = (splitAny && compoundBuffer.size() > 0) ? + (T) compoundBuffer.remove(0) : (T) lexer.next(); } while (nextToken != null && nextToken.word().length() == 0); // Check for compounds to split if (splitAny && nextToken instanceof CoreLabel) { CoreLabel cl = (CoreLabel) nextToken; - if (cl.containsKey(ParentAnnotation.class)) { - if(splitCompounds && cl.get(ParentAnnotation.class).equals(SpanishLexer.COMPOUND_ANNOTATION)) - nextToken = (T) processCompound(cl); - else if (splitVerbs && cl.get(ParentAnnotation.class).equals(SpanishLexer.VB_PRON_ANNOTATION)) - nextToken = (T) processVerb(cl); - else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer.CONTR_ANNOTATION)) - nextToken = (T) processContraction(cl); - } + if (cl.containsKey(ParentAnnotation.class)) { + if(splitCompounds && cl.get(ParentAnnotation.class).equals(SpanishLexer.COMPOUND_ANNOTATION)) + nextToken = (T) processCompound(cl); + else if (splitVerbs && cl.get(ParentAnnotation.class).equals(SpanishLexer.VB_PRON_ANNOTATION)) + nextToken = (T) processVerb(cl); + else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer.CONTR_ANNOTATION)) + nextToken = (T) processContraction(cl); + } } return nextToken; @@ -119,11 +117,11 @@ else if (splitContractions && cl.get(ParentAnnotation.class).equals(SpanishLexer /* Copies the CoreLabel cl with the new word part */ private CoreLabel copyCoreLabel(CoreLabel cl, String part) { - CoreLabel newLabel = new CoreLabel(cl); - newLabel.setWord(part); - newLabel.setValue(part); - newLabel.set(OriginalTextAnnotation.class, part); - return newLabel; + CoreLabel newLabel = new CoreLabel(cl); + newLabel.setWord(part); + newLabel.setValue(part); + newLabel.set(OriginalTextAnnotation.class, part); + return newLabel; } /** @@ -195,13 +193,6 @@ private CoreLabel processCompound(CoreLabel cl) { return compoundBuffer.remove(0); } - /** - * a factory that vends CoreLabel tokens with default tokenization. - */ - public static TokenizerFactory coreLabelFactory() { - return SpanishTokenizerFactory.newCoreLabelTokenizerFactory(); - } - /** * recommended factory method */ @@ -210,7 +201,7 @@ public static TokenizerFactory factory(LexedTokenFactory< } public static TokenizerFactory factory(LexedTokenFactory factory) { - return new SpanishTokenizerFactory(factory, ANCORA_OPTS); + return new SpanishTokenizerFactory(factory, ANCORA_OPTIONS); } /** @@ -232,7 +223,7 @@ public static class SpanishTokenizerFactory implements Tokeni protected boolean splitContractionOption = false; public static TokenizerFactory newCoreLabelTokenizerFactory() { - return new SpanishTokenizerFactory(new CoreLabelTokenFactory(), ANCORA_OPTS); + return new SpanishTokenizerFactory(new CoreLabelTokenFactory()); } @@ -254,7 +245,6 @@ public static SpanishTokenizerFactory newSpanishTokenizer /** Make a factory for SpanishTokenizers, default options */ private SpanishTokenizerFactory(LexedTokenFactory factory) { this.factory = factory; - setOptions(ANCORA_OPTS); } /** Make a factory for SpanishTokenizers, options passed in */ @@ -271,7 +261,7 @@ public Iterator getIterator(Reader r) { @Override public Tokenizer getTokenizer(Reader r) { - return new SpanishTokenizer(r, factory, lexerProperties, splitCompoundOption, splitVerbOption, splitContractionOption); + return new SpanishTokenizer(r, factory, lexerProperties, splitCompoundOption, splitVerbOption, splitContractionOption); } /** @@ -330,7 +320,25 @@ public Tokenizer getTokenizer(Reader r, String extraOptions) { } // end static class SpanishTokenizerFactory - + /** + * Returns a tokenizer with Ancora tokenization. + */ + public static TokenizerFactory ancoraFactory() { + TokenizerFactory tf = SpanishTokenizerFactory.newCoreLabelTokenizerFactory(); + tf.setOptions(ANCORA_OPTIONS); + return tf; + } + + /** + * a factory that vends CoreLabel tokens with default tokenization. + */ + public static TokenizerFactory coreLabelFactory() { + return SpanishTokenizerFactory.newCoreLabelTokenizerFactory(); + } + + public static TokenizerFactory factory() { + return coreLabelFactory(); + } private static String usage() { StringBuilder sb = new StringBuilder(); @@ -341,8 +349,8 @@ private static String usage() { sb.append(" -ancora : Tokenization style of AnCora (fixed).").append(nl); sb.append(" -lowerCase : Apply lowercasing.").append(nl); sb.append(" -encoding type : Encoding format.").append(nl); - sb.append(" -orthoOpts str : Orthographic options (see SpanishLexer.java)").append(nl); - sb.append(" -lines : Keep tokens as space-separated, not line separated.").append(nl); + sb.append(" -options str : Orthographic options (see SpanishLexer.java)").append(nl); + sb.append(" -tokens : Output tokens as line-separated instead of space-separted.").append(nl); return sb.toString(); } @@ -350,10 +358,11 @@ private static Map argOptionDefs() { Map argOptionDefs = Generics.newHashMap(); argOptionDefs.put("help", 0); argOptionDefs.put("ftb", 0); + argOptionDefs.put("ancora", 0); argOptionDefs.put("lowerCase", 0); argOptionDefs.put("encoding", 1); - argOptionDefs.put("orthoOpts", 1); - argOptionDefs.put("lines", 0); + argOptionDefs.put("options", 1); + argOptionDefs.put("tokens", 0); return argOptionDefs; } @@ -377,17 +386,17 @@ public static void main(String[] args) { // Lexer options final TokenizerFactory tf = SpanishTokenizer.coreLabelFactory(); - if (options.containsKey("ancora")) - tf.setOptions(ANCORA_OPTS); - String orthoOptions = options.getProperty("orthoOpts", ""); + String orthoOptions = options.containsKey("ancora") ? ANCORA_OPTIONS : ""; + if (options.containsKey("options")) { + orthoOptions = orthoOptions.length() == 0 ? options.getProperty("options") : orthoOptions + "," + options; + } + final boolean tokens = PropertiesUtils.getBool(options, "tokens", false); + if ( ! tokens) { + orthoOptions = orthoOptions.length() == 0 ? "tokenizeNLs" : orthoOptions + ",tokenizeNLs"; + } tf.setOptions(orthoOptions); - // When called from this main method, split on newline. No options for - // more granular sentence splitting. - tf.setOptions("tokenizeNLs"); - // Other options - final boolean lines = options.containsKey("lines"); final String encoding = options.getProperty("encoding", "UTF-8"); final boolean toLower = PropertiesUtils.getBool(options, "lowerCase", false); final Locale es = new Locale("es"); @@ -407,10 +416,7 @@ public static void main(String[] args) { printSpace = false; System.out.println(); } else { - if (printSpace) { - if (lines) System.out.print(" "); - else System.out.println(); - } + if (printSpace) System.out.print(" "); String outputToken = toLower ? word.toLowerCase(es) : word; System.out.print(outputToken); printSpace = true; diff --git a/src/edu/stanford/nlp/ling/AbstractCoreLabel.java b/src/edu/stanford/nlp/ling/AbstractCoreLabel.java index a679be7f12..173e92ab42 100644 --- a/src/edu/stanford/nlp/ling/AbstractCoreLabel.java +++ b/src/edu/stanford/nlp/ling/AbstractCoreLabel.java @@ -3,13 +3,39 @@ import edu.stanford.nlp.util.TypesafeMap; public interface AbstractCoreLabel extends Label, HasWord, HasIndex, HasTag, HasLemma, HasOffset, TypesafeMap { + + /** + * Return the named entity class of the label (or null if none). + * + * @return The NER class for the label + */ public String ner(); + /** + * Set the named entity class of the label. + * + * @param ner The NER class for the label + */ public void setNER(String ner); + // These next two are a partial implementation of HasContext. Maybe clean this up someday? + public String originalText(); public void setOriginalText(String originalText); + /** + * Return a non-null String value for a key. This method is included + * for backwards compatibility with the removed class AbstractMapLabel. + * It is guaranteed to not return null; if the key is not present or + * has a null value, it returns the empty string (""). It is only valid to + * call this method when key is paired with a value of type String. + * + * @param A key type with a String value + * @param key The key to return the value of. + * @return "" if the key is not in the map or has the value {@code null} + * and the String value of the key otherwise + */ public > String getString(Class key); + } diff --git a/src/edu/stanford/nlp/ling/AnnotationLookup.java b/src/edu/stanford/nlp/ling/AnnotationLookup.java index 3f43db5002..2cb96ed936 100644 --- a/src/edu/stanford/nlp/ling/AnnotationLookup.java +++ b/src/edu/stanford/nlp/ling/AnnotationLookup.java @@ -17,9 +17,9 @@ public enum KeyLookup { WORD_KEY(CoreAnnotations.TextAnnotation.class, OldFeatureLabelKeys.WORD_KEY), LEMMA_KEY(CoreAnnotations.LemmaAnnotation.class, OldFeatureLabelKeys.LEMMA_KEY), CATEGORY_KEY(CoreAnnotations.CategoryAnnotation.class, OldFeatureLabelKeys.CATEGORY_KEY), - PROJ_CAT_KEY(CoreAnnotations.ProjectedCategoryAnnotation.class, OldFeatureLabelKeys.PROJ_CAT_KEY), - HEAD_WORD_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadWordAnnotation", OldFeatureLabelKeys.HEAD_WORD_KEY), - HEAD_TAG_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadTagAnnotation", OldFeatureLabelKeys.HEAD_TAG_KEY), + //PROJ_CAT_KEY(CoreAnnotations.ProjectedCategoryAnnotation.class, OldFeatureLabelKeys.PROJ_CAT_KEY), + //HEAD_WORD_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadWordAnnotation", OldFeatureLabelKeys.HEAD_WORD_KEY), + //HEAD_TAG_KEY("edu.stanford.nlp.ling.TreeCoreAnnotations.HeadTagAnnotation", OldFeatureLabelKeys.HEAD_TAG_KEY), INDEX_KEY(CoreAnnotations.IndexAnnotation.class, OldFeatureLabelKeys.INDEX_KEY), ARG_KEY(CoreAnnotations.ArgumentAnnotation.class, OldFeatureLabelKeys.ARG_KEY), MARKING_KEY(CoreAnnotations.MarkingAnnotation.class, OldFeatureLabelKeys.MARKING_KEY), diff --git a/src/edu/stanford/nlp/ling/CoreAnnotations.java b/src/edu/stanford/nlp/ling/CoreAnnotations.java index 2210b81d0d..493c34ec5e 100644 --- a/src/edu/stanford/nlp/ling/CoreAnnotations.java +++ b/src/edu/stanford/nlp/ling/CoreAnnotations.java @@ -427,20 +427,6 @@ public Class getType() { } } - /** - * Keys from AbstractMapLabel (descriptions taken from that class) - */ - /** - * The standard key for storing a projected category in the map, as a String. - * For any word (leaf node), the projected category is the syntactic category - * of the maximal constituent headed by the word. Used in SemanticGraph. - */ - public static class ProjectedCategoryAnnotation implements CoreAnnotation { - public Class getType() { - return String.class; - } - } - /** * The standard key for a propbank label which is of type Argument */ diff --git a/src/edu/stanford/nlp/ling/CoreLabel.java b/src/edu/stanford/nlp/ling/CoreLabel.java index aacebc6374..8ef92031ee 100644 --- a/src/edu/stanford/nlp/ling/CoreLabel.java +++ b/src/edu/stanford/nlp/ling/CoreLabel.java @@ -1,5 +1,6 @@ package edu.stanford.nlp.ling; +import java.util.Arrays; import java.util.Comparator; import java.util.Map; import java.util.TreeMap; @@ -8,14 +9,13 @@ import edu.stanford.nlp.util.ArrayCoreMap; import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.Generics; -import edu.stanford.nlp.util.StringUtils; /** * A CoreLabel represents a single word with ancillary information - * attached using CoreAnnotations. If the proper annotations are set, - * the CoreLabel also provides convenient methods to access tags, - * lemmas, etc. + * attached using CoreAnnotations. + * A CoreLabel also provides convenient methods to access tags, + * lemmas, etc. (if the proper annotations are set). *

* A CoreLabel is a Map from keys (which are Class objects) to values, * whose type is determined by the key. That is, it is a heterogeneous @@ -29,7 +29,7 @@ * @author dramage * @author rafferty */ -public class CoreLabel extends ArrayCoreMap implements AbstractCoreLabel, HasWord, HasTag, HasCategory, HasLemma, HasContext, HasIndex, HasOffset { +public class CoreLabel extends ArrayCoreMap implements AbstractCoreLabel, HasCategory, HasContext { private static final long serialVersionUID = 2L; @@ -86,8 +86,11 @@ public CoreLabel(CoreMap label) { * Returns a new CoreLabel instance based on the contents of the given * label. Warning: The behavior of this method is a bit disjunctive! * If label is a CoreMap (including CoreLabel), then its entire - * contents is copied into this label. But, otherwise, just the - * value() and word iff it implements HasWord is copied. + * contents is copied into this label. + * If label is an IndexedWord, then the backing label is copied over + * entirely. + * But, otherwise, just the + * value() and word iff it implements {@link HasWord} is copied. * * @param label Basis for this label */ @@ -100,6 +103,12 @@ public CoreLabel(Label label) { for (Class key : cl.keySet()) { set(key, cl.get(key)); } + } else if (label instanceof IndexedWord) { + CoreMap cl = ((IndexedWord) label).backingLabel(); + setCapacity(cl.size()); + for (Class key : cl.keySet()) { + set(key, cl.get(key)); + } } else { if (label instanceof HasWord) { setWord(((HasWord)label).word()); @@ -130,7 +139,7 @@ public CoreLabel(String[] keys, String[] values) { * This allows you to read in arbitrary values from a file as features, for example. */ public static interface GenericAnnotation extends CoreAnnotation { } - //Unchecked is below because eclipse can't handle the level of type inference if we correctly parameterize GenericAnnotation with String + //Unchecked is below because eclipse can't handle the level of type inference if we correctly parametrize GenericAnnotation with String @SuppressWarnings("unchecked") public static final Map> genericKeys = Generics.newHashMap(); @SuppressWarnings("unchecked") @@ -139,7 +148,11 @@ public static interface GenericAnnotation extends CoreAnnotation { } @SuppressWarnings("unchecked") private void initFromStrings(String[] keys, String[] values) { - for (int i = 0; i < Math.min(keys.length, values.length); i++) { + if (keys.length != values.length) { + throw new UnsupportedOperationException("Argument array lengths differ: " + + Arrays.toString(keys) + " vs. " + Arrays.toString(values)); + } + for (int i = 0; i < keys.length; i++) { String key = keys[i]; String value = values[i]; KeyLookup lookup = AnnotationLookup.getCoreKey(key); @@ -183,14 +196,15 @@ private void initFromStrings(String[] keys, String[] values) { this.set(lookup.coreKey, Double.parseDouble(values[i])); } else if(valueClass == Long.class) { this.set(lookup.coreKey, Long.parseLong(values[i])); + } else { + throw new RuntimeException("Can't handle " + valueClass); } } catch (Exception e) { - e.printStackTrace(); // unexpected value type - System.err.println("CORE: CoreLabel.initFromStrings: " + throw new UnsupportedOperationException("CORE: CoreLabel.initFromStrings: " + "Bad type for " + key + ". Value was: " + value - + "; expected "+AnnotationLookup.getValueType(lookup.coreKey)); + + "; expected "+AnnotationLookup.getValueType(lookup.coreKey), e); } } } @@ -266,16 +280,7 @@ public LabelFactory labelFactory() { } /** - * Return a non-null String value for a key. - * This method is included for backwards compatibility with AbstractMapLabel. - * It is guaranteed to not return null; if the key is not present or - * has a null value, it returns the empty string (""). It is only valid to - * call this method when key is paired with a value of type String. - * - * @param A key type with a String value - * @param key The key to return the value of. - * @return "" if the key is not in the map or has the value null - * and the String value of the key otherwise + * {@inheritDoc} */ @Override public > String getString(Class key) { @@ -287,13 +292,6 @@ public > String getString(Class key) { } - /** - * {@inheritDoc} - */ -// public int size() { -// return map.size(); -// } - /** * {@inheritDoc} */ @@ -326,8 +324,8 @@ public final String value() { public void setWord(String word) { String originalWord = get(CoreAnnotations.TextAnnotation.class); set(CoreAnnotations.TextAnnotation.class, word); - // pado feb 09: if you change the word, delete the lemma. - // gabor dec 2012: check if there was a real change -- this remove is actually rather expensive if it gets called a lot + // Pado feb 09: if you change the word, delete the lemma. + // Gabor dec 2012: check if there was a real change -- this remove is actually rather expensive if it gets called a lot if (word != null && !word.equals(originalWord) && containsKey(CoreAnnotations.LemmaAnnotation.class)) { remove(CoreAnnotations.LemmaAnnotation.class); } @@ -439,14 +437,17 @@ public void setDocID(String docID) { } /** - * Return the named entity class of the label (or null if none). - * - * @return String the word value for the label + * {@inheritDoc} */ + @Override public String ner() { return get(CoreAnnotations.NamedEntityTagAnnotation.class); } + /** + * {@inheritDoc} + */ + @Override public void setNER(String ner) { set(CoreAnnotations.NamedEntityTagAnnotation.class, ner); } @@ -549,7 +550,7 @@ public void setEndPosition(int endPos) { public enum OutputFormat { VALUE_INDEX, VALUE, VALUE_TAG, VALUE_TAG_INDEX, MAP, VALUE_MAP, VALUE_INDEX_MAP, WORD, WORD_INDEX - }; + } public static final OutputFormat DEFAULT_FORMAT = OutputFormat.VALUE_INDEX; @@ -638,7 +639,7 @@ public String toString(OutputFormat format) { if (index != null) { buf.append('-').append((index).intValue()); } - Map map2 = new TreeMap(); + Map map2 = new TreeMap<>(); for(Class key : this.keySet()) { String cls = key.getName(); // special shortening of all the Annotation classes @@ -673,11 +674,7 @@ public String toString(OutputFormat format) { return buf.toString(); } - private static final Comparator> asClassComparator = new Comparator>() { - @Override - public int compare(Class o1, Class o2) { - return o1.getName().compareTo(o2.getName()); - } - }; + private static final Comparator> asClassComparator = + (o1, o2) -> o1.getName().compareTo(o2.getName()); } diff --git a/src/edu/stanford/nlp/ling/IndexedWord.java b/src/edu/stanford/nlp/ling/IndexedWord.java index 84f3281aa7..91cb02a3eb 100644 --- a/src/edu/stanford/nlp/ling/IndexedWord.java +++ b/src/edu/stanford/nlp/ling/IndexedWord.java @@ -2,26 +2,32 @@ import java.util.Set; -import edu.stanford.nlp.util.CoreMap; import edu.stanford.nlp.util.StringUtils; import edu.stanford.nlp.util.TypesafeMap; /** - * This class is mainly for use with RTE in terms of the methods it provides, - * but on a more general level, it provides a {@link CoreLabel} that uses its + * This class provides a {@link CoreLabel} that uses its * DocIDAnnotation, SentenceIndexAnnotation, and IndexAnnotation to implement * Comparable/compareTo, hashCode, and equals. This means no other annotations, * including the identity of the word, are taken into account when using these - * methods. - *
- * The actual implementation is to wrap a CoreLabel. - * This avoids breaking the equals() and - * hashCode() contract and also avoids expensive copying + * methods. Historically, this class was introduced for and is mainly used in + * the RTE package, and it provides a number of methods that are really specific + * to that use case. A second use case is now the Stanford Dependencies code, + * where this class directly implements the "copy nodes" of section 4.6 of the + * Stanford Dependencies Manual, rather than these being placed directly in the + * backing CoreLabel. This was so there can stay one CoreLabel per token, despite + * there being multiple IndexedWord nodes, additional ones representing copy + * nodes. + *

+ * The actual implementation is to wrap a {@code CoreLabel}. + * This avoids breaking the {@code equals()} and + * {@code hashCode()} contract and also avoids expensive copying * when used to represent the same data as the original - * CoreLabel. + * {@code CoreLabel}. * * @author rafferty - * + * @author John Bauer + * @author Sonal Gupta */ public class IndexedWord implements AbstractCoreLabel, Comparable { @@ -34,7 +40,8 @@ public class IndexedWord implements AbstractCoreLabel, Comparable { private final CoreLabel label; - private int copyCount = 0; + private int copyCount; // = 0; + /** * Default constructor; uses {@link CoreLabel} default constructor */ @@ -102,38 +109,46 @@ public IndexedWord makeSoftCopy(int count) { } /** - * TODO: would be nice to get rid of this. Only used in two places in RTE. + * TODO: get rid of this. Only used in two places in RTE (in rewriter code) */ public CoreLabel backingLabel() { return label; } + @Override public VALUE get(Class> key) { return label.get(key); } + @Override public boolean has(Class> key) { return label.has(key); } + @Override public boolean containsKey(Class> key) { return label.containsKey(key); } + @Override public VALUE set(Class> key, VALUE value) { return label.set(key, value); } + @Override public > String getString(Class key) { return label.getString(key); } + @Override public VALUE remove(Class> key) { return label.remove(key); } + @Override public Set> keySet() { return label.keySet(); } + @Override public int size() { return label.size(); } @@ -346,6 +361,7 @@ public int hashCode() { * @param w The IndexedWord to compare with * @return Whether this is less than w or not in the ordering */ + @Override public int compareTo(IndexedWord w) { if (this.equals(IndexedWord.NO_WORD)) { if (w.equals(IndexedWord.NO_WORD)) { @@ -395,26 +411,31 @@ public void setFromString(String labelStr) { public static LabelFactory factory() { return new LabelFactory() { + @Override public Label newLabel(String labelStr) { - CoreLabel label = new CoreLabel(); - label.setValue(labelStr); - return new IndexedWord(label); + CoreLabel coreLabel = new CoreLabel(); + coreLabel.setValue(labelStr); + return new IndexedWord(coreLabel); } + @Override public Label newLabel(String labelStr, int options) { return newLabel(labelStr); } + @Override public Label newLabel(Label oldLabel) { return new IndexedWord(oldLabel); } + @Override public Label newLabelFromString(String encodedLabelStr) { throw new UnsupportedOperationException("This code branch left blank" + " because we do not understand what this method should do."); } }; } + /** * {@inheritDoc} */ @@ -422,4 +443,5 @@ public Label newLabelFromString(String encodedLabelStr) { public LabelFactory labelFactory() { return IndexedWord.factory(); } + } diff --git a/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java b/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java index 8a876cc530..51e28b520b 100644 --- a/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java +++ b/src/edu/stanford/nlp/ling/tokensregex/NodePattern.java @@ -2,7 +2,6 @@ import edu.stanford.nlp.util.StringUtils; -import java.io.Serializable; import java.util.List; /** @@ -10,7 +9,7 @@ * * @author Angel Chang */ -public abstract class NodePattern implements Serializable{ +public abstract class NodePattern { public static final NodePattern ANY_NODE = new AnyNodePattern(); diff --git a/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java b/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java index 79f6bffff5..5ec79bef4f 100644 --- a/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java +++ b/src/edu/stanford/nlp/ling/tokensregex/SequenceMatcher.java @@ -459,7 +459,7 @@ protected boolean findMatchStart(int start, boolean matchAllTokens) protected boolean findMatchStartNoBacktracking(int start, boolean matchAllTokens) { boolean matchAll = true; - MatchedStates cStates = getStartStates(); + MatchedStates cStates = getStartStates(); // Save cStates for FIND_ALL .... curMatchStates = cStates; for(int i = start; i < regionEnd; i++){ @@ -702,6 +702,7 @@ public T get(int i) return elements.get(i); } + /** Returns a non-null MatchedStates, which has a non-empty states list inside. */ private MatchedStates getStartStates() { return new MatchedStates(this, pattern.root); diff --git a/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java b/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java index 92722b6a1a..8c5af2bbe1 100644 --- a/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java +++ b/src/edu/stanford/nlp/ling/tokensregex/SequencePattern.java @@ -2,10 +2,6 @@ import edu.stanford.nlp.util.*; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.io.Serializable; import java.util.*; import java.util.function.Function; @@ -81,15 +77,12 @@ * see {@link MultiCoreMapNodePattern} for example) *

  • Conjunctions - conjunctions of sequence patterns (works for some cases)
  • * - * - *

    - *

    Note that this and the inherited classes do not implement any custom equals and hashCode functions. *

    * * @author Angel Chang * @see SequenceMatcher */ -public class SequencePattern implements Serializable { +public class SequencePattern { // TODO: // 1. Validate backref capture groupid // 2. Actions @@ -99,7 +92,6 @@ public class SequencePattern implements Serializable { private String patternStr; private PatternExpr patternExpr; private SequenceMatchAction action; - State root; int totalGroups = 0; @@ -268,7 +260,7 @@ public boolean matches(Object o1, Object o2) { /** * Represents a sequence pattern expressions (before translating into NFA). */ - public abstract static class PatternExpr implements Serializable { + public abstract static class PatternExpr { protected abstract Frag build(); @@ -1749,36 +1741,6 @@ protected void updateKeepBids(Set bids) { } } - - - private void readObject(ObjectInputStream ois) - throws IOException, ClassNotFoundException { - patternStr = (String)ois.readObject(); - - patternExpr = (PatternExpr) ois.readObject(); - //this.patternStr = patternStr; - //this.patternExpr = nodeSequencePattern; - action = (SequenceMatchAction) ois.readObject(); - - patternExpr = new GroupPatternExpr(patternExpr, true); - patternExpr = patternExpr.optimize(); - this.totalGroups = patternExpr.assignGroupIds(0); - Frag f = patternExpr.build(); - f.connect(MATCH_STATE); - this.root = f.start; - varGroupBindings = new VarGroupBindings(totalGroups+1); - patternExpr.updateBindings(varGroupBindings); - } - - - private void writeObject(ObjectOutputStream oos) - throws IOException { - oos.writeObject(toString()); - oos.writeObject(this.getPatternExpr()); - oos.writeObject(this.getAction()); - - } // public void writeObject() - // States for matching conjunctions // - Basic, not well tested implementation that may not work for all cases ... // - Can be optimized to terminate earlier if one branch of the conjunction is known not to succeed diff --git a/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java b/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java index 1c7cca2fae..608e2828b0 100644 --- a/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java +++ b/src/edu/stanford/nlp/ling/tokensregex/TokenSequencePattern.java @@ -3,9 +3,6 @@ import edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser; import edu.stanford.nlp.util.*; -import java.io.IOException; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; import java.util.*; /** @@ -273,7 +270,6 @@ public String toString(){ return this.pattern(); } - /** * Create a multi-pattern matcher for matching across multiple TokensRegex patterns * @param patterns Collection of input patterns diff --git a/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java b/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java index 02a53fb90c..7e1d8e231f 100644 --- a/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java +++ b/src/edu/stanford/nlp/ling/tokensregex/parser/TokenSequenceParser.java @@ -180,17 +180,15 @@ final public AssignableExpression AssignableExpression(Env env) throws ParseExce final public Expression Expression(Env env) throws ParseException { Expression expr; - if (jj_2_8(5)) { + if (jj_2_8(4)) { expr = NestedFunctionCallExpression(env); - } else if (jj_2_9(5)) { + } else if (jj_2_9(4)) { expr = NestedVarExpression(env); - } else if (jj_2_10(5)) { + } else if (jj_2_10(4)) { expr = ValueExpression(env); - } else if (jj_2_11(5)) { + } else if (jj_2_11(4)) { expr = ListExpression(env); - } else if (jj_2_12(5)) { - expr = ListExpression2(env); - } else if (jj_2_13(5)) { + } else if (jj_2_12(4)) { expr = CaseExpression(env); } else { jj_consume_token(-1); @@ -229,7 +227,6 @@ final public Expression FunctionCallExpression(Env env) throws ParseException { case STR: case 22: case 25: - case 31: case 38: param = Expression(env); params.add(param); @@ -441,7 +438,6 @@ final public Expression MethodCallExpression(Env env, Expression parent) throws case STR: case 22: case 25: - case 31: case 38: param = Expression(env); params.add(param); @@ -487,15 +483,15 @@ final public AssignableExpression AssignableNestedVarExpression(Env env) throws jj_la1[12] = jj_gen; break label_5; } - if (jj_2_14(2)) { + if (jj_2_13(2)) { i = Index(); expr = new Expressions.IndexedExpression(expr, i); - } else if (jj_2_15(2)) { + } else if (jj_2_14(2)) { jj_consume_token(31); fieldExpr = Expression(env); expr = new Expressions.FieldExpression(expr, fieldExpr); jj_consume_token(32); - } else if (jj_2_16(2)) { + } else if (jj_2_15(2)) { jj_consume_token(35); s = RelaxedString(); expr = new Expressions.FieldExpression(expr, s); @@ -526,18 +522,18 @@ final public Expression NestedVarExpression(Env env) throws ParseException { jj_la1[13] = jj_gen; break label_6; } - if (jj_2_17(3)) { + if (jj_2_16(3)) { i = Index(); expr = new Expressions.IndexedExpression(expr, i); - } else if (jj_2_18(3)) { + } else if (jj_2_17(3)) { jj_consume_token(31); fieldExpr = Expression(env); expr = new Expressions.FieldExpression(expr, fieldExpr); jj_consume_token(32); - } else if (jj_2_19(3)) { + } else if (jj_2_18(3)) { jj_consume_token(35); expr = MethodCallExpression(env, expr); - } else if (jj_2_20(3)) { + } else if (jj_2_19(3)) { jj_consume_token(35); s = RelaxedString(); expr = new Expressions.FieldExpression(expr, s); @@ -568,18 +564,18 @@ final public Expression NestedFunctionCallExpression(Env env) throws ParseExcept jj_la1[14] = jj_gen; break label_7; } - if (jj_2_21(3)) { + if (jj_2_20(3)) { i = Index(); expr = new Expressions.IndexedExpression(expr, i); - } else if (jj_2_22(3)) { + } else if (jj_2_21(3)) { jj_consume_token(31); fieldExpr = Expression(env); expr = new Expressions.FieldExpression(expr, fieldExpr); jj_consume_token(32); - } else if (jj_2_23(3)) { + } else if (jj_2_22(3)) { jj_consume_token(35); expr = MethodCallExpression(env, expr); - } else if (jj_2_24(3)) { + } else if (jj_2_23(3)) { jj_consume_token(35); s = RelaxedString(); expr = new Expressions.FieldExpression(expr, s); @@ -617,36 +613,11 @@ final public Expression ListExpression(Env env) throws ParseException { throw new Error("Missing return statement in function"); } - final public Expression ListExpression2(Env env) throws ParseException { - List exprs = new ArrayList(); - Expression expr; - jj_consume_token(31); - expr = Expression(env); - exprs.add(expr); - label_9: - while (true) { - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case 33: - ; - break; - default: - jj_la1[16] = jj_gen; - break label_9; - } - jj_consume_token(33); - expr = Expression(env); - exprs.add(expr); - } - jj_consume_token(32); - {if (true) return new Expressions.ListExpression(Expressions.TYPE_LIST, exprs);} - throw new Error("Missing return statement in function"); - } - final public Expression BasicCondExpression(Env env) throws ParseException { Expression expr1 = null; Expression expr2 = null; Token op = null; - if (jj_2_25(3)) { + if (jj_2_24(3)) { expr1 = NestedVarExpression(env); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case NUMCMP: @@ -655,7 +626,7 @@ final public Expression BasicCondExpression(Env env) throws ParseException { expr2 = Expression(env); break; default: - jj_la1[17] = jj_gen; + jj_la1[16] = jj_gen; ; } if (op == null) { @@ -663,7 +634,7 @@ final public Expression BasicCondExpression(Env env) throws ParseException { } else { {if (true) return new Expressions.ConditionalExpression(op.image, expr1, expr2);} } - } else if (jj_2_26(3)) { + } else if (jj_2_25(3)) { expr1 = NestedFunctionCallExpression(env); {if (true) return new Expressions.ConditionalExpression(expr1);} } else { @@ -689,7 +660,7 @@ final public Expression CondGroup(Env env) throws ParseException { jj_consume_token(26); break; default: - jj_la1[18] = jj_gen; + jj_la1[17] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -704,7 +675,7 @@ final public Expression CondExpression(Env env) throws ParseException { Token op; child = CondGroup(env); conjChildren.add(child); - label_10: + label_9: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 36: @@ -712,8 +683,8 @@ final public Expression CondExpression(Env env) throws ParseException { ; break; default: - jj_la1[19] = jj_gen; - break label_10; + jj_la1[18] = jj_gen; + break label_9; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 36: @@ -723,7 +694,7 @@ final public Expression CondExpression(Env env) throws ParseException { op = jj_consume_token(37); break; default: - jj_la1[20] = jj_gen; + jj_la1[19] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -760,7 +731,7 @@ final public Expression CaseExpression(Env env) throws ParseException { Expression elseExpr = null; jj_consume_token(38); jj_consume_token(22); - label_11: + label_10: while (true) { cond = CondExpression(env); jj_consume_token(23); @@ -774,7 +745,7 @@ final public Expression CaseExpression(Env env) throws ParseException { jj_consume_token(30); break; default: - jj_la1[21] = jj_gen; + jj_la1[20] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -788,8 +759,8 @@ final public Expression CaseExpression(Env env) throws ParseException { ; break; default: - jj_la1[22] = jj_gen; - break label_11; + jj_la1[21] = jj_gen; + break label_10; } } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -799,7 +770,7 @@ final public Expression CaseExpression(Env env) throws ParseException { elseExpr = Expression(env); break; default: - jj_la1[23] = jj_gen; + jj_la1[22] = jj_gen; ; } jj_consume_token(24); @@ -842,7 +813,7 @@ final public Object StringNumberValue(Env env) throws ParseException { {if (true) return Double.valueOf(tok.image);} break; default: - jj_la1[24] = jj_gen; + jj_la1[23] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -855,7 +826,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce MultiNodePattern multiNode; SequencePattern.PatternExpr expr; Object value = null; - label_12: + label_11: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 31: @@ -887,7 +858,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce expr = SeqBackRef(env); break; default: - jj_la1[25] = jj_gen; + jj_la1[24] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -899,7 +870,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce expr = SeqRegexRepeatTimes(env, expr); break; default: - jj_la1[26] = jj_gen; + jj_la1[25] = jj_gen; ; } children.add(expr); @@ -920,8 +891,8 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce ; break; default: - jj_la1[27] = jj_gen; - break label_12; + jj_la1[26] = jj_gen; + break label_11; } } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -930,7 +901,7 @@ final public SequencePattern.PatternExpr SeqRegexBasic(Env env) throws ParseExce value = Expression(env); break; default: - jj_la1[28] = jj_gen; + jj_la1[27] = jj_gen; ; } if (children.size() != 1) { @@ -963,13 +934,13 @@ final public SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePa min = 1; max = -1; break; default: - jj_la1[29] = jj_gen; - if (jj_2_27(3)) { + jj_la1[28] = jj_gen; + if (jj_2_26(3)) { jj_consume_token(22); value = jj_consume_token(NONNEGINT); jj_consume_token(24); min = Integer.parseInt(value.image); max = min; - } else if (jj_2_28(4)) { + } else if (jj_2_27(4)) { jj_consume_token(22); value = jj_consume_token(NONNEGINT); jj_consume_token(33); @@ -986,7 +957,7 @@ final public SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePa min = Integer.parseInt(value.image); max = Integer.parseInt(v2.image); break; default: - jj_la1[30] = jj_gen; + jj_la1[29] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -998,7 +969,7 @@ final public SequencePattern.PatternExpr SeqRegexRepeatTimes(Env env, SequencePa greedy = false; break; default: - jj_la1[31] = jj_gen; + jj_la1[30] = jj_gen; ; } {if (true) return new SequencePattern.RepeatPatternExpr(expr, min, max, greedy);} @@ -1010,7 +981,7 @@ final public SequencePattern.PatternExpr SeqRegexDisj(Env env) throws ParseExcep SequencePattern.PatternExpr expr; expr = SeqRegexBasic(env); children.add(expr); - label_13: + label_12: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 37: @@ -1018,8 +989,8 @@ final public SequencePattern.PatternExpr SeqRegexDisj(Env env) throws ParseExcep ; break; default: - jj_la1[32] = jj_gen; - break label_13; + jj_la1[31] = jj_gen; + break label_12; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 43: @@ -1029,7 +1000,7 @@ final public SequencePattern.PatternExpr SeqRegexDisj(Env env) throws ParseExcep jj_consume_token(37); break; default: - jj_la1[33] = jj_gen; + jj_la1[32] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1048,7 +1019,7 @@ final public SequencePattern.PatternExpr SeqRegexDisjConj(Env env) throws ParseE Token op; child = SeqRegexBasic(env); conjChildren.add(child); - label_14: + label_13: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 36: @@ -1058,8 +1029,8 @@ final public SequencePattern.PatternExpr SeqRegexDisjConj(Env env) throws ParseE ; break; default: - jj_la1[34] = jj_gen; - break label_14; + jj_la1[33] = jj_gen; + break label_13; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 44: @@ -1075,7 +1046,7 @@ final public SequencePattern.PatternExpr SeqRegexDisjConj(Env env) throws ParseE op = jj_consume_token(37); break; default: - jj_la1[35] = jj_gen; + jj_la1[34] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1124,13 +1095,13 @@ final public SequencePattern.PatternExpr SeqRegexGroup(Env env) throws ParseExce varname = var.image; break; default: - jj_la1[36] = jj_gen; + jj_la1[35] = jj_gen; jj_consume_token(-1); throw new ParseException(); } break; default: - jj_la1[37] = jj_gen; + jj_la1[36] = jj_gen; ; } expr = SeqRegex(env); @@ -1145,7 +1116,7 @@ final public SequencePattern.PatternExpr SeqRegexGroup(Env env) throws ParseExce final public NodePattern BracketedNode(Env env) throws ParseException { NodePattern node; - if (jj_2_29(2)) { + if (jj_2_28(2)) { jj_consume_token(31); jj_consume_token(32); node = NodePattern.ANY_NODE; @@ -1157,7 +1128,7 @@ final public NodePattern BracketedNode(Env env) throws ParseException { jj_consume_token(32); break; default: - jj_la1[38] = jj_gen; + jj_la1[37] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1203,7 +1174,7 @@ final public NodePattern Node(Env env) throws ParseException { node = NodeGroup(env); break; default: - jj_la1[39] = jj_gen; + jj_la1[38] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1216,7 +1187,7 @@ final public NodePattern NodeDisj(Env env) throws ParseException { NodePattern child; child = NodeGroup(env); children.add(child); - label_15: + label_14: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 37: @@ -1224,8 +1195,8 @@ final public NodePattern NodeDisj(Env env) throws ParseException { ; break; default: - jj_la1[40] = jj_gen; - break label_15; + jj_la1[39] = jj_gen; + break label_14; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 43: @@ -1235,7 +1206,7 @@ final public NodePattern NodeDisj(Env env) throws ParseException { jj_consume_token(37); break; default: - jj_la1[41] = jj_gen; + jj_la1[40] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1254,7 +1225,7 @@ final public NodePattern NodeConj(Env env) throws ParseException { List children = new ArrayList(); child = NodeGroup(env); children.add(child); - label_16: + label_15: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 36: @@ -1262,8 +1233,8 @@ final public NodePattern NodeConj(Env env) throws ParseException { ; break; default: - jj_la1[42] = jj_gen; - break label_16; + jj_la1[41] = jj_gen; + break label_15; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 44: @@ -1273,7 +1244,7 @@ final public NodePattern NodeConj(Env env) throws ParseException { jj_consume_token(36); break; default: - jj_la1[43] = jj_gen; + jj_la1[42] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1294,7 +1265,7 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException { Token op; child = NodeGroup(env); conjChildren.add(child); - label_17: + label_16: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 36: @@ -1304,8 +1275,8 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException { ; break; default: - jj_la1[44] = jj_gen; - break label_17; + jj_la1[43] = jj_gen; + break label_16; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 44: @@ -1321,7 +1292,7 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException { op = jj_consume_token(37); break; default: - jj_la1[45] = jj_gen; + jj_la1[44] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1352,13 +1323,13 @@ final public NodePattern NodeDisjConj(Env env) throws ParseException { final public NodePattern NodeGroup(Env env) throws ParseException { NodePattern node; - if (jj_2_30(2)) { + if (jj_2_29(2)) { node = NodeBasic(env); - } else if (jj_2_31(2)) { + } else if (jj_2_30(2)) { jj_consume_token(25); node = NodeDisjConj(env); jj_consume_token(26); - } else if (jj_2_32(2)) { + } else if (jj_2_31(2)) { jj_consume_token(46); jj_consume_token(25); node = NodeDisjConj(env); @@ -1390,7 +1361,7 @@ final public NodePattern NodeBasic(Env env) throws ParseException { {if (true) return child;} break; default: - jj_la1[46] = jj_gen; + jj_la1[45] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1410,7 +1381,7 @@ final public NodePattern CoreMapNode(Env env) throws ParseException { case 22: jj_consume_token(22); AttrValue(env, attributes); - label_18: + label_17: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 30: @@ -1418,8 +1389,8 @@ final public NodePattern CoreMapNode(Env env) throws ParseException { ; break; default: - jj_la1[47] = jj_gen; - break label_18; + jj_la1[46] = jj_gen; + break label_17; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 33: @@ -1429,7 +1400,7 @@ final public NodePattern CoreMapNode(Env env) throws ParseException { jj_consume_token(30); break; default: - jj_la1[48] = jj_gen; + jj_la1[47] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1438,8 +1409,8 @@ final public NodePattern CoreMapNode(Env env) throws ParseException { jj_consume_token(24); break; default: - jj_la1[50] = jj_gen; - if (jj_2_33(2)) { + jj_la1[49] = jj_gen; + if (jj_2_32(2)) { AttrValue(env, attributes); } else { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -1455,14 +1426,14 @@ final public NodePattern CoreMapNode(Env env) throws ParseException { value = jj_consume_token(REGEX); break; default: - jj_la1[49] = jj_gen; + jj_la1[48] = jj_gen; jj_consume_token(-1); throw new ParseException(); } attributes.put("word", value.image); break; default: - jj_la1[51] = jj_gen; + jj_la1[50] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1482,7 +1453,7 @@ final public NodePattern CoreMapNode(Env env) throws ParseException { {if (true) return pat;} break; default: - jj_la1[52] = jj_gen; + jj_la1[51] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1512,7 +1483,7 @@ final public Map AttrValue(Env env, Map attributes str = CoreMapVarValue(env); break; default: - jj_la1[53] = jj_gen; + jj_la1[52] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1533,13 +1504,13 @@ final public Map AttrValue(Env env, Map attributes str = CoreMapVarValue(env); break; default: - jj_la1[54] = jj_gen; + jj_la1[53] = jj_gen; jj_consume_token(-1); throw new ParseException(); } break; default: - jj_la1[55] = jj_gen; + jj_la1[54] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1585,7 +1556,7 @@ final public NodePattern CoreMapWordPattern(Env env) throws ParseException { value = jj_consume_token(STRSIMPLE); break; default: - jj_la1[56] = jj_gen; + jj_la1[55] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1603,13 +1574,13 @@ final public MultiNodePattern MultiNodePattern(Env env) throws ParseException { jj_consume_token(50); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 22: - if (jj_2_34(3)) { + if (jj_2_33(3)) { jj_consume_token(22); v1 = jj_consume_token(NONNEGINT); jj_consume_token(24); min = Integer.parseInt(v1.image); max = Integer.parseInt(v1.image); - } else if (jj_2_35(4)) { + } else if (jj_2_34(4)) { jj_consume_token(22); v1 = jj_consume_token(NONNEGINT); jj_consume_token(33); @@ -1627,14 +1598,14 @@ final public MultiNodePattern MultiNodePattern(Env env) throws ParseException { max = Integer.parseInt(v2.image); break; default: - jj_la1[57] = jj_gen; + jj_la1[56] = jj_gen; jj_consume_token(-1); throw new ParseException(); } } break; default: - jj_la1[58] = jj_gen; + jj_la1[57] = jj_gen; ; } pat = CoreMapWordPattern(env); @@ -1675,7 +1646,7 @@ final public Pair> Seq action = Action(env); break; default: - jj_la1[59] = jj_gen; + jj_la1[58] = jj_gen; ; } {if (true) return new Pair>(expr,action);} @@ -1705,7 +1676,7 @@ final public Map SetAttrValues(Env env) throws ParseException { Map attributes = new ArrayMap(); jj_consume_token(22); SetAttrValue(env, attributes); - label_19: + label_18: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 30: @@ -1713,8 +1684,8 @@ final public Map SetAttrValues(Env env) throws ParseException { ; break; default: - jj_la1[60] = jj_gen; - break label_19; + jj_la1[59] = jj_gen; + break label_18; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case 33: @@ -1724,7 +1695,7 @@ final public Map SetAttrValues(Env env) throws ParseException { jj_consume_token(30); break; default: - jj_la1[61] = jj_gen; + jj_la1[60] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1752,7 +1723,7 @@ final public Map SetAttrValue(Env env, Map attribu value = NumberToken(); break; default: - jj_la1[62] = jj_gen; + jj_la1[61] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1780,7 +1751,7 @@ final public Token NumberToken() throws ParseException { value = jj_consume_token(REAL); break; default: - jj_la1[63] = jj_gen; + jj_la1[62] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1798,7 +1769,7 @@ final public Token IntegerToken() throws ParseException { value = jj_consume_token(INT); break; default: - jj_la1[64] = jj_gen; + jj_la1[63] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1816,7 +1787,7 @@ final public Token CmpToken() throws ParseException { value = jj_consume_token(NUMCMP); break; default: - jj_la1[65] = jj_gen; + jj_la1[64] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1834,7 +1805,7 @@ final public Token RelaxedStringToken() throws ParseException { value = jj_consume_token(IDENTIFIER); break; default: - jj_la1[66] = jj_gen; + jj_la1[65] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -1854,7 +1825,7 @@ final public String RelaxedString() throws ParseException { {if (true) return value.image;} break; default: - jj_la1[67] = jj_gen; + jj_la1[66] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -2117,213 +2088,251 @@ private boolean jj_2_34(int xla) { finally { jj_save(33, xla); } } - private boolean jj_2_35(int xla) { - jj_la = xla; jj_lastpos = jj_scanpos = token; - try { return !jj_3_35(); } - catch(LookaheadSuccess ls) { return true; } - finally { jj_save(34, xla); } - } - - private boolean jj_3_20() { + private boolean jj_3_19() { if (jj_scan_token(35)) return true; - if (jj_3R_32()) return true; + if (jj_3R_30()) return true; return false; } - private boolean jj_3_19() { + private boolean jj_3_18() { if (jj_scan_token(35)) return true; - if (jj_3R_33()) return true; + if (jj_3R_31()) return true; return false; } - private boolean jj_3_18() { + private boolean jj_3_17() { if (jj_scan_token(31)) return true; - if (jj_3R_31()) return true; + if (jj_3R_29()) return true; if (jj_scan_token(32)) return true; return false; } - private boolean jj_3R_43() { + private boolean jj_3R_41() { Token xsp; xsp = jj_scanpos; + if (jj_3_16()) { + jj_scanpos = xsp; if (jj_3_17()) { jj_scanpos = xsp; if (jj_3_18()) { jj_scanpos = xsp; - if (jj_3_19()) { - jj_scanpos = xsp; - if (jj_3_20()) return true; + if (jj_3_19()) return true; } } } return false; } - private boolean jj_3_17() { - if (jj_3R_30()) return true; - return false; - } - - private boolean jj_3R_112() { - if (jj_scan_token(BACKREF)) return true; + private boolean jj_3_16() { + if (jj_3R_28()) return true; return false; } - private boolean jj_3R_25() { - if (jj_3R_42()) return true; + private boolean jj_3R_24() { + if (jj_3R_40()) return true; Token xsp; while (true) { xsp = jj_scanpos; - if (jj_3R_43()) { jj_scanpos = xsp; break; } + if (jj_3R_41()) { jj_scanpos = xsp; break; } } return false; } - private boolean jj_3_16() { + private boolean jj_3_15() { if (jj_scan_token(35)) return true; - if (jj_3R_32()) return true; + if (jj_3R_30()) return true; return false; } - private boolean jj_3R_111() { - if (jj_scan_token(REGEXVAR)) return true; + private boolean jj_3R_107() { + if (jj_scan_token(BACKREF)) return true; return false; } - private boolean jj_3_15() { + private boolean jj_3_14() { if (jj_scan_token(31)) return true; - if (jj_3R_31()) return true; + if (jj_3R_29()) return true; return false; } - private boolean jj_3_14() { - if (jj_3R_30()) return true; + private boolean jj_3_13() { + if (jj_3R_28()) return true; return false; } - private boolean jj_3R_72() { + private boolean jj_3R_69() { Token xsp; xsp = jj_scanpos; - if (jj_3_14()) { + if (jj_3_13()) { jj_scanpos = xsp; - if (jj_3_15()) { + if (jj_3_14()) { jj_scanpos = xsp; - if (jj_3_16()) return true; + if (jj_3_15()) return true; } } return false; } - private boolean jj_3R_114() { + private boolean jj_3R_57() { + if (jj_3R_68()) return true; + Token xsp; + while (true) { + xsp = jj_scanpos; + if (jj_3R_69()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3R_106() { + if (jj_scan_token(REGEXVAR)) return true; + return false; + } + + private boolean jj_3R_109() { if (jj_scan_token(31)) return true; - if (jj_3R_36()) return true; + if (jj_3R_34()) return true; if (jj_scan_token(32)) return true; return false; } - private boolean jj_3_29() { + private boolean jj_3_28() { if (jj_scan_token(31)) return true; if (jj_scan_token(32)) return true; return false; } - private boolean jj_3R_60() { - if (jj_3R_71()) return true; + private boolean jj_3R_31() { + if (jj_scan_token(IDENTIFIER)) return true; + if (jj_scan_token(25)) return true; + return false; + } + + private boolean jj_3R_102() { Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_72()) { jj_scanpos = xsp; break; } + xsp = jj_scanpos; + if (jj_3_28()) { + jj_scanpos = xsp; + if (jj_3R_109()) return true; } return false; } - private boolean jj_3R_107() { + private boolean jj_3R_63() { + if (jj_scan_token(REGEXMRGROUP)) return true; + return false; + } + + private boolean jj_3R_62() { + if (jj_scan_token(REGEXMRVAR)) return true; + return false; + } + + private boolean jj_3R_61() { + if (jj_scan_token(REGEXGROUP)) return true; + return false; + } + + private boolean jj_3R_60() { + if (jj_scan_token(REGEXVAR)) return true; + return false; + } + + private boolean jj_3R_40() { Token xsp; xsp = jj_scanpos; - if (jj_3_29()) { + if (jj_3R_59()) { + jj_scanpos = xsp; + if (jj_3R_60()) { + jj_scanpos = xsp; + if (jj_3R_61()) { jj_scanpos = xsp; - if (jj_3R_114()) return true; + if (jj_3R_62()) { + jj_scanpos = xsp; + if (jj_3R_63()) return true; + } + } + } } return false; } - private boolean jj_3R_89() { - if (jj_3R_31()) return true; + private boolean jj_3R_59() { + if (jj_scan_token(IDENTIFIER)) return true; return false; } - private boolean jj_3R_126() { + private boolean jj_3R_121() { if (jj_scan_token(41)) return true; if (jj_scan_token(REGEXVAR)) return true; return false; } - private boolean jj_3R_51() { + private boolean jj_3R_48() { if (jj_scan_token(IDENTIFIER)) return true; return false; } - private boolean jj_3R_125() { + private boolean jj_3R_120() { if (jj_scan_token(45)) return true; return false; } - private boolean jj_3R_118() { + private boolean jj_3R_113() { Token xsp; xsp = jj_scanpos; - if (jj_3R_125()) { + if (jj_3R_120()) { jj_scanpos = xsp; - if (jj_3R_126()) return true; + if (jj_3R_121()) return true; } return false; } - private boolean jj_3R_50() { + private boolean jj_3R_47() { if (jj_scan_token(STR)) return true; return false; } - private boolean jj_3R_32() { + private boolean jj_3R_30() { Token xsp; xsp = jj_scanpos; - if (jj_3R_50()) { + if (jj_3R_47()) { jj_scanpos = xsp; - if (jj_3R_51()) return true; + if (jj_3R_48()) return true; } return false; } - private boolean jj_3R_33() { - if (jj_scan_token(IDENTIFIER)) return true; - if (jj_scan_token(25)) return true; + private boolean jj_3R_84() { + if (jj_scan_token(REGEXVAR)) return true; + return false; + } + + private boolean jj_3R_68() { Token xsp; xsp = jj_scanpos; - if (jj_3R_89()) jj_scanpos = xsp; - if (jj_scan_token(26)) return true; + if (jj_3R_83()) { + jj_scanpos = xsp; + if (jj_3R_84()) return true; + } return false; } - private boolean jj_3R_66() { - if (jj_scan_token(REGEXMRGROUP)) return true; + private boolean jj_3R_83() { + if (jj_scan_token(IDENTIFIER)) return true; return false; } - private boolean jj_3R_110() { + private boolean jj_3R_105() { if (jj_scan_token(25)) return true; Token xsp; xsp = jj_scanpos; - if (jj_3R_118()) jj_scanpos = xsp; - if (jj_3R_59()) return true; + if (jj_3R_113()) jj_scanpos = xsp; + if (jj_3R_56()) return true; if (jj_scan_token(26)) return true; return false; } - private boolean jj_3R_65() { - if (jj_scan_token(REGEXMRVAR)) return true; - return false; - } - - private boolean jj_3R_106() { + private boolean jj_3R_101() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(18)) { @@ -2333,41 +2342,19 @@ private boolean jj_3R_106() { return false; } - private boolean jj_3R_64() { - if (jj_scan_token(REGEXGROUP)) return true; - return false; - } - - private boolean jj_3R_63() { - if (jj_scan_token(REGEXVAR)) return true; - return false; - } - - private boolean jj_3R_42() { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_62()) { - jj_scanpos = xsp; - if (jj_3R_63()) { - jj_scanpos = xsp; - if (jj_3R_64()) { - jj_scanpos = xsp; - if (jj_3R_65()) { - jj_scanpos = xsp; - if (jj_3R_66()) return true; - } - } - } - } + private boolean jj_3R_76() { + if (jj_scan_token(25)) return true; + if (jj_3R_56()) return true; + if (jj_scan_token(26)) return true; return false; } - private boolean jj_3R_62() { - if (jj_scan_token(IDENTIFIER)) return true; + private boolean jj_3R_75() { + if (jj_scan_token(REAL)) return true; return false; } - private boolean jj_3R_52() { + private boolean jj_3R_49() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(20)) { @@ -2377,27 +2364,22 @@ private boolean jj_3R_52() { return false; } - private boolean jj_3R_88() { - if (jj_scan_token(REGEXVAR)) return true; + private boolean jj_3R_74() { + if (jj_scan_token(LONGINT)) return true; return false; } - private boolean jj_3R_71() { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_87()) { - jj_scanpos = xsp; - if (jj_3R_88()) return true; - } + private boolean jj_3R_73() { + if (jj_3R_46()) return true; return false; } - private boolean jj_3R_87() { - if (jj_scan_token(IDENTIFIER)) return true; + private boolean jj_3R_72() { + if (jj_scan_token(STR)) return true; return false; } - private boolean jj_3R_49() { + private boolean jj_3R_46() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(13)) { @@ -2407,32 +2389,34 @@ private boolean jj_3R_49() { return false; } - private boolean jj_3R_132() { + private boolean jj_3R_64() { Token xsp; xsp = jj_scanpos; - if (jj_scan_token(13)) { + if (jj_3R_71()) { jj_scanpos = xsp; - if (jj_scan_token(14)) { + if (jj_3R_72()) { + jj_scanpos = xsp; + if (jj_3R_73()) { + jj_scanpos = xsp; + if (jj_3R_74()) { jj_scanpos = xsp; - if (jj_scan_token(16)) return true; + if (jj_3R_75()) { + jj_scanpos = xsp; + if (jj_3R_76()) return true; + } + } + } } } return false; } - private boolean jj_3R_79() { - if (jj_scan_token(25)) return true; - if (jj_3R_59()) return true; - if (jj_scan_token(26)) return true; - return false; - } - - private boolean jj_3R_78() { - if (jj_scan_token(REAL)) return true; + private boolean jj_3R_71() { + if (jj_scan_token(REGEX)) return true; return false; } - private boolean jj_3R_105() { + private boolean jj_3R_100() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(44)) { @@ -2445,92 +2429,55 @@ private boolean jj_3R_105() { } } } - if (jj_3R_86()) return true; + if (jj_3R_82()) return true; return false; } - private boolean jj_3R_77() { - if (jj_scan_token(LONGINT)) return true; - return false; - } - - private boolean jj_3R_76() { - if (jj_3R_49()) return true; - return false; - } - - private boolean jj_3R_75() { - if (jj_scan_token(STR)) return true; + private boolean jj_3R_37() { + if (jj_3R_30()) return true; + if (jj_scan_token(34)) return true; + if (jj_3R_29()) return true; return false; } private boolean jj_3R_67() { + if (jj_3R_82()) return true; Token xsp; - xsp = jj_scanpos; - if (jj_3R_74()) { - jj_scanpos = xsp; - if (jj_3R_75()) { - jj_scanpos = xsp; - if (jj_3R_76()) { - jj_scanpos = xsp; - if (jj_3R_77()) { - jj_scanpos = xsp; - if (jj_3R_78()) { - jj_scanpos = xsp; - if (jj_3R_79()) return true; - } - } - } - } + while (true) { + xsp = jj_scanpos; + if (jj_3R_100()) { jj_scanpos = xsp; break; } } return false; } - private boolean jj_3R_74() { - if (jj_scan_token(REGEX)) return true; - return false; - } - - private boolean jj_3R_70() { - if (jj_3R_86()) return true; - Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_105()) { jj_scanpos = xsp; break; } - } + private boolean jj_3R_21() { + if (jj_scan_token(22)) return true; + if (jj_3R_37()) return true; return false; } - private boolean jj_3R_80() { - Token xsp; - xsp = jj_scanpos; - if (jj_scan_token(33)) { - jj_scanpos = xsp; - if (jj_scan_token(30)) return true; - } + private boolean jj_3R_118() { + if (jj_scan_token(41)) return true; return false; } - private boolean jj_3R_39() { - if (jj_3R_32()) return true; - if (jj_scan_token(34)) return true; - if (jj_3R_31()) return true; + private boolean jj_3R_117() { + if (jj_scan_token(22)) return true; + if (jj_scan_token(NONNEGINT)) return true; return false; } - private boolean jj_3R_123() { - if (jj_scan_token(41)) return true; + private boolean jj_3R_43() { + if (jj_3R_21()) return true; return false; } - private boolean jj_3R_122() { - if (jj_scan_token(22)) return true; - if (jj_scan_token(NONNEGINT)) return true; - if (jj_scan_token(33)) return true; + private boolean jj_3R_42() { + if (jj_3R_64()) return true; return false; } - private boolean jj_3_28() { + private boolean jj_3_27() { if (jj_scan_token(22)) return true; if (jj_scan_token(NONNEGINT)) return true; if (jj_scan_token(33)) return true; @@ -2538,247 +2485,244 @@ private boolean jj_3_28() { return false; } - private boolean jj_3R_121() { + private boolean jj_3R_116() { if (jj_scan_token(42)) return true; return false; } - private boolean jj_3R_22() { - if (jj_scan_token(22)) return true; - if (jj_3R_39()) return true; + private boolean jj_3R_25() { Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_80()) { jj_scanpos = xsp; break; } + xsp = jj_scanpos; + if (jj_3R_42()) { + jj_scanpos = xsp; + if (jj_3R_43()) return true; } - if (jj_scan_token(24)) return true; return false; } - private boolean jj_3_27() { + private boolean jj_3_26() { if (jj_scan_token(22)) return true; if (jj_scan_token(NONNEGINT)) return true; if (jj_scan_token(24)) return true; return false; } - private boolean jj_3R_120() { + private boolean jj_3R_115() { if (jj_scan_token(41)) return true; return false; } - private boolean jj_3R_119() { + private boolean jj_3R_70() { + if (jj_scan_token(33)) return true; + return false; + } + + private boolean jj_3R_114() { if (jj_scan_token(40)) return true; return false; } - private boolean jj_3R_116() { + private boolean jj_3R_58() { + if (jj_3R_29()) return true; + Token xsp; + while (true) { + xsp = jj_scanpos; + if (jj_3R_70()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3R_111() { Token xsp; xsp = jj_scanpos; - if (jj_3R_119()) { + if (jj_3R_114()) { jj_scanpos = xsp; - if (jj_3R_120()) { + if (jj_3R_115()) { jj_scanpos = xsp; - if (jj_3R_121()) { + if (jj_3R_116()) { jj_scanpos = xsp; - if (jj_3_27()) { + if (jj_3_26()) { jj_scanpos = xsp; - if (jj_3_28()) { + if (jj_3_27()) { jj_scanpos = xsp; - if (jj_3R_122()) return true; + if (jj_3R_117()) return true; } } } } } xsp = jj_scanpos; - if (jj_3R_123()) jj_scanpos = xsp; - return false; - } - - private boolean jj_3R_45() { - if (jj_3R_22()) return true; - return false; - } - - private boolean jj_3R_44() { - if (jj_3R_67()) return true; + if (jj_3R_118()) jj_scanpos = xsp; return false; } - private boolean jj_3R_26() { + private boolean jj_3R_38() { + if (jj_scan_token(IDENTIFIER)) return true; + if (jj_scan_token(25)) return true; Token xsp; xsp = jj_scanpos; - if (jj_3R_44()) { - jj_scanpos = xsp; - if (jj_3R_45()) return true; - } + if (jj_3R_58()) jj_scanpos = xsp; + if (jj_scan_token(26)) return true; return false; } - private boolean jj_3R_113() { + private boolean jj_3R_108() { if (jj_scan_token(23)) return true; - if (jj_3R_31()) return true; + if (jj_3R_29()) return true; return false; } - private boolean jj_3R_73() { - if (jj_scan_token(33)) return true; - if (jj_3R_31()) return true; + private boolean jj_3R_90() { + if (jj_3R_65()) return true; return false; } - private boolean jj_3R_95() { - if (jj_3R_68()) return true; + private boolean jj_3R_28() { + if (jj_scan_token(31)) return true; + if (jj_3R_46()) return true; + if (jj_scan_token(32)) return true; return false; } - private boolean jj_3R_61() { - if (jj_3R_31()) return true; - Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_73()) { jj_scanpos = xsp; break; } - } + private boolean jj_3R_89() { + if (jj_scan_token(REGEXVAR)) return true; return false; } - private boolean jj_3R_94() { - if (jj_scan_token(REGEXVAR)) return true; + private boolean jj_3R_110() { + if (jj_3R_111()) return true; return false; } - private boolean jj_3R_115() { - if (jj_3R_116()) return true; + private boolean jj_3_12() { + if (jj_3R_27()) return true; return false; } - private boolean jj_3R_128() { - if (jj_3R_131()) return true; + private boolean jj_3R_99() { + if (jj_3R_107()) return true; return false; } - private boolean jj_3R_104() { - if (jj_3R_112()) return true; + private boolean jj_3_11() { + if (jj_3R_26()) return true; return false; } - private boolean jj_3R_40() { - if (jj_scan_token(IDENTIFIER)) return true; - if (jj_scan_token(25)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_3R_61()) jj_scanpos = xsp; - if (jj_scan_token(26)) return true; + private boolean jj_3R_98() { + if (jj_3R_106()) return true; return false; } - private boolean jj_3R_131() { - if (jj_scan_token(REGEXVAR)) return true; + private boolean jj_3_10() { + if (jj_3R_25()) return true; return false; } - private boolean jj_3R_103() { - if (jj_3R_111()) return true; + private boolean jj_3R_97() { + if (jj_3R_105()) return true; return false; } - private boolean jj_3R_102() { - if (jj_3R_110()) return true; + private boolean jj_3_9() { + if (jj_3R_24()) return true; return false; } - private boolean jj_3R_101() { - if (jj_3R_109()) return true; + private boolean jj_3R_96() { + if (jj_3R_104()) return true; return false; } - private boolean jj_3R_100() { - if (jj_3R_108()) return true; + private boolean jj_3_8() { + if (jj_3R_23()) return true; return false; } - private boolean jj_3R_30() { - if (jj_scan_token(31)) return true; - if (jj_3R_49()) return true; - if (jj_scan_token(32)) return true; + private boolean jj_3R_95() { + if (jj_3R_103()) return true; return false; } - private boolean jj_3R_99() { - if (jj_3R_107()) return true; + private boolean jj_3R_94() { + if (jj_3R_102()) return true; return false; } - private boolean jj_3R_96() { + private boolean jj_3R_29() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_8()) { + jj_scanpos = xsp; + if (jj_3_9()) { + jj_scanpos = xsp; + if (jj_3_10()) { + jj_scanpos = xsp; + if (jj_3_11()) { + jj_scanpos = xsp; + if (jj_3_12()) return true; + } + } + } + } + return false; + } + + private boolean jj_3R_91() { Token xsp; xsp = jj_scanpos; - if (jj_3R_99()) { + if (jj_3R_94()) { jj_scanpos = xsp; - if (jj_3R_100()) { + if (jj_3R_95()) { jj_scanpos = xsp; - if (jj_3R_101()) { + if (jj_3R_96()) { jj_scanpos = xsp; - if (jj_3R_102()) { + if (jj_3R_97()) { jj_scanpos = xsp; - if (jj_3R_103()) { + if (jj_3R_98()) { jj_scanpos = xsp; - if (jj_3R_104()) return true; + if (jj_3R_99()) return true; } } } } } xsp = jj_scanpos; - if (jj_3R_115()) jj_scanpos = xsp; + if (jj_3R_110()) jj_scanpos = xsp; return false; } - private boolean jj_3R_86() { + private boolean jj_3R_82() { Token xsp; - if (jj_3R_96()) return true; + if (jj_3R_91()) return true; while (true) { xsp = jj_scanpos; - if (jj_3R_96()) { jj_scanpos = xsp; break; } + if (jj_3R_91()) { jj_scanpos = xsp; break; } } xsp = jj_scanpos; - if (jj_3R_113()) jj_scanpos = xsp; + if (jj_3R_108()) jj_scanpos = xsp; return false; } - private boolean jj_3_13() { - if (jj_3R_29()) return true; - return false; - } - - private boolean jj_3_12() { - if (jj_3R_28()) return true; + private boolean jj_3R_36() { + if (jj_3R_57()) return true; return false; } - private boolean jj_3R_124() { + private boolean jj_3R_119() { if (jj_scan_token(22)) return true; if (jj_scan_token(NONNEGINT)) return true; - if (jj_scan_token(33)) return true; - return false; - } - - private boolean jj_3_11() { - if (jj_3R_27()) return true; - return false; - } - - private boolean jj_3_10() { - if (jj_3R_26()) return true; return false; } - private boolean jj_3_9() { - if (jj_3R_25()) return true; + private boolean jj_3R_20() { + if (jj_3R_36()) return true; + if (jj_scan_token(29)) return true; + if (jj_3R_29()) return true; return false; } - private boolean jj_3_35() { + private boolean jj_3_34() { if (jj_scan_token(22)) return true; if (jj_scan_token(NONNEGINT)) return true; if (jj_scan_token(33)) return true; @@ -2786,90 +2730,90 @@ private boolean jj_3_35() { return false; } - private boolean jj_3_8() { - if (jj_3R_24()) return true; - return false; - } - - private boolean jj_3R_130() { - if (jj_3R_131()) return true; + private boolean jj_3_33() { + if (jj_scan_token(22)) return true; + if (jj_scan_token(NONNEGINT)) return true; + if (jj_scan_token(24)) return true; return false; } - private boolean jj_3R_31() { + private boolean jj_3R_112() { Token xsp; xsp = jj_scanpos; - if (jj_3_8()) { - jj_scanpos = xsp; - if (jj_3_9()) { - jj_scanpos = xsp; - if (jj_3_10()) { - jj_scanpos = xsp; - if (jj_3_11()) { + if (jj_3_33()) { jj_scanpos = xsp; - if (jj_3_12()) { + if (jj_3_34()) { jj_scanpos = xsp; - if (jj_3_13()) return true; - } - } - } + if (jj_3R_119()) return true; } } return false; } - private boolean jj_3R_117() { + private boolean jj_3_7() { + if (jj_scan_token(22)) return true; + if (jj_scan_token(28)) return true; + if (jj_3R_22()) return true; + return false; + } + + private boolean jj_3R_104() { + if (jj_scan_token(50)) return true; Token xsp; xsp = jj_scanpos; - if (jj_3_34()) { - jj_scanpos = xsp; - if (jj_3_35()) { - jj_scanpos = xsp; - if (jj_3R_124()) return true; - } - } + if (jj_3R_112()) jj_scanpos = xsp; + if (jj_3R_103()) return true; return false; } - private boolean jj_3_34() { + private boolean jj_3_6() { if (jj_scan_token(22)) return true; - if (jj_scan_token(NONNEGINT)) return true; - if (jj_scan_token(24)) return true; + if (jj_scan_token(27)) return true; + if (jj_scan_token(25)) return true; return false; } - private boolean jj_3R_109() { - if (jj_scan_token(50)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_3R_117()) jj_scanpos = xsp; - if (jj_3R_108()) return true; + private boolean jj_3R_56() { + if (jj_3R_67()) return true; return false; } - private boolean jj_3R_38() { - if (jj_3R_60()) return true; + private boolean jj_3_5() { + if (jj_scan_token(22)) return true; + if (jj_scan_token(25)) return true; + if (jj_3R_56()) return true; return false; } - private boolean jj_3R_59() { - if (jj_3R_70()) return true; + private boolean jj_3_4() { + if (jj_scan_token(22)) return true; + if (jj_3R_22()) return true; + if (jj_scan_token(23)) return true; return false; } - private boolean jj_3R_21() { - if (jj_3R_38()) return true; - if (jj_scan_token(29)) return true; - if (jj_3R_31()) return true; + private boolean jj_3R_19() { + Token xsp; + xsp = jj_scanpos; + if (jj_3_4()) { + jj_scanpos = xsp; + if (jj_3_5()) { + jj_scanpos = xsp; + if (jj_3_6()) { + jj_scanpos = xsp; + if (jj_3_7()) return true; + } + } + } return false; } - private boolean jj_3R_23() { + private boolean jj_3R_22() { if (jj_scan_token(REGEX)) return true; return false; } - private boolean jj_3R_108() { + private boolean jj_3R_103() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(18)) { @@ -2897,351 +2841,283 @@ private boolean jj_3R_108() { return false; } - private boolean jj_3_7() { - if (jj_scan_token(22)) return true; - if (jj_scan_token(28)) return true; - if (jj_3R_23()) return true; - return false; - } - - private boolean jj_3R_129() { - if (jj_3R_132()) return true; - return false; - } - - private boolean jj_3R_48() { - if (jj_3R_68()) return true; + private boolean jj_3R_45() { + if (jj_3R_65()) return true; if (jj_scan_token(23)) return true; - if (jj_3R_31()) return true; return false; } - private boolean jj_3_6() { - if (jj_scan_token(22)) return true; - if (jj_scan_token(27)) return true; - if (jj_scan_token(25)) return true; + private boolean jj_3_3() { + if (jj_3R_21()) return true; return false; } - private boolean jj_3R_58() { + private boolean jj_3R_55() { if (jj_scan_token(NUMCMP)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_3R_129()) { - jj_scanpos = xsp; - if (jj_3R_130()) return true; - } return false; } - private boolean jj_3_5() { - if (jj_scan_token(22)) return true; - if (jj_scan_token(25)) return true; - if (jj_3R_59()) return true; + private boolean jj_3_2() { + if (jj_3R_20()) return true; return false; } - private boolean jj_3R_57() { + private boolean jj_3R_54() { if (jj_scan_token(49)) return true; - if (jj_scan_token(IDENTIFIER)) return true; return false; } - private boolean jj_3R_29() { + private boolean jj_3_1() { + if (jj_3R_19()) return true; + return false; + } + + private boolean jj_3R_27() { if (jj_scan_token(38)) return true; if (jj_scan_token(22)) return true; Token xsp; - if (jj_3R_48()) return true; + if (jj_3R_45()) return true; while (true) { xsp = jj_scanpos; - if (jj_3R_48()) { jj_scanpos = xsp; break; } + if (jj_3R_45()) { jj_scanpos = xsp; break; } } return false; } - private boolean jj_3R_56() { + private boolean jj_3R_53() { if (jj_scan_token(34)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_scan_token(18)) { - jj_scanpos = xsp; - if (jj_scan_token(17)) { - jj_scanpos = xsp; - if (jj_scan_token(7)) { - jj_scanpos = xsp; - if (jj_3R_128()) return true; - } - } - } - return false; - } - - private boolean jj_3R_20() { - Token xsp; - xsp = jj_scanpos; - if (jj_3_4()) { - jj_scanpos = xsp; - if (jj_3_5()) { - jj_scanpos = xsp; - if (jj_3_6()) { - jj_scanpos = xsp; - if (jj_3_7()) return true; - } - } - } - return false; - } - - private boolean jj_3_4() { - if (jj_scan_token(22)) return true; - if (jj_3R_23()) return true; - if (jj_scan_token(23)) return true; return false; } - private boolean jj_3R_37() { + private boolean jj_3R_35() { if (jj_scan_token(IDENTIFIER)) return true; Token xsp; xsp = jj_scanpos; - if (jj_3R_56()) { + if (jj_3R_53()) { jj_scanpos = xsp; - if (jj_3R_57()) { + if (jj_3R_54()) { jj_scanpos = xsp; - if (jj_3R_58()) return true; + if (jj_3R_55()) return true; } } return false; } - private boolean jj_3_3() { - if (jj_3R_22()) return true; - return false; - } - - private boolean jj_3_2() { - if (jj_3R_21()) return true; - return false; - } - - private boolean jj_3R_98() { - if (jj_3R_106()) return true; - return false; - } - - private boolean jj_3_1() { - if (jj_3R_20()) return true; + private boolean jj_3R_93() { + if (jj_3R_101()) return true; return false; } - private boolean jj_3R_93() { + private boolean jj_3R_88() { Token xsp; xsp = jj_scanpos; - if (jj_3R_98()) { + if (jj_3R_93()) { jj_scanpos = xsp; if (jj_scan_token(17)) return true; } return false; } - private boolean jj_3_33() { - if (jj_3R_37()) return true; + private boolean jj_3_32() { + if (jj_3R_35()) return true; return false; } - private boolean jj_3R_85() { + private boolean jj_3R_81() { if (jj_scan_token(47)) return true; - if (jj_3R_95()) return true; - if (jj_scan_token(48)) return true; + if (jj_3R_90()) return true; return false; } - private boolean jj_3R_82() { + private boolean jj_3R_78() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(36)) { jj_scanpos = xsp; if (jj_scan_token(37)) return true; } - if (jj_3R_81()) return true; return false; } - private boolean jj_3R_84() { - if (jj_3R_94()) return true; + private boolean jj_3R_80() { + if (jj_3R_89()) return true; return false; } - private boolean jj_3R_68() { - if (jj_3R_81()) return true; + private boolean jj_3R_65() { + if (jj_3R_77()) return true; Token xsp; while (true) { xsp = jj_scanpos; - if (jj_3R_82()) { jj_scanpos = xsp; break; } + if (jj_3R_78()) { jj_scanpos = xsp; break; } } return false; } - private boolean jj_3R_92() { + private boolean jj_3R_87() { if (jj_scan_token(22)) return true; - if (jj_3R_37()) return true; + if (jj_3R_35()) return true; return false; } - private boolean jj_3R_69() { + private boolean jj_3R_66() { Token xsp; xsp = jj_scanpos; - if (jj_3R_83()) { + if (jj_3R_79()) { jj_scanpos = xsp; - if (jj_3R_84()) { + if (jj_3R_80()) { jj_scanpos = xsp; - if (jj_3R_85()) return true; + if (jj_3R_81()) return true; } } return false; } - private boolean jj_3R_83() { + private boolean jj_3R_79() { Token xsp; xsp = jj_scanpos; - if (jj_3R_92()) { + if (jj_3R_87()) { jj_scanpos = xsp; - if (jj_3_33()) { + if (jj_3_32()) { jj_scanpos = xsp; - if (jj_3R_93()) return true; + if (jj_3R_88()) return true; } } return false; } - private boolean jj_3R_91() { + private boolean jj_3R_86() { if (jj_scan_token(25)) return true; - if (jj_3R_68()) return true; - if (jj_scan_token(26)) return true; + if (jj_3R_65()) return true; return false; } - private boolean jj_3R_90() { - if (jj_3R_97()) return true; + private boolean jj_3R_85() { + if (jj_3R_92()) return true; return false; } - private boolean jj_3R_81() { + private boolean jj_3R_77() { Token xsp; xsp = jj_scanpos; - if (jj_3R_90()) { + if (jj_3R_85()) { jj_scanpos = xsp; - if (jj_3R_91()) return true; + if (jj_3R_86()) return true; } return false; } - private boolean jj_3R_54() { - if (jj_3R_69()) return true; + private boolean jj_3R_51() { + if (jj_3R_66()) return true; return false; } - private boolean jj_3_26() { - if (jj_3R_24()) return true; + private boolean jj_3_25() { + if (jj_3R_23()) return true; return false; } - private boolean jj_3R_53() { + private boolean jj_3R_50() { if (jj_scan_token(46)) return true; - if (jj_3R_69()) return true; + if (jj_3R_66()) return true; return false; } - private boolean jj_3R_35() { + private boolean jj_3R_33() { Token xsp; xsp = jj_scanpos; - if (jj_3R_53()) { + if (jj_3R_50()) { jj_scanpos = xsp; - if (jj_3R_54()) return true; + if (jj_3R_51()) return true; } return false; } - private boolean jj_3R_34() { - if (jj_3R_52()) return true; - if (jj_3R_31()) return true; + private boolean jj_3R_32() { + if (jj_3R_49()) return true; + if (jj_3R_29()) return true; return false; } - private boolean jj_3_32() { + private boolean jj_3_31() { if (jj_scan_token(46)) return true; if (jj_scan_token(25)) return true; - if (jj_3R_36()) return true; return false; } - private boolean jj_3_25() { - if (jj_3R_25()) return true; + private boolean jj_3_24() { + if (jj_3R_24()) return true; Token xsp; xsp = jj_scanpos; - if (jj_3R_34()) jj_scanpos = xsp; + if (jj_3R_32()) jj_scanpos = xsp; return false; } - private boolean jj_3R_97() { + private boolean jj_3R_92() { Token xsp; xsp = jj_scanpos; - if (jj_3_25()) { + if (jj_3_24()) { jj_scanpos = xsp; - if (jj_3_26()) return true; + if (jj_3_25()) return true; } return false; } - private boolean jj_3_31() { + private boolean jj_3_30() { if (jj_scan_token(25)) return true; - if (jj_3R_36()) return true; - if (jj_scan_token(26)) return true; + if (jj_3R_34()) return true; return false; } - private boolean jj_3_30() { - if (jj_3R_35()) return true; + private boolean jj_3_29() { + if (jj_3R_33()) return true; return false; } - private boolean jj_3R_55() { + private boolean jj_3R_52() { Token xsp; xsp = jj_scanpos; - if (jj_3_30()) { + if (jj_3_29()) { jj_scanpos = xsp; - if (jj_3_31()) { + if (jj_3_30()) { jj_scanpos = xsp; - if (jj_3_32()) return true; + if (jj_3_31()) return true; } } return false; } - private boolean jj_3R_47() { + private boolean jj_3R_44() { if (jj_scan_token(33)) return true; - if (jj_3R_31()) return true; + if (jj_3R_29()) return true; return false; } - private boolean jj_3R_28() { - if (jj_scan_token(31)) return true; - if (jj_3R_31()) return true; + private boolean jj_3R_26() { + if (jj_scan_token(25)) return true; + if (jj_3R_29()) return true; Token xsp; while (true) { xsp = jj_scanpos; - if (jj_3R_47()) { jj_scanpos = xsp; break; } + if (jj_3R_44()) { jj_scanpos = xsp; break; } } - if (jj_scan_token(32)) return true; + if (jj_scan_token(26)) return true; return false; } - private boolean jj_3R_46() { - if (jj_scan_token(33)) return true; + private boolean jj_3_23() { + if (jj_scan_token(35)) return true; + if (jj_3R_30()) return true; + return false; + } + + private boolean jj_3_22() { + if (jj_scan_token(35)) return true; if (jj_3R_31()) return true; return false; } - private boolean jj_3R_127() { + private boolean jj_3R_122() { Token xsp; xsp = jj_scanpos; if (jj_scan_token(44)) { @@ -3254,78 +3130,53 @@ private boolean jj_3R_127() { } } } - if (jj_3R_55()) return true; return false; } - private boolean jj_3R_27() { - if (jj_scan_token(25)) return true; - if (jj_3R_31()) return true; - Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_46()) { jj_scanpos = xsp; break; } - } - if (jj_scan_token(26)) return true; + private boolean jj_3_21() { + if (jj_scan_token(31)) return true; + if (jj_3R_29()) return true; + if (jj_scan_token(32)) return true; return false; } - private boolean jj_3R_36() { - if (jj_3R_55()) return true; + private boolean jj_3R_34() { + if (jj_3R_52()) return true; Token xsp; while (true) { xsp = jj_scanpos; - if (jj_3R_127()) { jj_scanpos = xsp; break; } + if (jj_3R_122()) { jj_scanpos = xsp; break; } } return false; } - private boolean jj_3_24() { - if (jj_scan_token(35)) return true; - if (jj_3R_32()) return true; - return false; - } - - private boolean jj_3_23() { - if (jj_scan_token(35)) return true; - if (jj_3R_33()) return true; - return false; - } - - private boolean jj_3_22() { - if (jj_scan_token(31)) return true; - if (jj_3R_31()) return true; - if (jj_scan_token(32)) return true; - return false; - } - - private boolean jj_3R_41() { + private boolean jj_3R_39() { Token xsp; xsp = jj_scanpos; + if (jj_3_20()) { + jj_scanpos = xsp; if (jj_3_21()) { jj_scanpos = xsp; if (jj_3_22()) { jj_scanpos = xsp; - if (jj_3_23()) { - jj_scanpos = xsp; - if (jj_3_24()) return true; + if (jj_3_23()) return true; } } } return false; } - private boolean jj_3_21() { - if (jj_3R_30()) return true; + private boolean jj_3_20() { + if (jj_3R_28()) return true; return false; } - private boolean jj_3R_24() { - if (jj_3R_40()) return true; + private boolean jj_3R_23() { + if (jj_3R_38()) return true; Token xsp; while (true) { xsp = jj_scanpos; - if (jj_3R_41()) { jj_scanpos = xsp; break; } + if (jj_3R_39()) { jj_scanpos = xsp; break; } } return false; } @@ -3341,7 +3192,7 @@ private boolean jj_3R_24() { private Token jj_scanpos, jj_lastpos; private int jj_la; private int jj_gen; - final private int[] jj_la1 = new int[68]; + final private int[] jj_la1 = new int[67]; static private int[] jj_la1_0; static private int[] jj_la1_1; static { @@ -3349,12 +3200,12 @@ private boolean jj_3R_24() { jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x400180,0x40000000,0x0,0x8247ef80,0x247e000,0x40000000,0x40000000,0x207e000,0x180,0xf80,0x0,0x8247ef80,0x80000000,0x80000000,0x80000000,0x0,0x0,0x180000,0x2000f80,0x0,0x0,0x40000000,0x2000f80,0x0,0x5e000,0x8227f180,0x400000,0x8227f180,0x800000,0x0,0x400000,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x80000000,0x82460180,0x0,0x0,0x0,0x0,0x0,0x0,0x460180,0x40000000,0x40000000,0x60080,0x400000,0x60080,0x460180,0x60180,0x16100,0x80000,0x27e080,0x400000,0x400000,0x0,0x40000000,0x40000000,0x56080,0x16000,0x6000,0x180000,0x40080,0x40080,}; + jj_la1_0 = new int[] {0x400180,0x40000000,0x0,0x247ef80,0x247e000,0x40000000,0x40000000,0x207e000,0x180,0xf80,0x0,0x247ef80,0x80000000,0x80000000,0x80000000,0x0,0x180000,0x2000f80,0x0,0x0,0x40000000,0x2000f80,0x0,0x5e000,0x8227f180,0x400000,0x8227f180,0x800000,0x0,0x400000,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x80000000,0x82460180,0x0,0x0,0x0,0x0,0x0,0x0,0x460180,0x40000000,0x40000000,0x60080,0x400000,0x60080,0x460180,0x60180,0x16100,0x80000,0x27e080,0x400000,0x400000,0x0,0x40000000,0x40000000,0x56080,0x16000,0x6000,0x180000,0x40080,0x40080,}; } private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x2,0x40,0x0,0x2,0x2,0x0,0x0,0x0,0x2,0x40,0x8,0x8,0x8,0x2,0x2,0x0,0x0,0x30,0x30,0x2,0x0,0x80,0x0,0x40000,0x700,0x40000,0x0,0x700,0x0,0x200,0x820,0x820,0x1830,0x1830,0x2200,0x2200,0x0,0xc000,0x820,0x820,0x1010,0x1010,0x1830,0x1830,0xc000,0x2,0x2,0x0,0x0,0x0,0x8000,0x0,0x0,0x20004,0x0,0x0,0x0,0x80000,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,}; + jj_la1_1 = new int[] {0x0,0x0,0x2,0x40,0x0,0x2,0x2,0x0,0x0,0x0,0x2,0x40,0x8,0x8,0x8,0x2,0x0,0x0,0x30,0x30,0x2,0x0,0x80,0x0,0x40000,0x700,0x40000,0x0,0x700,0x0,0x200,0x820,0x820,0x1830,0x1830,0x2200,0x2200,0x0,0xc000,0x820,0x820,0x1010,0x1010,0x1830,0x1830,0xc000,0x2,0x2,0x0,0x0,0x0,0x8000,0x0,0x0,0x20004,0x0,0x0,0x0,0x80000,0x2,0x2,0x0,0x0,0x0,0x0,0x0,0x0,}; } - final private JJCalls[] jj_2_rtns = new JJCalls[35]; + final private JJCalls[] jj_2_rtns = new JJCalls[34]; private boolean jj_rescan = false; private int jj_gc = 0; @@ -3369,7 +3220,7 @@ public TokenSequenceParser(java.io.InputStream stream, String encoding) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 68; i++) jj_la1[i] = -1; + for (int i = 0; i < 67; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -3384,7 +3235,7 @@ public void ReInit(java.io.InputStream stream, String encoding) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 68; i++) jj_la1[i] = -1; + for (int i = 0; i < 67; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -3395,7 +3246,7 @@ public TokenSequenceParser(java.io.Reader stream) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 68; i++) jj_la1[i] = -1; + for (int i = 0; i < 67; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -3406,7 +3257,7 @@ public void ReInit(java.io.Reader stream) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 68; i++) jj_la1[i] = -1; + for (int i = 0; i < 67; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -3416,7 +3267,7 @@ public TokenSequenceParser(TokenSequenceParserTokenManager tm) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 68; i++) jj_la1[i] = -1; + for (int i = 0; i < 67; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -3426,7 +3277,7 @@ public void ReInit(TokenSequenceParserTokenManager tm) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 68; i++) jj_la1[i] = -1; + for (int i = 0; i < 67; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -3543,7 +3394,7 @@ public ParseException generateParseException() { la1tokens[jj_kind] = true; jj_kind = -1; } - for (int i = 0; i < 68; i++) { + for (int i = 0; i < 67; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1< exprs = new ArrayList(); - Expression expr; -} { - ( "[" - expr = Expression(env) - { exprs.add(expr); } - ( - "," - expr = Expression(env) - { exprs.add(expr); } - )* - "]" - ) - { return new Expressions.ListExpression(Expressions.TYPE_LIST, exprs); } -} - Expression BasicCondExpression(Env env) : { Expression expr1 = null; Expression expr2 = null; diff --git a/src/edu/stanford/nlp/math/ArrayMath.java b/src/edu/stanford/nlp/math/ArrayMath.java index 97989fa51c..10a263f990 100644 --- a/src/edu/stanford/nlp/math/ArrayMath.java +++ b/src/edu/stanford/nlp/math/ArrayMath.java @@ -786,7 +786,7 @@ public static float max(float[] a) { } /** - * @return the index of the max value; if max is a tie, returns the first one. + * @return the index of the min value; if min is a tie, returns the first one. */ public static int argmin(double[] a) { double min = Double.POSITIVE_INFINITY; @@ -800,12 +800,15 @@ public static int argmin(double[] a) { return argmin; } + /** + * @return The minimum value in an array. + */ public static double min(double[] a) { return a[argmin(a)]; } /** - * Returns the largest value in a vector of doubles. Any values which + * Returns the smallest value in a vector of doubles. Any values which * are NaN or infinite are ignored. If the vector is empty, 0.0 is * returned. */ @@ -816,7 +819,7 @@ public static double safeMin(double[] v) { } /** - * @return the index of the max value; if max is a tie, returns the first one. + * @return the index of the min value; if min is a tie, returns the first one. */ public static int argmin(float[] a) { float min = Float.POSITIVE_INFINITY; @@ -835,7 +838,7 @@ public static float min(float[] a) { } /** - * @return the index of the max value; if max is a tie, returns the first one. + * @return the index of the min value; if min is a tie, returns the first one. */ public static int argmin(int[] a) { int min = Integer.MAX_VALUE; @@ -1112,12 +1115,6 @@ public static double innerProduct(float[] a, float[] b) { // UTILITIES - public static int[] subArray(int[] a, int from, int to) { - int[] result = new int[to-from]; - System.arraycopy(a, from, result, 0, to-from); - return result; - } - public static double[][] load2DMatrixFromFile(String filename) throws IOException { String s = IOUtils.slurpFile(filename); String[] rows = s.split("[\r\n]+"); @@ -1395,7 +1392,7 @@ public static double mean(double[] a) { public static int mean(int[] a) { return sum(a) / a.length; } - + public static double median(double[] a) { double[] b = new double[a.length]; System.arraycopy(a, 0, b, 0, b.length); @@ -1528,29 +1525,29 @@ public static boolean containsInSubarray(int[] a, int begin, int end, int i) { * x and y, then compute innerProduct(x,y)/(x.length-1). */ public static double pearsonCorrelation(double[] x, double[] y) { - double result; - double sum_sq_x = 0, sum_sq_y = 0; + double result; + double sum_sq_x = 0, sum_sq_y = 0; double mean_x = x[0], mean_y = y[0]; - double sum_coproduct = 0; - for(int i=2; i 0) { - if ((e & 1) != 0) { - result *= currPow; + if (e <= 1) { + if (e == 1) { + return b; + } else { + return 1; // this is also what you get for e < 0 ! + } + } else { + if (e == 2) { + return b * b; + } else { + int result = 1; + while (e > 0) { + if ((e & 1) != 0) { + result *= b; + } + b *= b; + e >>= 1; } - currPow *= currPow; - e >>= 1; + return result; } - return result; } } /** - * Exponentiation like we learned in grade school: - * multiply b by itself e times. Uses power of two trick. - * e must be nonnegative!!! no checking!!! - * - * @param b base - * @param e exponent - * @return b^e - */ + * Exponentiation like we learned in grade school: + * multiply b by itself e times. Uses power of two trick. + * e must be nonnegative!!! no checking!!! + * + * @param b base + * @param e exponent + * @return b^e + */ public static float intPow(float b, int e) { float result = 1.0f; float currPow = b; @@ -493,7 +495,7 @@ public static double hypergeometric(int k, int n, int r, int m) { double ans = 1.0; // do (n-r)x...x((n-r)-((m-k)-1))/n x...x (n-((m-k-1))) - // leaving rest of denominator to get to multimply by (n-(m-1)) + // leaving rest of denominator to get to multiply by (n-(m-1)) // that's k things which goes into next loop for (int nr = n - r, n0 = n; nr > (n - r) - (m - k); nr--, n0--) { // System.out.println("Multiplying by " + nr); @@ -640,7 +642,8 @@ public static double sigmoid(double x) { } - private static float[] acosCache = null; + private static float[] acosCache; // = null; + /** * Compute acos very quickly by directly looking up the value. * @param cosValue The cosine of the angle to fine. @@ -668,13 +671,13 @@ public static double acos(double cosValue) { public static double poisson(int x, double lambda) { if (x<0 || lambda<=0.0) throw new RuntimeException("Bad arguments: " + x + " and " + lambda); double p = (Math.exp(-lambda) * Math.pow(lambda, x)) / factorial(x); - if (Double.isInfinite(p) || p<=0.0) throw new RuntimeException(Math.exp(-lambda) +" "+ Math.pow(lambda, x) +" "+ factorial(x)); + if (Double.isInfinite(p) || p<=0.0) throw new RuntimeException(Math.exp(-lambda) +" "+ Math.pow(lambda, x) + ' ' + factorial(x)); return p; } /** * Uses floating point so that it can represent the really big numbers that come up. - * @param x Argumet to take factorial of + * @param x Argument to take factorial of * @return Factorial of argument */ public static double factorial(int x) { @@ -689,7 +692,7 @@ public static double factorial(int x) { * Tests the hypergeometric distribution code, or other functions * provided in this module. * - * @param args Either none, and the log add rountines are tested, or the + * @param args Either none, and the log add routines are tested, or the * following 4 arguments: k (cell), n (total), r (row), m (col) */ public static void main(String[] args) { diff --git a/src/edu/stanford/nlp/naturalli/Monotonicity.java b/src/edu/stanford/nlp/naturalli/Monotonicity.java new file mode 100644 index 0000000000..7444f2f0c5 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/Monotonicity.java @@ -0,0 +1,13 @@ +package edu.stanford.nlp.naturalli; + +/** + * A monotonicity value. + * + * @author Gabor Angeli + */ +public enum Monotonicity { + MONOTONE, + ANTITONE, + NONMONOTONE, + INVALID; +} diff --git a/src/edu/stanford/nlp/naturalli/MonotonicityType.java b/src/edu/stanford/nlp/naturalli/MonotonicityType.java new file mode 100644 index 0000000000..48fe71fd9b --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/MonotonicityType.java @@ -0,0 +1,13 @@ +package edu.stanford.nlp.naturalli; + +/** + * The monotonicity type -- that is, additive, multiplicative, or both/neither + * + * @author Gabor Angeli + */ +public enum MonotonicityType { + NONE, + ADDITIVE, + MULTIPLICATIVE, + BOTH; +} diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java new file mode 100644 index 0000000000..b276879f16 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotations.java @@ -0,0 +1,58 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ie.util.RelationTriple; +import edu.stanford.nlp.ling.CoreAnnotation; + +import java.util.Collection; + +/** + * A collection of {@link edu.stanford.nlp.ling.CoreAnnotation}s for various Natural Logic data. + * + * @author Gabor Angeli + */ +public class NaturalLogicAnnotations { + + /** + * An annotation which attaches to a CoreLabel to denote that this is an operator in natural logic, + * to describe which operator it is, and to give the scope of its argument(s). + */ + public static final class OperatorAnnotation implements CoreAnnotation { + @Override + public Class getType() { + return OperatorSpec.class; + } + } + + /** + * An annotation which attaches to a CoreLabel to denote that this is an operator in natural logic, + * to describe which operator it is, and to give the scope of its argument(s). + */ + public static final class PolarityAnnotation implements CoreAnnotation { + @Override + public Class getType() { + return Polarity.class; + } + } + + /** + * The set of sentences which are entailed by the original sentence, according to Natural Logic semantics. + */ + public static final class EntailedSentencesAnnotation implements CoreAnnotation> { + @SuppressWarnings("unchecked") + @Override + public Class> getType() { + return (Class>) ((Object) Collection.class); + } + } + + /** + * The set of relation triples extracted from this sentence. + */ + public static final class RelationTriplesAnnotation implements CoreAnnotation> { + @SuppressWarnings("unchecked") + @Override + public Class> getType() { + return (Class>) ((Object) Collection.class); + } + } +} diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java new file mode 100644 index 0000000000..ba1e38592a --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/NaturalLogicAnnotator.java @@ -0,0 +1,435 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.SentenceAnnotator; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; +import edu.stanford.nlp.semgraph.SemanticGraphEdge; +import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; +import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.Pair; +import edu.stanford.nlp.util.StringUtils; +import edu.stanford.nlp.util.Triple; +import edu.stanford.nlp.naturalli.NaturalLogicAnnotations.*; + +import java.util.*; +import java.util.function.Function; + +/** + * An annotator marking operators with their scope. + * Look at {@link NaturalLogicAnnotator#PATTERNS} for the full list of patterns, otherwise + * {@link NaturalLogicAnnotator#doOneSentence(Annotation, CoreMap)} is the main interface for this class. + * + * TODO(gabor) annotate generics as "most" + * + * @author Gabor Angeli + */ +@SuppressWarnings("unchecked") +public class NaturalLogicAnnotator extends SentenceAnnotator { + + /** + * A regex for arcs that act as determiners. + */ + private static final String DET = "/(pre)?det|a(dv)?mod|neg|num|nn/"; + /** + * A regex for arcs that we pretend are subject arcs. + */ + private static final String GEN_SUBJ = "/[ni]subj(pass)?/"; + /** + * A regex for arcs that we pretend are object arcs. + */ + private static final String GEN_OBJ = "/[di]obj|xcomp|advcl|acomp/"; + /** + * A regex for arcs that we pretend are copula. + */ + private static final String GEN_COP = "/cop|aux(pass)?/"; + /** + * A regex for arcs which denote a sub-clause (e.g., "at Stanford" or "who are at Stanford") + */ + private static final String GEN_CLAUSE = "/prep|rcmod/"; + /** + * A regex for arcs which denote a preposition + */ + private static final String GEN_PREP = "/prep|advcl|ccomp|advmod/"; + + /** + * A Semgrex fragment for matching a quantifier. + */ + private static final String QUANTIFIER; + + static { + Set singleWordQuantifiers = new HashSet<>(); + for (Operator q : Operator.values()) { + String[] tokens = q.surfaceForm.split("\\s+"); + if (!tokens[tokens.length - 1].startsWith("_")) { + singleWordQuantifiers.add("(" + tokens[tokens.length - 1].toLowerCase() + ")"); + } + } + QUANTIFIER = "[ {lemma:/" + StringUtils.join(singleWordQuantifiers, "|") + "/}=quantifier | {pos:CD}=quantifier ]"; + } + + /** + * The patterns to use for marking quantifier scopes. + */ + private static final List PATTERNS = Collections.unmodifiableList(new ArrayList() {{ + // { All cats eat mice, + // All cats want milk } + add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_OBJ+" {}=object")); + // { All cats are in boxes, + // All cats voted for Obama, + // All cats have voted for Obama } + add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_PREP+" {}=object")); + // { All cats are cute, + // All cats can purr } + add(SemgrexPattern.compile("{}=object >"+GEN_SUBJ+" ({}=subject >>"+DET+" "+QUANTIFIER+") >"+GEN_COP+" {}=pivot")); + // { Everyone at Stanford likes cats, + // Everyone who is at Stanford likes cats } + add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >"+GEN_CLAUSE+" {}=subject ) >"+GEN_OBJ+" {}=object")); + // { Everyone at Stanford voted for Colbert } + add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >"+GEN_CLAUSE+" {}=subject ) >"+GEN_PREP+" {}=object")); + // { Felix likes cat food } + add(SemgrexPattern.compile("{}=pivot >"+GEN_SUBJ+" {pos:NNP}=Subject >"+GEN_OBJ+" {}=object")); + // { Felix has spoken to Fido } + add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" {pos:NNP}=Subject >/prep|ccomp|[di]obj/ {}=object")); + // { Felix is a cat, + // Felix is cute } + add(SemgrexPattern.compile("{}=object >"+GEN_SUBJ+" {pos:NNP}=Subject >"+GEN_COP+" {}=pivot")); + // { Some cats do n't like dogs } + add(SemgrexPattern.compile("{}=pivot >neg "+QUANTIFIER+" >"+GEN_OBJ+" {}=object")); + // { All of the cats hate dogs. } + add(SemgrexPattern.compile("{pos:/V.*/}=pivot >"+GEN_SUBJ+" ( "+QUANTIFIER+" >prep {}=subject ) >"+GEN_OBJ+" {}=object")); + add(SemgrexPattern.compile("{pos:/V.*/}=pivot >dep ( "+QUANTIFIER+" >prep {}=subject ) >"+GEN_SUBJ+" {}=object")); // as above, but handle a common parse error + // { Either cats or dogs have tails. } + add(SemgrexPattern.compile("{pos:/V.*/}=pivot >dep {lemma:either}=quantifier >"+GEN_SUBJ+" {}=subject >"+GEN_OBJ+" {}=object")); + // { There are cats } + add(SemgrexPattern.compile("{}=quantifier >"+GEN_SUBJ+" {}=pivot >>expl {}")); + }}); + + /** A helper method for + * {@link NaturalLogicAnnotator#getModifierSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)} and + * {@link NaturalLogicAnnotator#getSubtreeSpan(edu.stanford.nlp.semgraph.SemanticGraph, edu.stanford.nlp.ling.IndexedWord)}. + */ + private static Pair getGeneralizedSubtreeSpan(SemanticGraph tree, IndexedWord root, Set validArcs) { + int min = root.index(); + int max = root.index(); + Queue fringe = new LinkedList<>(); + for (SemanticGraphEdge edge : tree.getOutEdgesSorted(root)) { + String edgeLabel = edge.getRelation().getShortName(); + if ((validArcs == null || validArcs.contains(edgeLabel)) && + !"punct".equals(edgeLabel)) { + fringe.add(edge.getDependent()); + } + } + while (!fringe.isEmpty()) { + IndexedWord node = fringe.poll(); + min = Math.min(node.index(), min); + max = Math.max(node.index(), max); + for (SemanticGraphEdge edge : tree.getOutEdgesSorted(node)) { + if (!"punct".equals(edge.getRelation().getShortName())) { // ignore punctuation + fringe.add(edge.getDependent()); + } + } + } + return Pair.makePair(min, max + 1); + } + + private static final Set MODIFIER_ARCS = Collections.unmodifiableSet(new HashSet() {{ + add("aux"); + add("prep"); + }}); + + /** + * Returns the yield span for the word rooted at the given node, but only traversing a fixed set of relations. + * @param tree The dependency graph to get the span from. + * @param root The root word of the span. + * @return A one indexed span rooted at the given word. + */ + private static Pair getModifierSubtreeSpan(SemanticGraph tree, IndexedWord root) { + return getGeneralizedSubtreeSpan(tree, root, MODIFIER_ARCS); + } + + /** + * Returns the yield span for the word rooted at the given node. So, for example, all cats like dogs rooted at the word + * "cats" would yield a span (1, 3) -- "all cats". + * @param tree The dependency graph to get the span from. + * @param root The root word of the span. + * @return A one indexed span rooted at the given word. + */ + private static Pair getSubtreeSpan(SemanticGraph tree, IndexedWord root) { + return getGeneralizedSubtreeSpan(tree, root, null); + } + + /** + * Effectively, merge two spans + */ + private static Pair includeInSpan(Pair span, Pair toInclude) { + return Pair.makePair(Math.min(span.first, toInclude.first), Math.max(span.second, toInclude.second)); + } + + /** + * Exclude the second span from the first, if the second is on the edge of the first. If the second is in the middle, it's + * unclear what this function should do, so it just returns the original span. + */ + private static Pair excludeFromSpan(Pair span, Pair toExclude) { + if (toExclude.second <= span.first || toExclude.first >= span.second) { + // Case: toExclude is outside of the span anyways + return span; + } else if (toExclude.first <= span.first && toExclude.second > span.first) { + // Case: overlap on the front + return Pair.makePair(toExclude.second, span.second); + } else if (toExclude.first < span.second && toExclude.second >= span.second) { + // Case: overlap on the front + return Pair.makePair(span.first, toExclude.first); + } else if (toExclude.first > span.first && toExclude.second < span.second) { + // Case: toExclude is within the span + return span; + } else { + throw new IllegalStateException("This case should be impossible"); + } + } + + /** + * Compute the span for a given matched pattern. + * At a high level: + * + *
      + *
    • If both a subject and an object exist, we take the subject minus the quantifier, and the object plus the pivot.
    • + *
    • If only an object exists, we make the subject the object, and create a dummy object to signify a one-place quantifier.
    • + *
    • If neither the subject or object exist, the pivot is the subject and there is no object.
    • + *
    + * + * But: + * + *
      + *
    • If we have a two-place quantifier, the object is allowed to absorb various specific arcs from the pivot.
    • + *
    • If we have a one-place quantifier, the object is allowed to absorb only prepositions from the pivot.
    • + *
    + */ + private OperatorSpec computeScope(SemanticGraph tree, Operator operator, + IndexedWord pivot, Pair quantifierSpan, + IndexedWord subject, IndexedWord object) { + Pair subjSpan; + Pair objSpan; + if (subject == null && object == null) { + subjSpan = getSubtreeSpan(tree, pivot); + objSpan = Pair.makePair(subjSpan.second, subjSpan.second); + } else if (subject == null) { + subjSpan = includeInSpan(getSubtreeSpan(tree, object), getGeneralizedSubtreeSpan(tree, pivot, Collections.singleton("prep"))); + objSpan = Pair.makePair(subjSpan.second, subjSpan.second); + } else { + Pair subjectSubtree = getSubtreeSpan(tree, subject); + subjSpan = excludeFromSpan(subjectSubtree, quantifierSpan); + objSpan = excludeFromSpan(includeInSpan(getSubtreeSpan(tree, object), getModifierSubtreeSpan(tree, pivot)), subjectSubtree); + } + return new OperatorSpec(operator, + quantifierSpan.first - 1, quantifierSpan.second - 1, + subjSpan.first - 1, subjSpan.second - 1, + objSpan.first - 1, objSpan.second - 1); + } + + /** + * Try to find which quantifier we matched, given that we matched the head of a quantifier at the given IndexedWord, and that + * this whole deal is taking place in the given sentence. + * + * @param sentence The sentence we are matching. + * @param quantifier The word at which we matched a quantifier. + * @return An optional triple consisting of the particular quantifier we matched, as well as the span of that quantifier in the sentence. + */ + private Optional> validateQuantiferByHead(CoreMap sentence, IndexedWord quantifier) { + int end = quantifier.index(); + for (int start = Math.max(0, end - 10); start < end; ++start) { + Function glossFn = (label) -> "CD".equals(label.tag()) ? "__NUM__" : label.lemma(); + String gloss = StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), " ", glossFn, start, end).toLowerCase(); + for (Operator q : Operator.values()) { + if (q.surfaceForm.equals(gloss)) { + return Optional.of(Triple.makeTriple(q, start + 1, end + 1)); + } + } + } + return Optional.empty(); + } + + + /** + * Find the operators in this sentence, annotating the head word (only!) of each operator with the + * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.OperatorAnnotation}. + * + * @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)} + */ + private void annotateOperators(CoreMap sentence) { + SemanticGraph tree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); + for (SemgrexPattern pattern : PATTERNS) { + SemgrexMatcher matcher = pattern.matcher(tree); + while (matcher.find()) { + + // Get terms + IndexedWord properSubject = matcher.getNode("Subject"); + IndexedWord quantifier, subject; + boolean namedEntityQuantifier = false; + if (properSubject != null) { + quantifier = subject = properSubject; + namedEntityQuantifier = true; + } else { + quantifier = matcher.getNode("quantifier"); + subject = matcher.getNode("subject"); + } + + // Validate quantifier + // At the end of this + Optional> quantifierInfo; + if (namedEntityQuantifier) { + // named entities have the "all" semantics by default. + quantifierInfo = Optional.of(Triple.makeTriple(Operator.ALL, quantifier.index(), quantifier.index())); // note: empty quantifier span given + } else { + // find the quantifier, and return some info about it. + quantifierInfo = validateQuantiferByHead(sentence, quantifier); + } + + // Set tokens + if (quantifierInfo.isPresent()) { + // Compute span + OperatorSpec scope = computeScope(tree, quantifierInfo.get().first, + matcher.getNode("pivot"), Pair.makePair(quantifierInfo.get().second, quantifierInfo.get().third), subject, matcher.getNode("object")); + // Set annotation + CoreLabel token = sentence.get(CoreAnnotations.TokensAnnotation.class).get(quantifier.index() - 1); + OperatorSpec oldScope = token.get(OperatorAnnotation.class); + if (oldScope == null || oldScope.quantifierLength() < scope.quantifierLength() || + oldScope.instance != scope.instance) { + token.set(OperatorAnnotation.class, scope); + } else { + token.set(OperatorAnnotation.class, OperatorSpec.merge(oldScope, scope)); + } + } + } + } + + // Ensure we didn't select overlapping quantifiers. For example, "a" and "a few" can often overlap. + // In these cases, take the longer quantifier match. + List quantifiers = new ArrayList<>(); + for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { + if (token.has(OperatorAnnotation.class)) { + quantifiers.add(token.get(OperatorAnnotation.class)); + } + } + quantifiers.sort( (x, y) -> y.quantifierLength() - x.quantifierLength()); + for (OperatorSpec quantifier : quantifiers) { + for (int i = quantifier.quantifierBegin; i < quantifier.quantifierEnd; ++i) { + if (i != quantifier.quantifierHead) { + sentence.get(CoreAnnotations.TokensAnnotation.class).get(i).remove(OperatorAnnotation.class); + } + } + } + } + + /** + * Annotate every token for its polarity, based on the operators found. This function will set the + * {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.PolarityAnnotation} for every token. + * + * @param sentence As in {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator#doOneSentence(edu.stanford.nlp.pipeline.Annotation, edu.stanford.nlp.util.CoreMap)} + */ + private void annotatePolarity(CoreMap sentence) { + // Collect all the operators in this sentence + List operators = new ArrayList<>(); + List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); + for (CoreLabel token : tokens) { + OperatorSpec specOrNull = token.get(OperatorAnnotation.class); + if (specOrNull != null) { + operators.add(specOrNull); + } + } + + // Set polarity for each token + for (int i = 0; i < tokens.size(); ++i) { + CoreLabel token = tokens.get(i); + // Get operators in scope + List> inScope = new ArrayList<>(4); + for (OperatorSpec operator : operators) { + if (i >= operator.subjectBegin && i < operator.subjectEnd) { + inScope.add(Triple.makeTriple(operator.subjectEnd - operator.subjectBegin, operator.instance.subjMono, operator.instance.subjType)); + } else if (i >= operator.objectBegin && i < operator.objectEnd) { + inScope.add(Triple.makeTriple(operator.objectEnd - operator.objectBegin, operator.instance.objMono, operator.instance.objType)); + } + } + // Sort the operators by their scope (approximated by the size of their argument span + inScope.sort( (x, y) -> y.first - x.first); + // Create polarity + List> info = new ArrayList<>(inScope.size()); + for (Triple term : inScope) { + info.add(Pair.makePair(term.second, term.third)); + } + Polarity polarity = new Polarity(info); + // Set polarity + token.set(PolarityAnnotation.class, polarity); + } + } + + /** + * If false, don't annotate tokens for polarity but only find the operators and their scopes. + */ + public final boolean doPolarity; + + /** + * Create a new annotator. + * @param annotatorName The prefix for the properties for this annotator. + * @param props The properties to configure this annotator with. + */ + public NaturalLogicAnnotator(String annotatorName, Properties props) { + this.doPolarity = Boolean.valueOf(props.getProperty(annotatorName + ".doPolarity", "true")); + } + + /** + * @see edu.stanford.nlp.naturalli.NaturalLogicAnnotator#NaturalLogicAnnotator(String, java.util.Properties) + */ + public NaturalLogicAnnotator(Properties props) { + this(STANFORD_NATLOG, props); + + } + + /** The default constructor */ + public NaturalLogicAnnotator() { + this("__irrelevant__", new Properties()); + } + + /** {@inheritDoc} */ + @Override + protected void doOneSentence(Annotation annotation, CoreMap sentence) { + annotateOperators(sentence); + if (doPolarity) { + annotatePolarity(sentence); + } + } + + /** {@inheritDoc} */ + @Override + protected int nThreads() { + return 1; + } + + /** {@inheritDoc} */ + @Override + protected long maxTime() { + return Long.MAX_VALUE; + } + + /** {@inheritDoc} */ + @Override + protected void doOneFailedSentence(Annotation annotation, CoreMap sentence) { + System.err.println("Failed to annotate: " + sentence.get(CoreAnnotations.TextAnnotation.class)); + } + + /** {@inheritDoc} */ + @Override + public Set requirementsSatisfied() { + return Collections.singleton(NATLOG_REQUIREMENT); + } + + /** {@inheritDoc} */ + @Override + public Set requires() { + return Collections.EMPTY_SET; // TODO(gabor) set me! + } +} diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java b/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java new file mode 100644 index 0000000000..f995165894 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/NaturalLogicRelation.java @@ -0,0 +1,481 @@ +package edu.stanford.nlp.naturalli; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +/** + * The catalog of the seven Natural Logic relations. + * Set-theoretically, if we assume A and B are two sets (e.g., denotations), + * and D is the universe of discourse, + * then the relations between A and B are defined as follows: + * + *
      + *
    • Equivalence: A = B
    • + *
    • Forward entailment: A \\subset B
    • + *
    • Reverse entailment: A \\supset B
    • + *
    • Negation: A \\intersect B = \\empty \\land A \\union B = D
    • + *
    • Alternation: A \\intersect B = \\empty
    • + *
    • Cover: A \\union B = D
    • + *
    + * + * @author Gabor Angeli + */ +public enum NaturalLogicRelation { + EQUIVALENT(0, true, false), + FORWARD_ENTAILMENT(1, true, false), + REVERSE_ENTAILMENT(2, false, false), + NEGATION(3, false, true), + ALTERNATION(4, false, true), + COVER(5, false, false), + INDEPENDENCE(6, false, false); + + public final int fixedIndex; + public final boolean isEntailed, isNegated; + + NaturalLogicRelation(int fixedIndex, boolean isEntailed, boolean isNegated) { + this.fixedIndex = fixedIndex; + this.isEntailed = isEntailed; + this.isNegated = isNegated; + } + + protected static NaturalLogicRelation byFixedIndex(int index) { + switch (index) { + case 0: return EQUIVALENT; + case 1: return FORWARD_ENTAILMENT; + case 2: return REVERSE_ENTAILMENT; + case 3: return NEGATION; + case 4: return ALTERNATION; + case 5: return COVER; + case 6: return INDEPENDENCE; + default: throw new IllegalArgumentException("Unknown index for Natural Logic relation: " + index); + } + } + + /** + * The MacCartney "join table" -- this determines the transitivity of entailment if we chain two relations together. + * These should already be projected up through the sentence, so that the relations being joined are relations between + * sentences rather than relations between lexical items (see {@link Polarity#projectLexicalRelation(NaturalLogicRelation)}, + * set by {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotator} using the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.PolarityAnnotation}). + * @param other The relation to join this relation with. + * @return The new joined relation. + */ + public NaturalLogicRelation join(NaturalLogicRelation other) { + switch (this) { + case EQUIVALENT: + return other; + case FORWARD_ENTAILMENT: + switch (other) { + case EQUIVALENT: + case FORWARD_ENTAILMENT: + return FORWARD_ENTAILMENT; + case NEGATION: + case ALTERNATION: + return COVER; + case REVERSE_ENTAILMENT: + case COVER: + case INDEPENDENCE: + return INDEPENDENCE; + } + case REVERSE_ENTAILMENT: + switch (other) { + case EQUIVALENT: + case REVERSE_ENTAILMENT: + return REVERSE_ENTAILMENT; + case NEGATION: + case COVER: + return COVER; + case FORWARD_ENTAILMENT: + case ALTERNATION: + case INDEPENDENCE: + return INDEPENDENCE; + } + case NEGATION: + switch (other) { + case EQUIVALENT: + return NEGATION; + case FORWARD_ENTAILMENT: + return COVER; + case REVERSE_ENTAILMENT: + return ALTERNATION; + case NEGATION: + return EQUIVALENT; + case ALTERNATION: + return REVERSE_ENTAILMENT; + case COVER: + return FORWARD_ENTAILMENT; + case INDEPENDENCE: + return INDEPENDENCE; + } + case ALTERNATION: + switch (other) { + case EQUIVALENT: + case REVERSE_ENTAILMENT: + return ALTERNATION; + case NEGATION: + case COVER: + return FORWARD_ENTAILMENT; + case FORWARD_ENTAILMENT: + case ALTERNATION: + case INDEPENDENCE: + return INDEPENDENCE; + } + case COVER: + switch (other) { + case EQUIVALENT: + case FORWARD_ENTAILMENT: + return COVER; + case NEGATION: + case ALTERNATION: + return REVERSE_ENTAILMENT; + case REVERSE_ENTAILMENT: + case COVER: + case INDEPENDENCE: + return INDEPENDENCE; + } + case INDEPENDENCE: + return INDEPENDENCE; + } + throw new IllegalStateException("[should be impossible]: Incomplete join table for " + this + " joined with " + other); + } + + private static final Map insertArcToNaturalLogicRelation = Collections.unmodifiableMap(new HashMap() {{ + put("acomp", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("advcl", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("purpcl", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("advmod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("amod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("appos", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("aux", NaturalLogicRelation.INDEPENDENCE); // he left -/-> he should leave + put("auxpass", NaturalLogicRelation.INDEPENDENCE); // some cat adopts -/-> some cat got adopted + put("cc", NaturalLogicRelation.REVERSE_ENTAILMENT); // match dep_conj + put("ccomp", NaturalLogicRelation.INDEPENDENCE); // interesting project here... "he said x" -> "x"? + put("conj", NaturalLogicRelation.REVERSE_ENTAILMENT); // match dep_cc + put("cop", NaturalLogicRelation.INDEPENDENCE); // + put("csubj", NaturalLogicRelation.INDEPENDENCE); // don't drop subjects. + put("csubjpass", NaturalLogicRelation.INDEPENDENCE); // as above + put("dep", NaturalLogicRelation.INDEPENDENCE); // + put("det", NaturalLogicRelation.EQUIVALENT); // + put("discourse", NaturalLogicRelation.EQUIVALENT); // + put("dobj", NaturalLogicRelation.REVERSE_ENTAILMENT); // "he studied NLP at Stanford" -> "he studied NLP" + put("expl", NaturalLogicRelation.EQUIVALENT); // though we shouldn't see this... + put("goeswith", NaturalLogicRelation.EQUIVALENT); // also shouldn't see this + put("iobj", NaturalLogicRelation.REVERSE_ENTAILMENT); // she gave me a raise -> she gave a raise + put("mark", NaturalLogicRelation.REVERSE_ENTAILMENT); // he says that you like to swim -> he says you like to swim + put("mwe", NaturalLogicRelation.INDEPENDENCE); // shouldn't see this + put("neg", NaturalLogicRelation.NEGATION); // + put("nn", NaturalLogicRelation.INDEPENDENCE); // + put("npadvmod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("nsubj", NaturalLogicRelation.INDEPENDENCE); // + put("nsubjpass", NaturalLogicRelation.INDEPENDENCE); // + put("num", NaturalLogicRelation.INDEPENDENCE); // + put("number", NaturalLogicRelation.INDEPENDENCE); // + put("parataxis", NaturalLogicRelation.INDEPENDENCE); // or, reverse? + put("pcomp", NaturalLogicRelation.INDEPENDENCE); // though, not so in collapsed dependencies + put("pobj", NaturalLogicRelation.INDEPENDENCE); // must delete whole preposition + put("poss", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("posseive", NaturalLogicRelation.INDEPENDENCE); // see dep_poss + put("preconj", NaturalLogicRelation.INDEPENDENCE); // forbidden to see this + put("predet", NaturalLogicRelation.INDEPENDENCE); // forbidden to see this + put("prep", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prt", NaturalLogicRelation.INDEPENDENCE); // + put("punct", NaturalLogicRelation.EQUIVALENT); // + put("quantmod", NaturalLogicRelation.FORWARD_ENTAILMENT); // + put("rcmod", NaturalLogicRelation.FORWARD_ENTAILMENT); // "there are great tennors --rcmod--> who are modest" + put("root", NaturalLogicRelation.INDEPENDENCE); // err.. never delete + put("tmod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("vmod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("partmod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("infmod", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("xcomp", NaturalLogicRelation.INDEPENDENCE); // + put("conj_and", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("conj_or", NaturalLogicRelation.FORWARD_ENTAILMENT); // + put("prep_aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_about", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_above", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_across", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_after", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_against", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_along", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_amid", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_among", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_anti", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_around", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_at", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_before", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_behind", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_below", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_beside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_besides", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_between", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_but", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_concerning", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_considering", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_despite", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_down", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_during", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_except", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_following", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_inside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_into", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_like", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_minus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_near", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_off", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_onto", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_outside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_over", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_past", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_plus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_round", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_save", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_since", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_than", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_through", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_toward", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_towards", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_under", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_until", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_up", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_upon", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_versus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_via", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_within", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_without", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_whether", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_compared_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_compared_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_close_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_close_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prep_with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_aboard", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_about", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_above", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_across", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_after", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_against", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_along", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_amid", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_among", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_anti", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_around", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_at", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_before", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_behind", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_below", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_beneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_beside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_besides", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_between", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_beyond", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_but", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_concerning", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_considering", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_despite", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_down", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_during", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_except", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_excepting", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_excluding", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_following", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_inside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_into", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_like", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_minus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_near", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_off", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_onto", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_opposite", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_outside", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_over", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_past", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_plus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_regarding", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_round", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_save", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_since", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_than", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_through", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_toward", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_towards", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_under", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_underneath", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_unlike", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_until", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_up", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_upon", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_versus", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_via", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_within", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_without", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_according_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_as_per", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_compared_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_instead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_preparatory_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_across_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_as_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_compared_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_irrespective_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_previous_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_ahead_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_aside_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_due_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_next_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_prior_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_along_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_away_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_depending_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_near_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_pursuant_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_alongside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_based_on", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_except_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_off_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_regardless_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_apart_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_because_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_exclusive_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_out_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_subsequent_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_as_for", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_close_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_outside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_such_as", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_as_from", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_close_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_followed_by", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_owing_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_thanks_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_as_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_contrary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_inside_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_preliminary_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_together_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_by_means_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_case_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_place_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_on_behalf_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_with_respect_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_accordance_with", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_front_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_spite_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_on_top_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_addition_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_in_lieu_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_on_account_of", NaturalLogicRelation.REVERSE_ENTAILMENT); // + put("prepc_with_regard_to", NaturalLogicRelation.REVERSE_ENTAILMENT); // + }}); + + /** + * Returns the natural logic relation corresponding to the given dependency arc being inserted into a sentence. + */ + public static NaturalLogicRelation forDependencyInsertion(String dependencyLabel) { + NaturalLogicRelation rel = insertArcToNaturalLogicRelation.get(dependencyLabel.toLowerCase()); + if (rel != null) { + return rel; + } else { + throw new IllegalArgumentException("Unknown dependency arc label: " + dependencyLabel); + } + } + + /** + * Returns the natural logic relation corresponding to the given dependency arc being deleted from a sentence. + */ + public static NaturalLogicRelation forDependencyDeletion(String dependencyLabel) { + NaturalLogicRelation rel = forDependencyInsertion(dependencyLabel); + switch (rel) { + case EQUIVALENT: return EQUIVALENT; + case FORWARD_ENTAILMENT: return REVERSE_ENTAILMENT; + case REVERSE_ENTAILMENT: return FORWARD_ENTAILMENT; + case NEGATION: return NEGATION; + case ALTERNATION: return COVER; + case COVER: return ALTERNATION; + case INDEPENDENCE: return INDEPENDENCE; + default: + throw new IllegalStateException("Unhandled natural logic relation: " + rel); + } + } +} diff --git a/src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java b/src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java new file mode 100644 index 0000000000..8378e1a8ee --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/NaturalLogicWeights.java @@ -0,0 +1,85 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.io.IOUtils; +import edu.stanford.nlp.stats.ClassicCounter; +import edu.stanford.nlp.stats.Counter; +import edu.stanford.nlp.stats.Counters; +import edu.stanford.nlp.stats.TwoDimensionalCounter; + +/** + * TODO(gabor) JavaDoc + * + * @author Gabor Angeli + */ +public class NaturalLogicWeights { + + private TwoDimensionalCounter ppAffinity = new TwoDimensionalCounter<>(); + private Counter dobjAffinity = new ClassicCounter<>(); + + public NaturalLogicWeights(String PP_AFFINITY, String DOBJ_AFFINITY) { + // Preposition affinities + for (String line : IOUtils.readLines(PP_AFFINITY, "utf8")) { + String[] fields = line.split("\t"); + if (fields.length != 3) { + throw new IllegalArgumentException("Invalid format for the pp_affinity data"); + } + ppAffinity.setCount(fields[0], fields[1], Double.parseDouble(fields[2])); + } + for (String verb : ppAffinity.firstKeySet()) { + // Normalize counts to be between 0 and 1 + Counter preps = ppAffinity.getCounter(verb); + Counters.multiplyInPlace(preps, -1.0); + Counters.addInPlace(preps, 1.0); + double min = Counters.min(preps); + double max = Counters.max(preps); + Counters.addInPlace(preps, -min); + if (max == min) { + Counters.addInPlace(preps, 0.5); + } else { + Counters.divideInPlace(preps, max - min); + } + Counters.multiplyInPlace(preps, -1.0); + Counters.addInPlace(preps, 1.0); + } + // Object affinities + for (String line : IOUtils.readLines(DOBJ_AFFINITY, "utf8")) { + String[] fields = line.split("\t"); + if (fields.length != 2) { + throw new IllegalArgumentException("Invalid format for the dobj_affinity data"); + } + dobjAffinity.setCount(fields[0], Double.parseDouble(fields[1])); + } + } + + private double backoffEdgeProbability(String edgeRel) { + return 1.0; // TODO(gabor) should probably learn these... + } + + public double deletionProbability(String parent, String edgeRel) { + return deletionProbability(parent, edgeRel, false); + } + + public double deletionProbability(String parent, String edgeRel, boolean isSecondaryEdgeOfType) { + if (edgeRel.startsWith("prep")) { + double affinity = ppAffinity.getCount(parent, edgeRel); + if (affinity != 0.0 && !isSecondaryEdgeOfType) { + return Math.sqrt(1.0 - Math.min(1.0, affinity)); + } else { + return backoffEdgeProbability(edgeRel); + } + } else if (edgeRel.startsWith("dobj")) { + double affinity = dobjAffinity.getCount(parent); + if (affinity != 0.0 && !isSecondaryEdgeOfType) { + return Math.sqrt(1.0 - Math.min(1.0, affinity)); + } else { + return backoffEdgeProbability(edgeRel); + } + } else { + return backoffEdgeProbability(edgeRel); + } + } + + public static NaturalLogicWeights fromString(String str) { + return new NaturalLogicWeights(null, null); // TODO(gabor) + } +} diff --git a/src/edu/stanford/nlp/naturalli/OpenIE.java b/src/edu/stanford/nlp/naturalli/OpenIE.java new file mode 100644 index 0000000000..496bbd21f9 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/OpenIE.java @@ -0,0 +1,597 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ie.util.RelationTriple; +import edu.stanford.nlp.ling.CoreAnnotations; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.pipeline.Annotation; +import edu.stanford.nlp.pipeline.Annotator; +import edu.stanford.nlp.pipeline.StanfordCoreNLP; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; +import edu.stanford.nlp.semgraph.SemanticGraphEdge; +import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher; +import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern; +import edu.stanford.nlp.trees.GrammaticalRelation; +import edu.stanford.nlp.util.CoreMap; +import edu.stanford.nlp.util.Execution; +import edu.stanford.nlp.util.StringUtils; + +import java.util.*; + +/** + * A simple OpenIE system based on valid Natural Logic deletions of a sentence. + * + * @author Gabor Angeli + */ +@SuppressWarnings("FieldCanBeLocal") +public class OpenIE implements Annotator { + + + @Execution.Option(name="openie.pp_affinity", gloss="A tab separated file of 'verb preposition affinity' values, where affinity is between 0 and 1") + private String PP_AFFINITY = "/home/gabor/workspace/naturalli/etc/pp_affinity.tsv.gz"; //"edu/stanford/nlp/naturalli/pp_affinity.tab"; + @Execution.Option(name="openie.dobj_affinity", gloss="A tab separated file of 'verb dobj_affinity' values, where affinity is between 0 and 1") + private String DOBJ_AFFINITY = "/home/gabor/workspace/naturalli/etc/dobj_affinity.tsv.gz"; // "edu/stanford/nlp/naturalli/dobj_affinity.tab"; + + private static enum Optimization { GENERAL, KB } + @Execution.Option(name="openie.optimize.for", gloss="{General, KB}: Optimize the system for particular tasks (e.g., knowledge base completion tasks -- try to make the subject and object coherent named entities).") + private Optimization OPTIMIZE_FOR = Optimization.GENERAL; + + private final NaturalLogicWeights WEIGHTS; + + /** Create a new OpenIE system, with default properties */ + @SuppressWarnings("UnusedDeclaration") + public OpenIE() { + this(new Properties()); + } + + /** + * Create a ne OpenIE system, based on the given properties. + * @param props The properties to parameterize the system with. + */ + public OpenIE(Properties props) { + Execution.fillOptions(this, props); + this.WEIGHTS = new NaturalLogicWeights(PP_AFFINITY, DOBJ_AFFINITY); + } + + /** + * A result from the search over possible shortenings of the sentence. + */ + private static class SearchResult { + public final SemanticGraph tree; + public final List deletedEdges; + public final double confidence; + + private SearchResult(SemanticGraph tree, List deletedEdges, double confidence) { + this.tree = tree; + this.deletedEdges = deletedEdges; + this.confidence = confidence; + } + + @Override + public String toString() { + return StringUtils.join(tree.vertexListSorted().stream().map(IndexedWord::word), " "); + } + } + + /** + * A state in the search, denoting a partial shortening of the sentence. + */ + private static class SearchState { + public final long deletionMask; + public final int currentIndex; + public final SemanticGraph tree; + public final String lastDeletedEdge; + public final SearchState source; + public final double score; + + private SearchState(long deletionMask, int currentIndex, SemanticGraph tree, String lastDeletedEdge, SearchState source, double score) { + this.deletionMask = deletionMask; + this.currentIndex = currentIndex; + this.tree = tree; + this.lastDeletedEdge = lastDeletedEdge; + this.source = source; + this.score = score; + } + } + + /** + * The search algorithm, starting with a full sentence and iteratively shortening it to its entailed sentences. + * @param sentence The sentence to begin with. + * @param originalTree The original tree of the sentence we are beginning with + * @return A list of search results, corresponding to shortenings of the sentence. + */ + private List search(List sentence, SemanticGraph originalTree) { + // Pre-process the tree + originalTree = new SemanticGraph(originalTree); + // (remove common determiners) + List determinerRemovals = new ArrayList<>(); + for (IndexedWord vertex : originalTree.getLeafVertices()) { + if (vertex.word().equalsIgnoreCase("the") || vertex.word().equalsIgnoreCase("a") || + vertex.word().equalsIgnoreCase("an")) { + originalTree.removeVertex(vertex); + determinerRemovals.add("det"); + } + } + // (cut conj_and nodes) + Set andsToAdd = new HashSet<>(); + for (IndexedWord vertex : originalTree.vertexSet()) { + if( originalTree.inDegree(vertex) > 1 ) { + SemanticGraphEdge conjAnd = null; + for (SemanticGraphEdge edge : originalTree.incomingEdgeIterable(vertex)) { + if (edge.getRelation().toString().equals("conj_and")) { + conjAnd = edge; + } + } + if (conjAnd != null) { + originalTree.removeEdge(conjAnd); + andsToAdd.add(conjAnd); + } + } + } + // (find secondary edges) + Set secondaryEdges = classifySecondaryEdges(originalTree); + + // Outputs + List results = new ArrayList<>(); + if (!determinerRemovals.isEmpty()) { + if (andsToAdd.isEmpty()) { + double score = Math.pow(WEIGHTS.deletionProbability(null, "det"), (double) determinerRemovals.size()); + assert !Double.isNaN(score); + assert !Double.isInfinite(score); + results.add(new SearchResult(originalTree, determinerRemovals, score)); + } else { + SemanticGraph treeWithAnds = new SemanticGraph(originalTree); + for (SemanticGraphEdge and : andsToAdd) { + treeWithAnds.addEdge(and.getGovernor(), and.getDependent(), and.getRelation(), Double.NEGATIVE_INFINITY, false); + } + results.add(new SearchResult(treeWithAnds, determinerRemovals, + Math.pow(WEIGHTS.deletionProbability(null, "det"), (double) determinerRemovals.size()))); + } + } + + // Initialize the search + List topologicalVertices = originalTree.topologicalSort(); + Stack fringe = new Stack<>(); + fringe.push(new SearchState(0l, 0, originalTree, null, null, 1.0)); + + // Start the search + while (!fringe.isEmpty()) { + SearchState state = fringe.pop(); + IndexedWord currentWord = topologicalVertices.get(state.currentIndex); + + // Push the case where we don't delete + int nextIndex = state.currentIndex + 1; + while (nextIndex < topologicalVertices.size()) { + IndexedWord nextWord = topologicalVertices.get(nextIndex); + if ( ((state.deletionMask >>> (nextWord.index() - 1)) & 0x1l) == 0) { + fringe.push(new SearchState(state.deletionMask, nextIndex, state.tree, null, state, state.score)); + break; + } else { + nextIndex += 1; + } + } + + // Check if we can delete this subtree + boolean canDelete = state.tree.getFirstRoot() != currentWord; + for (SemanticGraphEdge edge : state.tree.incomingEdgeIterable(currentWord)) { + // Get token information + CoreLabel token = sentence.get(edge.getDependent().index() - 1); + Polarity tokenPolarity = token.get(NaturalLogicAnnotations.PolarityAnnotation.class); + // Get the relation for this deletion + NaturalLogicRelation lexicalRelation = NaturalLogicRelation.forDependencyDeletion(edge.getRelation().toString()); + NaturalLogicRelation projectedRelation = tokenPolarity.projectLexicalRelation(lexicalRelation); + // Make sure this is a valid entailment + if (!projectedRelation.isEntailed) { canDelete = false; } + } + + if (canDelete) { + // Register the deletion + long newMask = state.deletionMask; + SemanticGraph treeWithDeletions = new SemanticGraph(state.tree); + for (IndexedWord vertex : state.tree.descendants(currentWord)) { + treeWithDeletions.removeVertex(vertex); + newMask |= (0x1l << (vertex.index() - 1)); + assert vertex.index() <= 64; + assert ((newMask >>> (vertex.index() - 1)) & 0x1l) == 1; + } + SemanticGraph resultTree = new SemanticGraph(treeWithDeletions); + for (SemanticGraphEdge edge : andsToAdd) { + if (resultTree.containsVertex(edge.getGovernor()) && resultTree.containsVertex(edge.getDependent())) { + resultTree.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), Double.NEGATIVE_INFINITY, false); + } + } + // Compute the score of the sentence + double newScore = state.score; + for (SemanticGraphEdge edge : state.tree.incomingEdgeIterable(currentWord)) { + String relationString = edge.getRelation().toString(); + double multiplier = WEIGHTS.deletionProbability( + edge.getGovernor().word().toLowerCase(), + relationString, + secondaryEdges.contains(edge) + ); + assert !Double.isNaN(multiplier); + assert !Double.isInfinite(multiplier); + newScore *= multiplier; + } + // Register the result + results.add(new SearchResult(resultTree, + aggregateDeletedEdges(state, state.tree.incomingEdgeIterable(currentWord), determinerRemovals), + newScore)); + + // Push the state with this subtree deleted + nextIndex = state.currentIndex + 1; + while (nextIndex < topologicalVertices.size()) { + IndexedWord nextWord = topologicalVertices.get(nextIndex); + if ( ((newMask >>> (nextWord.index() - 1)) & 0x1l) == 0) { + assert treeWithDeletions.containsVertex(topologicalVertices.get(nextIndex)); + fringe.push(new SearchState(newMask, nextIndex, treeWithDeletions, null, state, newScore)); + break; + } else { + nextIndex += 1; + } + } + } + } + + // Return + return results; + } + + /** + * A simple heuristic to classify whether an edge is a secondary edge of the given type. + * For example, in: + *
    +   *   In foreign policy, Obama ended military involvement in Iraq.
    +   * 
    + * + * The first 'in' ('in foreign policy') is to be considered a secondary edge of the type 'prep_in'. + * + * @param graph The graph to classify + * + * @return The edges in the graph which are to be considered secondary edges. + */ + private Set classifySecondaryEdges(SemanticGraph graph) { + Set secondaryEdges = new HashSet<>(); + for (IndexedWord root : graph.vertexSet()) { + Map> edgesByType = new HashMap<>(); + boolean hasDirectObject = false; + for (SemanticGraphEdge edge : graph.outgoingEdgeIterable(root)) { + String type = edge.getRelation().toString(); + if (type.startsWith("prep")) { + if (!edgesByType.containsKey(type)) { + edgesByType.put(type, new ArrayList<>()); + } + edgesByType.get(type).add(edge); + } + if (type.equals("dobj")) { + hasDirectObject = true; + } + } + for (Map.Entry> entry : edgesByType.entrySet()) { + List edges = entry.getValue(); + if (hasDirectObject) { + // If we have a dobj, all prep_* edges are secondary + for (SemanticGraphEdge e : edges) { + secondaryEdges.add(e); + } + } else if (entry.getValue().size() > 1) { + // Candidate for a secondary edge (i.e., more than one outgoing edge of the given type) + Collections.sort(edges, (o1, o2) -> { + if (o1.getDependent().index() < root.index()) { + return -1; + } else if (o2.getDependent().index() < root.index()) { + return 1; + } else { + return o1.getDependent().index() - o2.getDependent().index(); + } + }); + // Register secondary edges + for (int i = 1; i < edges.size(); ++i) { + secondaryEdges.add(edges.get(i)); + } + } else if (edges.get(0).getDependent().index() < root.index()) { + secondaryEdges.add(edges.get(0)); + } + } + } + return secondaryEdges; + } + + /** + * Backtrace from a search state, collecting all of the deleted edges used to get there. + * @param state The final search state. + * @param justDeleted The edges we have just deleted. + * @param otherEdges Other deletions we want to register + * @return A list of deleted edges for that search state. + */ + private static List aggregateDeletedEdges(SearchState state, Iterable justDeleted, Iterable otherEdges) { + List rtn = new ArrayList<>(); + for (SemanticGraphEdge edge : justDeleted) { + rtn.add(edge.getRelation().toString()); + } + for (String edge : otherEdges) { + rtn.add(edge); + } + while (state != null) { + if (state.lastDeletedEdge != null) { + rtn.add(state.lastDeletedEdge); + } + state = state.source; + } + return rtn; + } + + /** The pattern for a clause to be split off of the sentence */ + private static final List CLAUSE_PATTERNS = Collections.unmodifiableList(new ArrayList() {{ + String clauseBreakers = "vmod|partmod|infmod|prepc.*|advcl|purpcl|conj(_and)?|prep_.*|dep"; + add(SemgrexPattern.compile("{$} ?>/.subj(pass)?/ {}=subject >/" + clauseBreakers + "/ ( {pos:/V.*/}=clause ?>/.subj(pass)?/ {}=clausesubj )")); + add(SemgrexPattern.compile("{$} ?>/.subj(pass)?/ {}=subject >/.obj|prep.*/ ( !{pos:/N*/} >/" + clauseBreakers + "/ ( {pos:/V.*/}=clause ?>/.subj(pass)?/ {}=clausesubj ) )")); + }}); + + private static final SemgrexPattern LIMITED_CC_COLLAPSE + = SemgrexPattern.compile("{}=root >/.*/=rel ( {}=a >/conj_.*/ {}=b )"); + + /** + * Do some limited CC collapsing. + * @param tree The tree to perform the collapsing on. + * @return The same tree. THIS IS AN IN PLACE FUNCTION + */ + private static SemanticGraph tweakCC(SemanticGraph tree) { + SemgrexMatcher matcher = LIMITED_CC_COLLAPSE.matcher(tree); + List edgesToAdd = new ArrayList<>(); // Avoid a concurrent modification exception + while (matcher.find()) { + edgesToAdd.add(new SemanticGraphEdge(matcher.getNode("root"), matcher.getNode("b"), + GrammaticalRelation.valueOf(GrammaticalRelation.Language.Any, matcher.getRelnString("rel")), + Double.NEGATIVE_INFINITY, false)); + } + for (SemanticGraphEdge edge : edgesToAdd) { + tree.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); + } + return tree; + } + + /** + * Split a given subtree off of the main tree. + * This will do two things: + * + *
      + *
    • It will return a {@link edu.stanford.nlp.semgraph.SemanticGraph} consisting of the subtree split off.
    • + *
    • It will delete all the nodes in the original tree which were split off into the subtree.
    • + *
    + * @param tree The original tree; this will be smaller when we return. + * @param subtreeRoot The root of the subtree we are splitting off. + * @param subjectOrNull An optional subject to clone into the split subtree. This will appear in both trees. + * @return The split off tree. + */ + private SemanticGraph splitOffTree(SemanticGraph tree, IndexedWord subtreeRoot, IndexedWord subjectOrNull) { + SemanticGraph subtree = new SemanticGraph(); + subtree.addRoot(subtreeRoot); + // Initialize the search + Stack fringe = new Stack<>(); + for (IndexedWord child : tree.getChildren(subtreeRoot)) { + fringe.add(child); + } + // Run the search + Set seen = new HashSet<>(); + while (!fringe.isEmpty()) { + IndexedWord node = fringe.pop(); + if (seen.contains(node.index())) { + continue; + } + seen.add(node.index()); + subtree.addVertex(node); + for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(node)) { + subtree.addEdge(incomingEdge.getGovernor(), incomingEdge.getDependent(), incomingEdge.getRelation(), incomingEdge.getWeight(), incomingEdge.isExtra()); + } + for (IndexedWord child : tree.getChildren(node)) { + if (child.index() != node.index()) { // wat...? + fringe.add(child); + } + } + } + // Delete from original tree + for (IndexedWord vertex : subtree.vertexSet()) { + tree.removeVertex(vertex); + } + tree.removeVertex(subtreeRoot); + // Optionally clone the subject + if (subjectOrNull != null) { + subtree.addVertex(subjectOrNull); + for (SemanticGraphEdge incomingEdge : tree.incomingEdgeIterable(subjectOrNull)) { + subtree.addEdge(subtreeRoot, subjectOrNull, incomingEdge.getRelation(), incomingEdge.getWeight(), incomingEdge.isExtra()); + } + } + // Return + return subtree; + } + + /** + * Split a tree into constituent clauses + * @param rawTree The tree to split into clauses. + * @return A list of clauses in this sentence. + */ + private List coarseClauseSplitting(SemanticGraph rawTree) { + List clauses = new ArrayList<>(); + SemanticGraph original = null; + for (SemgrexPattern pattern : CLAUSE_PATTERNS) { + SemgrexMatcher matcher = pattern.matcher(original != null ? original : rawTree); + while (matcher.find()) { + if (original == null) { + original = new SemanticGraph(rawTree); + } + IndexedWord subjectOrNull = matcher.getNode("subject"); + IndexedWord clauseRoot = matcher.getNode("clause"); + IndexedWord clauseSubjectOrNull = matcher.getNode("clausesubj"); + SemanticGraph clause; + if (clauseSubjectOrNull != null || subjectOrNull == null) { + // Case: independent clause; no need to copy the subject + clause = splitOffTree(original, clauseRoot, null); + } else { + // Case: copy subject from main clause + //noinspection ConstantConditions + assert subjectOrNull != null; + clause = splitOffTree(original, clauseRoot, subjectOrNull); + } + if (original.isEmpty()) { + clauses.add(clause); + } else { + clauses.addAll(coarseClauseSplitting(clause)); + } + } + } + // Base case: just add the original tree + if (clauses.isEmpty()) { + clauses.add(tweakCC(rawTree)); + } else if (original != null && original.vertexSet().size() > 0) { + clauses.add(tweakCC(original)); + } + // Return + return clauses; + } + + /** + * Fix some bizarre peculiarities with certain trees. + * So far, these include: + *
      + *
    • Sometimes there's a node from a word to itself. This seems wrong.
    • + *
    + * @param tree The tree to clean (in place!) + */ + private void cleanTree(SemanticGraph tree) { + // Clean nodes + List toDelete = new ArrayList<>(); + for (IndexedWord vertex : tree.vertexSet()) { + // Clean punctuation + char tag = vertex.backingLabel().tag().charAt(0); + if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':') { + toDelete.add(vertex); + } + } + for (IndexedWord v : toDelete) { tree.removeVertex(v); } + // Clean edges + Iterator iter = tree.edgeIterable().iterator(); + while (iter.hasNext()) { + SemanticGraphEdge edge = iter.next(); + if (edge.getDependent().index() == edge.getGovernor().index()) { + // Clean self-edges + iter.remove(); + } else if (edge.getRelation().toString().equals("punct")) { + // Clean punctuation (again) + iter.remove(); + } + } + } + + + /** + *

    + * Annotate a single sentence. + *

    + *

    + * This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation} + * and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations. + *

    + */ + @SuppressWarnings("unchecked") + public void annotateSentence(CoreMap sentence) { + SemanticGraph fullTree = new SemanticGraph(sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class)); + cleanTree(fullTree); + List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); + if (tokens.size() > 63) { + System.err.println("Very long sentence (>63 tokens); " + this.getClass().getSimpleName() + " is not attempting to extract relations."); + sentence.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, Collections.EMPTY_LIST); + sentence.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, Collections.EMPTY_LIST); + } else { + List clauses = coarseClauseSplitting(fullTree); + Collection fragments = new ArrayList<>(); + List extractions = new ArrayList<>(); + // Add clauses + if (clauses.size() > 1) { + for (SemanticGraph tree : clauses) { + fragments.add(new SentenceFragment(tree, false)); + Optional extraction = RelationTriple.segment(tree, Optional.empty()); + if (extraction.isPresent()) { + extractions.add(extraction.get()); + } + } + } + // Add search results + for (SemanticGraph tree : clauses) { + List results = search(tokens, tree); + for (SearchResult result : results) { + SentenceFragment fragment = new SentenceFragment(result.tree, false); + fragments.add(fragment); + Optional extraction = RelationTriple.segment(result.tree, Optional.of(result.confidence)); + if (extraction.isPresent()) { + extractions.add(extraction.get()); + } + } + } + sentence.set(NaturalLogicAnnotations.EntailedSentencesAnnotation.class, fragments); + Collections.sort(extractions); + sentence.set(NaturalLogicAnnotations.RelationTriplesAnnotation.class, extractions); + } + } + + /** + * A simple utility function for just getting a list of relation triples from a sentence. + * Calls {@link OpenIE#annotate(edu.stanford.nlp.pipeline.Annotation)} on the backend. + */ + @SuppressWarnings("UnusedDeclaration") + public Collection relationsForSentence(CoreMap sentence) { + annotateSentence(sentence); + return sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); + } + + /** + * {@inheritDoc} + * + *

    + * This annotator will, in particular, set the {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.EntailedSentencesAnnotation} + * and {@link edu.stanford.nlp.naturalli.NaturalLogicAnnotations.RelationTriplesAnnotation} annotations. + *

    + */ + @Override + public void annotate(Annotation annotation) { + annotation.get(CoreAnnotations.SentencesAnnotation.class).forEach(this::annotateSentence); + } + + /** {@inheritDoc} */ + @Override + public Set requirementsSatisfied() { + return Collections.singleton(Annotator.OPENIE_REQUIREMENT); + } + + /** {@inheritDoc} */ + @Override + public Set requires() { + return Collections.singleton(Annotator.NATLOG_REQUIREMENT); + } + + /** + * An entry method for annotating standard in with OpenIE extractions. + */ + public static void main(String[] args) { + // Initialize prerequisites + Properties props = StringUtils.argsToProperties(args); + props.setProperty("annotators", "tokenize,ssplit,pos,depparse,natlog,openie"); + props.setProperty("ssplit.isOneSentence", "true"); + StanfordCoreNLP pipeline = new StanfordCoreNLP(props); + + // Run extractor + Scanner in = new Scanner(System.in); + while (in.hasNext()) { + String line = in.nextLine(); + Annotation ann = new Annotation(line); + pipeline.annotate(ann); + Collection extractions = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); + if (extractions.isEmpty()) { + System.err.println("No extractions for: " + line); + } + for (RelationTriple extraction : extractions) { + System.out.println(extraction); + } + } + } +} diff --git a/src/edu/stanford/nlp/naturalli/Operator.java b/src/edu/stanford/nlp/naturalli/Operator.java new file mode 100644 index 0000000000..edc40065d4 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/Operator.java @@ -0,0 +1,160 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.util.Pair; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +/** + * A collection of quantifiers. This is the exhaustive list of quantifiers our system knows about. + * + * @author Gabor Angeli + */ +public enum Operator { + // "All" quantifiers + ALL("all", "anti-additive", "multiplicative"), + EVERY("every", "anti-additive", "multiplicative"), + ANY("any", "anti-additive", "multiplicative"), + EACH("each", "anti-additive", "multiplicative"), + THE_LOT_OF("the lot of", "anti-additive", "multiplicative"), + ALL_OF("all of", "anti-additive", "multiplicative"), + FOR_ALL("for all", "anti-additive", "multiplicative"), + FOR_EVERY("for every", "anti-additive", "multiplicative"), + FOR_EACH("for each", "anti-additive", "multiplicative"), + EVERYONE("everyone", "anti-additive", "multiplicative"), + NUM("__num__", "anti-additive", "multiplicative"), // TODO check me + NUM_NUM("__num__ __num__", "anti-additive", "multiplicative"), // TODO check me + NUM_NUM_NUM("__num__ __num__ __num__", "anti-additive", "multiplicative"), // TODO check me + NUM_NUM_NUM_NUM("__num__ __num__ __num__ __num__", "anti-additive", "multiplicative"), // TODO check me + FEW("few", "anti-additive", "multiplicative"), // TODO check me + IMPLICIT_NAMED_ENTITY("__implicit_named_entity__", "anti-additive", "multiplicative"), + + // "No" quantifiers + NO("no", "anti-additive", "anti-additive"), + NEITHER("neither", "anti-additive", "anti-additive"), + NO_ONE("no one", "anti-additive", "anti-additive"), + NOBODY("nobody", "anti-additive", "anti-additive"), + UNARY_NO("no", "anti-additive"), + UNARY_NOT("not", "anti-additive"), + UNARY_NO_ONE("no one", "anti-additive"), + UNARY_NT("n't", "anti-additive"), + + // "Some" quantifiers + SOME("some", "additive", "additive"), + SEVERAL("several", "additive", "additive"), + EITHER("either", "additive", "additive"), + A("a", "additive", "additive"), + THE("the", "additive", "additive"), + LESS_THAN("less than __num__", "additive", "additive"), + SOME_OF("some of", "additive", "additive"), + ONE_OF("one of", "additive", "additive"), + AT_LEAST("at least __num__", "additive", "additive"), + A_FEW("a few", "additive", "additive"), + AT_LEAST_A_FEW("at least a few", "additive", "additive"), + THERE_BE("there be", "additive", "additive"), + THERE_BE_A_FEW("there be a few", "additive", "additive"), + THERE_EXIST("there exist", "additive", "additive"), + NUM_OF_THE("__num__ of the", "additive", "additive"), + + // "Not All" quantifiers + NOT_ALL("not all", "additive", "anti-multiplicative"), + NOT_EVERY("not every", "additive", "anti-multiplicative"), + + // "Most" quantifiers + // TODO(gabor) check these + MOST("most", "nonmonotone", "multiplicative"), + MANY("many", "nonmonotone", "multiplicative"), + ENOUGH("enough", "nonmonotone", "multiplicative"), + MORE_THAN("more than __num_", "nonmonotone", "multiplicative"), + A_LOT_OF("a lot of", "nonmonotone", "multiplicative"), + LOTS_OF("lots of", "nonmonotone", "multiplicative"), + PLENTY_OF("plenty of", "nonmonotone", "multiplicative"), + HEAPS_OF("heap of", "nonmonotone", "multiplicative"), + A_LOAD_OF("a load of", "nonmonotone", "multiplicative"), + LOADS_OF("load of", "nonmonotone", "multiplicative"), + TONS_OF("ton of", "nonmonotone", "multiplicative"), + BOTH("both", "nonmonotone", "multiplicative"), + JUST_NUM("just __num__", "nonmonotone", "multiplicative"), + ONLY_NUM("only __num__", "nonmonotone", "multiplicative"), + + // Strange cases + AT_MOST_NUM("at most __num__", "anti-additive", "anti-additive"), + ; + + public static final Set GLOSSES = Collections.unmodifiableSet(new HashSet() {{ + for (Operator q : Operator.values()) { + add(q.surfaceForm); + } + }}); + + public final String surfaceForm; + public final Monotonicity subjMono; + public final MonotonicityType subjType; + public final Monotonicity objMono; + public final MonotonicityType objType; + + Operator(String surfaceForm, String subjMono, String objMono) { + this.surfaceForm = surfaceForm; + Pair subj = monoFromString(subjMono); + this.subjMono = subj.first; + this.subjType = subj.second; + Pair obj = monoFromString(objMono); + this.objMono = obj.first; + this.objType = obj.second; + } + + Operator(String surfaceForm, String subjMono) { + this.surfaceForm = surfaceForm; + Pair subj = monoFromString(subjMono); + this.subjMono = subj.first; + this.subjType = subj.second; + this.objMono = Monotonicity.INVALID; + this.objType = MonotonicityType.NONE; + } + + public boolean isUnary() { + return objMono == Monotonicity.INVALID; + } + + public static Pair monoFromString(String mono) { + switch (mono) { + case "nonmonotone": return Pair.makePair(Monotonicity.NONMONOTONE, MonotonicityType.NONE); + case "additive": return Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.ADDITIVE); + case "multiplicative": return Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.MULTIPLICATIVE); + case "additive-multiplicative": return Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.BOTH); + case "anti-additive": return Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.ADDITIVE); + case "anti-multiplicative": return Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.MULTIPLICATIVE); + case "anti-additive-multiplicative": return Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.BOTH); + default: throw new IllegalArgumentException("Unknown monotonicity: " + mono); + } + } + + public static String monotonicitySignature(Monotonicity mono, MonotonicityType type) { + switch (mono) { + case MONOTONE: + switch (type) { + case NONE: return "nonmonotone"; + case ADDITIVE: return "additive"; + case MULTIPLICATIVE: return "multiplicative"; + case BOTH: return "additive-multiplicative"; + } + case ANTITONE: + switch (type) { + case NONE: return "nonmonotone"; + case ADDITIVE: return "anti-additive"; + case MULTIPLICATIVE: return "anti-multiplicative"; + case BOTH: return "anti-additive-multiplicative"; + } + case NONMONOTONE: return "nonmonotone"; + } + throw new IllegalStateException("Unhandled case: " + mono + " and " + type); + } + + @SuppressWarnings("UnusedDeclaration") + public static final Set quantifierGlosses = Collections.unmodifiableSet(new HashSet() {{ + for (Operator operator : values()) { + add(operator.surfaceForm); + } + }}); +} diff --git a/src/edu/stanford/nlp/naturalli/OperatorSpec.java b/src/edu/stanford/nlp/naturalli/OperatorSpec.java new file mode 100644 index 0000000000..d3f3322374 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/OperatorSpec.java @@ -0,0 +1,97 @@ +package edu.stanford.nlp.naturalli; + +/** + * A silly little class to denote a quantifier scope. + * + * @author Gabor Angeli + */ +public class OperatorSpec { + public final Operator instance; + public final int quantifierBegin; + public final int quantifierEnd; + public final int quantifierHead; + public final int subjectBegin; + public final int subjectEnd; + public final int objectBegin; + public final int objectEnd; + + public OperatorSpec( + Operator instance, + int quantifierBegin, int quantifierEnd, + int subjectBegin, int subjectEnd, + int objectBegin, int objectEnd) { + this.instance = instance; + this.quantifierBegin = quantifierBegin; + this.quantifierEnd = quantifierEnd; + this.quantifierHead = quantifierEnd - 1; + this.subjectBegin = subjectBegin; + this.subjectEnd = subjectEnd; + this.objectBegin = objectBegin; + this.objectEnd = objectEnd; + } + + /** + * If true, this is an explicit quantifier, such as "all" or "some." + * The other option is for this to be an implicit quantification, for instance with proper names: + * + * + * "Felix is a cat" -> \forall x, Felix(x) \rightarrow cat(x). + * + */ + public boolean isExplicit() { + return instance != Operator.IMPLICIT_NAMED_ENTITY; + } + + public boolean isBinary() { + return objectEnd > objectBegin; + } + + public int quantifierLength() { + return quantifierEnd - quantifierBegin; + } + + /** {@inheritDoc} */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof OperatorSpec)) return false; + OperatorSpec that = (OperatorSpec) o; + return objectBegin == that.objectBegin && objectEnd == that.objectEnd && subjectBegin == that.subjectBegin && subjectEnd == that.subjectEnd; + + } + + /** {@inheritDoc} */ + @Override + public int hashCode() { + int result = subjectBegin; + result = 31 * result + subjectEnd; + result = 31 * result + objectBegin; + result = 31 * result + objectEnd; + return result; + } + + /** {@inheritDoc} */ + @Override + public String toString() { + return "QuantifierScope{" + + "subjectBegin=" + subjectBegin + + ", subjectEnd=" + subjectEnd + + ", objectBegin=" + objectBegin + + ", objectEnd=" + objectEnd + + '}'; + } + + public static OperatorSpec merge(OperatorSpec x, OperatorSpec y) { + assert (x.quantifierBegin == y.quantifierBegin); + assert (x.quantifierEnd == y.quantifierEnd); + assert (x.instance == y.instance); + return new OperatorSpec( + x.instance, + Math.min(x.quantifierBegin, y.quantifierBegin), + Math.min(x.quantifierEnd, y.quantifierEnd), + Math.min(x.subjectBegin, y.subjectBegin), + Math.max(x.subjectEnd, y.subjectEnd), + Math.min(x.objectBegin, y.objectBegin), + Math.max(x.objectEnd, y.objectEnd)); + } +} diff --git a/src/edu/stanford/nlp/naturalli/Polarity.java b/src/edu/stanford/nlp/naturalli/Polarity.java new file mode 100644 index 0000000000..9710b2351e --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/Polarity.java @@ -0,0 +1,242 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.util.Pair; + +import java.util.Arrays; +import java.util.List; + +/** + *

    + * A class intended to be attached to a lexical item, determining what mutations are valid on it while + * maintaining valid Natural Logic inference. + *

    + * + * @author Gabor Angeli + */ +@SuppressWarnings("UnusedDeclaration") +public class Polarity { + + /** The projection function, as a table from a relations fixed index to the projected fixed index */ + private final byte[] projectionFunction = new byte[7]; + + /** Create a polarity from a list of operators in scope */ + protected Polarity(List> operatorsInNarrowingScopeOrder) { + if (operatorsInNarrowingScopeOrder.isEmpty()) { + for (byte i = 0; i < projectionFunction.length; ++i) { + projectionFunction[i] = i; + } + } else { + for (int rel = 0; rel < 7; ++rel) { + NaturalLogicRelation relation = NaturalLogicRelation.byFixedIndex(rel); + for (int op = operatorsInNarrowingScopeOrder.size() - 1; op >= 0; --op) { + relation = project(relation, operatorsInNarrowingScopeOrder.get(op).first, operatorsInNarrowingScopeOrder.get(op).second); + } + projectionFunction[rel] = (byte) relation.fixedIndex; + } + } + } + + /** + * Create a polarity item by directly copying the projection function from {@link edu.stanford.nlp.naturalli.NaturalLogicRelation}s to + * their projected relation. + */ + public Polarity(byte[] projectionFunction) { + if (projectionFunction.length != 7) { + throw new IllegalArgumentException("Invalid projection function: " + Arrays.toString(projectionFunction)); + } + for (int i = 0; i < 7; ++i) { + if (projectionFunction[i] < 0 || projectionFunction[i] > 6) { + throw new IllegalArgumentException("Invalid projection function: " + Arrays.toString(projectionFunction)); + } + } + System.arraycopy(projectionFunction, 0, this.projectionFunction, 0, 7); + } + + /** + * Encode the projection table in painful detail. + * + * @param input The input natural logic relation to project up through the operator. + * @param mono The monotonicity of the operator we are projecting through. + * @param type The monotonicity type of the operator we are projecting through. + * + * @return The projected relation, once passed through an operator with the given specifications. + */ + private NaturalLogicRelation project(NaturalLogicRelation input, Monotonicity mono, MonotonicityType type) { + switch (input) { + case EQUIVALENT: + return NaturalLogicRelation.EQUIVALENT; + case FORWARD_ENTAILMENT: + switch (mono) { + case MONOTONE: + return NaturalLogicRelation.FORWARD_ENTAILMENT; + case ANTITONE: + return NaturalLogicRelation.REVERSE_ENTAILMENT; + case NONMONOTONE: + case INVALID: + return NaturalLogicRelation.INDEPENDENCE; + } + case REVERSE_ENTAILMENT: + switch (mono) { + case MONOTONE: + return NaturalLogicRelation.REVERSE_ENTAILMENT; + case ANTITONE: + return NaturalLogicRelation.FORWARD_ENTAILMENT; + case NONMONOTONE: + case INVALID: + return NaturalLogicRelation.INDEPENDENCE; + } + case NEGATION: + switch (type) { + case NONE: + return NaturalLogicRelation.INDEPENDENCE; + case ADDITIVE: + switch (mono) { + case MONOTONE: + return NaturalLogicRelation.COVER; + case ANTITONE: + return NaturalLogicRelation.ALTERNATION; + case NONMONOTONE: + case INVALID: + return NaturalLogicRelation.INDEPENDENCE; + } + case MULTIPLICATIVE: + switch (mono) { + case MONOTONE: + return NaturalLogicRelation.ALTERNATION; + case ANTITONE: + return NaturalLogicRelation.COVER; + case NONMONOTONE: + case INVALID: + return NaturalLogicRelation.INDEPENDENCE; + } + break; + case BOTH: + return NaturalLogicRelation.NEGATION; + } + break; + case ALTERNATION: + switch (mono) { + case MONOTONE: + switch (type) { + case NONE: + case ADDITIVE: + return NaturalLogicRelation.INDEPENDENCE; + case MULTIPLICATIVE: + case BOTH: + return NaturalLogicRelation.ALTERNATION; + } + case ANTITONE: + switch (type) { + case NONE: + case ADDITIVE: + return NaturalLogicRelation.INDEPENDENCE; + case MULTIPLICATIVE: + case BOTH: + return NaturalLogicRelation.COVER; + } + case NONMONOTONE: + case INVALID: + return NaturalLogicRelation.INDEPENDENCE; + } + case COVER: + switch (mono) { + case MONOTONE: + switch (type) { + case NONE: + case MULTIPLICATIVE: + return NaturalLogicRelation.INDEPENDENCE; + case ADDITIVE: + case BOTH: + return NaturalLogicRelation.COVER; + } + case ANTITONE: + switch (type) { + case NONE: + case MULTIPLICATIVE: + return NaturalLogicRelation.INDEPENDENCE; + case ADDITIVE: + case BOTH: + return NaturalLogicRelation.ALTERNATION; + } + case NONMONOTONE: + case INVALID: + return NaturalLogicRelation.INDEPENDENCE; + } + case INDEPENDENCE: + return NaturalLogicRelation.INDEPENDENCE; + } + throw new IllegalStateException("[should not happen!] Projection table is incomplete for " + mono + " : " + type + " on relation " + input); + } + + /** + * Project the given natural logic lexical relation on this word. So, for example, if we want to go up the + * Hypernymy hierarchy ({@link edu.stanford.nlp.naturalli.NaturalLogicRelation#FORWARD_ENTAILMENT}) on this word, + * then this function will tell you what relation holds between the new mutated fact and this fact. + * + * @param lexicalRelation The lexical relation we are applying to this word. + * @return The relation between the mutated sentence and the original sentence. + */ + public NaturalLogicRelation projectLexicalRelation(NaturalLogicRelation lexicalRelation) { + return NaturalLogicRelation.byFixedIndex( projectionFunction[lexicalRelation.fixedIndex] ); + } + + /** + * If true, applying this lexical relation to this word creates a sentence which is entailed by the original sentence, + * Note that both this, and {@link Polarity#introducesNegation(NaturalLogicRelation)} can be false. If this is the case, then + * natural logic can neither verify nor disprove this mutation. + */ + public boolean maintainsEntailment(NaturalLogicRelation lexicalRelation) { + return projectLexicalRelation(lexicalRelation).isEntailed; + } + + /** + * If true, applying this lexical relation to this word creates a sentence which is negated by the original sentence + * Note that both this, and {@link Polarity#maintainsEntailment(NaturalLogicRelation)}} can be false. If this is the case, then + * natural logic can neither verify nor disprove this mutation. + */ + public boolean introducesNegation(NaturalLogicRelation lexicalRelation) { + return projectLexicalRelation(lexicalRelation).isNegated; + } + + /** + * Ignoring exclusion, determine if this word has upward polarity. + */ + public boolean isUpwards() { + return projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT) == NaturalLogicRelation.FORWARD_ENTAILMENT && + projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT) == NaturalLogicRelation.REVERSE_ENTAILMENT; + + } + + /** + * Ignoring exclusion, determine if this word has downward polarity. + */ + public boolean isDownwards() { + return projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT) == NaturalLogicRelation.REVERSE_ENTAILMENT && + projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT) == NaturalLogicRelation.FORWARD_ENTAILMENT; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Polarity)) return false; + Polarity polarity = (Polarity) o; + return Arrays.equals(projectionFunction, polarity.projectionFunction); + + } + + @Override + public int hashCode() { + return Arrays.hashCode(projectionFunction); + } + + @Override + public String toString() { + if (isUpwards()) { + return "up"; + } else if (isDownwards()) { + return "down"; + } else { + return "flat"; + } + } +} diff --git a/src/edu/stanford/nlp/naturalli/SentenceFragment.java b/src/edu/stanford/nlp/naturalli/SentenceFragment.java new file mode 100644 index 0000000000..d6f72d0130 --- /dev/null +++ b/src/edu/stanford/nlp/naturalli/SentenceFragment.java @@ -0,0 +1,51 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.util.StringUtils; + +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Collectors; + +/** + * A representation of a sentence fragment. + * + * @author Gabor Angeli + */ +public class SentenceFragment { + public final List words = new ArrayList<>(); + public final SemanticGraph parseTree; + + public SentenceFragment(SemanticGraph tree, boolean copy) { + if (copy) { + this.parseTree = new SemanticGraph(tree); + } else { + this.parseTree = tree; + } + words.addAll(this.parseTree.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList())); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof SentenceFragment)) return false; + if (!super.equals(o)) return false; + SentenceFragment that = (SentenceFragment) o; + return parseTree.equals(that.parseTree); + + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + parseTree.hashCode(); + return result; + } + + @Override + public String toString() { + return StringUtils.join(words.stream().map(CoreLabel::word), " "); + } +} diff --git a/src/edu/stanford/nlp/neural/NeuralUtils.java b/src/edu/stanford/nlp/neural/NeuralUtils.java index d2e47e1a5f..c96bc26295 100644 --- a/src/edu/stanford/nlp/neural/NeuralUtils.java +++ b/src/edu/stanford/nlp/neural/NeuralUtils.java @@ -1,6 +1,8 @@ package edu.stanford.nlp.neural; +import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.PrintStream; import java.util.Arrays; import java.util.Iterator; import java.util.List; @@ -8,6 +10,7 @@ import java.util.function.Predicate; import org.ejml.simple.SimpleMatrix; +import org.ejml.ops.MatrixIO; import edu.stanford.nlp.io.IOUtils; import edu.stanford.nlp.util.CollectionUtils; @@ -70,6 +73,15 @@ public boolean test(String s) { return new SimpleMatrix(data); } + /** + * @param matrix The matrix to return as a String + * @param format The format to use for each value in the matrix, eg "%f" + */ + public static String toString(SimpleMatrix matrix, String format) { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + MatrixIO.print(new PrintStream(stream), matrix.getMatrix(), format); + return stream.toString(); + } /** * Compute cosine distance between two column vectors. diff --git a/src/edu/stanford/nlp/neural/SimpleTensor.java b/src/edu/stanford/nlp/neural/SimpleTensor.java index ef3ebb7d0a..ba69f06101 100644 --- a/src/edu/stanford/nlp/neural/SimpleTensor.java +++ b/src/edu/stanford/nlp/neural/SimpleTensor.java @@ -296,5 +296,18 @@ public String toString() { return result.toString(); } + /** + * Output the tensor one slice at a time. Each number is output + * with the format given, so for example "%f" + */ + public String toString(String format) { + StringBuilder result = new StringBuilder(); + for (int slice = 0; slice < numSlices; ++slice) { + result.append("Slice " + slice + "\n"); + result.append(NeuralUtils.toString(slices[slice], format)); + } + return result.toString(); + } + private static final long serialVersionUID = 1; } diff --git a/src/edu/stanford/nlp/optimization/QNMinimizer.java b/src/edu/stanford/nlp/optimization/QNMinimizer.java index 1de0a249cc..984a72d643 100644 --- a/src/edu/stanford/nlp/optimization/QNMinimizer.java +++ b/src/edu/stanford/nlp/optimization/QNMinimizer.java @@ -1030,15 +1030,13 @@ public double[] minimize(DiffFunction dfunction, double functionTolerance, say("M"); break; default: - sayln("Invalid line search option for QNMinimizer. "); - System.exit(1); - break; - + throw new IllegalArgumentException("Invalid line search option for QNMinimizer."); } } newValue = newPoint[f]; - System.err.print(" " + nf.format(newPoint[a])); + say(" "); + say(nf.format(newPoint[a])); say("] "); // This shouldn't actually evaluate anything since that should have been diff --git a/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java b/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java index f15da4be39..5b80902fd3 100644 --- a/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java +++ b/src/edu/stanford/nlp/parser/lexparser/LexicalizedParser.java @@ -1148,7 +1148,7 @@ public void setOptionFlags(String... flags) { *
  • -outputFormatOptions Provide options that control the * behavior of various -outputFormat choices, such as * lexicalize, stem, markHeadNodes, - * or xml. + * or xml. {@link edu.stanford.nlp.trees.TreePrint} * Options are specified as a comma-separated list.
  • *
  • -writeOutputFiles Write output files corresponding * to the input files, with the same name but a ".stp" diff --git a/src/edu/stanford/nlp/parser/nndep/Classifier.java b/src/edu/stanford/nlp/parser/nndep/Classifier.java index 12dc993836..6fd0cdb7dc 100644 --- a/src/edu/stanford/nlp/parser/nndep/Classifier.java +++ b/src/edu/stanford/nlp/parser/nndep/Classifier.java @@ -14,7 +14,6 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.stream.IntStream; -import static java.util.stream.Collectors.toSet; /** * Neural network classifier which powers a transition-based dependency @@ -140,7 +139,7 @@ public Classifier(Config config, Dataset dataset, double[][] E, double[][] W1, d numLabels = W2.length; preMap = new HashMap<>(); - for (int i = 0; i < preComputed.size(); ++i) + for (int i = 0; i < preComputed.size() && i < config.numPreComputed; ++i) preMap.put(preComputed.get(i), i); isTraining = dataset != null; @@ -636,13 +635,7 @@ public void finalizeTraining() { * @see #preCompute(java.util.Set) */ public void preCompute() { - // If no features are specified, pre-compute all of them (which fit - // into a `saved` array of size `config.numPreComputed`) - Set keys = preMap.entrySet().stream() - .filter(e -> e.getValue() < config.numPreComputed) - .map(Map.Entry::getKey) - .collect(toSet()); - preCompute(keys); + preCompute(preMap.keySet()); } /** diff --git a/src/edu/stanford/nlp/parser/nndep/Config.java b/src/edu/stanford/nlp/parser/nndep/Config.java index 6a0dff343c..4e5a7d66bc 100644 --- a/src/edu/stanford/nlp/parser/nndep/Config.java +++ b/src/edu/stanford/nlp/parser/nndep/Config.java @@ -233,7 +233,6 @@ private Languages.Language getLanguage(String languageStr) { if (l.name().equalsIgnoreCase(languageStr)) return l; } - return null; } diff --git a/src/edu/stanford/nlp/parser/nndep/DependencyParser.java b/src/edu/stanford/nlp/parser/nndep/DependencyParser.java index 137902ec28..32058d9cbc 100644 --- a/src/edu/stanford/nlp/parser/nndep/DependencyParser.java +++ b/src/edu/stanford/nlp/parser/nndep/DependencyParser.java @@ -504,7 +504,7 @@ public void loadModelFile(String modelFile) { private void loadModelFile(String modelFile, boolean verbose) { Timing t = new Timing(); try { - // System.err.println(Config.SEPARATOR); + System.err.println("Loading depparse model file: " + modelFile + " ... "); String s; BufferedReader input = IOUtils.readerFromString(modelFile); @@ -643,9 +643,8 @@ private void readEmbedFile(String embedFile) { embeddings = new double[nWords][dim]; System.err.println("Embedding File " + embedFile + ": #Words = " + nWords + ", dim = " + dim); - //TODO: how if the embedding dim. does not match..? if (dim != config.embeddingSize) - System.err.println("ERROR: embedding dimension mismatch"); + throw new IllegalArgumentException("The dimension of embedding file does not match config.embeddingSize"); for (int i = 0; i < lines.size(); ++i) { splits = lines.get(i).split("\\s+"); @@ -992,13 +991,16 @@ public double testCoNLL(String testFile, String outFile) { List predicted = testSents.stream().map(this::predictInner).collect(toList()); Map result = system.evaluate(testSents, predicted, testTrees); + + double uasNoPunc = result.get("UASwoPunc"); double lasNoPunc = result.get("LASwoPunc"); - System.err.printf("UAS = %.4f%n", result.get("UASwoPunc")); + System.err.printf("UAS = %.4f%n", uasNoPunc); System.err.printf("LAS = %.4f%n", lasNoPunc); + long millis = timer.stop(); double wordspersec = numWords / (((double) millis) / 1000); double sentspersec = numSentences / (((double) millis) / 1000); - System.err.printf("%s tagged %d words in %d sentences in %.1fs at %.1f w/s, %.1f sent/s.%n", + System.err.printf("%s parsed %d words in %d sentences in %.1fs at %.1f w/s, %.1f sent/s.%n", StringUtils.getShortClassName(this), numWords, numSentences, millis / 1000.0, wordspersec, sentspersec); if (outFile != null) { @@ -1156,7 +1158,6 @@ public static void main(String[] args) { if (props.containsKey("testFile")) { parser.loadModelFile(props.getProperty("model")); loaded = true; - parser.testCoNLL(props.getProperty("testFile"), props.getProperty("outFile")); } @@ -1191,5 +1192,4 @@ public static void main(String[] args) { parser.parseTextFile(input, output); } } - } \ No newline at end of file diff --git a/src/edu/stanford/nlp/parser/nndep/Util.java b/src/edu/stanford/nlp/parser/nndep/Util.java index 71e83f0795..34c48c0068 100644 --- a/src/edu/stanford/nlp/parser/nndep/Util.java +++ b/src/edu/stanford/nlp/parser/nndep/Util.java @@ -162,17 +162,20 @@ public static void writeConllFile(String outFile, List sentences, List< try { PrintWriter output = IOUtils.getPrintWriter(outFile); - for (CoreMap sentence : sentences) + + for (int i = 0; i < sentences.size(); i++) { + CoreMap sentence = sentences.get(i); + DependencyTree tree = trees.get(i); + List tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (int j = 1, size = tokens.size(); j <= size; ++j) { CoreLabel token = tokens.get(j - 1); output.printf("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_%n", - j, token.word(), token.tag(), token.tag(), - token.get(CoreAnnotations.CoNLLDepParentIndexAnnotation.class), - token.get(CoreAnnotations.CoNLLDepTypeAnnotation.class)); + j, token.word(), token.tag(), token.tag(), + tree.getHead(j), tree.getLabel(j)); } output.println(); } @@ -186,17 +189,25 @@ public static void writeConllFile(String outFile, List sentences, List< public static void printTreeStats(String str, List trees) { System.err.println(Config.SEPARATOR + " " + str); - System.err.println("#Trees: " + trees.size()); - int nonTrees = 0; + int nTrees = trees.size(); + int nonTree = 0; + int multiRoot = 0; int nonProjective = 0; for (DependencyTree tree : trees) { if (!tree.isTree()) - ++nonTrees; - else if (!tree.isProjective()) - ++nonProjective; + ++nonTree; + else + { + if (!tree.isProjective()) + ++nonProjective; + if (!tree.isSingleRoot()) + ++multiRoot; + } } - System.err.println(nonTrees + " tree(s) are illegal."); - System.err.println(nonProjective + " tree(s) are legal but not projective."); + System.err.printf("#Trees: %d%n", nTrees); + System.err.printf("%d tree(s) are illegal (%.2f%%).%n", nonTree, nonTree * 100.0 / nTrees); + System.err.printf("%d tree(s) are legal but have multiple roots (%.2f%%).%n", multiRoot, multiRoot * 100.0 / nTrees); + System.err.printf("%d tree(s) are legal but not projective (%.2f%%).%n", nonProjective, nonProjective * 100.0 / nTrees); } public static void printTreeStats(List trees) diff --git a/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java b/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java index c0f0c5ce51..0d69e24bda 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/BasicFeatureFactory.java @@ -29,8 +29,8 @@ public static void addUnaryQueueFeatures(List features, CoreLabel label, features.add(wtFeature + NULL); return; } - String tag = label.get(TreeCoreAnnotations.HeadTagAnnotation.class).label().value(); - String word = label.get(TreeCoreAnnotations.HeadWordAnnotation.class).label().value(); + String tag = label.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class).value(); + String word = label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class).value(); // TODO: check to see if this is slow because of the string concat features.add(wtFeature + tag + "-" + word); diff --git a/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java b/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java index f109a5e0ad..495ca84ef2 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransition.java @@ -177,8 +177,8 @@ public State apply(State state, double scoreDelta) { CoreLabel production = new CoreLabel(); production.setValue(label); - production.set(TreeCoreAnnotations.HeadWordAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class)); - production.set(TreeCoreAnnotations.HeadTagAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagAnnotation.class)); + production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); + production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); Tree newTop = new LabeledScoredTreeNode(production); newTop.addChild(left); newTop.addChild(right); diff --git a/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java b/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java index 40b94746fe..dde9f2fec6 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/CreateTransitionSequence.java @@ -73,9 +73,9 @@ private static void createTransitionSequenceHelper(List transitions, CoreLabel label = (CoreLabel) tree.label(); CoreLabel leftLabel = (CoreLabel) tree.children()[0].label(); CoreLabel rightLabel = (CoreLabel) tree.children()[1].label(); - Tree head = label.get(TreeCoreAnnotations.HeadWordAnnotation.class); - Tree leftHead = leftLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class); - Tree rightHead = rightLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class); + CoreLabel head = label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class); + CoreLabel leftHead = leftLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class); + CoreLabel rightHead = rightLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class); if (head == null || leftHead == null || rightHead == null) { throw new IllegalArgumentException("Expected tree labels to have their heads assigned"); } diff --git a/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java b/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java index 01cee163cc..7c9f02a12c 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/FeatureFactory.java @@ -37,10 +37,10 @@ public static String getFeatureFromCoreLabel(CoreLabel label, FeatureComponent f String value = null; switch(feature) { case HEADWORD: - value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadWordAnnotation.class).label().value(); + value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class).value(); break; case HEADTAG: - value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadTagAnnotation.class).label().value(); + value = (label == null) ? NULL : label.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class).value(); break; case VALUE: value = (label == null) ? NULL : label.value(); @@ -67,7 +67,7 @@ public static CoreLabel getRecentDependent(TreeShapedStack stack, Transiti if (!(node.label() instanceof CoreLabel)) { throw new IllegalArgumentException("Can only featurize CoreLabel trees"); } - Tree head = ((CoreLabel) node.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class); + CoreLabel head = ((CoreLabel) node.label()).get(TreeCoreAnnotations.HeadWordLabelAnnotation.class); switch (transition) { case LEFT: { @@ -79,7 +79,7 @@ public static CoreLabel getRecentDependent(TreeShapedStack stack, Transiti if (!(child.label() instanceof CoreLabel)) { throw new IllegalArgumentException("Can only featurize CoreLabel trees"); } - if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class) != head) { + if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordLabelAnnotation.class) != head) { return (CoreLabel) child.label(); } node = child; @@ -98,7 +98,7 @@ public static CoreLabel getRecentDependent(TreeShapedStack stack, Transiti if (!(child.label() instanceof CoreLabel)) { throw new IllegalArgumentException("Can only featurize CoreLabel trees"); } - if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordAnnotation.class) != head) { + if (((CoreLabel) child.label()).get(TreeCoreAnnotations.HeadWordLabelAnnotation.class) != head) { return (CoreLabel) child.label(); } node = child; diff --git a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java index 5884ea6d70..c3527be357 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceParser.java @@ -151,6 +151,13 @@ public String[] defaultCoreNLPFlags() { } } + /** + * Return an unmodifiableSet containing the known states (including binarization) + */ + public Set knownStates() { + return Collections.unmodifiableSet(model.knownStates); + } + @Override public boolean requiresTags() { return true; @@ -241,10 +248,11 @@ public static State initialStateFromTaggedSentence(List words LabeledScoredTreeNode tagNode = new LabeledScoredTreeNode(tagLabel); tagNode.addChild(wordNode); - wordLabel.set(TreeCoreAnnotations.HeadWordAnnotation.class, wordNode); - wordLabel.set(TreeCoreAnnotations.HeadTagAnnotation.class, tagNode); - tagLabel.set(TreeCoreAnnotations.HeadWordAnnotation.class, wordNode); - tagLabel.set(TreeCoreAnnotations.HeadTagAnnotation.class, tagNode); + // TODO: can we get away with not setting these on the wordLabel? + wordLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel); + wordLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel); + tagLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel); + tagLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel); preterminals.add(tagNode); } diff --git a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java index 3144c0846e..d965b9a255 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/ShiftReduceUtils.java @@ -15,7 +15,7 @@ static BinaryTransition.Side getBinarySide(Tree tree) { } CoreLabel label = ErasureUtils.uncheckedCast(tree.label()); CoreLabel childLabel = ErasureUtils.uncheckedCast(tree.children()[0].label()); - if (label.get(TreeCoreAnnotations.HeadWordAnnotation.class) == childLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class)) { + if (label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class) == childLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)) { return BinaryTransition.Side.LEFT; } else { return BinaryTransition.Side.RIGHT; @@ -36,8 +36,7 @@ static boolean isEquivalentCategory(String l1, String l2) { /** Returns a 0-based index of the head of the tree. Assumes the leaves had been indexed from 1 */ static int headIndex(Tree tree) { CoreLabel label = ErasureUtils.uncheckedCast(tree.label()); - Tree head = label.get(TreeCoreAnnotations.HeadWordAnnotation.class); - CoreLabel headLabel = ErasureUtils.uncheckedCast(head.label()); + CoreLabel headLabel = label.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class); return headLabel.index() - 1; } diff --git a/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java b/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java index a843513dc4..e4429dc44c 100644 --- a/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java +++ b/src/edu/stanford/nlp/parser/shiftreduce/UnaryTransition.java @@ -80,8 +80,8 @@ static Tree createNode(Tree top, String label, Tree ... children) { CoreLabel headLabel = (CoreLabel) top.label(); CoreLabel production = new CoreLabel(); production.setValue(label); - production.set(TreeCoreAnnotations.HeadWordAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordAnnotation.class)); - production.set(TreeCoreAnnotations.HeadTagAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagAnnotation.class)); + production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); + production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); Tree newTop = new LabeledScoredTreeNode(production); for (Tree child : children) { newTop.addChild(child); diff --git a/src/edu/stanford/nlp/pipeline/Annotation.java b/src/edu/stanford/nlp/pipeline/Annotation.java index 8234a0a9bc..9e3546263f 100644 --- a/src/edu/stanford/nlp/pipeline/Annotation.java +++ b/src/edu/stanford/nlp/pipeline/Annotation.java @@ -88,6 +88,7 @@ public String toString() { return this.get(CoreAnnotations.TextAnnotation.class); } + /** Make a new Annotation from a List of tokenized sentences. */ public Annotation(List sentences) { super(); this.set(CoreAnnotations.SentencesAnnotation.class, sentences); diff --git a/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java b/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java index dd1cb7b5f9..7b989c66c4 100644 --- a/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java +++ b/src/edu/stanford/nlp/pipeline/AnnotationPipeline.java @@ -67,7 +67,7 @@ public void annotate(Annotation annotation) { } annotator.annotate(annotation); if (TIME) { - int elapsed = (int) t.stop(); + long elapsed = t.stop(); MutableLong m = it.next(); m.incValue(elapsed); } diff --git a/src/edu/stanford/nlp/pipeline/Annotator.java b/src/edu/stanford/nlp/pipeline/Annotator.java index b728243a5a..b07c279a26 100644 --- a/src/edu/stanford/nlp/pipeline/Annotator.java +++ b/src/edu/stanford/nlp/pipeline/Annotator.java @@ -104,6 +104,8 @@ public String toString() { public static final String STANFORD_SENTIMENT = "sentiment"; public static final String STANFORD_COLUMN_DATA_CLASSIFIER = "cdc"; public static final String STANFORD_DEPENDENCIES = "depparse"; + public static final String STANFORD_NATLOG = "natlog"; + public static final String STANFORD_OPENIE = "openie"; public static final Requirement TOKENIZE_REQUIREMENT = new Requirement(STANFORD_TOKENIZE); @@ -117,6 +119,8 @@ public String toString() { public static final Requirement PARSE_REQUIREMENT = new Requirement(STANFORD_PARSE); public static final Requirement DETERMINISTIC_COREF_REQUIREMENT = new Requirement(STANFORD_DETERMINISTIC_COREF); public static final Requirement RELATION_EXTRACTOR_REQUIREMENT = new Requirement(STANFORD_RELATION); + public static final Requirement NATLOG_REQUIREMENT = new Requirement(STANFORD_NATLOG); + public static final Requirement OPENIE_REQUIREMENT = new Requirement(STANFORD_OPENIE); /** * These are annotators which StanfordCoreNLP does not know how to diff --git a/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java b/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java index 2df98f8753..f9fa43672d 100644 --- a/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java +++ b/src/edu/stanford/nlp/pipeline/AnnotatorFactories.java @@ -20,6 +20,8 @@ */ public class AnnotatorFactories { + private AnnotatorFactories() {} // static factory class + public static AnnotatorFactory tokenize(Properties properties, final AnnotatorImplementations annotatorImplementation) { return new AnnotatorFactory(properties, annotatorImplementation) { private static final long serialVersionUID = 1L; @@ -53,15 +55,15 @@ public String additionalSignature() { } if (Boolean.valueOf(properties.getProperty("tokenize.whitespace", "false"))) { - os.append(TokenizerAnnotator.EOL_PROPERTY + ":").append(properties.getProperty(TokenizerAnnotator.EOL_PROPERTY, + os.append(TokenizerAnnotator.EOL_PROPERTY + ':').append(properties.getProperty(TokenizerAnnotator.EOL_PROPERTY, "false")); - os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ":").append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, + os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ':').append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")); return os.toString(); } else { - os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ":").append(Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, + os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + ':').append(Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false"))); - os.append(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY + ":").append(properties.getProperty(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, StanfordCoreNLP.DEFAULT_NEWLINE_IS_SENTENCE_BREAK)); + os.append(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY + ':').append(properties.getProperty(StanfordCoreNLP.NEWLINE_IS_SENTENCE_BREAK_PROPERTY, StanfordCoreNLP.DEFAULT_NEWLINE_IS_SENTENCE_BREAK)); } return os.toString(); } @@ -190,7 +192,8 @@ public static AnnotatorFactory sentenceSplit(Properties properties, final Annota private static final long serialVersionUID = 1L; @Override public Annotator create() { - System.err.println(signature()); + // System.err.println(signature()); + // todo: The above shows that signature is edu.stanford.nlp.pipeline.AnnotatorImplementations: and doesn't reflect what annotator it is! Should fix. boolean nlSplitting = Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")); if (nlSplitting) { boolean whitespaceTokenization = Boolean.valueOf(properties.getProperty("tokenize.whitespace", "false")); @@ -254,8 +257,8 @@ public String additionalSignature() { // keep track of all relevant properties for this annotator here! StringBuilder os = new StringBuilder(); if (Boolean.valueOf(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false"))) { - os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + "=").append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")).append("\n"); - os.append("tokenize.whitespace=").append(properties.getProperty("tokenize.whitespace", "false")).append("\n"); + os.append(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY + '=').append(properties.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")).append('\n'); + os.append("tokenize.whitespace=").append(properties.getProperty("tokenize.whitespace", "false")).append('\n'); } else { os.append(baseSignature(properties, StanfordCoreNLP.STANFORD_SSPLIT)); } @@ -327,10 +330,10 @@ public String additionalSignature() { // keep track of all relevant properties for this annotator here! return "ner.model:" + properties.getProperty("ner.model", "") + - NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY + ":" + + NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY + ':' + properties.getProperty(NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_PROPERTY, Boolean.toString(NERClassifierCombiner.APPLY_NUMERIC_CLASSIFIERS_DEFAULT)) + - NumberSequenceClassifier.USE_SUTIME_PROPERTY + ":" + + NumberSequenceClassifier.USE_SUTIME_PROPERTY + ':' + properties.getProperty(NumberSequenceClassifier.USE_SUTIME_PROPERTY, Boolean.toString(NumberSequenceClassifier.USE_SUTIME_DEFAULT)); } @@ -501,6 +504,7 @@ public String additionalSignature() { public static AnnotatorFactory columnDataClassifier(Properties properties, final AnnotatorImplementations annotatorImpls) { return new AnnotatorFactory(properties, annotatorImpls) { + private static final long serialVersionUID = 1L; @Override public Annotator create() { if(!properties.containsKey("loadClassifier")) @@ -520,6 +524,7 @@ protected String additionalSignature() { // public static AnnotatorFactory dependencies(Properties properties, final AnnotatorImplementations annotatorImpl) { return new AnnotatorFactory(properties, annotatorImpl) { + private static final long serialVersionUID = 1L; @Override public Annotator create() { return annotatorImpl.dependencies(properties); @@ -532,4 +537,38 @@ protected String additionalSignature() { }; } + // + // Monotonicity and Polarity + // + public static AnnotatorFactory natlog(Properties properties, final AnnotatorImplementations annotatorImpl) { + return new AnnotatorFactory(properties, annotatorImpl) { + @Override + public Annotator create() { + return annotatorImpl.natlog(properties); + } + + @Override + protected String additionalSignature() { + return ""; + } + }; + } + + // + // RelationTriples + // + public static AnnotatorFactory openie(Properties properties, final AnnotatorImplementations annotatorImpl) { + return new AnnotatorFactory(properties, annotatorImpl) { + @Override + public Annotator create() { + return annotatorImpl.openie(properties); + } + + @Override + protected String additionalSignature() { + return ""; + } + }; + } + } diff --git a/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java b/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java index c73b94114c..179a965de3 100644 --- a/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java +++ b/src/edu/stanford/nlp/pipeline/AnnotatorImplementations.java @@ -2,6 +2,8 @@ import edu.stanford.nlp.ie.NERClassifierCombiner; import edu.stanford.nlp.ie.regexp.NumberSequenceClassifier; +import edu.stanford.nlp.naturalli.NaturalLogicAnnotator; +import edu.stanford.nlp.naturalli.OpenIE; import edu.stanford.nlp.util.PropertiesUtils; import edu.stanford.nlp.util.ReflectionLoading; @@ -65,7 +67,7 @@ public Annotator morpha(Properties properties, boolean verbose) { } /** - * Annotate for named entities -- note that this combines multiple NER tag sets, and some auxilliary things (like temporal tagging) + * Annotate for named entities -- note that this combines multiple NER tag sets, and some auxiliary things (like temporal tagging) */ public Annotator ner(Properties properties) throws FileNotFoundException { @@ -196,4 +198,22 @@ public Annotator dependencies(Properties properties) { return new DependencyParseAnnotator(relevantProperties); } + /** + * Annotate operators (e.g., quantifiers) and polarity of tokens in a sentence + */ + public Annotator natlog(Properties properties) { + Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, + Annotator.STANFORD_NATLOG + '.'); + return new NaturalLogicAnnotator(relevantProperties); + } + + /** + * Annotate {@link edu.stanford.nlp.ie.util.RelationTriple}s from text. + */ + public Annotator openie(Properties properties) { + Properties relevantProperties = PropertiesUtils.extractPrefixedProperties(properties, + Annotator.STANFORD_OPENIE + '.'); + return new OpenIE(relevantProperties); + } + } diff --git a/src/edu/stanford/nlp/pipeline/CoreNLP.proto b/src/edu/stanford/nlp/pipeline/CoreNLP.proto index e48feb2a14..10de54f253 100644 --- a/src/edu/stanford/nlp/pipeline/CoreNLP.proto +++ b/src/edu/stanford/nlp/pipeline/CoreNLP.proto @@ -70,29 +70,33 @@ message Sentence { // message Token { // Fields set by the default annotators [new CoreNLP(new Properties())] - required string word = 1; // the word's gloss (post-tokenization) - optional string pos = 2; // The word's part of speech tag - optional string value = 3; // The word's 'value', (e.g., parse tree node) - optional string category = 4; // The word's 'category' (e.g., parse tree node) - optional string before = 5; // The whitespace/xml before the token - optional string after = 6; // The whitespace/xml after the token - optional string originalText = 7; // The original text for this token - optional string ner = 8; // The word's NER tag - optional string normalizedNER = 9; // The word's normalized NER tag - optional string lemma = 10; // The word's lemma - optional uint32 beginChar = 11; // The character offset begin - optional uint32 endChar = 12; // The character offset end - optional uint32 utterance = 13; // The utterance tag used in dcoref - optional string speaker = 14; // The speaker speaking this word - optional uint32 beginIndex = 15; // The begin index of, e.g., a span - optional uint32 endIndex = 16; // The begin index of, e.g., a span - optional uint32 tokenBeginIndex = 17; // The begin index of the token - optional uint32 tokenEndIndex = 18; // The end index of the token - optional Timex timexValue = 19; // The time this word refers to - optional bool hasXmlContext = 21; // Used by clean xml annotator - repeated string xmlContext = 22; // Used by clean xml annotator - optional uint32 corefClusterID = 23; // The [primary] cluster id for this token - optional string answer = 24; // A temporary annotation which is occasionally left in + required string word = 1; // the word's gloss (post-tokenization) + optional string pos = 2; // The word's part of speech tag + optional string value = 3; // The word's 'value', (e.g., parse tree node) + optional string category = 4; // The word's 'category' (e.g., parse tree node) + optional string before = 5; // The whitespace/xml before the token + optional string after = 6; // The whitespace/xml after the token + optional string originalText = 7; // The original text for this token + optional string ner = 8; // The word's NER tag + optional string normalizedNER = 9; // The word's normalized NER tag + optional string lemma = 10; // The word's lemma + optional uint32 beginChar = 11; // The character offset begin + optional uint32 endChar = 12; // The character offset end + optional uint32 utterance = 13; // The utterance tag used in dcoref + optional string speaker = 14; // The speaker speaking this word + optional uint32 beginIndex = 15; // The begin index of, e.g., a span + optional uint32 endIndex = 16; // The begin index of, e.g., a span + optional uint32 tokenBeginIndex = 17; // The begin index of the token + optional uint32 tokenEndIndex = 18; // The end index of the token + optional Timex timexValue = 19; // The time this word refers to + optional bool hasXmlContext = 21; // Used by clean xml annotator + repeated string xmlContext = 22; // Used by clean xml annotator + optional uint32 corefClusterID = 23; // The [primary] cluster id for this token + optional string answer = 24; // A temporary annotation which is occasionally left in + // optional string projectedCategory = 25; // The syntactic category of the maximal constituent headed by the word. Not used anywhere, so deleted. + optional uint32 headWordIndex = 26; // The index of the head word of this word. + optional Operator operator = 27; // If this is an operator, which one is it and what is its scope (as per Natural Logic)? + optional Polarity polarity = 28; // The polarity of this word, according to Natural Logic // Fields set by other annotators in CoreNLP optional string gender = 51; // gender annotation (machine reading) @@ -109,6 +113,17 @@ message Token { extensions 100 to 255; } +// +// An enumeration of valid sentiment values for the sentiment classifier. +// +enum Sentiment { + STRONG_NEGATIVE = 0; + WEAK_NEGATIVE = 1; + NEUTRAL = 2; + WEAK_POSITIVE = 3; + STRONG_POSITIVE = 4; +} + // // A syntactic parse tree, with scores. // @@ -118,6 +133,7 @@ message ParseTree { optional uint32 yieldBeginIndex = 3; optional uint32 yieldEndIndex = 4; optional double score = 5; + optional Sentiment sentiment = 6; } // @@ -220,3 +236,42 @@ message Relation { // Implicit // uint32 sentence @see implicit in sentence } + +// +// A Natural Logic operator +// +message Operator { + required string name = 1; + required int32 quantifierSpanBegin = 2; + required int32 quantifierSpanEnd = 3; + required int32 subjectSpanBegin = 4; + required int32 subjectSpanEnd = 5; + required int32 objectSpanBegin = 6; + required int32 objectSpanEnd = 7; +} + +// +// The seven informative Natural Logic relations +// +enum NaturalLogicRelation { + EQUIVALENCE = 0; + FORWARD_ENTAILMENT = 1; + REVERSE_ENTAILMENT = 2; + NEGATION = 3; + ALTERNATION = 4; + COVER = 5; + INDEPENDENCE = 6; +} + +// +// The polarity of a word, according to Natural Logic +// +message Polarity { + required NaturalLogicRelation projectEquivalence = 1; + required NaturalLogicRelation projectForwardEntailment = 2; + required NaturalLogicRelation projectReverseEntailment = 3; + required NaturalLogicRelation projectNegation = 4; + required NaturalLogicRelation projectAlternation = 5; + required NaturalLogicRelation projectCover = 6; + required NaturalLogicRelation projectIndependence = 7; +} diff --git a/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java b/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java index b2dee81c37..0a4352bf72 100644 --- a/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java +++ b/src/edu/stanford/nlp/pipeline/CoreNLPProtos.java @@ -95,6 +95,168 @@ private Language(int index, int value) { // @@protoc_insertion_point(enum_scope:edu.stanford.nlp.pipeline.Language) } + public enum Sentiment + implements com.google.protobuf.ProtocolMessageEnum { + STRONG_NEGATIVE(0, 0), + WEAK_NEGATIVE(1, 1), + NEUTRAL(2, 2), + WEAK_POSITIVE(3, 3), + STRONG_POSITIVE(4, 4), + ; + + public static final int STRONG_NEGATIVE_VALUE = 0; + public static final int WEAK_NEGATIVE_VALUE = 1; + public static final int NEUTRAL_VALUE = 2; + public static final int WEAK_POSITIVE_VALUE = 3; + public static final int STRONG_POSITIVE_VALUE = 4; + + + public final int getNumber() { return value; } + + public static Sentiment valueOf(int value) { + switch (value) { + case 0: return STRONG_NEGATIVE; + case 1: return WEAK_NEGATIVE; + case 2: return NEUTRAL; + case 3: return WEAK_POSITIVE; + case 4: return STRONG_POSITIVE; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public Sentiment findValueByNumber(int number) { + return Sentiment.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.getDescriptor().getEnumTypes().get(1); + } + + private static final Sentiment[] VALUES = { + STRONG_NEGATIVE, WEAK_NEGATIVE, NEUTRAL, WEAK_POSITIVE, STRONG_POSITIVE, + }; + + public static Sentiment valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private Sentiment(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:edu.stanford.nlp.pipeline.Sentiment) + } + + public enum NaturalLogicRelation + implements com.google.protobuf.ProtocolMessageEnum { + EQUIVALENCE(0, 0), + FORWARD_ENTAILMENT(1, 1), + REVERSE_ENTAILMENT(2, 2), + NEGATION(3, 3), + ALTERNATION(4, 4), + COVER(5, 5), + INDEPENDENCE(6, 6), + ; + + public static final int EQUIVALENCE_VALUE = 0; + public static final int FORWARD_ENTAILMENT_VALUE = 1; + public static final int REVERSE_ENTAILMENT_VALUE = 2; + public static final int NEGATION_VALUE = 3; + public static final int ALTERNATION_VALUE = 4; + public static final int COVER_VALUE = 5; + public static final int INDEPENDENCE_VALUE = 6; + + + public final int getNumber() { return value; } + + public static NaturalLogicRelation valueOf(int value) { + switch (value) { + case 0: return EQUIVALENCE; + case 1: return FORWARD_ENTAILMENT; + case 2: return REVERSE_ENTAILMENT; + case 3: return NEGATION; + case 4: return ALTERNATION; + case 5: return COVER; + case 6: return INDEPENDENCE; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public NaturalLogicRelation findValueByNumber(int number) { + return NaturalLogicRelation.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.getDescriptor().getEnumTypes().get(2); + } + + private static final NaturalLogicRelation[] VALUES = { + EQUIVALENCE, FORWARD_ENTAILMENT, REVERSE_ENTAILMENT, NEGATION, ALTERNATION, COVER, INDEPENDENCE, + }; + + public static NaturalLogicRelation valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private NaturalLogicRelation(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:edu.stanford.nlp.pipeline.NaturalLogicRelation) + } + public interface DocumentOrBuilder extends com.google.protobuf.GeneratedMessage. ExtendableMessageOrBuilder { @@ -4221,6 +4383,20 @@ public interface TokenOrBuilder extends boolean hasAnswer(); String getAnswer(); + // optional uint32 headWordIndex = 26; + boolean hasHeadWordIndex(); + int getHeadWordIndex(); + + // optional .edu.stanford.nlp.pipeline.Operator operator = 27; + boolean hasOperator(); + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator getOperator(); + edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder getOperatorOrBuilder(); + + // optional .edu.stanford.nlp.pipeline.Polarity polarity = 28; + boolean hasPolarity(); + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity getPolarity(); + edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder getPolarityOrBuilder(); + // optional string gender = 51; boolean hasGender(); String getGender(); @@ -4763,11 +4939,47 @@ private com.google.protobuf.ByteString getAnswerBytes() { } } + // optional uint32 headWordIndex = 26; + public static final int HEADWORDINDEX_FIELD_NUMBER = 26; + private int headWordIndex_; + public boolean hasHeadWordIndex() { + return ((bitField0_ & 0x00400000) == 0x00400000); + } + public int getHeadWordIndex() { + return headWordIndex_; + } + + // optional .edu.stanford.nlp.pipeline.Operator operator = 27; + public static final int OPERATOR_FIELD_NUMBER = 27; + private edu.stanford.nlp.pipeline.CoreNLPProtos.Operator operator_; + public boolean hasOperator() { + return ((bitField0_ & 0x00800000) == 0x00800000); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Operator getOperator() { + return operator_; + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder getOperatorOrBuilder() { + return operator_; + } + + // optional .edu.stanford.nlp.pipeline.Polarity polarity = 28; + public static final int POLARITY_FIELD_NUMBER = 28; + private edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity polarity_; + public boolean hasPolarity() { + return ((bitField0_ & 0x01000000) == 0x01000000); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity getPolarity() { + return polarity_; + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder getPolarityOrBuilder() { + return polarity_; + } + // optional string gender = 51; public static final int GENDER_FIELD_NUMBER = 51; private java.lang.Object gender_; public boolean hasGender() { - return ((bitField0_ & 0x00400000) == 0x00400000); + return ((bitField0_ & 0x02000000) == 0x02000000); } public String getGender() { java.lang.Object ref = gender_; @@ -4799,7 +5011,7 @@ private com.google.protobuf.ByteString getGenderBytes() { public static final int TRUECASE_FIELD_NUMBER = 52; private java.lang.Object trueCase_; public boolean hasTrueCase() { - return ((bitField0_ & 0x00800000) == 0x00800000); + return ((bitField0_ & 0x04000000) == 0x04000000); } public String getTrueCase() { java.lang.Object ref = trueCase_; @@ -4831,7 +5043,7 @@ private com.google.protobuf.ByteString getTrueCaseBytes() { public static final int TRUECASETEXT_FIELD_NUMBER = 53; private java.lang.Object trueCaseText_; public boolean hasTrueCaseText() { - return ((bitField0_ & 0x01000000) == 0x01000000); + return ((bitField0_ & 0x08000000) == 0x08000000); } public String getTrueCaseText() { java.lang.Object ref = trueCaseText_; @@ -4883,6 +5095,9 @@ private void initFields() { xmlContext_ = com.google.protobuf.LazyStringArrayList.EMPTY; corefClusterID_ = 0; answer_ = ""; + headWordIndex_ = 0; + operator_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance(); + polarity_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance(); gender_ = ""; trueCase_ = ""; trueCaseText_ = ""; @@ -4896,6 +5111,18 @@ public final boolean isInitialized() { memoizedIsInitialized = 0; return false; } + if (hasOperator()) { + if (!getOperator().isInitialized()) { + memoizedIsInitialized = 0; + return false; + } + } + if (hasPolarity()) { + if (!getPolarity().isInitialized()) { + memoizedIsInitialized = 0; + return false; + } + } if (!extensionsAreInitialized()) { memoizedIsInitialized = 0; return false; @@ -4980,12 +5207,21 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) output.writeBytes(24, getAnswerBytes()); } if (((bitField0_ & 0x00400000) == 0x00400000)) { - output.writeBytes(51, getGenderBytes()); + output.writeUInt32(26, headWordIndex_); } if (((bitField0_ & 0x00800000) == 0x00800000)) { - output.writeBytes(52, getTrueCaseBytes()); + output.writeMessage(27, operator_); } if (((bitField0_ & 0x01000000) == 0x01000000)) { + output.writeMessage(28, polarity_); + } + if (((bitField0_ & 0x02000000) == 0x02000000)) { + output.writeBytes(51, getGenderBytes()); + } + if (((bitField0_ & 0x04000000) == 0x04000000)) { + output.writeBytes(52, getTrueCaseBytes()); + } + if (((bitField0_ & 0x08000000) == 0x08000000)) { output.writeBytes(53, getTrueCaseTextBytes()); } extensionWriter.writeUntil(256, output); @@ -5097,13 +5333,25 @@ public int getSerializedSize() { } if (((bitField0_ & 0x00400000) == 0x00400000)) { size += com.google.protobuf.CodedOutputStream - .computeBytesSize(51, getGenderBytes()); + .computeUInt32Size(26, headWordIndex_); } if (((bitField0_ & 0x00800000) == 0x00800000)) { size += com.google.protobuf.CodedOutputStream - .computeBytesSize(52, getTrueCaseBytes()); + .computeMessageSize(27, operator_); } if (((bitField0_ & 0x01000000) == 0x01000000)) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(28, polarity_); + } + if (((bitField0_ & 0x02000000) == 0x02000000)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(51, getGenderBytes()); + } + if (((bitField0_ & 0x04000000) == 0x04000000)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(52, getTrueCaseBytes()); + } + if (((bitField0_ & 0x08000000) == 0x08000000)) { size += com.google.protobuf.CodedOutputStream .computeBytesSize(53, getTrueCaseTextBytes()); } @@ -5225,6 +5473,8 @@ private Builder(BuilderParent parent) { private void maybeForceBuilderInitialization() { if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { getTimexValueFieldBuilder(); + getOperatorFieldBuilder(); + getPolarityFieldBuilder(); } } private static Builder create() { @@ -5283,12 +5533,26 @@ public Builder clear() { bitField0_ = (bitField0_ & ~0x00200000); answer_ = ""; bitField0_ = (bitField0_ & ~0x00400000); - gender_ = ""; + headWordIndex_ = 0; bitField0_ = (bitField0_ & ~0x00800000); - trueCase_ = ""; + if (operatorBuilder_ == null) { + operator_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance(); + } else { + operatorBuilder_.clear(); + } bitField0_ = (bitField0_ & ~0x01000000); - trueCaseText_ = ""; + if (polarityBuilder_ == null) { + polarity_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance(); + } else { + polarityBuilder_.clear(); + } bitField0_ = (bitField0_ & ~0x02000000); + gender_ = ""; + bitField0_ = (bitField0_ & ~0x04000000); + trueCase_ = ""; + bitField0_ = (bitField0_ & ~0x08000000); + trueCaseText_ = ""; + bitField0_ = (bitField0_ & ~0x10000000); return this; } @@ -5428,14 +5692,34 @@ public edu.stanford.nlp.pipeline.CoreNLPProtos.Token buildPartial() { if (((from_bitField0_ & 0x00800000) == 0x00800000)) { to_bitField0_ |= 0x00400000; } - result.gender_ = gender_; + result.headWordIndex_ = headWordIndex_; if (((from_bitField0_ & 0x01000000) == 0x01000000)) { to_bitField0_ |= 0x00800000; } - result.trueCase_ = trueCase_; + if (operatorBuilder_ == null) { + result.operator_ = operator_; + } else { + result.operator_ = operatorBuilder_.build(); + } if (((from_bitField0_ & 0x02000000) == 0x02000000)) { to_bitField0_ |= 0x01000000; } + if (polarityBuilder_ == null) { + result.polarity_ = polarity_; + } else { + result.polarity_ = polarityBuilder_.build(); + } + if (((from_bitField0_ & 0x04000000) == 0x04000000)) { + to_bitField0_ |= 0x02000000; + } + result.gender_ = gender_; + if (((from_bitField0_ & 0x08000000) == 0x08000000)) { + to_bitField0_ |= 0x04000000; + } + result.trueCase_ = trueCase_; + if (((from_bitField0_ & 0x10000000) == 0x10000000)) { + to_bitField0_ |= 0x08000000; + } result.trueCaseText_ = trueCaseText_; result.bitField0_ = to_bitField0_; onBuilt(); @@ -5529,6 +5813,15 @@ public Builder mergeFrom(edu.stanford.nlp.pipeline.CoreNLPProtos.Token other) { if (other.hasAnswer()) { setAnswer(other.getAnswer()); } + if (other.hasHeadWordIndex()) { + setHeadWordIndex(other.getHeadWordIndex()); + } + if (other.hasOperator()) { + mergeOperator(other.getOperator()); + } + if (other.hasPolarity()) { + mergePolarity(other.getPolarity()); + } if (other.hasGender()) { setGender(other.getGender()); } @@ -5548,6 +5841,18 @@ public final boolean isInitialized() { return false; } + if (hasOperator()) { + if (!getOperator().isInitialized()) { + + return false; + } + } + if (hasPolarity()) { + if (!getPolarity().isInitialized()) { + + return false; + } + } if (!extensionsAreInitialized()) { return false; @@ -5697,18 +6002,41 @@ public Builder mergeFrom( answer_ = input.readBytes(); break; } - case 410: { + case 208: { bitField0_ |= 0x00800000; + headWordIndex_ = input.readUInt32(); + break; + } + case 218: { + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder subBuilder = edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.newBuilder(); + if (hasOperator()) { + subBuilder.mergeFrom(getOperator()); + } + input.readMessage(subBuilder, extensionRegistry); + setOperator(subBuilder.buildPartial()); + break; + } + case 226: { + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder subBuilder = edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.newBuilder(); + if (hasPolarity()) { + subBuilder.mergeFrom(getPolarity()); + } + input.readMessage(subBuilder, extensionRegistry); + setPolarity(subBuilder.buildPartial()); + break; + } + case 410: { + bitField0_ |= 0x04000000; gender_ = input.readBytes(); break; } case 418: { - bitField0_ |= 0x01000000; + bitField0_ |= 0x08000000; trueCase_ = input.readBytes(); break; } case 426: { - bitField0_ |= 0x02000000; + bitField0_ |= 0x10000000; trueCaseText_ = input.readBytes(); break; } @@ -6485,82 +6813,283 @@ void setAnswer(com.google.protobuf.ByteString value) { onChanged(); } - // optional string gender = 51; - private java.lang.Object gender_ = ""; - public boolean hasGender() { + // optional uint32 headWordIndex = 26; + private int headWordIndex_ ; + public boolean hasHeadWordIndex() { return ((bitField0_ & 0x00800000) == 0x00800000); } - public String getGender() { - java.lang.Object ref = gender_; - if (!(ref instanceof String)) { - String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); - gender_ = s; - return s; - } else { - return (String) ref; - } + public int getHeadWordIndex() { + return headWordIndex_; } - public Builder setGender(String value) { - if (value == null) { - throw new NullPointerException(); - } - bitField0_ |= 0x00800000; - gender_ = value; + public Builder setHeadWordIndex(int value) { + bitField0_ |= 0x00800000; + headWordIndex_ = value; onChanged(); return this; } - public Builder clearGender() { + public Builder clearHeadWordIndex() { bitField0_ = (bitField0_ & ~0x00800000); - gender_ = getDefaultInstance().getGender(); + headWordIndex_ = 0; onChanged(); return this; } - void setGender(com.google.protobuf.ByteString value) { - bitField0_ |= 0x00800000; - gender_ = value; - onChanged(); - } - // optional string trueCase = 52; - private java.lang.Object trueCase_ = ""; - public boolean hasTrueCase() { + // optional .edu.stanford.nlp.pipeline.Operator operator = 27; + private edu.stanford.nlp.pipeline.CoreNLPProtos.Operator operator_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator, edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder, edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder> operatorBuilder_; + public boolean hasOperator() { return ((bitField0_ & 0x01000000) == 0x01000000); } - public String getTrueCase() { - java.lang.Object ref = trueCase_; - if (!(ref instanceof String)) { - String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); - trueCase_ = s; - return s; + public edu.stanford.nlp.pipeline.CoreNLPProtos.Operator getOperator() { + if (operatorBuilder_ == null) { + return operator_; } else { - return (String) ref; + return operatorBuilder_.getMessage(); } } - public Builder setTrueCase(String value) { - if (value == null) { - throw new NullPointerException(); - } - bitField0_ |= 0x01000000; - trueCase_ = value; - onChanged(); + public Builder setOperator(edu.stanford.nlp.pipeline.CoreNLPProtos.Operator value) { + if (operatorBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + operator_ = value; + onChanged(); + } else { + operatorBuilder_.setMessage(value); + } + bitField0_ |= 0x01000000; return this; } - public Builder clearTrueCase() { - bitField0_ = (bitField0_ & ~0x01000000); - trueCase_ = getDefaultInstance().getTrueCase(); - onChanged(); + public Builder setOperator( + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder builderForValue) { + if (operatorBuilder_ == null) { + operator_ = builderForValue.build(); + onChanged(); + } else { + operatorBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x01000000; return this; } - void setTrueCase(com.google.protobuf.ByteString value) { + public Builder mergeOperator(edu.stanford.nlp.pipeline.CoreNLPProtos.Operator value) { + if (operatorBuilder_ == null) { + if (((bitField0_ & 0x01000000) == 0x01000000) && + operator_ != edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance()) { + operator_ = + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.newBuilder(operator_).mergeFrom(value).buildPartial(); + } else { + operator_ = value; + } + onChanged(); + } else { + operatorBuilder_.mergeFrom(value); + } bitField0_ |= 0x01000000; - trueCase_ = value; - onChanged(); + return this; } - + public Builder clearOperator() { + if (operatorBuilder_ == null) { + operator_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance(); + onChanged(); + } else { + operatorBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x01000000); + return this; + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder getOperatorBuilder() { + bitField0_ |= 0x01000000; + onChanged(); + return getOperatorFieldBuilder().getBuilder(); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder getOperatorOrBuilder() { + if (operatorBuilder_ != null) { + return operatorBuilder_.getMessageOrBuilder(); + } else { + return operator_; + } + } + private com.google.protobuf.SingleFieldBuilder< + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator, edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder, edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder> + getOperatorFieldBuilder() { + if (operatorBuilder_ == null) { + operatorBuilder_ = new com.google.protobuf.SingleFieldBuilder< + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator, edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder, edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder>( + operator_, + getParentForChildren(), + isClean()); + operator_ = null; + } + return operatorBuilder_; + } + + // optional .edu.stanford.nlp.pipeline.Polarity polarity = 28; + private edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity polarity_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity, edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder, edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder> polarityBuilder_; + public boolean hasPolarity() { + return ((bitField0_ & 0x02000000) == 0x02000000); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity getPolarity() { + if (polarityBuilder_ == null) { + return polarity_; + } else { + return polarityBuilder_.getMessage(); + } + } + public Builder setPolarity(edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity value) { + if (polarityBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + polarity_ = value; + onChanged(); + } else { + polarityBuilder_.setMessage(value); + } + bitField0_ |= 0x02000000; + return this; + } + public Builder setPolarity( + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder builderForValue) { + if (polarityBuilder_ == null) { + polarity_ = builderForValue.build(); + onChanged(); + } else { + polarityBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x02000000; + return this; + } + public Builder mergePolarity(edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity value) { + if (polarityBuilder_ == null) { + if (((bitField0_ & 0x02000000) == 0x02000000) && + polarity_ != edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance()) { + polarity_ = + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.newBuilder(polarity_).mergeFrom(value).buildPartial(); + } else { + polarity_ = value; + } + onChanged(); + } else { + polarityBuilder_.mergeFrom(value); + } + bitField0_ |= 0x02000000; + return this; + } + public Builder clearPolarity() { + if (polarityBuilder_ == null) { + polarity_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance(); + onChanged(); + } else { + polarityBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x02000000); + return this; + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder getPolarityBuilder() { + bitField0_ |= 0x02000000; + onChanged(); + return getPolarityFieldBuilder().getBuilder(); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder getPolarityOrBuilder() { + if (polarityBuilder_ != null) { + return polarityBuilder_.getMessageOrBuilder(); + } else { + return polarity_; + } + } + private com.google.protobuf.SingleFieldBuilder< + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity, edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder, edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder> + getPolarityFieldBuilder() { + if (polarityBuilder_ == null) { + polarityBuilder_ = new com.google.protobuf.SingleFieldBuilder< + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity, edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder, edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder>( + polarity_, + getParentForChildren(), + isClean()); + polarity_ = null; + } + return polarityBuilder_; + } + + // optional string gender = 51; + private java.lang.Object gender_ = ""; + public boolean hasGender() { + return ((bitField0_ & 0x04000000) == 0x04000000); + } + public String getGender() { + java.lang.Object ref = gender_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + gender_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setGender(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x04000000; + gender_ = value; + onChanged(); + return this; + } + public Builder clearGender() { + bitField0_ = (bitField0_ & ~0x04000000); + gender_ = getDefaultInstance().getGender(); + onChanged(); + return this; + } + void setGender(com.google.protobuf.ByteString value) { + bitField0_ |= 0x04000000; + gender_ = value; + onChanged(); + } + + // optional string trueCase = 52; + private java.lang.Object trueCase_ = ""; + public boolean hasTrueCase() { + return ((bitField0_ & 0x08000000) == 0x08000000); + } + public String getTrueCase() { + java.lang.Object ref = trueCase_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + trueCase_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setTrueCase(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x08000000; + trueCase_ = value; + onChanged(); + return this; + } + public Builder clearTrueCase() { + bitField0_ = (bitField0_ & ~0x08000000); + trueCase_ = getDefaultInstance().getTrueCase(); + onChanged(); + return this; + } + void setTrueCase(com.google.protobuf.ByteString value) { + bitField0_ |= 0x08000000; + trueCase_ = value; + onChanged(); + } + // optional string trueCaseText = 53; private java.lang.Object trueCaseText_ = ""; public boolean hasTrueCaseText() { - return ((bitField0_ & 0x02000000) == 0x02000000); + return ((bitField0_ & 0x10000000) == 0x10000000); } public String getTrueCaseText() { java.lang.Object ref = trueCaseText_; @@ -6576,19 +7105,19 @@ public Builder setTrueCaseText(String value) { if (value == null) { throw new NullPointerException(); } - bitField0_ |= 0x02000000; + bitField0_ |= 0x10000000; trueCaseText_ = value; onChanged(); return this; } public Builder clearTrueCaseText() { - bitField0_ = (bitField0_ & ~0x02000000); + bitField0_ = (bitField0_ & ~0x10000000); trueCaseText_ = getDefaultInstance().getTrueCaseText(); onChanged(); return this; } void setTrueCaseText(com.google.protobuf.ByteString value) { - bitField0_ |= 0x02000000; + bitField0_ |= 0x10000000; trueCaseText_ = value; onChanged(); } @@ -6632,6 +7161,10 @@ edu.stanford.nlp.pipeline.CoreNLPProtos.ParseTreeOrBuilder getChildOrBuilder( // optional double score = 5; boolean hasScore(); double getScore(); + + // optional .edu.stanford.nlp.pipeline.Sentiment sentiment = 6; + boolean hasSentiment(); + edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment getSentiment(); } public static final class ParseTree extends com.google.protobuf.GeneratedMessage @@ -6745,12 +7278,23 @@ public double getScore() { return score_; } + // optional .edu.stanford.nlp.pipeline.Sentiment sentiment = 6; + public static final int SENTIMENT_FIELD_NUMBER = 6; + private edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment sentiment_; + public boolean hasSentiment() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment getSentiment() { + return sentiment_; + } + private void initFields() { child_ = java.util.Collections.emptyList(); value_ = ""; yieldBeginIndex_ = 0; yieldEndIndex_ = 0; score_ = 0D; + sentiment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment.STRONG_NEGATIVE; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -6779,6 +7323,9 @@ public void writeTo(com.google.protobuf.CodedOutputStream output) if (((bitField0_ & 0x00000008) == 0x00000008)) { output.writeDouble(5, score_); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeEnum(6, sentiment_.getNumber()); + } getUnknownFields().writeTo(output); } @@ -6808,6 +7355,10 @@ public int getSerializedSize() { size += com.google.protobuf.CodedOutputStream .computeDoubleSize(5, score_); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(6, sentiment_.getNumber()); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -6947,6 +7498,8 @@ public Builder clear() { bitField0_ = (bitField0_ & ~0x00000008); score_ = 0D; bitField0_ = (bitField0_ & ~0x00000010); + sentiment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment.STRONG_NEGATIVE; + bitField0_ = (bitField0_ & ~0x00000020); return this; } @@ -7010,6 +7563,10 @@ public edu.stanford.nlp.pipeline.CoreNLPProtos.ParseTree buildPartial() { to_bitField0_ |= 0x00000008; } result.score_ = score_; + if (((from_bitField0_ & 0x00000020) == 0x00000020)) { + to_bitField0_ |= 0x00000010; + } + result.sentiment_ = sentiment_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -7064,6 +7621,9 @@ public Builder mergeFrom(edu.stanford.nlp.pipeline.CoreNLPProtos.ParseTree other if (other.hasScore()) { setScore(other.getScore()); } + if (other.hasSentiment()) { + setSentiment(other.getSentiment()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -7121,6 +7681,17 @@ public Builder mergeFrom( score_ = input.readDouble(); break; } + case 48: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment value = edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(6, rawValue); + } else { + bitField0_ |= 0x00000020; + sentiment_ = value; + } + break; + } } } } @@ -7412,6 +7983,30 @@ public Builder clearScore() { return this; } + // optional .edu.stanford.nlp.pipeline.Sentiment sentiment = 6; + private edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment sentiment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment.STRONG_NEGATIVE; + public boolean hasSentiment() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment getSentiment() { + return sentiment_; + } + public Builder setSentiment(edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000020; + sentiment_ = value; + onChanged(); + return this; + } + public Builder clearSentiment() { + bitField0_ = (bitField0_ & ~0x00000020); + sentiment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.Sentiment.STRONG_NEGATIVE; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:edu.stanford.nlp.pipeline.ParseTree) } @@ -14432,81 +15027,1663 @@ void setSubtype(com.google.protobuf.ByteString value) { // @@protoc_insertion_point(class_scope:edu.stanford.nlp.pipeline.Relation) } - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_Document_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_Document_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_Sentence_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_Sentence_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_Token_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_Token_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_ParseTree_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_ParseTree_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_DependencyGraph_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_DependencyGraph_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Node_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Node_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Edge_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Edge_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_CorefChain_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_CorefChain_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_CorefChain_CorefMention_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_CorefChain_CorefMention_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_Timex_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_Timex_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_Entity_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_Entity_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_edu_stanford_nlp_pipeline_Relation_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_edu_stanford_nlp_pipeline_Relation_fieldAccessorTable; - - public static com.google.protobuf.Descriptors.FileDescriptor - getDescriptor() { - return descriptor; + public interface OperatorOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required string name = 1; + boolean hasName(); + String getName(); + + // required int32 quantifierSpanBegin = 2; + boolean hasQuantifierSpanBegin(); + int getQuantifierSpanBegin(); + + // required int32 quantifierSpanEnd = 3; + boolean hasQuantifierSpanEnd(); + int getQuantifierSpanEnd(); + + // required int32 subjectSpanBegin = 4; + boolean hasSubjectSpanBegin(); + int getSubjectSpanBegin(); + + // required int32 subjectSpanEnd = 5; + boolean hasSubjectSpanEnd(); + int getSubjectSpanEnd(); + + // required int32 objectSpanBegin = 6; + boolean hasObjectSpanBegin(); + int getObjectSpanBegin(); + + // required int32 objectSpanEnd = 7; + boolean hasObjectSpanEnd(); + int getObjectSpanEnd(); } - private static com.google.protobuf.Descriptors.FileDescriptor - descriptor; - static { - java.lang.String[] descriptorData = { - "\n\rCoreNLP.proto\022\031edu.stanford.nlp.pipeli" + - "ne\"\335\001\n\010Document\022\014\n\004text\030\001 \002(\t\0225\n\010sentenc" + - "e\030\002 \003(\0132#.edu.stanford.nlp.pipeline.Sent" + - "ence\0229\n\ncorefChain\030\003 \003(\0132%.edu.stanford." + - "nlp.pipeline.CorefChain\022\r\n\005docID\030\004 \001(\t\022;" + - "\n\021sentencelessToken\030\005 \003(\0132 .edu.stanford" + + public static final class Operator extends + com.google.protobuf.GeneratedMessage + implements OperatorOrBuilder { + // Use Operator.newBuilder() to construct. + private Operator(Builder builder) { + super(builder); + } + private Operator(boolean noInit) {} + + private static final Operator defaultInstance; + public static Operator getDefaultInstance() { + return defaultInstance; + } + + public Operator getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Operator_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Operator_fieldAccessorTable; + } + + private int bitField0_; + // required string name = 1; + public static final int NAME_FIELD_NUMBER = 1; + private java.lang.Object name_; + public boolean hasName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getName() { + java.lang.Object ref = name_; + if (ref instanceof String) { + return (String) ref; + } else { + com.google.protobuf.ByteString bs = + (com.google.protobuf.ByteString) ref; + String s = bs.toStringUtf8(); + if (com.google.protobuf.Internal.isValidUtf8(bs)) { + name_ = s; + } + return s; + } + } + private com.google.protobuf.ByteString getNameBytes() { + java.lang.Object ref = name_; + if (ref instanceof String) { + com.google.protobuf.ByteString b = + com.google.protobuf.ByteString.copyFromUtf8((String) ref); + name_ = b; + return b; + } else { + return (com.google.protobuf.ByteString) ref; + } + } + + // required int32 quantifierSpanBegin = 2; + public static final int QUANTIFIERSPANBEGIN_FIELD_NUMBER = 2; + private int quantifierSpanBegin_; + public boolean hasQuantifierSpanBegin() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public int getQuantifierSpanBegin() { + return quantifierSpanBegin_; + } + + // required int32 quantifierSpanEnd = 3; + public static final int QUANTIFIERSPANEND_FIELD_NUMBER = 3; + private int quantifierSpanEnd_; + public boolean hasQuantifierSpanEnd() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public int getQuantifierSpanEnd() { + return quantifierSpanEnd_; + } + + // required int32 subjectSpanBegin = 4; + public static final int SUBJECTSPANBEGIN_FIELD_NUMBER = 4; + private int subjectSpanBegin_; + public boolean hasSubjectSpanBegin() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public int getSubjectSpanBegin() { + return subjectSpanBegin_; + } + + // required int32 subjectSpanEnd = 5; + public static final int SUBJECTSPANEND_FIELD_NUMBER = 5; + private int subjectSpanEnd_; + public boolean hasSubjectSpanEnd() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public int getSubjectSpanEnd() { + return subjectSpanEnd_; + } + + // required int32 objectSpanBegin = 6; + public static final int OBJECTSPANBEGIN_FIELD_NUMBER = 6; + private int objectSpanBegin_; + public boolean hasObjectSpanBegin() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public int getObjectSpanBegin() { + return objectSpanBegin_; + } + + // required int32 objectSpanEnd = 7; + public static final int OBJECTSPANEND_FIELD_NUMBER = 7; + private int objectSpanEnd_; + public boolean hasObjectSpanEnd() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + public int getObjectSpanEnd() { + return objectSpanEnd_; + } + + private void initFields() { + name_ = ""; + quantifierSpanBegin_ = 0; + quantifierSpanEnd_ = 0; + subjectSpanBegin_ = 0; + subjectSpanEnd_ = 0; + objectSpanBegin_ = 0; + objectSpanEnd_ = 0; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasName()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasQuantifierSpanBegin()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasQuantifierSpanEnd()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasSubjectSpanBegin()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasSubjectSpanEnd()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasObjectSpanBegin()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasObjectSpanEnd()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeBytes(1, getNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeInt32(2, quantifierSpanBegin_); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeInt32(3, quantifierSpanEnd_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeInt32(4, subjectSpanBegin_); + } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeInt32(5, subjectSpanEnd_); + } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + output.writeInt32(6, objectSpanBegin_); + } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + output.writeInt32(7, objectSpanEnd_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(1, getNameBytes()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(2, quantifierSpanBegin_); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(3, quantifierSpanEnd_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(4, subjectSpanBegin_); + } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(5, subjectSpanEnd_); + } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(6, objectSpanBegin_); + } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + size += com.google.protobuf.CodedOutputStream + .computeInt32Size(7, objectSpanEnd_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Operator parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(edu.stanford.nlp.pipeline.CoreNLPProtos.Operator prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements edu.stanford.nlp.pipeline.CoreNLPProtos.OperatorOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Operator_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Operator_fieldAccessorTable; + } + + // Construct using edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + name_ = ""; + bitField0_ = (bitField0_ & ~0x00000001); + quantifierSpanBegin_ = 0; + bitField0_ = (bitField0_ & ~0x00000002); + quantifierSpanEnd_ = 0; + bitField0_ = (bitField0_ & ~0x00000004); + subjectSpanBegin_ = 0; + bitField0_ = (bitField0_ & ~0x00000008); + subjectSpanEnd_ = 0; + bitField0_ = (bitField0_ & ~0x00000010); + objectSpanBegin_ = 0; + bitField0_ = (bitField0_ & ~0x00000020); + objectSpanEnd_ = 0; + bitField0_ = (bitField0_ & ~0x00000040); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDescriptor(); + } + + public edu.stanford.nlp.pipeline.CoreNLPProtos.Operator getDefaultInstanceForType() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance(); + } + + public edu.stanford.nlp.pipeline.CoreNLPProtos.Operator build() { + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private edu.stanford.nlp.pipeline.CoreNLPProtos.Operator buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public edu.stanford.nlp.pipeline.CoreNLPProtos.Operator buildPartial() { + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator result = new edu.stanford.nlp.pipeline.CoreNLPProtos.Operator(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.name_ = name_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.quantifierSpanBegin_ = quantifierSpanBegin_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.quantifierSpanEnd_ = quantifierSpanEnd_; + if (((from_bitField0_ & 0x00000008) == 0x00000008)) { + to_bitField0_ |= 0x00000008; + } + result.subjectSpanBegin_ = subjectSpanBegin_; + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000010; + } + result.subjectSpanEnd_ = subjectSpanEnd_; + if (((from_bitField0_ & 0x00000020) == 0x00000020)) { + to_bitField0_ |= 0x00000020; + } + result.objectSpanBegin_ = objectSpanBegin_; + if (((from_bitField0_ & 0x00000040) == 0x00000040)) { + to_bitField0_ |= 0x00000040; + } + result.objectSpanEnd_ = objectSpanEnd_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof edu.stanford.nlp.pipeline.CoreNLPProtos.Operator) { + return mergeFrom((edu.stanford.nlp.pipeline.CoreNLPProtos.Operator)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(edu.stanford.nlp.pipeline.CoreNLPProtos.Operator other) { + if (other == edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.getDefaultInstance()) return this; + if (other.hasName()) { + setName(other.getName()); + } + if (other.hasQuantifierSpanBegin()) { + setQuantifierSpanBegin(other.getQuantifierSpanBegin()); + } + if (other.hasQuantifierSpanEnd()) { + setQuantifierSpanEnd(other.getQuantifierSpanEnd()); + } + if (other.hasSubjectSpanBegin()) { + setSubjectSpanBegin(other.getSubjectSpanBegin()); + } + if (other.hasSubjectSpanEnd()) { + setSubjectSpanEnd(other.getSubjectSpanEnd()); + } + if (other.hasObjectSpanBegin()) { + setObjectSpanBegin(other.getObjectSpanBegin()); + } + if (other.hasObjectSpanEnd()) { + setObjectSpanEnd(other.getObjectSpanEnd()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasName()) { + + return false; + } + if (!hasQuantifierSpanBegin()) { + + return false; + } + if (!hasQuantifierSpanEnd()) { + + return false; + } + if (!hasSubjectSpanBegin()) { + + return false; + } + if (!hasSubjectSpanEnd()) { + + return false; + } + if (!hasObjectSpanBegin()) { + + return false; + } + if (!hasObjectSpanEnd()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 10: { + bitField0_ |= 0x00000001; + name_ = input.readBytes(); + break; + } + case 16: { + bitField0_ |= 0x00000002; + quantifierSpanBegin_ = input.readInt32(); + break; + } + case 24: { + bitField0_ |= 0x00000004; + quantifierSpanEnd_ = input.readInt32(); + break; + } + case 32: { + bitField0_ |= 0x00000008; + subjectSpanBegin_ = input.readInt32(); + break; + } + case 40: { + bitField0_ |= 0x00000010; + subjectSpanEnd_ = input.readInt32(); + break; + } + case 48: { + bitField0_ |= 0x00000020; + objectSpanBegin_ = input.readInt32(); + break; + } + case 56: { + bitField0_ |= 0x00000040; + objectSpanEnd_ = input.readInt32(); + break; + } + } + } + } + + private int bitField0_; + + // required string name = 1; + private java.lang.Object name_ = ""; + public boolean hasName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public String getName() { + java.lang.Object ref = name_; + if (!(ref instanceof String)) { + String s = ((com.google.protobuf.ByteString) ref).toStringUtf8(); + name_ = s; + return s; + } else { + return (String) ref; + } + } + public Builder setName(String value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + name_ = value; + onChanged(); + return this; + } + public Builder clearName() { + bitField0_ = (bitField0_ & ~0x00000001); + name_ = getDefaultInstance().getName(); + onChanged(); + return this; + } + void setName(com.google.protobuf.ByteString value) { + bitField0_ |= 0x00000001; + name_ = value; + onChanged(); + } + + // required int32 quantifierSpanBegin = 2; + private int quantifierSpanBegin_ ; + public boolean hasQuantifierSpanBegin() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public int getQuantifierSpanBegin() { + return quantifierSpanBegin_; + } + public Builder setQuantifierSpanBegin(int value) { + bitField0_ |= 0x00000002; + quantifierSpanBegin_ = value; + onChanged(); + return this; + } + public Builder clearQuantifierSpanBegin() { + bitField0_ = (bitField0_ & ~0x00000002); + quantifierSpanBegin_ = 0; + onChanged(); + return this; + } + + // required int32 quantifierSpanEnd = 3; + private int quantifierSpanEnd_ ; + public boolean hasQuantifierSpanEnd() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public int getQuantifierSpanEnd() { + return quantifierSpanEnd_; + } + public Builder setQuantifierSpanEnd(int value) { + bitField0_ |= 0x00000004; + quantifierSpanEnd_ = value; + onChanged(); + return this; + } + public Builder clearQuantifierSpanEnd() { + bitField0_ = (bitField0_ & ~0x00000004); + quantifierSpanEnd_ = 0; + onChanged(); + return this; + } + + // required int32 subjectSpanBegin = 4; + private int subjectSpanBegin_ ; + public boolean hasSubjectSpanBegin() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public int getSubjectSpanBegin() { + return subjectSpanBegin_; + } + public Builder setSubjectSpanBegin(int value) { + bitField0_ |= 0x00000008; + subjectSpanBegin_ = value; + onChanged(); + return this; + } + public Builder clearSubjectSpanBegin() { + bitField0_ = (bitField0_ & ~0x00000008); + subjectSpanBegin_ = 0; + onChanged(); + return this; + } + + // required int32 subjectSpanEnd = 5; + private int subjectSpanEnd_ ; + public boolean hasSubjectSpanEnd() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public int getSubjectSpanEnd() { + return subjectSpanEnd_; + } + public Builder setSubjectSpanEnd(int value) { + bitField0_ |= 0x00000010; + subjectSpanEnd_ = value; + onChanged(); + return this; + } + public Builder clearSubjectSpanEnd() { + bitField0_ = (bitField0_ & ~0x00000010); + subjectSpanEnd_ = 0; + onChanged(); + return this; + } + + // required int32 objectSpanBegin = 6; + private int objectSpanBegin_ ; + public boolean hasObjectSpanBegin() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public int getObjectSpanBegin() { + return objectSpanBegin_; + } + public Builder setObjectSpanBegin(int value) { + bitField0_ |= 0x00000020; + objectSpanBegin_ = value; + onChanged(); + return this; + } + public Builder clearObjectSpanBegin() { + bitField0_ = (bitField0_ & ~0x00000020); + objectSpanBegin_ = 0; + onChanged(); + return this; + } + + // required int32 objectSpanEnd = 7; + private int objectSpanEnd_ ; + public boolean hasObjectSpanEnd() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + public int getObjectSpanEnd() { + return objectSpanEnd_; + } + public Builder setObjectSpanEnd(int value) { + bitField0_ |= 0x00000040; + objectSpanEnd_ = value; + onChanged(); + return this; + } + public Builder clearObjectSpanEnd() { + bitField0_ = (bitField0_ & ~0x00000040); + objectSpanEnd_ = 0; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:edu.stanford.nlp.pipeline.Operator) + } + + static { + defaultInstance = new Operator(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:edu.stanford.nlp.pipeline.Operator) + } + + public interface PolarityOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectEquivalence = 1; + boolean hasProjectEquivalence(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectEquivalence(); + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectForwardEntailment = 2; + boolean hasProjectForwardEntailment(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectForwardEntailment(); + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectReverseEntailment = 3; + boolean hasProjectReverseEntailment(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectReverseEntailment(); + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectNegation = 4; + boolean hasProjectNegation(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectNegation(); + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectAlternation = 5; + boolean hasProjectAlternation(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectAlternation(); + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectCover = 6; + boolean hasProjectCover(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectCover(); + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectIndependence = 7; + boolean hasProjectIndependence(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectIndependence(); + } + public static final class Polarity extends + com.google.protobuf.GeneratedMessage + implements PolarityOrBuilder { + // Use Polarity.newBuilder() to construct. + private Polarity(Builder builder) { + super(builder); + } + private Polarity(boolean noInit) {} + + private static final Polarity defaultInstance; + public static Polarity getDefaultInstance() { + return defaultInstance; + } + + public Polarity getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Polarity_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Polarity_fieldAccessorTable; + } + + private int bitField0_; + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectEquivalence = 1; + public static final int PROJECTEQUIVALENCE_FIELD_NUMBER = 1; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectEquivalence_; + public boolean hasProjectEquivalence() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectEquivalence() { + return projectEquivalence_; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectForwardEntailment = 2; + public static final int PROJECTFORWARDENTAILMENT_FIELD_NUMBER = 2; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectForwardEntailment_; + public boolean hasProjectForwardEntailment() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectForwardEntailment() { + return projectForwardEntailment_; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectReverseEntailment = 3; + public static final int PROJECTREVERSEENTAILMENT_FIELD_NUMBER = 3; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectReverseEntailment_; + public boolean hasProjectReverseEntailment() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectReverseEntailment() { + return projectReverseEntailment_; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectNegation = 4; + public static final int PROJECTNEGATION_FIELD_NUMBER = 4; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectNegation_; + public boolean hasProjectNegation() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectNegation() { + return projectNegation_; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectAlternation = 5; + public static final int PROJECTALTERNATION_FIELD_NUMBER = 5; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectAlternation_; + public boolean hasProjectAlternation() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectAlternation() { + return projectAlternation_; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectCover = 6; + public static final int PROJECTCOVER_FIELD_NUMBER = 6; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectCover_; + public boolean hasProjectCover() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectCover() { + return projectCover_; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectIndependence = 7; + public static final int PROJECTINDEPENDENCE_FIELD_NUMBER = 7; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectIndependence_; + public boolean hasProjectIndependence() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectIndependence() { + return projectIndependence_; + } + + private void initFields() { + projectEquivalence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + projectForwardEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + projectReverseEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + projectNegation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + projectAlternation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + projectCover_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + projectIndependence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasProjectEquivalence()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasProjectForwardEntailment()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasProjectReverseEntailment()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasProjectNegation()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasProjectAlternation()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasProjectCover()) { + memoizedIsInitialized = 0; + return false; + } + if (!hasProjectIndependence()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeEnum(1, projectEquivalence_.getNumber()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeEnum(2, projectForwardEntailment_.getNumber()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeEnum(3, projectReverseEntailment_.getNumber()); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeEnum(4, projectNegation_.getNumber()); + } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeEnum(5, projectAlternation_.getNumber()); + } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + output.writeEnum(6, projectCover_.getNumber()); + } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + output.writeEnum(7, projectIndependence_.getNumber()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(1, projectEquivalence_.getNumber()); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(2, projectForwardEntailment_.getNumber()); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(3, projectReverseEntailment_.getNumber()); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(4, projectNegation_.getNumber()); + } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(5, projectAlternation_.getNumber()); + } + if (((bitField0_ & 0x00000020) == 0x00000020)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(6, projectCover_.getNumber()); + } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(7, projectIndependence_.getNumber()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements edu.stanford.nlp.pipeline.CoreNLPProtos.PolarityOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Polarity_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.internal_static_edu_stanford_nlp_pipeline_Polarity_fieldAccessorTable; + } + + // Construct using edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + projectEquivalence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000001); + projectForwardEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000002); + projectReverseEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000004); + projectNegation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000008); + projectAlternation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000010); + projectCover_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000020); + projectIndependence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + bitField0_ = (bitField0_ & ~0x00000040); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDescriptor(); + } + + public edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity getDefaultInstanceForType() { + return edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance(); + } + + public edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity build() { + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity buildPartial() { + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity result = new edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.projectEquivalence_ = projectEquivalence_; + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.projectForwardEntailment_ = projectForwardEntailment_; + if (((from_bitField0_ & 0x00000004) == 0x00000004)) { + to_bitField0_ |= 0x00000004; + } + result.projectReverseEntailment_ = projectReverseEntailment_; + if (((from_bitField0_ & 0x00000008) == 0x00000008)) { + to_bitField0_ |= 0x00000008; + } + result.projectNegation_ = projectNegation_; + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000010; + } + result.projectAlternation_ = projectAlternation_; + if (((from_bitField0_ & 0x00000020) == 0x00000020)) { + to_bitField0_ |= 0x00000020; + } + result.projectCover_ = projectCover_; + if (((from_bitField0_ & 0x00000040) == 0x00000040)) { + to_bitField0_ |= 0x00000040; + } + result.projectIndependence_ = projectIndependence_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity) { + return mergeFrom((edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity other) { + if (other == edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.getDefaultInstance()) return this; + if (other.hasProjectEquivalence()) { + setProjectEquivalence(other.getProjectEquivalence()); + } + if (other.hasProjectForwardEntailment()) { + setProjectForwardEntailment(other.getProjectForwardEntailment()); + } + if (other.hasProjectReverseEntailment()) { + setProjectReverseEntailment(other.getProjectReverseEntailment()); + } + if (other.hasProjectNegation()) { + setProjectNegation(other.getProjectNegation()); + } + if (other.hasProjectAlternation()) { + setProjectAlternation(other.getProjectAlternation()); + } + if (other.hasProjectCover()) { + setProjectCover(other.getProjectCover()); + } + if (other.hasProjectIndependence()) { + setProjectIndependence(other.getProjectIndependence()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasProjectEquivalence()) { + + return false; + } + if (!hasProjectForwardEntailment()) { + + return false; + } + if (!hasProjectReverseEntailment()) { + + return false; + } + if (!hasProjectNegation()) { + + return false; + } + if (!hasProjectAlternation()) { + + return false; + } + if (!hasProjectCover()) { + + return false; + } + if (!hasProjectIndependence()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 8: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(1, rawValue); + } else { + bitField0_ |= 0x00000001; + projectEquivalence_ = value; + } + break; + } + case 16: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(2, rawValue); + } else { + bitField0_ |= 0x00000002; + projectForwardEntailment_ = value; + } + break; + } + case 24: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(3, rawValue); + } else { + bitField0_ |= 0x00000004; + projectReverseEntailment_ = value; + } + break; + } + case 32: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(4, rawValue); + } else { + bitField0_ |= 0x00000008; + projectNegation_ = value; + } + break; + } + case 40: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(5, rawValue); + } else { + bitField0_ |= 0x00000010; + projectAlternation_ = value; + } + break; + } + case 48: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(6, rawValue); + } else { + bitField0_ |= 0x00000020; + projectCover_ = value; + } + break; + } + case 56: { + int rawValue = input.readEnum(); + edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(7, rawValue); + } else { + bitField0_ |= 0x00000040; + projectIndependence_ = value; + } + break; + } + } + } + } + + private int bitField0_; + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectEquivalence = 1; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectEquivalence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectEquivalence() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectEquivalence() { + return projectEquivalence_; + } + public Builder setProjectEquivalence(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + projectEquivalence_ = value; + onChanged(); + return this; + } + public Builder clearProjectEquivalence() { + bitField0_ = (bitField0_ & ~0x00000001); + projectEquivalence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectForwardEntailment = 2; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectForwardEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectForwardEntailment() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectForwardEntailment() { + return projectForwardEntailment_; + } + public Builder setProjectForwardEntailment(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000002; + projectForwardEntailment_ = value; + onChanged(); + return this; + } + public Builder clearProjectForwardEntailment() { + bitField0_ = (bitField0_ & ~0x00000002); + projectForwardEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectReverseEntailment = 3; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectReverseEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectReverseEntailment() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectReverseEntailment() { + return projectReverseEntailment_; + } + public Builder setProjectReverseEntailment(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000004; + projectReverseEntailment_ = value; + onChanged(); + return this; + } + public Builder clearProjectReverseEntailment() { + bitField0_ = (bitField0_ & ~0x00000004); + projectReverseEntailment_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectNegation = 4; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectNegation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectNegation() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectNegation() { + return projectNegation_; + } + public Builder setProjectNegation(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000008; + projectNegation_ = value; + onChanged(); + return this; + } + public Builder clearProjectNegation() { + bitField0_ = (bitField0_ & ~0x00000008); + projectNegation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectAlternation = 5; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectAlternation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectAlternation() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectAlternation() { + return projectAlternation_; + } + public Builder setProjectAlternation(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000010; + projectAlternation_ = value; + onChanged(); + return this; + } + public Builder clearProjectAlternation() { + bitField0_ = (bitField0_ & ~0x00000010); + projectAlternation_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectCover = 6; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectCover_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectCover() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectCover() { + return projectCover_; + } + public Builder setProjectCover(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000020; + projectCover_ = value; + onChanged(); + return this; + } + public Builder clearProjectCover() { + bitField0_ = (bitField0_ & ~0x00000020); + projectCover_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // required .edu.stanford.nlp.pipeline.NaturalLogicRelation projectIndependence = 7; + private edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation projectIndependence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + public boolean hasProjectIndependence() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + public edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation getProjectIndependence() { + return projectIndependence_; + } + public Builder setProjectIndependence(edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000040; + projectIndependence_ = value; + onChanged(); + return this; + } + public Builder clearProjectIndependence() { + bitField0_ = (bitField0_ & ~0x00000040); + projectIndependence_ = edu.stanford.nlp.pipeline.CoreNLPProtos.NaturalLogicRelation.EQUIVALENCE; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:edu.stanford.nlp.pipeline.Polarity) + } + + static { + defaultInstance = new Polarity(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:edu.stanford.nlp.pipeline.Polarity) + } + + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Document_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Document_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Sentence_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Sentence_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Token_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Token_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_ParseTree_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_ParseTree_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_DependencyGraph_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_DependencyGraph_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Node_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Node_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Edge_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_DependencyGraph_Edge_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_CorefChain_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_CorefChain_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_CorefChain_CorefMention_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_CorefChain_CorefMention_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Timex_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Timex_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Entity_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Entity_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Relation_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Relation_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Operator_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Operator_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_edu_stanford_nlp_pipeline_Polarity_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_edu_stanford_nlp_pipeline_Polarity_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\rCoreNLP.proto\022\031edu.stanford.nlp.pipeli" + + "ne\"\335\001\n\010Document\022\014\n\004text\030\001 \002(\t\0225\n\010sentenc" + + "e\030\002 \003(\0132#.edu.stanford.nlp.pipeline.Sent" + + "ence\0229\n\ncorefChain\030\003 \003(\0132%.edu.stanford." + + "nlp.pipeline.CorefChain\022\r\n\005docID\030\004 \001(\t\022;" + + "\n\021sentencelessToken\030\005 \003(\0132 .edu.stanford" + ".nlp.pipeline.Token*\005\010d\020\200\002\"\316\006\n\010Sentence\022" + "/\n\005token\030\001 \003(\0132 .edu.stanford.nlp.pipeli" + "ne.Token\022\030\n\020tokenOffsetBegin\030\002 \002(\r\022\026\n\016to" + @@ -14528,7 +16705,7 @@ void setSubtype(com.google.protobuf.ByteString value) { " \001(\010\0221\n\006entity\0304 \003(\0132!.edu.stanford.nlp." + "pipeline.Entity\0225\n\010relation\0305 \003(\0132#.edu." + "stanford.nlp.pipeline.Relation\022$\n\034hasNum" + - "erizedTokensAnnotation\0306 \001(\010*\005\010d\020\200\002\"\221\004\n\005" + + "erizedTokensAnnotation\0306 \001(\010*\005\010d\020\200\002\"\226\005\n\005" + "Token\022\014\n\004word\030\001 \002(\t\022\013\n\003pos\030\002 \001(\t\022\r\n\005valu" + "e\030\003 \001(\t\022\020\n\010category\030\004 \001(\t\022\016\n\006before\030\005 \001(", "\t\022\r\n\005after\030\006 \001(\t\022\024\n\014originalText\030\007 \001(\t\022\013" + @@ -14540,48 +16717,77 @@ void setSubtype(com.google.protobuf.ByteString value) { "x\030\022 \001(\r\0224\n\ntimexValue\030\023 \001(\0132 .edu.stanfo" + "rd.nlp.pipeline.Timex\022\025\n\rhasXmlContext\030\025" + " \001(\010\022\022\n\nxmlContext\030\026 \003(\t\022\026\n\016corefCluster" + - "ID\030\027 \001(\r\022\016\n\006answer\030\030 \001(\t\022\016\n\006gender\0303 \001(\t", - "\022\020\n\010trueCase\0304 \001(\t\022\024\n\014trueCaseText\0305 \001(\t" + - "*\005\010d\020\200\002\"\216\001\n\tParseTree\0223\n\005child\030\001 \003(\0132$.e" + - "du.stanford.nlp.pipeline.ParseTree\022\r\n\005va" + - "lue\030\002 \001(\t\022\027\n\017yieldBeginIndex\030\003 \001(\r\022\025\n\ryi" + - "eldEndIndex\030\004 \001(\r\022\r\n\005score\030\005 \001(\001\"\215\003\n\017Dep" + - "endencyGraph\022=\n\004node\030\001 \003(\0132/.edu.stanfor" + - "d.nlp.pipeline.DependencyGraph.Node\022=\n\004e" + - "dge\030\002 \003(\0132/.edu.stanford.nlp.pipeline.De" + - "pendencyGraph.Edge\022\020\n\004root\030\003 \003(\rB\002\020\001\032D\n\004" + - "Node\022\025\n\rsentenceIndex\030\001 \002(\r\022\r\n\005index\030\002 \002", - "(\r\022\026\n\016copyAnnotation\030\003 \001(\r\032\243\001\n\004Edge\022\016\n\006s" + - "ource\030\001 \002(\r\022\016\n\006target\030\002 \002(\r\022\013\n\003dep\030\003 \001(\t" + - "\022\017\n\007isExtra\030\004 \001(\010\022\022\n\nsourceCopy\030\005 \001(\r\022\022\n" + - "\ntargetCopy\030\006 \001(\r\0225\n\010language\030\007 \001(\0162#.ed" + - "u.stanford.nlp.pipeline.Language\"\306\002\n\nCor" + - "efChain\022\017\n\007chainID\030\001 \002(\005\022C\n\007mention\030\002 \003(" + - "\01322.edu.stanford.nlp.pipeline.CorefChain" + - ".CorefMention\022\026\n\016representative\030\003 \002(\r\032\311\001" + - "\n\014CorefMention\022\021\n\tmentionID\030\001 \001(\005\022\023\n\013men" + - "tionType\030\002 \001(\t\022\016\n\006number\030\003 \001(\t\022\016\n\006gender", - "\030\004 \001(\t\022\017\n\007animacy\030\005 \001(\t\022\022\n\nstartIndex\030\006 " + - "\001(\r\022\020\n\010endIndex\030\007 \001(\r\022\021\n\theadIndex\030\t \001(\r" + - "\022\025\n\rsentenceIndex\030\n \001(\r\022\020\n\010position\030\013 \001(" + - "\r\"w\n\005Timex\022\r\n\005value\030\001 \001(\t\022\020\n\010altValue\030\002 " + - "\001(\t\022\014\n\004text\030\003 \001(\t\022\014\n\004type\030\004 \001(\t\022\013\n\003tid\030\005" + - " \001(\t\022\022\n\nbeginPoint\030\006 \001(\r\022\020\n\010endPoint\030\007 \001" + - "(\r\"\333\001\n\006Entity\022\021\n\theadStart\030\006 \001(\r\022\017\n\007head" + - "End\030\007 \001(\r\022\023\n\013mentionType\030\010 \001(\t\022\026\n\016normal" + - "izedName\030\t \001(\t\022\026\n\016headTokenIndex\030\n \001(\r\022\017" + - "\n\007corefID\030\013 \001(\t\022\020\n\010objectID\030\001 \001(\t\022\023\n\013ext", - "entStart\030\002 \001(\r\022\021\n\textentEnd\030\003 \001(\r\022\014\n\004typ" + - "e\030\004 \001(\t\022\017\n\007subtype\030\005 \001(\t\"\267\001\n\010Relation\022\017\n" + - "\007argName\030\006 \003(\t\022.\n\003arg\030\007 \003(\0132!.edu.stanfo" + - "rd.nlp.pipeline.Entity\022\021\n\tsignature\030\010 \001(" + - "\t\022\020\n\010objectID\030\001 \001(\t\022\023\n\013extentStart\030\002 \001(\r" + - "\022\021\n\textentEnd\030\003 \001(\r\022\014\n\004type\030\004 \001(\t\022\017\n\007sub" + - "type\030\005 \001(\t*n\n\010Language\022\013\n\007Unknown\020\000\022\n\n\006A" + - "rabic\020\001\022\013\n\007Chinese\020\002\022\013\n\007English\020\003\022\n\n\006Ger" + - "man\020\004\022\n\n\006French\020\005\022\n\n\006Hebrew\020\006\022\013\n\007Spanish" + - "\020\007B*\n\031edu.stanford.nlp.pipelineB\rCoreNLP", - "Protos" + "ID\030\027 \001(\r\022\016\n\006answer\030\030 \001(\t\022\025\n\rheadWordInde", + "x\030\032 \001(\r\0225\n\010operator\030\033 \001(\0132#.edu.stanford" + + ".nlp.pipeline.Operator\0225\n\010polarity\030\034 \001(\013" + + "2#.edu.stanford.nlp.pipeline.Polarity\022\016\n" + + "\006gender\0303 \001(\t\022\020\n\010trueCase\0304 \001(\t\022\024\n\014trueC" + + "aseText\0305 \001(\t*\005\010d\020\200\002\"\307\001\n\tParseTree\0223\n\005ch" + + "ild\030\001 \003(\0132$.edu.stanford.nlp.pipeline.Pa" + + "rseTree\022\r\n\005value\030\002 \001(\t\022\027\n\017yieldBeginInde" + + "x\030\003 \001(\r\022\025\n\ryieldEndIndex\030\004 \001(\r\022\r\n\005score\030" + + "\005 \001(\001\0227\n\tsentiment\030\006 \001(\0162$.edu.stanford." + + "nlp.pipeline.Sentiment\"\215\003\n\017DependencyGra", + "ph\022=\n\004node\030\001 \003(\0132/.edu.stanford.nlp.pipe" + + "line.DependencyGraph.Node\022=\n\004edge\030\002 \003(\0132" + + "/.edu.stanford.nlp.pipeline.DependencyGr" + + "aph.Edge\022\020\n\004root\030\003 \003(\rB\002\020\001\032D\n\004Node\022\025\n\rse" + + "ntenceIndex\030\001 \002(\r\022\r\n\005index\030\002 \002(\r\022\026\n\016copy" + + "Annotation\030\003 \001(\r\032\243\001\n\004Edge\022\016\n\006source\030\001 \002(" + + "\r\022\016\n\006target\030\002 \002(\r\022\013\n\003dep\030\003 \001(\t\022\017\n\007isExtr" + + "a\030\004 \001(\010\022\022\n\nsourceCopy\030\005 \001(\r\022\022\n\ntargetCop" + + "y\030\006 \001(\r\0225\n\010language\030\007 \001(\0162#.edu.stanford" + + ".nlp.pipeline.Language\"\306\002\n\nCorefChain\022\017\n", + "\007chainID\030\001 \002(\005\022C\n\007mention\030\002 \003(\01322.edu.st" + + "anford.nlp.pipeline.CorefChain.CorefMent" + + "ion\022\026\n\016representative\030\003 \002(\r\032\311\001\n\014CorefMen" + + "tion\022\021\n\tmentionID\030\001 \001(\005\022\023\n\013mentionType\030\002" + + " \001(\t\022\016\n\006number\030\003 \001(\t\022\016\n\006gender\030\004 \001(\t\022\017\n\007" + + "animacy\030\005 \001(\t\022\022\n\nstartIndex\030\006 \001(\r\022\020\n\010end" + + "Index\030\007 \001(\r\022\021\n\theadIndex\030\t \001(\r\022\025\n\rsenten" + + "ceIndex\030\n \001(\r\022\020\n\010position\030\013 \001(\r\"w\n\005Timex" + + "\022\r\n\005value\030\001 \001(\t\022\020\n\010altValue\030\002 \001(\t\022\014\n\004tex" + + "t\030\003 \001(\t\022\014\n\004type\030\004 \001(\t\022\013\n\003tid\030\005 \001(\t\022\022\n\nbe", + "ginPoint\030\006 \001(\r\022\020\n\010endPoint\030\007 \001(\r\"\333\001\n\006Ent" + + "ity\022\021\n\theadStart\030\006 \001(\r\022\017\n\007headEnd\030\007 \001(\r\022" + + "\023\n\013mentionType\030\010 \001(\t\022\026\n\016normalizedName\030\t" + + " \001(\t\022\026\n\016headTokenIndex\030\n \001(\r\022\017\n\007corefID\030" + + "\013 \001(\t\022\020\n\010objectID\030\001 \001(\t\022\023\n\013extentStart\030\002" + + " \001(\r\022\021\n\textentEnd\030\003 \001(\r\022\014\n\004type\030\004 \001(\t\022\017\n" + + "\007subtype\030\005 \001(\t\"\267\001\n\010Relation\022\017\n\007argName\030\006" + + " \003(\t\022.\n\003arg\030\007 \003(\0132!.edu.stanford.nlp.pip" + + "eline.Entity\022\021\n\tsignature\030\010 \001(\t\022\020\n\010objec" + + "tID\030\001 \001(\t\022\023\n\013extentStart\030\002 \001(\r\022\021\n\textent", + "End\030\003 \001(\r\022\014\n\004type\030\004 \001(\t\022\017\n\007subtype\030\005 \001(\t" + + "\"\262\001\n\010Operator\022\014\n\004name\030\001 \002(\t\022\033\n\023quantifie" + + "rSpanBegin\030\002 \002(\005\022\031\n\021quantifierSpanEnd\030\003 " + + "\002(\005\022\030\n\020subjectSpanBegin\030\004 \002(\005\022\026\n\016subject" + + "SpanEnd\030\005 \002(\005\022\027\n\017objectSpanBegin\030\006 \002(\005\022\025" + + "\n\robjectSpanEnd\030\007 \002(\005\"\251\004\n\010Polarity\022K\n\022pr" + + "ojectEquivalence\030\001 \002(\0162/.edu.stanford.nl" + + "p.pipeline.NaturalLogicRelation\022Q\n\030proje" + + "ctForwardEntailment\030\002 \002(\0162/.edu.stanford" + + ".nlp.pipeline.NaturalLogicRelation\022Q\n\030pr", + "ojectReverseEntailment\030\003 \002(\0162/.edu.stanf" + + "ord.nlp.pipeline.NaturalLogicRelation\022H\n" + + "\017projectNegation\030\004 \002(\0162/.edu.stanford.nl" + + "p.pipeline.NaturalLogicRelation\022K\n\022proje" + + "ctAlternation\030\005 \002(\0162/.edu.stanford.nlp.p" + + "ipeline.NaturalLogicRelation\022E\n\014projectC" + + "over\030\006 \002(\0162/.edu.stanford.nlp.pipeline.N" + + "aturalLogicRelation\022L\n\023projectIndependen" + + "ce\030\007 \002(\0162/.edu.stanford.nlp.pipeline.Nat" + + "uralLogicRelation*n\n\010Language\022\013\n\007Unknown", + "\020\000\022\n\n\006Arabic\020\001\022\013\n\007Chinese\020\002\022\013\n\007English\020\003" + + "\022\n\n\006German\020\004\022\n\n\006French\020\005\022\n\n\006Hebrew\020\006\022\013\n\007" + + "Spanish\020\007*h\n\tSentiment\022\023\n\017STRONG_NEGATIV" + + "E\020\000\022\021\n\rWEAK_NEGATIVE\020\001\022\013\n\007NEUTRAL\020\002\022\021\n\rW" + + "EAK_POSITIVE\020\003\022\023\n\017STRONG_POSITIVE\020\004*\223\001\n\024" + + "NaturalLogicRelation\022\017\n\013EQUIVALENCE\020\000\022\026\n" + + "\022FORWARD_ENTAILMENT\020\001\022\026\n\022REVERSE_ENTAILM" + + "ENT\020\002\022\014\n\010NEGATION\020\003\022\017\n\013ALTERNATION\020\004\022\t\n\005" + + "COVER\020\005\022\020\n\014INDEPENDENCE\020\006B*\n\031edu.stanfor" + + "d.nlp.pipelineB\rCoreNLPProtos" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -14609,7 +16815,7 @@ public com.google.protobuf.ExtensionRegistry assignDescriptors( internal_static_edu_stanford_nlp_pipeline_Token_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_edu_stanford_nlp_pipeline_Token_descriptor, - new java.lang.String[] { "Word", "Pos", "Value", "Category", "Before", "After", "OriginalText", "Ner", "NormalizedNER", "Lemma", "BeginChar", "EndChar", "Utterance", "Speaker", "BeginIndex", "EndIndex", "TokenBeginIndex", "TokenEndIndex", "TimexValue", "HasXmlContext", "XmlContext", "CorefClusterID", "Answer", "Gender", "TrueCase", "TrueCaseText", }, + new java.lang.String[] { "Word", "Pos", "Value", "Category", "Before", "After", "OriginalText", "Ner", "NormalizedNER", "Lemma", "BeginChar", "EndChar", "Utterance", "Speaker", "BeginIndex", "EndIndex", "TokenBeginIndex", "TokenEndIndex", "TimexValue", "HasXmlContext", "XmlContext", "CorefClusterID", "Answer", "HeadWordIndex", "Operator", "Polarity", "Gender", "TrueCase", "TrueCaseText", }, edu.stanford.nlp.pipeline.CoreNLPProtos.Token.class, edu.stanford.nlp.pipeline.CoreNLPProtos.Token.Builder.class); internal_static_edu_stanford_nlp_pipeline_ParseTree_descriptor = @@ -14617,7 +16823,7 @@ public com.google.protobuf.ExtensionRegistry assignDescriptors( internal_static_edu_stanford_nlp_pipeline_ParseTree_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_edu_stanford_nlp_pipeline_ParseTree_descriptor, - new java.lang.String[] { "Child", "Value", "YieldBeginIndex", "YieldEndIndex", "Score", }, + new java.lang.String[] { "Child", "Value", "YieldBeginIndex", "YieldEndIndex", "Score", "Sentiment", }, edu.stanford.nlp.pipeline.CoreNLPProtos.ParseTree.class, edu.stanford.nlp.pipeline.CoreNLPProtos.ParseTree.Builder.class); internal_static_edu_stanford_nlp_pipeline_DependencyGraph_descriptor = @@ -14684,6 +16890,22 @@ public com.google.protobuf.ExtensionRegistry assignDescriptors( new java.lang.String[] { "ArgName", "Arg", "Signature", "ObjectID", "ExtentStart", "ExtentEnd", "Type", "Subtype", }, edu.stanford.nlp.pipeline.CoreNLPProtos.Relation.class, edu.stanford.nlp.pipeline.CoreNLPProtos.Relation.Builder.class); + internal_static_edu_stanford_nlp_pipeline_Operator_descriptor = + getDescriptor().getMessageTypes().get(9); + internal_static_edu_stanford_nlp_pipeline_Operator_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_edu_stanford_nlp_pipeline_Operator_descriptor, + new java.lang.String[] { "Name", "QuantifierSpanBegin", "QuantifierSpanEnd", "SubjectSpanBegin", "SubjectSpanEnd", "ObjectSpanBegin", "ObjectSpanEnd", }, + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.class, + edu.stanford.nlp.pipeline.CoreNLPProtos.Operator.Builder.class); + internal_static_edu_stanford_nlp_pipeline_Polarity_descriptor = + getDescriptor().getMessageTypes().get(10); + internal_static_edu_stanford_nlp_pipeline_Polarity_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_edu_stanford_nlp_pipeline_Polarity_descriptor, + new java.lang.String[] { "ProjectEquivalence", "ProjectForwardEntailment", "ProjectReverseEntailment", "ProjectNegation", "ProjectAlternation", "ProjectCover", "ProjectIndependence", }, + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.class, + edu.stanford.nlp.pipeline.CoreNLPProtos.Polarity.Builder.class); return null; } }; diff --git a/src/edu/stanford/nlp/pipeline/ParserAnnotator.java b/src/edu/stanford/nlp/pipeline/ParserAnnotator.java index f94e1d9748..357b192cec 100644 --- a/src/edu/stanford/nlp/pipeline/ParserAnnotator.java +++ b/src/edu/stanford/nlp/pipeline/ParserAnnotator.java @@ -63,9 +63,6 @@ public class ParserAnnotator extends SentenceAnnotator { private final boolean saveBinaryTrees; - /** If true, don't re-annotate sentences that already have a tree annotation */ - private final boolean noSquash; - public ParserAnnotator(boolean verbose, int maxSent) { this(System.getProperty("parse.model", LexicalizedParser.DEFAULT_PARSER_LOC), verbose, maxSent, StringUtils.EMPTY_STRING_ARRAY); } @@ -96,7 +93,6 @@ public ParserAnnotator(ParserGrammar parser, boolean verbose, int maxSent, Funct } this.nThreads = 1; this.saveBinaryTrees = false; - this.noSquash = false; } @@ -141,7 +137,6 @@ public ParserAnnotator(String annotatorName, Properties props) { this.nThreads = PropertiesUtils.getInt(props, annotatorName + ".nthreads", PropertiesUtils.getInt(props, "nthreads", 1)); boolean usesBinary = StanfordCoreNLP.usesBinaryTrees(props); this.saveBinaryTrees = PropertiesUtils.getBool(props, annotatorName + ".binaryTrees", usesBinary); - this.noSquash = PropertiesUtils.getBool(props, annotatorName + ".nosquash", false); } public static String signature(String annotatorName, Properties props) { @@ -208,13 +203,6 @@ protected long maxTime() { @Override protected void doOneSentence(Annotation annotation, CoreMap sentence) { - // If "noSquash" is set, don't re-annotate sentences which already have a tree annotation - if (noSquash && - sentence.get(TreeCoreAnnotations.TreeAnnotation.class) != null && - !"X".equalsIgnoreCase(sentence.get(TreeCoreAnnotations.TreeAnnotation.class).label().value())) { - return; - } - final List words = sentence.get(CoreAnnotations.TokensAnnotation.class); if (VERBOSE) { System.err.println("Parsing: " + words); diff --git a/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java b/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java index ac875ce348..d5acc8b419 100644 --- a/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java +++ b/src/edu/stanford/nlp/pipeline/ProtobufAnnotationSerializer.java @@ -12,6 +12,8 @@ import edu.stanford.nlp.ling.CoreAnnotation; import edu.stanford.nlp.ling.CoreLabel; import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.naturalli.*; +import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; import edu.stanford.nlp.semgraph.SemanticGraph; import edu.stanford.nlp.semgraph.SemanticGraphEdge; import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; @@ -34,7 +36,7 @@ * A serializer using Google's protocol buffer format. * The files produced by this serializer, in addition to being language-independent, * are a little over 10% the size and 4x faster to read+write versus the default Java serialization - * (see GenericAnnotationSerializer), when both files are compressed with gzip. + * (see {@link GenericAnnotationSerializer}), when both files are compressed with gzip. *

    * *

    @@ -116,6 +118,7 @@ *

  • * * + * * @author Gabor Angeli */ public class ProtobufAnnotationSerializer extends AnnotationSerializer { @@ -286,6 +289,8 @@ protected CoreNLPProtos.Token.Builder toProtoBuilder(CoreLabel coreLabel, Set files, int numThrea String defaultExtension; switch (outputFormat) { case XML: defaultExtension = ".xml"; break; + case JSON: defaultExtension = ".json"; break; + case CONLL: defaultExtension = ".conll"; break; case TEXT: defaultExtension = ".out"; break; case SERIALIZED: defaultExtension = ".ser.gz"; break; default: throw new IllegalArgumentException("Unknown output format " + outputFormat); } final String serializerClass = properties.getProperty("serializer"); final String inputSerializerClass = properties.getProperty("inputSerializer", serializerClass); - final String inputSerializerName = (serializerClass == inputSerializerClass)? "serializer":"inputSerializer"; + final String inputSerializerName = (serializerClass.equals(inputSerializerClass))? "serializer":"inputSerializer"; final String outputSerializerClass = properties.getProperty("outputSerializer", serializerClass); - final String outputSerializerName = (serializerClass == outputSerializerClass)? "serializer":"outputSerializer"; + final String outputSerializerName = (serializerClass.equals(outputSerializerClass))? "serializer":"outputSerializer"; final String extension = properties.getProperty("outputExtension", defaultExtension); final boolean replaceExtension = Boolean.parseBoolean(properties.getProperty("replaceExtension", "false")); diff --git a/src/edu/stanford/nlp/pipeline/TokenizerAnnotator.java b/src/edu/stanford/nlp/pipeline/TokenizerAnnotator.java index 3986c59a21..5c5c396187 100644 --- a/src/edu/stanford/nlp/pipeline/TokenizerAnnotator.java +++ b/src/edu/stanford/nlp/pipeline/TokenizerAnnotator.java @@ -17,9 +17,8 @@ import edu.stanford.nlp.process.WhitespaceTokenizer; import edu.stanford.nlp.international.spanish.process.SpanishTokenizer; import edu.stanford.nlp.international.french.process.FrenchTokenizer; - import edu.stanford.nlp.util.Generics; -import edu.stanford.nlp.util.PropertiesUtils; + /** * This class will PTB tokenize the input. It assumes that the original @@ -46,7 +45,7 @@ public enum TokenizerType { German ("de", null, "invertible,ptb3Escaping=true"), French ("fr", "FrenchTokenizer", ""), Whitespace (null, "WhitespaceTokenizer", ""); - + private final String abbreviation; private final String className; private final String defaultOptions; @@ -93,7 +92,7 @@ public static TokenizerType getTokenizerType(Properties props) { String tokClass = props.getProperty("tokenize.class", null); boolean whitespace = Boolean.valueOf(props.getProperty("tokenize.whitespace", "false")); String language = props.getProperty("tokenize.language", null); - + if(whitespace) { return Whitespace; } @@ -105,7 +104,7 @@ public static TokenizerType getTokenizerType(Properties props) { } return type; } - + if (language != null) { TokenizerType type = nameToTokenizerMap.get(language.toUpperCase()); if (type == null) { @@ -117,63 +116,63 @@ public static TokenizerType getTokenizerType(Properties props) { return Unspecified; } } // end enum TokenizerType - + public static final String EOL_PROPERTY = "tokenize.keepeol"; - + private final boolean VERBOSE; private final TokenizerFactory factory; - + // CONSTRUCTORS - + public TokenizerAnnotator() { this(true); } - + public TokenizerAnnotator(boolean verbose) { this(verbose, TokenizerType.English); - } - + } + public TokenizerAnnotator(String lang) { this(true, lang, null); } - + public TokenizerAnnotator(boolean verbose, TokenizerType lang) { this(verbose, lang.toString()); - } - + } + public TokenizerAnnotator(boolean verbose, String lang) { this(verbose, lang, null); } - + public TokenizerAnnotator(boolean verbose, String lang, String options) { VERBOSE = verbose; Properties props = new Properties(); if (lang != null) { props.setProperty("tokenize.language", lang); } - + TokenizerType type = TokenizerType.getTokenizerType(props); factory = initFactory(type, props, options); } - + public TokenizerAnnotator(boolean verbose, Properties props) { this(verbose, props, null); } - + public TokenizerAnnotator(boolean verbose, Properties props, String options) { VERBOSE = verbose; if (props == null) { props = new Properties(); } - + TokenizerType type = TokenizerType.getTokenizerType(props); factory = initFactory(type, props, options); } - - /** + + /** * initFactory returns the right type of TokenizerFactory based on the options in the properties file * and the type. When adding a new Tokenizer, modify TokenizerType.getTokenizerType() to retrieve - * your tokenizer from the properties file, and then add a class is the switch structure here to + * your tokenizer from the properties file, and then add a class is the switch structure here to * instanstiate the new Tokenizer type. * * @param type the TokenizerType @@ -183,15 +182,15 @@ public TokenizerAnnotator(boolean verbose, Properties props, String options) { private TokenizerFactory initFactory(TokenizerType type, Properties props, String extraOptions) throws IllegalArgumentException{ TokenizerFactory factory; String options = props.getProperty("tokenize.options", null); - + // set it to the equivalent of both extraOptions and options - // TODO: maybe we should always have getDefaultOptions() and - // expect the user to turn off default options. That would + // TODO: maybe we should always have getDefaultOptions() and + // expect the user to turn off default options. That would // require all options to have negated options, but // currently there are some which don't have that if (options == null) { options = type.getDefaultOptions(); - } + } if (extraOptions != null) { if (extraOptions.endsWith(",")) { options = extraOptions + options; @@ -199,37 +198,37 @@ private TokenizerFactory initFactory(TokenizerType type, Properties p options = extraOptions + "," + options; } } - + switch(type) { case Spanish: factory = SpanishTokenizer.factory(new CoreLabelTokenFactory(), options); break; - + case French: factory = FrenchTokenizer.factory(new CoreLabelTokenFactory(), options); break; - + case Whitespace: boolean eolIsSignificant = Boolean.valueOf(props.getProperty(EOL_PROPERTY, "false")); eolIsSignificant = eolIsSignificant || Boolean.valueOf(props.getProperty(StanfordCoreNLP.NEWLINE_SPLITTER_PROPERTY, "false")); factory = new WhitespaceTokenizer.WhitespaceTokenizerFactory (new CoreLabelTokenFactory(), eolIsSignificant); break; - - case English: + + case English: case German: factory = PTBTokenizer.factory(new CoreLabelTokenFactory(), options); break; - + case Unspecified: System.err.println("TokenizerAnnotator: No tokenizer type provided. Defaulting to PTBTokenizer."); factory = PTBTokenizer.factory(new CoreLabelTokenFactory(), options); break; - + default: throw new IllegalArgumentException("No valid tokenizer type provided.\n" + "Use -tokenize.language, -tokenize.class, or -tokenize.whitespace \n" + "to specify a tokenizer."); - + } return factory; } @@ -240,8 +239,8 @@ private TokenizerFactory initFactory(TokenizerType type, Properties p public Tokenizer getTokenizer(Reader r) { return factory.getTokenizer(r); } - - /** + + /** * Does the actual work of splitting TextAnnotation into CoreLabels, * which are then attached to the TokensAnnotation. */ @@ -250,18 +249,18 @@ public void annotate(Annotation annotation) { if (VERBOSE) { System.err.print("Tokenizing ... "); } - + if (annotation.has(CoreAnnotations.TextAnnotation.class)) { String text = annotation.get(CoreAnnotations.TextAnnotation.class); - Reader r = new StringReader(text); - // don't wrap in BufferedReader. It gives you nothing for in memory String unless you need the readLine() method ! - + Reader r = new StringReader(text); + // don't wrap in BufferedReader. It gives you nothing for in-memory String unless you need the readLine() method! + List tokens = getTokenizer(r).tokenize(); // cdm 2010-05-15: This is now unnecessary, as it is done in CoreLabelTokenFactory // for (CoreLabel token: tokens) { // token.set(CoreAnnotations.TextAnnotation.class, token.get(CoreAnnotations.TextAnnotation.class)); // } - + annotation.set(CoreAnnotations.TokensAnnotation.class, tokens); if (VERBOSE) { System.err.println("done."); @@ -271,14 +270,14 @@ public void annotate(Annotation annotation) { throw new RuntimeException("Tokenizer unable to find text in annotation: " + annotation); } } - + @Override public Set requires() { return Collections.emptySet(); } - + @Override public Set requirementsSatisfied() { return Collections.singleton(TOKENIZE_REQUIREMENT); - } + } } diff --git a/src/edu/stanford/nlp/pipeline/WordsToSentencesAnnotator.java b/src/edu/stanford/nlp/pipeline/WordsToSentencesAnnotator.java index 5701175e9e..1af87ed99f 100644 --- a/src/edu/stanford/nlp/pipeline/WordsToSentencesAnnotator.java +++ b/src/edu/stanford/nlp/pipeline/WordsToSentencesAnnotator.java @@ -14,10 +14,10 @@ /** - * This class assumes that there is a {@code List} + * This class assumes that there is a {@code List} * under the {@code TokensAnnotation} field, and runs it * through {@link edu.stanford.nlp.process.WordToSentenceProcessor} - * and puts the new {@code List>} + * and puts the new {@code List} * under the {@code SentencesAnnotation} field. * * @author Jenny Finkel @@ -158,7 +158,7 @@ public void annotate(Annotation annotation) { sentence.set(CoreAnnotations.LineNumberAnnotation.class, lineNumber); } - // Annotation sentence with section information + // Annotate sentence with section information. // Assume section start and end appear as first and last tokens of sentence CoreLabel sentenceStartToken = sentenceTokens.get(0); CoreLabel sentenceEndToken = sentenceTokens.get(sentenceTokens.size()-1); @@ -176,7 +176,7 @@ public void annotate(Annotation annotation) { if (sectionEnd != null) { sectionAnnotations = null; } - + if (docID != null) { sentence.set(CoreAnnotations.DocIDAnnotation.class, docID); } diff --git a/src/edu/stanford/nlp/pipeline/demo/StanfordCoreNlpDemo.java b/src/edu/stanford/nlp/pipeline/demo/StanfordCoreNlpDemo.java index f7752dc093..977857a9c9 100644 --- a/src/edu/stanford/nlp/pipeline/demo/StanfordCoreNlpDemo.java +++ b/src/edu/stanford/nlp/pipeline/demo/StanfordCoreNlpDemo.java @@ -3,6 +3,8 @@ import java.io.*; import java.util.*; +import edu.stanford.nlp.dcoref.CorefChain; +import edu.stanford.nlp.dcoref.CorefCoreAnnotations; import edu.stanford.nlp.io.*; import edu.stanford.nlp.ling.*; import edu.stanford.nlp.pipeline.*; @@ -11,9 +13,12 @@ import edu.stanford.nlp.trees.*; import edu.stanford.nlp.util.*; +/** This class demonstrates building and using a Stanford CoreNLP pipeline. */ public class StanfordCoreNlpDemo { + /** Usage: java -cp "*" StanfordCoreNlpDemo [inputFile [outputTextFile [outputXmlFile]]] */ public static void main(String[] args) throws IOException { + // set up optional output files PrintWriter out; if (args.length > 1) { out = new PrintWriter(args[1]); @@ -25,7 +30,16 @@ public static void main(String[] args) throws IOException { xmlOut = new PrintWriter(args[2]); } + // Create a CoreNLP pipeline. This line just builds the default pipeline. + // In comments we show how you can build a particular pipeline + // Properties props = new Properties(); + // props.put("annotators", "tokenize, ssplit, pos, lemma, ner, depparse"); + // props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz"); + // props.put("ner.applyNumericClassifiers", "false"); + // StanfordCoreNLP pipeline = new StanfordCoreNLP(props); StanfordCoreNLP pipeline = new StanfordCoreNLP(); + + // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor. Annotation annotation; if (args.length > 0) { annotation = new Annotation(IOUtils.slurpFileNoExceptions(args[0])); @@ -33,39 +47,67 @@ public static void main(String[] args) throws IOException { annotation = new Annotation("Kosgi Santosh sent an email to Stanford University. He didn't get a reply."); } + // run all the selected Annotators on this text pipeline.annotate(annotation); + + // print the results to file(s) pipeline.prettyPrint(annotation, out); if (xmlOut != null) { pipeline.xmlPrint(annotation, xmlOut); } - // An Annotation is a Map and you can get and use the various analyses individually. - // For instance, this gets the parse tree of the first sentence in the text. - out.println(); + // Access the Annotation in code // The toString() method on an Annotation just prints the text of the Annotation // But you can see what is in it with other methods like toShorterString() + out.println(); out.println("The top level annotation"); out.println(annotation.toShorterString()); + + // An Annotation is a Map and you can get and use the various analyses individually. + // For instance, this gets the parse tree of the first sentence in the text. List sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); - if (sentences != null && sentences.size() > 0) { - ArrayCoreMap sentence = (ArrayCoreMap) sentences.get(0); + if (sentences != null && ! sentences.isEmpty()) { + CoreMap sentence = sentences.get(0); + out.println(); out.println("The first sentence is:"); out.println(sentence.toShorterString()); - Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); out.println(); out.println("The first sentence tokens are:"); for (CoreMap token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { - ArrayCoreMap aToken = (ArrayCoreMap) token; - out.println(aToken.toShorterString()); + out.println(token.toShorterString()); } + Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); + out.println(); out.println("The first sentence parse tree is:"); tree.pennPrint(out); - out.println("The first sentence basic dependencies are:"); - System.out.println(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST)); + out.println(); + out.println("The first sentence basic dependencies are:"); + out.println(sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).toString(SemanticGraph.OutputFormat.LIST)); out.println("The first sentence collapsed, CC-processed dependencies are:"); SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); - System.out.println(graph.toString(SemanticGraph.OutputFormat.LIST)); + out.println(graph.toString(SemanticGraph.OutputFormat.LIST)); + + // Access coreference. In the coreference link graph, + // each chain stores a set of mentions that co-refer with each other, + // along with a method for getting the most representative mention. + // Both sentence and token offsets start at 1! + out.println("Coreference information"); + Map corefChains = + annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class); + if (corefChains == null) { return; } + for (Map.Entry entry: corefChains.entrySet()) { + out.println("Chain " + entry.getKey() + " "); + for (CorefChain.CorefMention m : entry.getValue().getMentionsInTextualOrder()) { + // We need to subtract one since the indices count from 1 but the Lists start from 0 + List tokens = sentences.get(m.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class); + // We subtract two for end: one for 0-based indexing, and one because we want last token of mention not one following. + out.println(" " + m + ", i.e., 0-based character offsets [" + tokens.get(m.startIndex - 1).beginPosition() + + ", " + tokens.get(m.endIndex - 2).endPosition() + ")"); + } + } } + IOUtils.closeIgnoringExceptions(out); + IOUtils.closeIgnoringExceptions(xmlOut); } } diff --git a/src/edu/stanford/nlp/process/DocumentPreprocessor.java b/src/edu/stanford/nlp/process/DocumentPreprocessor.java index f78c24e821..9d3be79244 100644 --- a/src/edu/stanford/nlp/process/DocumentPreprocessor.java +++ b/src/edu/stanford/nlp/process/DocumentPreprocessor.java @@ -1,7 +1,20 @@ package edu.stanford.nlp.process; -import java.io.*; -import java.util.*; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.io.Reader; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Properties; +import java.util.Set; import java.util.regex.Pattern; import edu.stanford.nlp.io.IOUtils; @@ -12,8 +25,12 @@ import edu.stanford.nlp.ling.HasWord; import edu.stanford.nlp.ling.Label; import edu.stanford.nlp.objectbank.XMLBeginEndIterator; + import java.util.function.Function; + import edu.stanford.nlp.util.Generics; +import edu.stanford.nlp.util.PropertiesUtils; +import edu.stanford.nlp.util.StringUtils; /** * Produces a list of sentences from either a plain text or XML document. @@ -210,7 +227,7 @@ private class PlainTextIterator implements Iterator> { private final Set delimFollowers; private Function splitTag; private List nextSent = null; - private final List nextSentCarryover = new ArrayList(); + private final List nextSentCarryover = Generics.newArrayList(); public PlainTextIterator() { // Establish how to find sentence boundaries @@ -263,7 +280,7 @@ public String[] apply(String in) { } private void primeNext() { - nextSent = new ArrayList(nextSentCarryover); + nextSent = Generics.newArrayList(nextSentCarryover); nextSentCarryover.clear(); boolean seenBoundary = false; @@ -407,89 +424,103 @@ public List next() { } // end class XMLIterator + private static String usage() { + StringBuilder sb = new StringBuilder(); + String nl = System.getProperty("line.separator"); + sb.append(String.format("Usage: java %s [OPTIONS] [file] [< file]%n%n", DocumentPreprocessor.class.getName())); + sb.append("Options:").append(nl); + sb.append("-xml delim : XML input with associated delimiter.").append(nl); + sb.append("-encoding type : Input encoding (default: UTF-8).").append(nl); + sb.append("-printSentenceLengths : ").append(nl); + sb.append("-noTokenization : Split on newline delimiters only.").append(nl); + sb.append("-suppressEscaping : Suppress PTB escaping.").append(nl); + sb.append("-tokenizerOptions opts : Specify custom tokenizer options.").append(nl); + sb.append("-tag delim : Input tokens are tagged. Split tags.").append(nl); + sb.append("-whitespaceTokenization : Whitespace tokenization only.").append(nl); + return sb.toString(); + } + + private static Map argOptionDefs() { + Map argOptionDefs = Generics.newHashMap(); + argOptionDefs.put("help", 0); + argOptionDefs.put("xml", 1); + argOptionDefs.put("encoding", 1); + argOptionDefs.put("printSentenceLengths", 0); + argOptionDefs.put("noTokenization", 0); + argOptionDefs.put("suppressEscaping", 0); + argOptionDefs.put("tag", 1); + argOptionDefs.put("tokenizerOptions", 1); + argOptionDefs.put("whitespaceTokenization", 0); + return argOptionDefs; + } + /** - * This provides a simple test method for DocumentPreprocessor.
    - * Usage: - * java - * DocumentPreprocessor filename [-xml tag] [-suppressEscaping] [-noTokenization] - *

    - * A filename is required. The code doesn't run as a filter currently. - *

    - * tag is the element name of the XML from which to extract text. It can - * be a regular expression which is called on the element with the - * matches() method, such as 'TITLE|P'. - * + * A simple, deterministic sentence-splitter. This method only supports the English + * tokenizer, so for other languages you should run the tokenizer first and then + * run this sentence splitter with the "-whitespaceTokenization" option. + * * @param args Command-line arguments */ public static void main(String[] args) throws IOException { - if (args.length < 1) { - System.err.println("usage: DocumentPreprocessor OPT* filename"); - System.err.println(" OPT = -xml TAG|-encoding ENC|-tokenizerOptions opts|-tag delim|..."); - return; + final Properties options = StringUtils.argsToProperties(args, argOptionDefs()); + if (options.containsKey("help")) { + System.err.println(usage()); + System.exit(-1); } - String encoding = "utf-8"; - boolean printSentenceLengths = false; - DocType docType = DocType.Plain; - String xmlElementDelimiter = null; - TokenizerFactory tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); - String sentenceDelimiter = null; - String tagDelimiter = null; - boolean printOriginalText = false; + // Command-line flags + String encoding = options.getProperty("encoding", "utf-8"); + boolean printSentenceLengths = PropertiesUtils.getBool(options,"printSentenceLengths", false); + String xmlElementDelimiter = options.getProperty("xml", null); + DocType docType = xmlElementDelimiter == null ? DocType.Plain : DocType.XML; + String sentenceDelimiter = options.containsKey("-noTokenization") ? System.getProperty("line.separator") : null; + String tagDelimiter = options.getProperty("tag", null); String[] sentenceDelims = null; - - int i = 0; - for ( ; i < args.length; i++) { - if (args[i].isEmpty() || ! args[i].startsWith("-")) { - break; - } - if (args[i].equals("-xml")) { - docType = DocType.XML; - i++; - xmlElementDelimiter = args[i]; - - } else if (args[i].equals("-encoding") && i+1 < args.length) { - i++; - encoding = args[i]; - - } else if (args[i].equals("-printSentenceLengths")) { - printSentenceLengths = true; - - } else if (args[i].equals("-suppressEscaping")) { - tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "ptb3Escaping=false"); - - } else if (args[i].equals("-tokenizerOptions") && i+1 < args.length) { - i++; - tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), args[i]); - - } else if (args[i].equals("-noTokenization")) { - tf = null; - sentenceDelimiter = System.getProperty("line.separator"); - - } else if (args[i].equals("-whitespaceTokenization")) { - tf = null; - List whitespaceDelims = - new ArrayList(Arrays.asList(DocumentPreprocessor.DEFAULT_SENTENCE_DELIMS)); - whitespaceDelims.add(WhitespaceLexer.NEWLINE); - sentenceDelims = whitespaceDelims.toArray(new String[whitespaceDelims.size()]); - - } else if (args[i].equals("-tag")) { - i++; - tagDelimiter = args[i]; - - } else if (args[i].equals("-printOriginalText")) { - printOriginalText = true; - tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "invertible=true"); - - } else { - System.err.println("Unknown option: " + args[i]); - } + + // Setup the TokenizerFactory + int numFactoryFlags = 0; + boolean suppressEscaping = options.containsKey("suppressEscaping"); + if (suppressEscaping) numFactoryFlags += 1; + boolean customTokenizer = options.containsKey("tokenizerOptions"); + if (customTokenizer) numFactoryFlags += 1; + boolean printOriginalText = options.containsKey("printOriginalText"); + if (printOriginalText) numFactoryFlags += 1; + boolean whitespaceTokenization = options.containsKey("whitespaceTokenization"); + if (whitespaceTokenization) numFactoryFlags += 1; + if (numFactoryFlags > 1) { + System.err.println("Only one tokenizer flag allowed at a time: "); + System.err.println(" -suppressEscaping, -tokenizerOptions, -printOriginalText, -whitespaceTokenization"); + System.exit(-1); + } + + TokenizerFactory tf = null; + if (suppressEscaping) { + tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "ptb3Escaping=false"); + } else if (customTokenizer) { + tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), options.getProperty("tokenizerOptions")); + } else if (printOriginalText) { + tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), "invertible=true"); + } else if (whitespaceTokenization) { + List whitespaceDelims = + new ArrayList(Arrays.asList(DocumentPreprocessor.DEFAULT_SENTENCE_DELIMS)); + whitespaceDelims.add(WhitespaceLexer.NEWLINE); + sentenceDelims = whitespaceDelims.toArray(new String[whitespaceDelims.size()]); + } else { + tf = PTBTokenizer.factory(new CoreLabelTokenFactory(), ""); } + + String fileList = options.getProperty("", null); + String[] files = fileList == null ? new String[1] : fileList.split("\\s+"); int numSents = 0; PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, encoding), true); - for ( ; i < args.length; i++) { - DocumentPreprocessor docPreprocessor = new DocumentPreprocessor(args[i], docType, encoding); + for (String file : files) { + DocumentPreprocessor docPreprocessor; + if (file == null || file.length() == 0) { + docPreprocessor = new DocumentPreprocessor(new InputStreamReader(System.in, encoding)); + } else { + docPreprocessor = new DocumentPreprocessor(file, docType, encoding); + } if (docType == DocType.XML) { docPreprocessor.setElementDelimiter(xmlElementDelimiter); } @@ -498,7 +529,7 @@ public static void main(String[] args) throws IOException { docPreprocessor.setSentenceDelimiter(sentenceDelimiter); } if (tagDelimiter != null) { - docPreprocessor.setTagDelimiter(args[++i]); + docPreprocessor.setTagDelimiter(tagDelimiter); } if (sentenceDelims != null) { docPreprocessor.setSentenceFinalPuncWords(sentenceDelims); @@ -531,5 +562,4 @@ public static void main(String[] args) throws IOException { pw.close(); System.err.println("Read in " + numSents + " sentences."); } - } diff --git a/src/edu/stanford/nlp/process/Morpha.flex b/src/edu/stanford/nlp/process/Morpha.flex index f59d4f5196..b1b9c2125d 100644 --- a/src/edu/stanford/nlp/process/Morpha.flex +++ b/src/edu/stanford/nlp/process/Morpha.flex @@ -3,6 +3,7 @@ package edu.stanford.nlp.process; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; +import java.util.Collections; import java.util.Set; import java.util.StringTokenizer; import java.util.logging.Logger; @@ -276,7 +277,19 @@ import edu.stanford.nlp.util.Generics; "zag", "zap", "zig", "zig-zag", "zigzag", "zip", "ztrip" }; - private final Set verbStemSet; + private static final Set verbStemSet = loadVerbStemSet(verbStems); + + /** + * Loads a list of words from the array and stores them in a HashSet. + */ + private static Set loadVerbStemSet(String[] verbStems) { + Set set = Generics.newHashSet(verbStems.length); + for (String stem : verbStems) { + set.add(stem); + } + return Collections.unmodifiableSet(set); + } + %} @@ -287,7 +300,6 @@ import edu.stanford.nlp.util.Generics; options[j] = true; } options[0]=false; - verbStemSet = loadVerbStemSet(verbStems); if (noTags) { yybegin(any); } else { @@ -334,17 +346,6 @@ import edu.stanford.nlp.util.Generics; } -/** - * Loads a list of words from the array and stores them in a HashSet. - */ -private Set loadVerbStemSet(String[] verbStems) { - Set set = Generics.newHashSet(verbStems.length); - for (String stem : verbStems) { - set.add(stem); - } - return set; -} - String condub_stem(int del, String add, String affix) { StringBuilder yytextBuff; diff --git a/src/edu/stanford/nlp/process/Morpha.java b/src/edu/stanford/nlp/process/Morpha.java index ea93ed6344..6cf2be32a8 100644 --- a/src/edu/stanford/nlp/process/Morpha.java +++ b/src/edu/stanford/nlp/process/Morpha.java @@ -1,10 +1,11 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT */ +/* The following code was generated by JFlex 1.5.1 */ package edu.stanford.nlp.process; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; +import java.util.Collections; import java.util.Set; import java.util.StringTokenizer; import java.util.logging.Logger; @@ -14,8 +15,8 @@ /** * This class is a scanner generated by - * JFlex 1.5.0-SNAPSHOT - * from the specification file /juicy/u61/u/horatio/codebase2/javanlp/projects/core/src/edu/stanford/nlp/process/Morpha.flex + * JFlex 1.5.1 + * from the specification file /afs/cs.stanford.edu/u/horatio/codebase2/javanlp/projects/core/src/edu/stanford/nlp/process/Morpha.flex */ class Morpha { @@ -69503,7 +69504,19 @@ void setOptions(int opts) { "zag", "zap", "zig", "zig-zag", "zigzag", "zip", "ztrip" }; - private final Set verbStemSet; + private static final Set verbStemSet = loadVerbStemSet(verbStems); + + /** + * Loads a list of words from the array and stores them in a HashSet. + */ + private static Set loadVerbStemSet(String[] verbStems) { + Set set = Generics.newHashSet(verbStems.length); + for (String stem : verbStems) { + set.add(stem); + } + return Collections.unmodifiableSet(set); + } + String ynull_stem() { return common_noun_stem(); @@ -69542,17 +69555,6 @@ private static String capitalise(String s) { } -/** - * Loads a list of words from the array and stores them in a HashSet. - */ -private Set loadVerbStemSet(String[] verbStems) { - Set set = Generics.newHashSet(verbStems.length); - for (String stem : verbStems) { - set.add(stem); - } - return set; -} - String condub_stem(int del, String add, String affix) { StringBuilder yytextBuff; @@ -69660,7 +69662,6 @@ private String semi_reg_stem(int del, String add) { options[j] = true; } options[0]=false; - verbStemSet = loadVerbStemSet(verbStems); if (noTags) { yybegin(any); } else { diff --git a/src/edu/stanford/nlp/process/WordToSentenceProcessor.java b/src/edu/stanford/nlp/process/WordToSentenceProcessor.java index 5abbe48cf6..bbbe920a57 100644 --- a/src/edu/stanford/nlp/process/WordToSentenceProcessor.java +++ b/src/edu/stanford/nlp/process/WordToSentenceProcessor.java @@ -210,10 +210,12 @@ public List> process(List words) { * @see #WordToSentenceProcessor(String, Set, Set, Set, String, NewlineIsSentenceBreak, SequencePattern, Set, boolean, boolean) */ public List> wordsToSentences(List words) { - IdentityHashMap isSentenceBoundary = new IdentityHashMap(); + IdentityHashMap isSentenceBoundary = null; // is null unless used by sentenceBoundaryMultiTokenPattern + if (sentenceBoundaryMultiTokenPattern != null) { // Do initial pass using tokensregex to identify multi token patterns that need to be matched // and add the last token to our table of sentence boundary tokens + isSentenceBoundary = new IdentityHashMap(); SequenceMatcher matcher = sentenceBoundaryMultiTokenPattern.getMatcher(words); while (matcher.find()) { List nodes = matcher.groupNodes(); @@ -308,6 +310,7 @@ public List> wordsToSentences(List words) { } } else { lastTokenWasNewline = false; + Boolean isb; if (xmlBreakElementsToDiscard != null && matchesXmlBreakElementToDiscard(word)) { newSent = true; if (DEBUG) { @@ -317,7 +320,7 @@ public List> wordsToSentences(List words) { insideRegion = false; newSent = true; // Marked sentence boundaries - } else if (isSentenceBoundary.containsKey(o) && isSentenceBoundary.get(o)) { + } else if ((isSentenceBoundary != null) && ((isb = isSentenceBoundary.get(o)) != null) && isb) { if (!discardToken) currentSentence.add(o); if (DEBUG) { System.err.println(" is sentence boundary (matched multi-token pattern); " + debugText); diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java index 5e8868a313..6b9f9d1238 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java @@ -147,8 +147,7 @@ public List vertexListSorted() { * @return A ordered list of edges in the graph. */ public List edgeListSorted() { - ArrayList edgeList = - new ArrayList(); + ArrayList edgeList = new ArrayList<>(); for (SemanticGraphEdge edge : edgeIterable()) { edgeList.add(edge); } diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraphFactory.java b/src/edu/stanford/nlp/semgraph/SemanticGraphFactory.java index 60bc92d95d..d36165c664 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraphFactory.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraphFactory.java @@ -128,7 +128,6 @@ public static SemanticGraph makeFromTree(GrammaticalStructure gs, boolean includeExtras, boolean threadSafe, Predicate filter) { - addProjectedCategoriesToGrammaticalStructure(gs); Collection deps; switch(mode) { case COLLAPSED_TREE: @@ -225,36 +224,6 @@ public static SemanticGraph allTypedDependencies(Tree tree, boolean collapse) { return makeFromTree(tree, (collapse) ? Mode.COLLAPSED : Mode.BASIC, true, null); } - /** - * Modifies the given GrammaticalStructure by adding some annotations to the - * MapLabels of certain nodes.

    - * - * For each word (leaf node), we add an annotation which indicates the - * syntactic category of the maximal constituent headed by the word. - */ - static void addProjectedCategoriesToGrammaticalStructure(GrammaticalStructure gs) { - // Our strategy: (1) assume every node in GrammaticalStructure is already - // annotated with head word, (2) traverse nodes of GrammaticalStructure in - // reverse of pre-order (bottom up), and (3) at each, get head word and - // annotate it with category of this node. - List nodes = new ArrayList(); - for (Tree node : gs.root()) { // pre-order traversal - nodes.add((TreeGraphNode) node); - } - Collections.reverse(nodes); // reverse - for (TreeGraphNode node : nodes) { - if (!"ROOT".equals(node.value())) { // main verb should get PROJ_CAT "S", not "ROOT" - CoreLabel label = node.label(); - Tree hw = label.get(TreeCoreAnnotations.HeadWordAnnotation.class); - if (hw != null) { - TreeGraphNode hwn = (TreeGraphNode) hw; - CoreLabel hwLabel = hwn.label(); - hwLabel.set(CoreAnnotations.ProjectedCategoryAnnotation.class, node.value()); - } - } - } - } - /** * Given a list of edges, attempts to create and return a rooted SemanticGraph. *

    diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraphUtils.java b/src/edu/stanford/nlp/semgraph/SemanticGraphUtils.java index 773cb769d2..1053be5804 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraphUtils.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraphUtils.java @@ -1,6 +1,5 @@ package edu.stanford.nlp.semgraph; -import edu.stanford.nlp.ling.AnnotationLookup; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.IndexedWord; import edu.stanford.nlp.ling.LabeledWord; @@ -14,8 +13,6 @@ import java.io.StringWriter; import java.util.*; -import java.util.function.Function; -import java.util.regex.Pattern; /** @@ -731,7 +728,6 @@ public static String printVertices(SemanticGraph sg, PrintVerticeParams params) return buf.toString(); } - /** * Given a SemanticGraph, creates a SemgrexPattern string based off of this graph. * NOTE: the word() value of the vertice is the name to reference @@ -745,39 +741,14 @@ public static String semgrexFromGraph(SemanticGraph sg, boolean matchTag, boolea } public static String semgrexFromGraph(SemanticGraph sg, Collection wildcardNodes, - boolean useTag, boolean useWord, Map nodeNameMap) throws Exception { - Function transformNode = o ->{ - String str = ""; - if(useWord) - str = "{word: /" + Pattern.quote(o.word()) + "/"; - if(useTag){ - if(!str.isEmpty()) - str += "; "; - str = "tag: " + o.tag(); - } - if(!str.isEmpty()) - str += "}"; - return str; - }; - - return semgrexFromGraph(sg, wildcardNodes, nodeNameMap, transformNode); - } - - /** - * nodeValuesTranformation is a function that converts a vertex (IndexedWord) to the value. - * For an example, see semgrexFromGraph - * function implementations (if useWord and useTag is true, the value is "{word: vertex.word; tag: vertex.tag}"). - * @throws Exception - */ - public static String semgrexFromGraph(SemanticGraph sg, Collection wildcardNodes, - Map nodeNameMap, Function wordTransformation) throws Exception { + boolean useTag, boolean useWord, Map nodeNameMap) throws Exception { IndexedWord patternRoot = sg.getFirstRoot(); StringWriter buf = new StringWriter(); Set tabu = Generics.newHashSet(); Set seenEdges = Generics.newHashSet(); buf.append(semgrexFromGraphHelper(patternRoot, sg, tabu, seenEdges, true, true, wildcardNodes, - nodeNameMap, false, wordTransformation)); + useTag, useWord, nodeNameMap, false)); String patternString = buf.toString(); return patternString; @@ -797,12 +768,12 @@ public static String semgrexFromGraph(Iterable edges, boolean /** * Recursive call to generate the Semgrex pattern based off of this SemanticGraph. - * nodeValuesTranformation is a function that converts a vertex (IndexedWord) to the value. For an example, see semgrexFromGraph - * function implementations. + * Currently presumes the only elements to match on are the tags. + * TODO: consider tag generalization, and matching off of other features? */ protected static String semgrexFromGraphHelper(IndexedWord vertice, SemanticGraph sg, Set tabu, Set seenEdges, boolean useWordAsLabel, boolean nameEdges, Collection wildcardNodes, - Map nodeNameMap, boolean orderedNodes, Function nodeValuesTransformation) { + boolean useTag, boolean useWord, Map nodeNameMap, boolean orderedNodes) { StringWriter buf = new StringWriter(); // If the node is a wildcarded one, treat it as a {}, meaning any match. Currently these will not @@ -810,32 +781,16 @@ protected static String semgrexFromGraphHelper(IndexedWord vertice, SemanticGrap if (wildcardNodes != null && wildcardNodes.contains(vertice)) { buf.append("{}"); } else { - - String vertexStr = nodeValuesTransformation.apply(vertice); - if(vertexStr != null && !vertexStr.isEmpty()){ - buf.append(vertexStr); + buf.append("{"); + if (useTag) { + buf.append("tag:"); buf.append(vertice.tag()); + if (useWord) + buf.append(";"); + } + if (useWord) { + buf.append("word:"); buf.append(vertice.word()); } -// buf.append("{"); -// int i = 0; -// for(String corekey: useNodeCoreAnnotations){ -// AnnotationLookup.KeyLookup lookup = AnnotationLookup.getCoreKey(corekey); -// assert lookup != null : "Invalid key " + corekey; -// if(i > 0) -// buf.append("; "); -// String value = vertice.containsKey(lookup.coreKey) ? vertice.get(lookup.coreKey).toString() : "null"; -// buf.append(corekey+":"+nodeValuesTransformation.apply(value)); -// i++; -// } -// if (useTag) { -// -// buf.append("tag:"); buf.append(vertice.tag()); -// if (useWord) -// buf.append(";"); -// } -// if (useWord) { -// buf.append("word:"); buf.append(wordTransformation.apply(vertice.word())); -// } -// buf.append("}"); + buf.append("}"); } if (nodeNameMap != null) { buf.append("="); @@ -885,25 +840,23 @@ protected static String semgrexFromGraphHelper(IndexedWord vertice, SemanticGrap } } else { buf.append(semgrexFromGraphHelper(tgtVert, sg, tabu, seenEdges, useWordAsLabel, nameEdges, - wildcardNodes, nodeNameMap, orderedNodes, nodeValuesTransformation)); + wildcardNodes, useTag, useWord, nodeNameMap, orderedNodes)); if (applyParens) buf.append(")"); } } return buf.toString(); } - - /** Same as semgrexFromGraph except the node traversal is ordered by sorting - */ + public static String semgrexFromGraphOrderedNodes(SemanticGraph sg, Collection wildcardNodes, - Map nodeNameMap, Function wordTransformation) throws Exception { + boolean useTag, boolean useWord, Map nodeNameMap) throws Exception { IndexedWord patternRoot = sg.getFirstRoot(); StringWriter buf = new StringWriter(); Set tabu = Generics.newHashSet(); Set seenEdges = Generics.newHashSet(); buf.append(semgrexFromGraphHelper(patternRoot, sg, tabu, seenEdges, true, true, wildcardNodes, - nodeNameMap, true, wordTransformation)); + useTag, useWord, nodeNameMap, true)); String patternString = buf.toString(); return patternString; diff --git a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexBatchParser.java b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexBatchParser.java index cc26e26de8..4c83a99df9 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexBatchParser.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexBatchParser.java @@ -10,8 +10,6 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.Pair; @@ -28,7 +26,7 @@ public class SemgrexBatchParser { /** Maximum stream size in characters */ private static final int MAX_STREAM_SIZE = 1024 * 1024; - public Counter compileStream(InputStream is) throws IOException { + public List compileStream(InputStream is) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(is)); reader.mark(MAX_STREAM_SIZE); Map macros = preprocess(reader); @@ -36,16 +34,15 @@ public Counter compileStream(InputStream is) throws IOException return parse(reader, macros); } - private Counter parse(BufferedReader reader, Map macros) throws IOException { - Counter patterns = new ClassicCounter(); + private List parse(BufferedReader reader, Map macros) throws IOException { + List patterns = new ArrayList(); for(String line; (line = reader.readLine()) != null; ) { line = line.trim(); if(line.length() == 0 || line.startsWith("#")) continue; if(line.startsWith("macro ")) continue; line = replaceMacros(line, macros); - String[] tok = line.split("\t"); - SemgrexPattern pattern = SemgrexPattern.compile(tok[0]); - patterns.setCount(pattern, tok.length > 1? Double.parseDouble(tok[1]): 1.0); + SemgrexPattern pattern = SemgrexPattern.compile(line); + patterns.add(pattern); } return patterns; } diff --git a/src/edu/stanford/nlp/sentiment/RNNTrainOptions.java b/src/edu/stanford/nlp/sentiment/RNNTrainOptions.java index 24274083be..263b83ddb2 100644 --- a/src/edu/stanford/nlp/sentiment/RNNTrainOptions.java +++ b/src/edu/stanford/nlp/sentiment/RNNTrainOptions.java @@ -56,6 +56,19 @@ public double getClassWeight(int i) { /** Regularization cost for the transform tensor */ public double regTransformTensor = 0.001; + /** + * Shuffle matrices when training. Usually should be true. Set to + * false to compare training across different implementations, such + * as with the original Matlab version + */ + public boolean shuffleMatrices = true; + + /** + * If set, the initial matrices are logged to this location as a single file + * using SentimentModel.toString() + */ + public String initialMatrixLogPath = null; + @Override public String toString() { StringBuilder result = new StringBuilder(); @@ -82,6 +95,8 @@ public String toString() { result.append("regWordVector=" + regWordVector + "\n"); result.append("initialAdagradWeight=" + initialAdagradWeight + "\n"); result.append("adagradResetFrequency=" + adagradResetFrequency + "\n"); + result.append("shuffleMatrices=" + shuffleMatrices + "\n"); + result.append("initialMatrixLogPath=" + initialMatrixLogPath + "\n"); return result.toString(); } @@ -130,6 +145,15 @@ public int setOption(String[] args, int argIndex) { classWeights[i] = Double.parseDouble(pieces[i]); } return argIndex + 2; + } else if (args[argIndex].equalsIgnoreCase("-shuffleMatrices")) { + shuffleMatrices = true; + return argIndex + 1; + } else if (args[argIndex].equalsIgnoreCase("-noShuffleMatrices")) { + shuffleMatrices = false; + return argIndex + 1; + } else if (args[argIndex].equalsIgnoreCase("-initialMatrixLogPath")) { + initialMatrixLogPath = args[argIndex + 1]; + return argIndex + 2; } else { return argIndex; } diff --git a/src/edu/stanford/nlp/sentiment/SentimentCostAndGradient.java b/src/edu/stanford/nlp/sentiment/SentimentCostAndGradient.java index a0d5e2ad5b..4210542f53 100644 --- a/src/edu/stanford/nlp/sentiment/SentimentCostAndGradient.java +++ b/src/edu/stanford/nlp/sentiment/SentimentCostAndGradient.java @@ -2,6 +2,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; import org.ejml.simple.SimpleMatrix; @@ -113,11 +114,8 @@ public void calculate(double[] theta) { int numCols = entry.getValue().numCols(); unaryCD.put(entry.getKey(), new SimpleMatrix(numRows, numCols)); } - for (Map.Entry entry : model.wordVectors.entrySet()) { - int numRows = entry.getValue().numRows(); - int numCols = entry.getValue().numCols(); - wordVectorD.put(entry.getKey(), new SimpleMatrix(numRows, numCols)); - } + + // wordVectorD will be filled on an as-needed basis // TODO: This part can easily be parallelized List forwardPropTrees = Generics.newArrayList(); @@ -141,39 +139,54 @@ public void calculate(double[] theta) { double scale = (1.0 / trainingBatch.size()); value = error * scale; - value += scaleAndRegularize(binaryTD, model.binaryTransform, scale, model.op.trainOptions.regTransformMatrix); - value += scaleAndRegularize(binaryCD, model.binaryClassification, scale, model.op.trainOptions.regClassification); + value += scaleAndRegularize(binaryTD, model.binaryTransform, scale, model.op.trainOptions.regTransformMatrix, false); + value += scaleAndRegularize(binaryCD, model.binaryClassification, scale, model.op.trainOptions.regClassification, true); value += scaleAndRegularizeTensor(binaryTensorTD, model.binaryTensors, scale, model.op.trainOptions.regTransformTensor); - value += scaleAndRegularize(unaryCD, model.unaryClassification, scale, model.op.trainOptions.regClassification); - value += scaleAndRegularize(wordVectorD, model.wordVectors, scale, model.op.trainOptions.regWordVector); + value += scaleAndRegularize(unaryCD, model.unaryClassification, scale, model.op.trainOptions.regClassification, false, true); + value += scaleAndRegularize(wordVectorD, model.wordVectors, scale, model.op.trainOptions.regWordVector, true, false); derivative = NeuralUtils.paramsToVector(theta.length, binaryTD.valueIterator(), binaryCD.valueIterator(), SimpleTensor.iteratorSimpleMatrix(binaryTensorTD.valueIterator()), unaryCD.values().iterator(), wordVectorD.values().iterator()); } static double scaleAndRegularize(TwoDimensionalMap derivatives, - TwoDimensionalMap currentMatrices, - double scale, - double regCost) { + TwoDimensionalMap currentMatrices, + double scale, double regCost, boolean dropBiasColumn) { double cost = 0.0; // the regularization cost for (TwoDimensionalMap.Entry entry : currentMatrices) { SimpleMatrix D = derivatives.get(entry.getFirstKey(), entry.getSecondKey()); - D = D.scale(scale).plus(entry.getValue().scale(regCost)); + SimpleMatrix regMatrix = entry.getValue(); + if (dropBiasColumn) { + regMatrix = new SimpleMatrix(regMatrix); + regMatrix.insertIntoThis(0, regMatrix.numCols() - 1, new SimpleMatrix(regMatrix.numRows(), 1)); + } + D = D.scale(scale).plus(regMatrix.scale(regCost)); derivatives.put(entry.getFirstKey(), entry.getSecondKey(), D); - cost += entry.getValue().elementMult(entry.getValue()).elementSum() * regCost / 2.0; + cost += regMatrix.elementMult(regMatrix).elementSum() * regCost / 2.0; } return cost; } static double scaleAndRegularize(Map derivatives, - Map currentMatrices, - double scale, - double regCost) { + Map currentMatrices, + double scale, double regCost, + boolean activeMatricesOnly, boolean dropBiasColumn) { double cost = 0.0; // the regularization cost for (Map.Entry entry : currentMatrices.entrySet()) { SimpleMatrix D = derivatives.get(entry.getKey()); - D = D.scale(scale).plus(entry.getValue().scale(regCost)); + if (activeMatricesOnly && D == null) { + // Fill in an emptpy matrix so the length of theta can match. + // TODO: might want to allow for sparse parameter vectors + derivatives.put(entry.getKey(), new SimpleMatrix(entry.getValue().numRows(), entry.getValue().numCols())); + continue; + } + SimpleMatrix regMatrix = entry.getValue(); + if (dropBiasColumn) { + regMatrix = new SimpleMatrix(regMatrix); + regMatrix.insertIntoThis(0, regMatrix.numCols() - 1, new SimpleMatrix(regMatrix.numRows(), 1)); + } + D = D.scale(scale).plus(regMatrix.scale(regCost)); derivatives.put(entry.getKey(), D); - cost += entry.getValue().elementMult(entry.getValue()).elementSum() * regCost / 2.0; + cost += regMatrix.elementMult(regMatrix).elementSum() * regCost / 2.0; } return cost; } @@ -255,7 +268,12 @@ private void backpropDerivativesAndError(Tree tree, SimpleMatrix deltaFromClass = model.getUnaryClassification(category).transpose().mult(deltaClass); deltaFromClass = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).elementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.plus(deltaUp); - wordVectorD.put(word, wordVectorD.get(word).plus(deltaFull)); + SimpleMatrix oldWordVectorD = wordVectorD.get(word); + if (oldWordVectorD == null) { + wordVectorD.put(word, deltaFull); + } else { + wordVectorD.put(word, oldWordVectorD.plus(deltaFull)); + } } else { // Otherwise, this must be a binary node String leftCategory = model.basicCategory(tree.children()[0].label().value()); diff --git a/src/edu/stanford/nlp/sentiment/SentimentModel.java b/src/edu/stanford/nlp/sentiment/SentimentModel.java index 66beaa1066..aebdc854fa 100644 --- a/src/edu/stanford/nlp/sentiment/SentimentModel.java +++ b/src/edu/stanford/nlp/sentiment/SentimentModel.java @@ -41,6 +41,11 @@ public class SentimentModel implements Serializable { */ public final Map unaryClassification; + /** + * Map from vocabulary words to word vectors. + * + * @see #getWordVector(String) + */ public Map wordVectors; /** @@ -318,12 +323,81 @@ public SentimentModel(RNNOptions op, List trainingTrees) { numUnaryMatrices = unaryClassification.size(); unaryClassificationSize = numClasses * (numHid + 1); - // System.err.println("Binary transform matrices:"); - // System.err.println(binaryTransform); - // System.err.println("Binary classification matrices:"); - // System.err.println(binaryClassification); - // System.err.println("Unary classification matrices:"); - // System.err.println(unaryClassification); + //System.err.println(this); + } + + /** + * Dumps *all* the matrices in a mostly readable format. + */ + @Override + public String toString() { + StringBuilder output = new StringBuilder(); + + if (binaryTransform.size() > 0) { + if (binaryTransform.size() == 1) { + output.append("Binary transform matrix\n"); + } else { + output.append("Binary transform matrices\n"); + } + for (TwoDimensionalMap.Entry matrix : binaryTransform) { + if (!matrix.getFirstKey().equals("") || !matrix.getSecondKey().equals("")) { + output.append(matrix.getFirstKey() + " " + matrix.getSecondKey() + ":\n"); + } + output.append(NeuralUtils.toString(matrix.getValue(), "%.8f")); + } + } + + if (binaryTensors.size() > 0) { + if (binaryTensors.size() == 1) { + output.append("Binary transform tensor\n"); + } else { + output.append("Binary transform tensors\n"); + } + for (TwoDimensionalMap.Entry matrix : binaryTensors) { + if (!matrix.getFirstKey().equals("") || !matrix.getSecondKey().equals("")) { + output.append(matrix.getFirstKey() + " " + matrix.getSecondKey() + ":\n"); + } + output.append(matrix.getValue().toString("%.8f")); + } + } + + if (binaryClassification.size() > 0) { + if (binaryClassification.size() == 1) { + output.append("Binary classification matrix\n"); + } else { + output.append("Binary classification matrices\n"); + } + for (TwoDimensionalMap.Entry matrix : binaryClassification) { + if (!matrix.getFirstKey().equals("") || !matrix.getSecondKey().equals("")) { + output.append(matrix.getFirstKey() + " " + matrix.getSecondKey() + ":\n"); + } + output.append(NeuralUtils.toString(matrix.getValue(), "%.8f")); + } + } + + if (unaryClassification.size() > 0) { + if (unaryClassification.size() == 1) { + output.append("Unary classification matrix\n"); + } else { + output.append("Unary classification matrices\n"); + } + for (Map.Entry matrix : unaryClassification.entrySet()) { + if (!matrix.getKey().equals("")) { + output.append(matrix.getKey() + ":\n"); + } + output.append(NeuralUtils.toString(matrix.getValue(), "%.8f")); + } + } + + output.append("Word vectors\n"); + for (Map.Entry matrix : wordVectors.entrySet()) { + output.append("'" + matrix.getKey() + "'"); + output.append("\n"); + output.append(NeuralUtils.toString(matrix.getValue(), "%.8f")); + output.append("\n"); + } + + return output.toString(); } SimpleTensor randomBinaryTensor() { @@ -350,9 +424,10 @@ SimpleMatrix randomTransformBlock() { */ SimpleMatrix randomClassificationMatrix() { SimpleMatrix score = new SimpleMatrix(numClasses, numHid + 1); - // Leave the bias column with 0 values double range = 1.0 / (Math.sqrt((double) numHid)); score.insertIntoThis(0, 0, SimpleMatrix.random(numClasses, numHid, -range, range, rand)); + // bias column goes from 0 to 1 initially + score.insertIntoThis(0, numHid, SimpleMatrix.random(numClasses, 1, 0.0, 1.0, rand)); return score.scale(op.trainOptions.scalingForInit); } @@ -361,7 +436,7 @@ SimpleMatrix randomWordVector() { } static SimpleMatrix randomWordVector(int size, Random rand) { - return NeuralUtils.randomGaussian(size, 1, rand); + return NeuralUtils.randomGaussian(size, 1, rand).scale(0.1); } void initRandomWordVectors(List trainingTrees) { @@ -474,10 +549,22 @@ public SimpleMatrix getClassWForNode(Tree node) { } } + /** + * Retrieve a learned word vector for the given word. + * + * If the word is OOV, returns a vector associated with an + * {@code } term. + */ public SimpleMatrix getWordVector(String word) { return wordVectors.get(getVocabWord(word)); } + /** + * Get the known vocabulary word associated with the given word. + * + * @return The form of the given word known by the model, or + * {@link #UNKNOWN_WORD} if this word has not been observed + */ public String getVocabWord(String word) { if (op.lowercaseWordVectors) { word = word.toLowerCase(); diff --git a/src/edu/stanford/nlp/sentiment/SentimentTraining.java b/src/edu/stanford/nlp/sentiment/SentimentTraining.java index 8ebf0a0e5e..b443b0f2db 100644 --- a/src/edu/stanford/nlp/sentiment/SentimentTraining.java +++ b/src/edu/stanford/nlp/sentiment/SentimentTraining.java @@ -1,5 +1,6 @@ package edu.stanford.nlp.sentiment; +import java.io.File; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.Arrays; @@ -8,6 +9,7 @@ import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.util.Generics; +import edu.stanford.nlp.util.StringUtils; import edu.stanford.nlp.util.Timing; public class SentimentTraining { @@ -58,7 +60,9 @@ public static void train(SentimentModel model, String modelPath, List trai } List shuffledSentences = Generics.newArrayList(trainingTrees); - Collections.shuffle(shuffledSentences, model.rand); + if (model.op.trainOptions.shuffleMatrices) { + Collections.shuffle(shuffledSentences, model.rand); + } for (int batch = 0; batch < numBatches; ++batch) { System.err.println("======================================"); System.err.println("Epoch " + epoch + " batch " + batch); @@ -82,7 +86,7 @@ public static void train(SentimentModel model, String modelPath, List trai break; } - if (batch == 0 && epoch > 0 && epoch % model.op.trainOptions.debugOutputEpochs == 0) { + if (batch == (numBatches - 1) && model.op.trainOptions.debugOutputEpochs > 0 && (epoch + 1) % model.op.trainOptions.debugOutputEpochs == 0) { double score = 0.0; if (devTrees != null) { Evaluate eval = new Evaluate(model); @@ -191,6 +195,10 @@ public static void main(String[] args) { System.err.println("Sentiment model options:\n" + op); SentimentModel model = new SentimentModel(op, trainingTrees); + if (op.trainOptions.initialMatrixLogPath != null) { + StringUtils.printToFile(new File(op.trainOptions.initialMatrixLogPath), model.toString(), false, false, "utf-8"); + } + // TODO: need to handle unk rules somehow... at test time the tree // structures might have something that we never saw at training // time. for example, we could put a threshold on all of the diff --git a/src/edu/stanford/nlp/sequences/ColumnTabDocumentReaderWriter.java b/src/edu/stanford/nlp/sequences/ColumnTabDocumentReaderWriter.java index 452ce5315f..8761028eee 100644 --- a/src/edu/stanford/nlp/sequences/ColumnTabDocumentReaderWriter.java +++ b/src/edu/stanford/nlp/sequences/ColumnTabDocumentReaderWriter.java @@ -361,7 +361,12 @@ public Annotation getNext() { docId = newDocId; itemCnt++; } - IN wi = tokenFactory.makeToken(map, info); + IN wi; + if (info.length == map.length) { + wi = tokenFactory.makeToken(map, info); + } else { + wi = tokenFactory.makeToken(map, Arrays.asList(info).subList(0, map.length).toArray(new String[map.length])); + } words.add(wi); } } else { diff --git a/src/edu/stanford/nlp/sequences/PlainTextDocumentReaderAndWriter.java b/src/edu/stanford/nlp/sequences/PlainTextDocumentReaderAndWriter.java index 6b44ae8cf3..bd5f09ff7b 100644 --- a/src/edu/stanford/nlp/sequences/PlainTextDocumentReaderAndWriter.java +++ b/src/edu/stanford/nlp/sequences/PlainTextDocumentReaderAndWriter.java @@ -21,6 +21,7 @@ /** * This class provides methods for reading plain text documents and writing out * those documents once classified in several different formats. + * The output formats are named: slashTags, xml, inlineXML, tsv, tabbedEntities. *

    * Implementation note: see * itest/src/edu/stanford/nlp/ie/crf/CRFClassifierITest.java for examples and diff --git a/src/edu/stanford/nlp/time/TimeFormatter.java b/src/edu/stanford/nlp/time/TimeFormatter.java index 0b12dcdc32..2e1c2310c4 100644 --- a/src/edu/stanford/nlp/time/TimeFormatter.java +++ b/src/edu/stanford/nlp/time/TimeFormatter.java @@ -36,6 +36,7 @@ public JavaDateFormatExtractor(String pattern) { this.format = new SimpleDateFormat(pattern); } + @Override public Value apply(CoreMap m) { try { // TODO: Allow specification of locale, pivot year (set2DigitYearStart) for interpreting 2 digit years @@ -86,6 +87,7 @@ static class ApplyActionWrapper implements Function { this.action = action; } + @Override public O apply(I in) { O v = base.apply(in); if (action != null) { diff --git a/src/edu/stanford/nlp/trees/ENUniversalPOS.tsurgeon b/src/edu/stanford/nlp/trees/ENUniversalPOS.tsurgeon new file mode 100644 index 0000000000..3c194d3d20 --- /dev/null +++ b/src/edu/stanford/nlp/trees/ENUniversalPOS.tsurgeon @@ -0,0 +1,297 @@ +% +% Context-sensitive mapping of PTB POS tags to +% Universal POS tags. +% +% Author: Sebastian Schuster +% +% +% Context-sensitive mappings +% +% +% TO -> PART (in CONJP phrases) +CONJP < TO=target < VB + +relabel target PART + +% TO -> PART +VP < VP < (/^TO$/=target <... {/.*/}) + +relabel target PART + +% TO -> ADP (otherwise) +TO=target <... {/.*/} + +relabel target ADP + +% delete the next two rules, third one should also cover them +% +% VB -> AUX (passive, case 1) +%VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (VP|ADJP [ < VBN|VBD | < (VP|ADJP < VBN|VBD) < CC ] ) +% +%relabel target AUX +% +% VB.* -> AUX (passive, case 2) +%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN)) +% +%relabel target AUX + +% VB.* -> AUX (active, case 1) +VP < VP < (/^VB.*$/=target <... {/.*/}) + +relabel target AUX + +% VB -> AUX (active, case 2) +SQ|SINV < (/^VB/=target $++ /^(?:VP|ADJP)/ <... {/.*/}) + +relabel target AUX + +% VB.* -> VERB +/^VB.*/=target <... {/.*/} + +relabel target VERB + +% IN -> SCONJ (only in case of subordinating conjunctions) +SBAR|SBAR-TMP < (IN=target $++ S|FRAG <... {/.*/}) + +relabel target SCONJ + +% IN -> PREP (otherwise) +IN=target <... {/.*/} + +relabel target PREP + +% NN -> SYM (in case of the percent sign) +NN=target <... {/\\%/} + +relabel target SYM + +% NN -> NOUN (otherwise) +NN=target <... {/.*/} + +relabel target NOUN + + +% NFP -> PUNCT (in case of possibly repeated hyphens, asterisks or tildes) +NFP=target <... {/^(~+|\*+|\-+)$/} + +relabel target PUNCT + +% NFP -> SYM (otherwise) +NFP=target <... {/.*/} + +relabel target SYM + +% +% 1 to 1 mappings +% +% +% CC -> CONJ +CC=target <... {/.*/} + +relabel target CONJ + +% CD -> NUM +CD=target <... {/.*/} + +relabel target NUM + +% DT -> DET +DT=target <... {/.*/} + +relabel target DET + +% EX -> DET +EX=target <... {/.*/} + +relabel target DET + +% FW -> X +FW=target <... {/.*/} + +relabel target X + +% JJ.* -> ADJ +JJ.*=target <... {/.*/} + +relabel target ADJ + +% LS -> X +LS=target <... {/.*/} + +relabel target X + +% MD -> AUX +MD=target <... {/.*/} + +relabel target AUX + +% NNS -> NOUN +NNS=target <... {/.*/} + +relabel target NOUN + +% NNP -> PROPN +NNP=target <... {/.*/} + +relabel target PROPN + +% NNPS -> PROPN +NNPS=target <... {/.*/} + +relabel target PROPN + +% PDT -> DET +PDT=target <... {/.*/} + +relabel target DET + +% POS -> PART +POS=target <... {/.*/} + +relabel target PART + +% PRP -> PRON +PRP=target <... {/.*/} + +relabel target PRON + +% PRP$ -> PRON +/^PRP\$$/=target <... {/.*/} + +relabel target PRON + +% RB -> ADV +RB=target <... {/.*/} + +relabel target ADV + +% RBR -> ADV +RBR=target <... {/.*/} + +relabel target ADV + +% RBS -> ADV +RBS=target <... {/.*/} + +relabel target ADV + +% RP -> ADP +RP=target <... {/.*/} + +relabel target ADP + +% UH -> INTJ +UH=target <... {/.*/} + +relabel target INTJ + +% WDT -> DET +WDT=target <... {/.*/} + +relabel target DET + +% WP -> PRON +WP=target <... {/.*/} + +relabel target PRON + +% WP$ -> PRON +/^WP\$$/=target <... {/.*/} + +relabel target PRON + +% WRB -> ADV +WRB=target <... {/.*/} + +relabel target ADV + +% `` -> PUNCT +/^``$/=target <... {/.*/} + +relabel target PUNCT + +% ! -> PUNCT +/^!$/=target <... {/.*/} + +relabel target PUNCT + +% # -> SYM +/^#$/=target <... {/.*/} + +relabel target SYM + +% $ -> SYM +/^\$$/=target <... {/.*/} + +relabel target SYM + +% ' -> PUNCT +/^'$/=target <... {/.*/} + +relabel target PUNCT + +% ( -> PUNCT +/^\($/=target <... {/.*/} + +relabel target PUNCT + +% ) -> PUNCT +/^\)$/=target <... {/.*/} + +relabel target PUNCT + +% , -> PUNCT +/^,$/=target <... {/.*/} + +relabel target PUNCT + +% -LRB- -> PUNCT +/^-LRB-$/=target <... {/.*/} + +relabel target PUNCT + +% -RRB- -> PUNCT +/^-RRB-$/=target <... {/.*/} + +relabel target PUNCT + +% . -> PUNCT +/^\.$/=target <... {/.*/} + +relabel target PUNCT + +% : -> PUNCT +/^:$/=target <... {/.*/} + +relabel target PUNCT + +% ? -> PUNCT +/^\?$/=target <... {/.*/} + +relabel target PUNCT + +% ADD -> X +ADD=target <... {/.*/} + +relabel target X + +% AFX -> X +AFX=target <... {/.*/} + +relabel target X + + %GW -> X +GW=target <... {/.*/} + +relabel target X + +% HYPH -> PUNCT +HYPH=target <... {/.*/} + +relabel target PUNCT + +% XX -> X +XX=target <... {/.*/} + +relabel target X + diff --git a/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java b/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java index a5e91d7a3b..440e0140e5 100644 --- a/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java +++ b/src/edu/stanford/nlp/trees/EnglishGrammaticalStructure.java @@ -82,7 +82,7 @@ public EnglishGrammaticalStructure(Tree t, Predicate puncFilter, HeadFin */ public EnglishGrammaticalStructure(Tree t, Predicate puncFilter, HeadFinder hf, boolean threadSafe) { // the tree is normalized (for index and functional tag stripping) inside CoordinationTransformer - super((new CoordinationTransformer(hf)).transformTree(t.deepCopy()), EnglishGrammaticalRelations.values(threadSafe), threadSafe ? EnglishGrammaticalRelations.valuesLock() : null, hf, puncFilter); + super(t, EnglishGrammaticalRelations.values(threadSafe), threadSafe ? EnglishGrammaticalRelations.valuesLock() : null, new CoordinationTransformer(hf), hf, puncFilter); } /** Used for postprocessing CoNLL X dependencies */ diff --git a/src/edu/stanford/nlp/trees/GrammaticalStructure.java b/src/edu/stanford/nlp/trees/GrammaticalStructure.java index 7ced6dd397..5b58a0db8d 100644 --- a/src/edu/stanford/nlp/trees/GrammaticalStructure.java +++ b/src/edu/stanford/nlp/trees/GrammaticalStructure.java @@ -7,6 +7,7 @@ import java.util.concurrent.locks.Lock; import edu.stanford.nlp.graph.DirectedMultiGraph; +import edu.stanford.nlp.io.IOUtils; import edu.stanford.nlp.io.RuntimeIOException; import edu.stanford.nlp.ling.CoreAnnotations; import edu.stanford.nlp.ling.AbstractCoreLabel; @@ -18,9 +19,13 @@ import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams; import edu.stanford.nlp.process.PTBTokenizer; import edu.stanford.nlp.process.WhitespaceTokenizer; +import edu.stanford.nlp.trees.tregex.TregexPattern; +import edu.stanford.nlp.trees.tregex.TregexPatternCompiler; +import edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon; +import edu.stanford.nlp.trees.tregex.tsurgeon.TsurgeonPattern; import edu.stanford.nlp.util.*; -import java.util.function.Predicate; +import java.util.function.Predicate; import java.util.function.Function; import static edu.stanford.nlp.trees.GrammaticalRelation.DEPENDENT; @@ -85,6 +90,7 @@ public abstract class GrammaticalStructure implements Serializable { * @param t A Tree to analyze * @param relations A set of GrammaticalRelations to consider * @param relationsLock Something needed to make this thread-safe + * @param transformer A transformer to apply to the tree before converting * @param hf A HeadFinder for analysis * @param puncFilter A Filter to reject punctuation. To delete punctuation * dependencies, this filter should return false on @@ -93,8 +99,22 @@ public abstract class GrammaticalStructure implements Serializable { * should pass in a Filters.<String>acceptFilter(). */ public GrammaticalStructure(Tree t, Collection relations, - Lock relationsLock, HeadFinder hf, Predicate puncFilter) { - this.root = new TreeGraphNode(t, this); + Lock relationsLock, TreeTransformer transformer, + HeadFinder hf, Predicate puncFilter) { + TreeGraphNode treegraph = new TreeGraphNode(t, (TreeGraphNode) null); + // TODO: create the tree and reuse the leaf labels in one pass, + // avoiding a wasteful copy of the labels. + Trees.setLeafLabels(treegraph, t.yield()); + Trees.setLeafTagsIfUnset(treegraph); + if (transformer != null) { + Tree transformed = transformer.transformTree(treegraph); + if (!(transformed instanceof TreeGraphNode)) { + throw new RuntimeException("Transformer did not change TreeGraphNode into another TreeGraphNode: " + transformer); + } + this.root = (TreeGraphNode) transformed; + } else { + this.root = treegraph; + } indexNodes(this.root); // add head word and tag to phrase nodes if (hf == null) { @@ -342,7 +362,7 @@ public GrammaticalStructure(List projectiveDependencies, TreeGr public GrammaticalStructure(Tree t, Collection relations, HeadFinder hf, Predicate puncFilter) { - this(t, relations, null, hf, puncFilter); + this(t, relations, null, null, hf, puncFilter); } @Override @@ -422,7 +442,7 @@ private List getDeps(Predicate puncTypedDepFil for (TreeGraphNode gov : basicGraph.getAllVertices()) { for (TreeGraphNode dep : basicGraph.getChildren(gov)) { - GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov.label(), dep.label(), basicGraph.getEdges(gov, dep)); + GrammaticalRelation reln = getGrammaticalRelationCommonAncestor(gov.headWordNode().label(), gov.label(), dep.headWordNode().label(), dep.label(), basicGraph.getEdges(gov, dep)); // System.err.println(" Gov: " + gov + " Dep: " + dep + " Reln: " + reln); basicDep.add(new TypedDependency(reln, new IndexedWord(gov.headWordNode().label()), new IndexedWord(dep.headWordNode().label()))); } @@ -586,22 +606,24 @@ public GrammaticalRelation getGrammaticalRelation(IndexedWord gov, IndexedWord d } } - return getGrammaticalRelationCommonAncestor(gov, dep, labels); + return getGrammaticalRelationCommonAncestor(gov, gov, dep, dep, labels); } /** * Returns the GrammaticalRelation which is the highest common - * ancestor of the list of relations passed in. The IndexedWords - * are passed in only for debugging reasons. + * ancestor of the list of relations passed in. The Labels are + * passed in only for debugging reasons. gov & dep are the + * labels with the text, govH and depH can be higher labels in the + * tree which represent the category */ - private static GrammaticalRelation getGrammaticalRelationCommonAncestor(AbstractCoreLabel govH, AbstractCoreLabel depH, List labels) { + private static GrammaticalRelation getGrammaticalRelationCommonAncestor(AbstractCoreLabel gov, AbstractCoreLabel govH, AbstractCoreLabel dep, AbstractCoreLabel depH, List labels) { GrammaticalRelation reln = GrammaticalRelation.DEPENDENT; List sortedLabels; if (labels.size() <= 1) { sortedLabels = labels; } else { - sortedLabels = new ArrayList(labels); + sortedLabels = new ArrayList(labels); Collections.sort(sortedLabels, new NameComparator()); } // System.err.println(" gov " + govH + " dep " + depH + " arc labels: " + sortedLabels); @@ -616,11 +638,11 @@ private static GrammaticalRelation getGrammaticalRelationCommonAncestor(Abstract } if (PRINT_DEBUGGING && reln.equals(GrammaticalRelation.DEPENDENT)) { String topCat = govH.get(CoreAnnotations.ValueAnnotation.class); - String topTag = govH.get(TreeCoreAnnotations.HeadTagAnnotation.class).value(); - String topWord = govH.get(TreeCoreAnnotations.HeadWordAnnotation.class).value(); + String topTag = gov.tag(); + String topWord = gov.value(); String botCat = depH.get(CoreAnnotations.ValueAnnotation.class); - String botTag = depH.get(TreeCoreAnnotations.HeadTagAnnotation.class).value(); - String botWord = depH.get(TreeCoreAnnotations.HeadWordAnnotation.class).value(); + String botTag = dep.tag(); + String botWord = dep.value(); System.err.println("### dep\t" + topCat + "\t" + topTag + "\t" + topWord + "\t" + botCat + "\t" + botTag + "\t" + botWord + "\t"); } @@ -923,9 +945,14 @@ public static String dependenciesToString(GrammaticalStructure gs, Collection leaves = tree.getLeaves(); + Tree uposTree = UniversalPOSMapper.mapTree(tree); + List

    @@ -43,10 +41,9 @@ public class TreeGraphNode extends Tree implements HasParent { protected TreeGraphNode[] children = ZERO_TGN_CHILDREN; /** - * The {@link GrammaticalStructure GrammaticalStructure} of which this - * node is part. + * For internal nodes, the head word of this subtree. */ - protected GrammaticalStructure tg; + private TreeGraphNode headWordNode; /** * A leaf node should have a zero-length array for its @@ -56,13 +53,8 @@ public class TreeGraphNode extends Tree implements HasParent { */ protected static final TreeGraphNode[] ZERO_TGN_CHILDREN = new TreeGraphNode[0]; - private static LabelFactory mlf = CoreLabel.factory(); + private static final LabelFactory mlf = CoreLabel.factory(); - /** - * Create a new empty TreeGraphNode. - */ - public TreeGraphNode() { - } /** * Create a new TreeGraphNode with the supplied @@ -87,24 +79,6 @@ public TreeGraphNode(Label label, List children) { setChildren(children); } - /** - * Create a new TreeGraphNode having the same tree - * structure and label values as an existing tree (but no shared - * storage). - * @param t the tree to copy - * @param graph the graph of which this node is a part - */ - public TreeGraphNode(Tree t, GrammaticalStructure graph) { - this(t, (TreeGraphNode) null); - this.setTreeGraph(graph); - } - - // XXX TODO it's not really clear what graph the copy should be a part of - public TreeGraphNode(TreeGraphNode t) { - this(t, t.parent); - this.setTreeGraph(t.treeGraph()); - } - /** * Create a new TreeGraphNode having the same tree * structure and label values as an existing tree (but no shared @@ -158,6 +132,15 @@ public CoreLabel label() { return label; } + @Override + public void setLabel(Label label) { + if (label instanceof CoreLabel) { + this.setLabel((CoreLabel) label); + } else { + this.setLabel((CoreLabel) mlf.newLabel(label)); + } + } + /** * Sets the label associated with the current node. * @@ -218,10 +201,14 @@ public void setChildren(Tree[] children) { } else { if (children instanceof TreeGraphNode[]) { this.children = (TreeGraphNode[]) children; + for (TreeGraphNode child : this.children) { + child.setParent(this); + } } else { this.children = new TreeGraphNode[children.length]; for (int i = 0; i < children.length; i++) { this.children[i] = (TreeGraphNode)children[i]; + this.children[i].setParent(this); } } } @@ -240,24 +227,60 @@ public void setChildren(List childTreesList) { } } + @Override + public Tree setChild(int i, Tree t) { + if (!(t instanceof TreeGraphNode)) { + throw new IllegalArgumentException("Horrible error"); + } + ((TreeGraphNode) t).setParent(this); + return super.setChild(i, t); + } + /** - * Get the GrammaticalStructure of which this node is a - * part. + * Adds a child in the ith location. Does so without overwriting + * the parent pointers of the rest of the children, which might be + * relevant in case there are add and remove operations mixed + * together. */ - protected GrammaticalStructure treeGraph() { - return tg; + @Override + public void addChild(int i, Tree t) { + if (!(t instanceof TreeGraphNode)) { + throw new IllegalArgumentException("Horrible error"); + } + ((TreeGraphNode) t).setParent(this); + TreeGraphNode[] kids = this.children; + TreeGraphNode[] newKids = new TreeGraphNode[kids.length + 1]; + if (i != 0) { + System.arraycopy(kids, 0, newKids, 0, i); + } + newKids[i] = (TreeGraphNode) t; + if (i != kids.length) { + System.arraycopy(kids, i, newKids, i + 1, kids.length - i); + } + this.children = newKids; } /** - * Set pointer to the GrammaticalStructure of which this node - * is a part. Operates recursively to set pointer for all - * descendants too. + * Removes the ith child from the TreeGraphNode. Needs to override + * the parent removeChild so it can avoid setting the parent + * pointers on the remaining children. This is useful if you want + * to add and remove children from one node to another node; this way, + * it won't matter what order you do the add and remove operations. */ - protected void setTreeGraph(GrammaticalStructure tg) { - this.tg = tg; - for (TreeGraphNode child : children) { - child.setTreeGraph(tg); + @Override + public Tree removeChild(int i) { + TreeGraphNode[] kids = children(); + TreeGraphNode kid = kids[i]; + TreeGraphNode[] newKids = new TreeGraphNode[kids.length - 1]; + for (int j = 0; j < newKids.length; j++) { + if (j < i) { + newKids[j] = kids[j]; + } else { + newKids[j] = kids[j + 1]; + } } + this.children = newKids; + return kid; } /** @@ -298,14 +321,6 @@ public void percolateHeads(HeadFinder hf) { } else { setHeadWordNode(hwn); } - - TreeGraphNode htn = head.headTagNode(); - if (htn == null && head.isLeaf()) { // below us is a leaf - setHeadTagNode(this); - } else { - setHeadTagNode(htn); - } - } else { System.err.println("Head is null: " + this); } @@ -324,12 +339,8 @@ public void percolateHeads(HeadFinder hf) { * @return the node containing the head word for this node */ public TreeGraphNode headWordNode() { - TreeGraphNode hwn = safeCast(label.get(TreeCoreAnnotations.HeadWordAnnotation.class)); - if (hwn == null || (hwn.treeGraph() != null && !(hwn.treeGraph().equals(this.treeGraph())))) { - return null; - } - return hwn; - } + return headWordNode; + } /** * Store the node containing the head word for this node by @@ -343,41 +354,7 @@ public TreeGraphNode headWordNode() { * @param hwn the node containing the head word for this node */ private void setHeadWordNode(final TreeGraphNode hwn) { - label.set(TreeCoreAnnotations.HeadWordAnnotation.class, hwn); - } - - /** - * Return the node containing the head tag for this node (or - * null if none), as recorded in this node's {@link - * CoreLabel CoreLabel}. (In contrast to {@link - * edu.stanford.nlp.ling.CategoryWordTag - * CategoryWordTag}, we store head words and head - * tags as references to nodes, not merely as - * Strings.) - * - * @return the node containing the head tag for this node - */ - public TreeGraphNode headTagNode() { - TreeGraphNode htn = safeCast(label.get(TreeCoreAnnotations.HeadTagAnnotation.class)); - if (htn == null || (htn.treeGraph() != null && !(htn.treeGraph().equals(this.treeGraph())))) { - return null; - } - return htn; - } - - /** - * Store the node containing the head tag for this node by - * storing it in this node's {@link CoreLabel - * CoreLabel}. (In contrast to {@link - * edu.stanford.nlp.ling.CategoryWordTag - * CategoryWordTag}, we store head words and head - * tags as references to nodes, not merely as - * Strings.) - * - * @param htn the node containing the head tag for this node - */ - private void setHeadTagNode(final TreeGraphNode htn) { - label.set(TreeCoreAnnotations.HeadTagAnnotation.class, htn); + this.headWordNode = hwn; } /** @@ -515,17 +492,12 @@ public String toOneLineString() { return buf.toString(); } -// public String toPrimes() { -// int copy = label().copyCount(); -// return StringUtils.repeat('\'', copy); -// } @Override public String toString() { return toString(CoreLabel.DEFAULT_FORMAT); } - //TODO: is it important to have the toPrimes() string attached to this? (SG) Currently it is not. public String toString(CoreLabel.OutputFormat format) { return label.toString(format); } diff --git a/src/edu/stanford/nlp/trees/Trees.java b/src/edu/stanford/nlp/trees/Trees.java index e73d784d93..cc2c566603 100644 --- a/src/edu/stanford/nlp/trees/Trees.java +++ b/src/edu/stanford/nlp/trees/Trees.java @@ -180,6 +180,47 @@ private static void taggedLeafLabels(Tree t, List l) { } } + /** + * Given a tree, set the tags on the leaf nodes if they are not + * already set. Do this by using the preterminal's value as a tag. + */ + public static void setLeafTagsIfUnset(Tree tree) { + if (tree.isPreTerminal()) { + Tree leaf = tree.children()[0]; + if (!(leaf.label() instanceof HasTag)) { + return; + } + HasTag label = (HasTag) leaf.label(); + if (label.tag() == null) { + label.setTag(tree.value()); + } + } else { + for (Tree child : tree.children()) { + setLeafTagsIfUnset(child); + } + } + } + + /** + * Replace the labels of the leaves with the given leaves. + */ + public static void setLeafLabels(Tree tree, List

  • -hf <headfinder-class-name> use the specified {@link HeadFinder} class to determine headship relations. *
  • -hfArg <string> pass a string argument in to the {@link HeadFinder} class's constructor. -hfArg can be used multiple times to pass in multiple arguments. *
  • -trf <TreeReaderFactory-class-name> use the specified {@link TreeReaderFactory} class to read trees from files. + *
  • -e <extension> Only attempt to read files with the given extension. If not provided, will attempt to read all files.
  • *
  • -v print every tree that contains no matches of the specified pattern, but print no matches to the pattern. * *
  • -x Instead of the matched subtree, print the matched subtree's identifying number as defined in tgrep2:a @@ -582,6 +584,8 @@ public static void main(String[] args) throws IOException { String headFinderOption = "-hf"; String headFinderArgOption = "-hfArg"; String trfOption = "-trf"; + String extensionOption = "-e"; + String extension = null; String headFinderClassName = null; String[] headFinderArgs = StringUtils.EMPTY_STRING_ARRAY; String treeReaderFactoryClassName = null; @@ -612,6 +616,7 @@ public static void main(String[] args) throws IOException { flagMap.put(headFinderOption,1); flagMap.put(headFinderArgOption,1); flagMap.put(trfOption,1); + flagMap.put(extensionOption, 1); flagMap.put(macroOption, 1); flagMap.put(yieldOnly, 0); flagMap.put(quietMode, 0); @@ -643,7 +648,7 @@ public static void main(String[] args) throws IOException { } if (args.length < 1) { - errPW.println("Usage: java edu.stanford.nlp.trees.tregex.TregexPattern [-T] [-C] [-w] [-f] [-o] [-n] [-s] [-filter] [-hf class] [-trf class] [-h handle]* pattern [filepath]"); + errPW.println("Usage: java edu.stanford.nlp.trees.tregex.TregexPattern [-T] [-C] [-w] [-f] [-o] [-n] [-s] [-filter] [-hf class] [-trf class] [-h handle]* [-e ext] pattern [filepath]"); return; } String matchString = args[0]; @@ -662,6 +667,9 @@ public static void main(String[] args) throws IOException { treeReaderFactoryClassName = argsMap.get(trfOption)[0]; errPW.println("Using tree reader factory " + treeReaderFactoryClassName + "..."); } + if (argsMap.containsKey(extensionOption)) { + extension = argsMap.get(extensionOption)[0]; + } if (argsMap.containsKey(printAllTrees)) { TRegexTreeVisitor.printTree = true; } @@ -743,8 +751,9 @@ public static void main(String[] args) throws IOException { int last = args.length - 1; errPW.println("Reading trees from file(s) " + args[last]); TreeReaderFactory trf = getTreeReaderFactory(treeReaderFactoryClassName); + treebank = new DiskTreebank(trf, encoding); - treebank.loadPath(args[last], null, true); + treebank.loadPath(args[last], extension, true); } TRegexTreeVisitor vis = new TRegexTreeVisitor(p, handles, encoding); diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinNode.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinNode.java index cc863519e6..4a7a0605a0 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinNode.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinNode.java @@ -45,7 +45,7 @@ public Tree evaluate(Tree tree, TregexMatcher tregex) { Tree targetNode = childMatcher[0].evaluate(tree, tregex); Tree parent = targetNode.parent(tree); // put children underneath target in foot of auxilary tree - AuxiliaryTree ft = adjunctionTree.copy(this); + AuxiliaryTree ft = adjunctionTree.copy(this, tree.treeFactory(), tree.label().labelFactory()); ft.foot.setChildren(targetNode.getChildrenAsList()); // replace match with root of auxiliary tree if (parent==null) { diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToFootNode.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToFootNode.java index 47e11735fd..cc840d531b 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToFootNode.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToFootNode.java @@ -30,7 +30,7 @@ public Tree evaluate(Tree tree, TregexMatcher tregex) { Tree targetNode = childMatcher[0].evaluate(tree, tregex); Tree parent = targetNode.parent(tree); // substitute original node for foot of auxiliary tree. Foot node is ignored - AuxiliaryTree ft = adjunctionTree().copy(this); + AuxiliaryTree ft = adjunctionTree().copy(this, tree.treeFactory(), tree.label().labelFactory()); // System.err.println("ft=" + ft + "; ft.foot=" + ft.foot + "; ft.tree=" + ft.tree); Tree parentOfFoot = ft.foot.parent(ft.tree); if (parentOfFoot == null) { diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToHeadNode.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToHeadNode.java index a002eddebf..a8d22a4378 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToHeadNode.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AdjoinToHeadNode.java @@ -29,7 +29,7 @@ public Tree evaluate(Tree tree, TregexMatcher tregex) { // find match Tree targetNode = childMatcher[0].evaluate(tree, tregex); // put children underneath target in foot of auxilary tree - AuxiliaryTree ft = adjunctionTree().copy(this); + AuxiliaryTree ft = adjunctionTree().copy(this, tree.treeFactory(), tree.label().labelFactory()); ft.foot.setChildren(targetNode.getChildrenAsList()); // put children of auxiliary tree under target. root of auxiliary tree is ignored. root of original is maintained. targetNode.setChildren(ft.tree.getChildrenAsList()); diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AuxiliaryTree.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AuxiliaryTree.java index 366a62d8bf..9b88924414 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/AuxiliaryTree.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/AuxiliaryTree.java @@ -1,6 +1,9 @@ package edu.stanford.nlp.trees.tregex.tsurgeon; +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.LabelFactory; import edu.stanford.nlp.trees.Tree; +import edu.stanford.nlp.trees.TreeFactory; import edu.stanford.nlp.util.Generics; import edu.stanford.nlp.util.Pair; @@ -51,10 +54,19 @@ public String toString() { /** * Copies the Auxiliary tree. Also, puts the new names->nodes map in the TsurgeonMatcher that called copy. + *
    + * The trees and labels to use when making the copy are specified + * with treeFactory and labelFactory. This lets the tsurgeon script + * produce trees which are of the same type as the input trees. + * Each of the tsurgeon relations which copies a tree should include + * pass in the correct factories. */ - public AuxiliaryTree copy(TsurgeonMatcher matcher) { + public AuxiliaryTree copy(TsurgeonMatcher matcher, TreeFactory treeFactory, LabelFactory labelFactory) { + if (labelFactory == null) { + labelFactory = CoreLabel.factory(); + } Map newNamesToNodes = Generics.newHashMap(); - Pair result = copyHelper(tree,newNamesToNodes); + Pair result = copyHelper(tree, newNamesToNodes, treeFactory, labelFactory); //if(! result.first().dominates(result.second())) //System.err.println("Error -- aux tree copy doesn't dominate foot copy."); matcher.newNodeNames.putAll(newNamesToNodes); @@ -62,20 +74,20 @@ public AuxiliaryTree copy(TsurgeonMatcher matcher) { } // returns Pair - private Pair copyHelper(Tree node,Map newNamesToNodes) { + private Pair copyHelper(Tree node, Map newNamesToNodes, TreeFactory treeFactory, LabelFactory labelFactory) { Tree clone; Tree newFoot = null; if (node.isLeaf()) { if (node == foot) { // found the foot node; pass it up. - clone = node.treeFactory().newTreeNode(node.label(),new ArrayList(0)); + clone = treeFactory.newTreeNode(node.label(), new ArrayList(0)); newFoot = clone; } else { - clone = node.treeFactory().newLeaf(node.label().labelFactory().newLabel(node.label())); + clone = treeFactory.newLeaf(labelFactory.newLabel(node.label())); } } else { List newChildren = new ArrayList(node.children().length); for (Tree child : node.children()) { - Pair newChild = copyHelper(child,newNamesToNodes); + Pair newChild = copyHelper(child, newNamesToNodes, treeFactory, labelFactory); newChildren.add(newChild.first()); if (newChild.second() != null) { if (newFoot != null) { @@ -84,7 +96,7 @@ private Pair copyHelper(Tree node,Map newNamesToNodes) { newFoot = newChild.second(); } } - clone = node.treeFactory().newTreeNode(node.label().labelFactory().newLabel(node.label()),newChildren); + clone = treeFactory.newTreeNode(labelFactory.newLabel(node.label()),newChildren); } if (nodesToNames.containsKey(node)) diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/CreateSubtreeNode.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/CreateSubtreeNode.java index 2ea755f4c9..1b326554cb 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/CreateSubtreeNode.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/CreateSubtreeNode.java @@ -72,7 +72,7 @@ public Tree evaluate(Tree tree, TregexMatcher tregex) { throw new TsurgeonRuntimeException("Parents did not match for trees when applied to " + this); } - AuxiliaryTree treeCopy = auxTree.copy(this); + AuxiliaryTree treeCopy = auxTree.copy(this, tree.treeFactory(), tree.label().labelFactory()); // Collect all the children of the parent of the node we care // about. If the child is one of the nodes we care about, or diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/HoldTreeNode.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/HoldTreeNode.java index f3b16c0a11..d01f826b2a 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/HoldTreeNode.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/HoldTreeNode.java @@ -29,7 +29,7 @@ public Matcher(Map newNodeNames, CoindexationGenerator coindexer) { @Override public Tree evaluate(Tree tree, TregexMatcher tregex) { - return subTree.copy(this).tree; + return subTree.copy(this, tree.treeFactory(), tree.label().labelFactory()).tree; } } diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/Tsurgeon.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/Tsurgeon.java index 565900a55e..8bb666c4dc 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/Tsurgeon.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/Tsurgeon.java @@ -441,11 +441,7 @@ public static String getTregexPatternFromReader(BufferedReader reader) throws IO // A blank line after getting some real content (not just comments or nothing) break; } - Matcher m = commentPattern.matcher(thisLine); - if (m.matches()) { - // delete it - thisLine = m.replaceFirst(""); - } + thisLine = removeComments(thisLine); if ( ! emptyLinePattern.matcher(thisLine).matches()) { matchString.append(thisLine); } diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.java index a90bc88066..23adbc264d 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.java @@ -4,7 +4,6 @@ import edu.stanford.nlp.trees.*; import edu.stanford.nlp.util.Generics; - import java.util.*; diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jj b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jj index 39ef7199a5..869300e92e 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jj +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jj @@ -80,7 +80,7 @@ TOKEN: < CLOSE_BRACKET: "]" > | < SELECTION: "#rightmost" | "#leftmost" > | < GENERAL_RELABEL: "/" ("\\/"|~["\n","\r"])* "/" ( ("=" (["0"-"9","_","a"-"z","A"-"Z"])+) | ("%" (["0"-"9","_","a"-"z","A"-"Z"])+) | ("/" ("\\/"|"\\]"|"\\\\"|~["\n","\r","]","\\"])* "/") )+ > -| < IDENTIFIER: ["-","*","a"-"z","A"-"Z"] ( ["-","*","a"-"z","A"-"Z","_","0"-"9","|","@","="] )* > +| < IDENTIFIER: ~[" ","0"-"9","\n","\r","(","/","|","@","!","#","%","&",")","=","?","[","]",">","<","~","_",".",",","$",":","{","}",";"] ( ~[" ","\n","\r","(","/","@","!","#","%","&",")","=","?","[","]",">","<","~",".",",","$",":"] )* > | < LOCATION_RELATION: "$-" | "$+" | ">" ("-")? (["0"-"9"])+ > | < REGEX: "/" ("\\/"|"\\]"|"\\\\"|~["\n","\r","]","\\"])* "/" > | < QUOTEX: "|" ("\\|"|"\\\\"|~["\n","\r","|","\\"])* "|" > diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jjt b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jjt index 674e30e070..eacd4cf1ed 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jjt +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParser.jjt @@ -76,7 +76,7 @@ TOKEN: < CLOSE_BRACKET: "]" > | < SELECTION: "#rightmost" | "#leftmost" > | < GENERAL_RELABEL: "/" ("\\/"|~["\n","\r"])* "/" ( ("=" (["0"-"9","_","a"-"z","A"-"Z"])+) | ("%" (["0"-"9","_","a"-"z","A"-"Z"])+) | ("/" ("\\/"|"\\]"|"\\\\"|~["\n","\r","]","\\"])* "/") )+ > -| < IDENTIFIER: ["-","*","a"-"z","A"-"Z"] ( ["-","*","a"-"z","A"-"Z","_","0"-"9","|","@","="] )* > +| < IDENTIFIER: ~[" ","0"-"9","\n","\r","(","/","|","@","!","#","%","&",")","=","?","[","]",">","<","~","_",".",",","$",":","{","}",";"] ( ~[" ","\n","\r","(","/","@","!","#","%","&",")","=","?","[","]",">","<","~",".",",","$",":"] )* > | < LOCATION_RELATION: "$-" | "$+" | ">" ("-")? (["0"-"9"])+ > | < REGEX: "/" ("\\/"|"\\]"|"\\\\"|~["\n","\r","]","\\"])* "/" > | < QUOTEX: "|" ("\\|"|"\\\\"|~["\n","\r","|","\\"])* "|" > diff --git a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParserTokenManager.java b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParserTokenManager.java index 30eefe4d1c..5985d88a01 100644 --- a/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParserTokenManager.java +++ b/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonParserTokenManager.java @@ -3,7 +3,6 @@ package edu.stanford.nlp.trees.tregex.tsurgeon; import edu.stanford.nlp.trees.*; import edu.stanford.nlp.util.Generics; - import java.util.*; /** Token Manager. */ @@ -351,7 +350,7 @@ private int jjMoveStringLiteralDfa0_0(){ switch(curChar) { case 9: - return jjStartNfaWithStates_0(0, 3, 13); + return jjStartNfaWithStates_0(0, 3, 58); case 41: return jjStopAtPos(0, 32); case 91: @@ -394,6 +393,20 @@ private int jjMoveNfa_0(int startState, int curPos) { switch(jjstateSet[--i]) { + case 58: + if ((0xfbfffcfeffffdbffL & l) != 0L) + { + if (kind > 30) + kind = 30; + { jjCheckNAdd(13); } + } + if ((0xbff2c84ffffdbffL & l) != 0L) + { + if (kind > 25) + kind = 25; + { jjCheckNAdd(1); } + } + break; case 0: if ((0xfbfffcfeffffdbffL & l) != 0L) { @@ -407,7 +420,7 @@ else if (curChar == 40) kind = 31; { jjCheckNAdd(15); } } - if ((0x240000000000L & l) != 0L) + if ((0x2c84ffffdbffL & l) != 0L) { if (kind > 25) kind = 25; @@ -425,7 +438,7 @@ else if (curChar == 62) { jjCheckNAdd(12); } break; case 1: - if ((0x23ff240000000000L & l) == 0L) + if ((0xbff2c84ffffdbffL & l) == 0L) break; if (kind > 25) kind = 25; @@ -575,6 +588,20 @@ else if (curChar < 128) { switch(jjstateSet[--i]) { + case 58: + if ((0xffffffffd7ffffffL & l) != 0L) + { + if (kind > 30) + kind = 30; + { jjCheckNAdd(13); } + } + if ((0xbfffffffd7fffffeL & l) != 0L) + { + if (kind > 25) + kind = 25; + { jjCheckNAdd(1); } + } + break; case 0: if ((0xffffffffd7ffffffL & l) != 0L) { @@ -582,7 +609,7 @@ else if (curChar < 128) kind = 30; { jjCheckNAdd(13); } } - if ((0x7fffffe07fffffeL & l) != 0L) + if ((0x87ffffff57fffffeL & l) != 0L) { if (kind > 25) kind = 25; @@ -592,7 +619,7 @@ else if (curChar == 124) { jjCheckNAddStates(10, 12); } break; case 1: - if ((0x17fffffe87ffffffL & l) == 0L) + if ((0xbfffffffd7fffffeL & l) == 0L) break; if (kind > 25) kind = 25; @@ -762,18 +789,52 @@ else if (curChar == 124) { switch(jjstateSet[--i]) { + case 58: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 25) + kind = 25; + { jjCheckNAdd(1); } + } + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 30) + kind = 30; + { jjCheckNAdd(13); } + } + break; case 0: - case 13: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 25) + kind = 25; + { jjCheckNAdd(1); } + } + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + { + if (kind > 30) + kind = 30; + { jjCheckNAdd(13); } + } + break; + case 1: if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) break; - if (kind > 30) - kind = 30; - { jjCheckNAdd(13); } + if (kind > 25) + kind = 25; + { jjCheckNAdd(1); } break; case 6: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { jjAddStates(10, 12); } break; + case 13: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 30) + kind = 30; + { jjCheckNAdd(13); } + break; case 39: if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { jjAddStates(13, 15); } @@ -913,9 +974,9 @@ else if (curChar < 128) } } static final int[] jjnextStates = { - 38, 39, 40, 52, 53, 54, 28, 35, 17, 18, 6, 7, 8, 38, 39, 40, - 41, 43, 45, 41, 42, 43, 45, 41, 43, 44, 45, 46, 47, 48, 52, 53, - 54, 9, 10, 49, 50, 51, 55, 56, 57, + 38, 39, 40, 52, 53, 54, 28, 35, 17, 18, 6, 7, 8, 38, 39, 40, + 41, 43, 45, 41, 42, 43, 45, 41, 43, 44, 45, 46, 47, 48, 52, 53, + 54, 9, 10, 49, 50, 51, 55, 56, 57, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -932,11 +993,11 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo /** Token literal values. */ public static final String[] jjstrLiteralImages = { -"", null, null, null, null, "\133", "\151\146", "\156\157\164", -"\145\170\151\163\164\163", "\144\145\154\145\164\145", "\160\162\165\156\145", -"\162\145\154\141\142\145\154", "\145\170\143\151\163\145", "\151\156\163\145\162\164", "\155\157\166\145", -"\162\145\160\154\141\143\145", "\143\162\145\141\164\145\123\165\142\164\162\145\145", -"\141\144\152\157\151\156", "\141\144\152\157\151\156\110", "\141\144\152\157\151\156\106", +"", null, null, null, null, "\133", "\151\146", "\156\157\164", +"\145\170\151\163\164\163", "\144\145\154\145\164\145", "\160\162\165\156\145", +"\162\145\154\141\142\145\154", "\145\170\143\151\163\145", "\151\156\163\145\162\164", "\155\157\166\145", +"\162\145\160\154\141\143\145", "\143\162\145\141\164\145\123\165\142\164\162\145\145", +"\141\144\152\157\151\156", "\141\144\152\157\151\156\110", "\141\144\152\157\151\156\106", "\143\157\151\156\144\145\170", null, "\135", null, null, null, null, null, null, null, null, null, "\51", }; protected Token jjFillToken() { @@ -970,7 +1031,7 @@ protected Token jjFillToken() int jjmatchedKind; /** Get the next Token. */ -public Token getNextToken() +public Token getNextToken() { Token matchedToken; int curPos = 0; @@ -1148,20 +1209,20 @@ public void SwitchTo(int lexState) /** Lex State array. */ public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, 2, -1, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 2, -1, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, }; static final long[] jjtoToken = { - 0x1ffffffe1L, + 0x1ffffffe1L, }; static final long[] jjtoSkip = { - 0x1eL, + 0x1eL, }; protected SimpleCharStream input_stream; private final int[] jjrounds = new int[58]; private final int[] jjstateSet = new int[2 * 58]; - + protected char curChar; } diff --git a/src/edu/stanford/nlp/util/ArrayCoreMap.java b/src/edu/stanford/nlp/util/ArrayCoreMap.java index 7f6bd1cb41..73c0aedefc 100644 --- a/src/edu/stanford/nlp/util/ArrayCoreMap.java +++ b/src/edu/stanford/nlp/util/ArrayCoreMap.java @@ -3,6 +3,7 @@ import java.io.IOException; import java.io.ObjectOutputStream; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import edu.stanford.nlp.util.logging.PrettyLogger; import edu.stanford.nlp.util.logging.Redwood; @@ -287,7 +288,7 @@ public int size() { private static final ThreadLocal> toStringCalled = new ThreadLocal>() { @Override protected IdentityHashSet initialValue() { - return new IdentityHashSet(); + return new IdentityHashSet<>(); } }; @@ -329,20 +330,42 @@ public String toString() { return s.toString(); } + // support caching of String form of keys for speedier printing + private static final ConcurrentHashMap shortNames = + new ConcurrentHashMap<>(12, 0.75f, 1); + + private static final int SHORTER_STRING_CHARSTRING_START_SIZE = 64; + /** * {@inheritDoc} */ @Override public String toShorterString(String... what) { - StringBuilder s = new StringBuilder("["); + StringBuilder s = new StringBuilder(SHORTER_STRING_CHARSTRING_START_SIZE); + s.append('['); + Set whatSet = null; + if (size > 5 && what.length > 5) { + // if there's a lot of stuff, hash. + whatSet = new HashSet<>(Arrays.asList(what)); + } for (int i = 0; i < size; i++) { - String name = keys[i].getSimpleName(); - int annoIdx = name.lastIndexOf("Annotation"); - if (annoIdx >= 0) { - name = name.substring(0, annoIdx); + Class klass = keys[i]; + String name = shortNames.get(klass); + if (name == null) { + name = klass.getSimpleName(); + int annoIdx = name.lastIndexOf("Annotation"); + if (annoIdx >= 0) { + name = name.substring(0, annoIdx); + } + shortNames.put(klass, name); } + boolean include; - if (what.length > 0) { + if (what.length == 0) { + include = true; + } else if (whatSet != null) { + include = whatSet.contains(name); + } else { include = false; for (String item : what) { if (item.equals(name)) { @@ -350,8 +373,6 @@ public String toShorterString(String... what) { break; } } - } else { - include = true; } if (include) { if (s.length() > 1) { diff --git a/src/edu/stanford/nlp/util/FileBackedCache.java b/src/edu/stanford/nlp/util/FileBackedCache.java index 9ce01aee03..f82db76512 100644 --- a/src/edu/stanford/nlp/util/FileBackedCache.java +++ b/src/edu/stanford/nlp/util/FileBackedCache.java @@ -54,13 +54,13 @@ * *

    * The serialization behavior can be safely changed by overwriting: - *

    *
      *
    • @See FileBackedCache#newInputStream
    • *
    • @See FileBackedCache#newOutputStream
    • *
    • @See FileBackedCache#writeNextObject
    • *
    • @See FileBackedCache#readNextObject
    • *
    + *

    * * @param The key to cache by * @param The object to cache diff --git a/src/edu/stanford/nlp/util/IterableIterator.java b/src/edu/stanford/nlp/util/IterableIterator.java index 2abaac5852..9f6cc97fc6 100644 --- a/src/edu/stanford/nlp/util/IterableIterator.java +++ b/src/edu/stanford/nlp/util/IterableIterator.java @@ -1,7 +1,6 @@ package edu.stanford.nlp.util; import java.util.*; -import java.util.stream.Stream; /** * This cures a pet peeve of mine: that you can't use an Iterator directly in @@ -12,45 +11,32 @@ public class IterableIterator implements Iterator, Iterable { private Iterator it; - private Iterable iterable; - private Stream stream; public IterableIterator(Iterator it) { this.it = it; } - public IterableIterator(Iterable iterable) { - this.iterable = iterable; - this.it = iterable.iterator(); - } - - public IterableIterator(Stream stream) { - this.stream = stream; - this.it = stream.iterator(); - } - public boolean hasNext() { return it.hasNext(); } public E next() { return it.next(); } public void remove() { it.remove(); } - public Iterator iterator() { - if (iterable != null) { - return iterable.iterator(); - } else if (stream != null) { - return stream.iterator(); - } else { - return this; - } - } + public Iterator iterator() { return this; } + + public static void main(String[] args) { + + String[] strings = new String[] { + "do", "re", "mi", "fa", "so", "la", "ti", "do", + }; - @Override - public Spliterator spliterator() { - if (iterable != null) { - return iterable.spliterator(); - } else if (stream != null) { - return stream.spliterator(); - } else { - return Spliterators.spliteratorUnknownSize(it, Spliterator.ORDERED | Spliterator.CONCURRENT); + Iterator it = Arrays.asList(strings).iterator(); + // for (String s : it) { // UH-OH!! + // System.out.println(s); + // } + + IterableIterator iterit = new IterableIterator(it); + for (String s : iterit) { // YAY!! + System.out.println(s); } + } } diff --git a/src/edu/stanford/nlp/util/MutableLong.java b/src/edu/stanford/nlp/util/MutableLong.java index bfe8dad911..f507c0436b 100644 --- a/src/edu/stanford/nlp/util/MutableLong.java +++ b/src/edu/stanford/nlp/util/MutableLong.java @@ -16,23 +16,25 @@ public void set(long i) { @Override public int hashCode() { - return (int) i; + return (int)(i ^ (i >>> 32)); } /** * Compares this object to the specified object. The result is - * true if and only if the argument is not - * null and is an MutableInteger object that - * contains the same int value as this object. - * Note that a MutableInteger isn't and can't be equal to an Integer. + * {@code true} if and only if the argument is not + * {@code null} and is an {@code MutableLong} object that + * contains the same {@code long} value as this object. + * Note that a MutableLong isn't and can't be equal to an Long. * * @param obj the object to compare with. - * @return true if the objects are the same; - * false otherwise. + * @return {@code true} if the objects are the same; + * {@code false} otherwise. */ @Override public boolean equals(Object obj) { - if (obj instanceof MutableLong) { + if (this == obj) { + return true; + } else if (obj instanceof MutableLong) { return i == ((MutableLong) obj).i; } return false; @@ -58,6 +60,7 @@ public String toString() { * greater than the argument MutableLong (signed * comparison). */ + @Override public int compareTo(MutableLong anotherMutableLong) { long thisVal = this.i; long anotherVal = anotherMutableLong.i; @@ -96,11 +99,11 @@ public double doubleValue() { return i; } - /** Add the argument to the value of this integer. A convenience method. + /** Add the argument to the value of this long. A convenience method. * - * @param val Value to be added to this integer + * @param val Value to be added to this long */ - public void incValue(int val) { + public void incValue(long val) { i += val; } @@ -108,9 +111,10 @@ public MutableLong() { this(0); } - public MutableLong(int i) { + public MutableLong(long i) { this.i = i; } private static final long serialVersionUID = 624465615824626762L; + } diff --git a/src/edu/stanford/nlp/util/StringUtils.java b/src/edu/stanford/nlp/util/StringUtils.java index 8dad3b37dd..68d5804938 100644 --- a/src/edu/stanford/nlp/util/StringUtils.java +++ b/src/edu/stanford/nlp/util/StringUtils.java @@ -339,9 +339,11 @@ public static String joinWithOriginalWhiteSpace(List tokens) { } /** - * Joins each elem in the {@code Collection} with the given glue. + * Joins each elem in the {@link Iterable} with the given glue. * For example, given a list of {@code Integers}, you can create * a comma-separated list by calling {@code join(numbers, ", ")}. + * + * @see StringUtils#join(Stream, String) */ public static String join(Iterable l, String glue) { StringBuilder sb = new StringBuilder(); @@ -358,10 +360,25 @@ public static String join(Iterable l, String glue) { } /** + * Joins each elem in the {@link Stream} with the given glue. + * For example, given a list of {@code Integers}, you can create + * a comma-separated list by calling {@code join(numbers, ", ")}. + * * @see StringUtils#join(Iterable, String) */ - public static String join(Stream collection, String glue) { - return join(new IterableIterator(collection.iterator()), glue); + public static String join(Stream l, String glue) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + Iterator iter = l.iterator(); + while (iter.hasNext()) { + if ( ! first) { + sb.append(glue); + } else { + first = false; + } + sb.append(iter.next()); + } + return sb.toString(); } // Omitted; I'm pretty sure this are redundant with the above @@ -2093,5 +2110,4 @@ public static String normalize(String s) { d = diacriticalMarksPattern.matcher(d).replaceAll(""); return Normalizer.normalize(d, Normalizer.Form.NFKC); } - } diff --git a/test/src/edu/stanford/nlp/ie/util/RelationTripleTest.java b/test/src/edu/stanford/nlp/ie/util/RelationTripleTest.java new file mode 100644 index 0000000000..2b0481028e --- /dev/null +++ b/test/src/edu/stanford/nlp/ie/util/RelationTripleTest.java @@ -0,0 +1,250 @@ +package edu.stanford.nlp.ie.util; + +import edu.stanford.nlp.ling.CoreLabel; +import edu.stanford.nlp.ling.IndexedWord; +import edu.stanford.nlp.semgraph.SemanticGraph; +import edu.stanford.nlp.trees.GrammaticalRelation; +import junit.framework.TestCase; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +/** + * A test of various functions in {@link RelationTriple}. + * + * @author Gabor Angeli + */ +public class RelationTripleTest extends TestCase { + + protected CoreLabel mkWord(String gloss, int index) { + CoreLabel w = new CoreLabel(); + w.setWord(gloss); + w.setValue(gloss); + if (index >= 0) { + w.setIndex(index); + } + return w; + } + + /** + * Create a relation from a CoNLL format like: + *
    +   *   word_index  word  parent_index  incoming_relation
    +   * 
    + */ + protected Optional mkExtraction(String conll) { + List sentence = new ArrayList<>(); + SemanticGraph tree = new SemanticGraph(); + for (String line : conll.split("\n")) { + if (line.trim().equals("")) { continue; } + String[] fields = line.trim().split("\\s+"); + int index = Integer.parseInt(fields[0]); + String word = fields[1]; + CoreLabel label = mkWord(word, index); + sentence.add(label); + if (fields[2].equals("0")) { + tree.addRoot(new IndexedWord(label)); + } else { + tree.addVertex(new IndexedWord(label)); + } + } + int i = 0; + for (String line : conll.split("\n")) { + if (line.trim().equals("")) { continue; } + String[] fields = line.trim().split("\\s+"); + int parent = Integer.parseInt(fields[2]); + String reln = fields[3]; + if (parent > 0) { + tree.addEdge( + new IndexedWord(sentence.get(parent - 1)), + new IndexedWord(sentence.get(i)), + new GrammaticalRelation(GrammaticalRelation.Language.English, reln, null, null), + 1.0, false + ); + } + i += 1; + } + return RelationTriple.segment(tree, Optional.empty()); + } + + protected RelationTriple blueCatsPlayWithYarnNoIndices() { + List sentence = new ArrayList<>(); + sentence.add(mkWord("blue", -1)); + sentence.add(mkWord("cats", -1)); + sentence.add(mkWord("play", -1)); + sentence.add(mkWord("with", -1)); + sentence.add(mkWord("yarn", -1)); + return new RelationTriple(sentence.subList(0, 2), sentence.subList(2, 4), sentence.subList(4, 5)); + } + + protected RelationTriple blueCatsPlayWithYarn() { + List sentence = new ArrayList<>(); + sentence.add(mkWord("blue", 0)); + sentence.add(mkWord("cats", 1)); + sentence.add(mkWord("play", 2)); + sentence.add(mkWord("with", 3)); + sentence.add(mkWord("yarn", 4)); + return new RelationTriple(sentence.subList(0, 2), sentence.subList(2, 4), sentence.subList(4, 5)); + } + + protected RelationTriple yarnBlueCatsPlayWith() { + List sentence = new ArrayList<>(); + sentence.add(mkWord("yarn", 0)); + sentence.add(mkWord("blue", 1)); + sentence.add(mkWord("cats", 2)); + sentence.add(mkWord("play", 3)); + sentence.add(mkWord("with", 4)); + return new RelationTriple(sentence.subList(1, 3), sentence.subList(3, 5), sentence.subList(0, 1)); + } + + + public void testToSentenceNoIndices() { + assertEquals(new ArrayList(){{ + add(mkWord("blue", -1)); + add(mkWord("cats", -1)); + add(mkWord("play", -1)); + add(mkWord("with", -1)); + add(mkWord("yarn", -1)); + }}, blueCatsPlayWithYarnNoIndices().asSentence()); + } + + public void testToSentenceInOrder() { + assertEquals(new ArrayList(){{ + add(mkWord("blue", 0)); + add(mkWord("cats", 1)); + add(mkWord("play", 2)); + add(mkWord("with", 3)); + add(mkWord("yarn", 4)); + }}, blueCatsPlayWithYarn().asSentence()); + } + + public void testToSentenceOutOfOrder() { + assertEquals(new ArrayList(){{ + add(mkWord("yarn", 0)); + add(mkWord("blue", 1)); + add(mkWord("cats", 2)); + add(mkWord("play", 3)); + add(mkWord("with", 4)); + }}, yarnBlueCatsPlayWith().asSentence()); + } + + public void testSameSemanticsForDifferentWordOrder() { + assertEquals(blueCatsPlayWithYarn().toString(), yarnBlueCatsPlayWith().toString()); + assertEquals("1.0\tblue cats\tplay with\tyarn", blueCatsPlayWithYarn().toString()); + assertEquals("1.0\tblue cats\tplay with\tyarn", yarnBlueCatsPlayWith().toString()); + } + + public void testGlosses() { + assertEquals("blue cats", blueCatsPlayWithYarn().subjectGloss()); + assertEquals("play with", blueCatsPlayWithYarn().relationGloss()); + assertEquals("yarn", blueCatsPlayWithYarn().objectGloss()); + } + + public void testBlueCatsPlayWithYarn() { + Optional extraction = mkExtraction( + "1\tblue\t2\tamod\n" + + "2\tcats\t3\tnsubj\n" + + "3\tplay\t0\troot\n" + + "4\twith\t3\tprep\n" + + "5\tyarn\t4\tpobj\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\tblue cats\tplay with\tyarn", extraction.get().toString()); + } + + public void testBlueCatsPlayQuietlyWithYarn() { + Optional extraction = mkExtraction( + "1\tblue\t2\tamod\n" + + "2\tcats\t3\tnsubj\n" + + "3\tplay\t0\troot\n" + + "4\tquietly\t3\tadvmod\n" + + "5\twith\t3\tprep\n" + + "6\tyarn\t5\tpobj\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\tblue cats\tplay quietly with\tyarn", extraction.get().toString()); + } + + public void testCatsHaveTails() { + Optional extraction = mkExtraction( + "1\tcats\t2\tnsubj\n" + + "2\thave\t0\troot\n" + + "3\ttails\t2\tdobj\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\tcats\thave\ttails", extraction.get().toString()); + } + + public void testFishLikeToSwim() { + Optional extraction = mkExtraction( + "1\tfish\t2\tnsubj\n" + + "2\tlike\t0\troot\n" + + "3\tto\t4\taux\n" + + "4\tswim\t2\txcomp\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\tfish\tlike\tto swim", extraction.get().toString()); + } + + public void testCatsAreCute() { + Optional extraction = mkExtraction( + "1\tcats\t3\tnsubj\n" + + "2\tare\t3\tcop\n" + + "3\tcute\t0\troot\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\tcats\tare\tcute", extraction.get().toString()); + } + + public void testHeWasInaugurated() { + Optional extraction = mkExtraction( + "1\the\t3\tnsubjpass\n" + + "2\twas\t3\tauxpass\n" + + "3\tinaugurated\t0\troot\n" + + "5\tpresident\t3\tprep_as\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\the\twas inaugurated as\tpresident", extraction.get().toString()); + } + + public void testPPAttachment() { + Optional extraction = mkExtraction( + "1\the\t2\tnsubj\n" + + "2\tserved\t0\troot\n" + + "3\tpresident\t2\tprep_as\n" + + "4\tHarvard\t6\taux\n" + + "5\tLaw\t6\taux\n" + + "6\tReview\t3\tprep_of\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\the\tserved as\tpresident of Harvard Law Review", extraction.get().toString()); + } + + public void testPPAttachmentTwo() { + Optional extraction = mkExtraction( + "1\the\t4\tnsubj\n" + + "2\twas\t4\tcop\n" + + "3\tcommunity\t4\tnn\n" + + "4\torganizer\t0\troot\n" + + "6\tChicago\t4\tprep_in\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\the\twas\tcommunity organizer in Chicago", extraction.get().toString()); + } + + public void testXComp() { + Optional extraction = mkExtraction( + "1\tObama\t3\tnsubjpass\n" + + "2\twas\t3\tauxpass\n" + + "3\tnamed\t0\troot\n" + + "4\t2009\t8\tnum\n" + + "5\tNobel\t8\tnn\n" + + "6\tPeace\t8\tnn\n" + + "7\tPrize\t8\tnn\n" + + "8\tLaureate\t3\txcomp\n" + ); + assertTrue("No extraction for sentence!", extraction.isPresent()); + assertEquals("1.0\tObama\twas named\t2009 Nobel Peace Prize Laureate", extraction.get().toString()); + } +} diff --git a/test/src/edu/stanford/nlp/naturalli/NaturalLogicRelationTest.java b/test/src/edu/stanford/nlp/naturalli/NaturalLogicRelationTest.java new file mode 100644 index 0000000000..37f71b420c --- /dev/null +++ b/test/src/edu/stanford/nlp/naturalli/NaturalLogicRelationTest.java @@ -0,0 +1,65 @@ +package edu.stanford.nlp.naturalli; + +import org.junit.*; +import static org.junit.Assert.*; + +/** + * A test for {@link edu.stanford.nlp.naturalli.NaturalLogicRelation}. + * + * @author Gabor Angeli + */ +public class NaturalLogicRelationTest { + + @Test + public void fixedIndex() { + for (NaturalLogicRelation rel : NaturalLogicRelation.values()) { + assertEquals(rel, NaturalLogicRelation.byFixedIndex(rel.fixedIndex)); + } + } + + @Test + public void spotTestJoinTable() { + assertEquals(NaturalLogicRelation.COVER, NaturalLogicRelation.NEGATION.join(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, NaturalLogicRelation.ALTERNATION.join(NaturalLogicRelation.NEGATION)); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, NaturalLogicRelation.COVER.join(NaturalLogicRelation.ALTERNATION)); + assertEquals(NaturalLogicRelation.EQUIVALENT, NaturalLogicRelation.NEGATION.join(NaturalLogicRelation.NEGATION)); + for (NaturalLogicRelation rel : NaturalLogicRelation.values()) { + assertEquals(rel, NaturalLogicRelation.EQUIVALENT.join(rel)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, NaturalLogicRelation.INDEPENDENCE.join(rel)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, rel.join(NaturalLogicRelation.INDEPENDENCE)); + } + } + + @Test + public void entailmentState() { + assertTrue(NaturalLogicRelation.EQUIVALENT.isEntailed); + assertTrue(NaturalLogicRelation.FORWARD_ENTAILMENT.isEntailed); + assertTrue(NaturalLogicRelation.NEGATION.isNegated); + assertTrue(NaturalLogicRelation.ALTERNATION.isNegated); + + assertFalse(NaturalLogicRelation.EQUIVALENT.isNegated); + assertFalse(NaturalLogicRelation.FORWARD_ENTAILMENT.isNegated); + assertFalse(NaturalLogicRelation.NEGATION.isEntailed); + assertFalse(NaturalLogicRelation.ALTERNATION.isEntailed); + + assertFalse(NaturalLogicRelation.COVER.isEntailed); + assertFalse(NaturalLogicRelation.COVER.isNegated); + assertFalse(NaturalLogicRelation.INDEPENDENCE.isEntailed); + assertFalse(NaturalLogicRelation.INDEPENDENCE.isNegated); + } + + @Test + public void someInsertionRelations() { + assertEquals(NaturalLogicRelation.INDEPENDENCE, NaturalLogicRelation.forDependencyInsertion("nsubj")); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, NaturalLogicRelation.forDependencyInsertion("rcmod")); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, NaturalLogicRelation.forDependencyInsertion("amod")); + } + + @Test + public void someDeletionRelations() { + assertEquals(NaturalLogicRelation.INDEPENDENCE, NaturalLogicRelation.forDependencyDeletion("nsubj")); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, NaturalLogicRelation.forDependencyDeletion("rcmod")); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, NaturalLogicRelation.forDependencyDeletion("amod")); + } + +} diff --git a/test/src/edu/stanford/nlp/naturalli/PolarityTest.java b/test/src/edu/stanford/nlp/naturalli/PolarityTest.java new file mode 100644 index 0000000000..3af89995c4 --- /dev/null +++ b/test/src/edu/stanford/nlp/naturalli/PolarityTest.java @@ -0,0 +1,133 @@ +package edu.stanford.nlp.naturalli; + +import edu.stanford.nlp.util.Pair; + +import java.util.ArrayList; + +import org.junit.*; +import static org.junit.Assert.*; + +/** + * A test for the {@link edu.stanford.nlp.naturalli.Polarity} class. + * + * This is primarily just spot-checking the projection table, and then some of the utility functions. + * + * @author Gabor Angeli + */ +public class PolarityTest { + + private static final Polarity none = new Polarity(new ArrayList>() {{ + }}); + + private static final Polarity additive = new Polarity(new ArrayList>() {{ + add( Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.ADDITIVE)); + }}); + + private static final Polarity multiplicative = new Polarity(new ArrayList>() {{ + add( Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.MULTIPLICATIVE)); + }}); + + private static final Polarity antimultiplicative = new Polarity(new ArrayList>() {{ + add( Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.ADDITIVE)); + add( Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.MULTIPLICATIVE)); + }}); + + private static final Polarity additiveAntiMultiplicative = new Polarity(new ArrayList>() {{ + add( Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.ADDITIVE)); + add( Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.MULTIPLICATIVE)); + }}); + + private static final Polarity multiplicativeAntiMultiplicative = new Polarity(new ArrayList>() {{ + add( Pair.makePair(Monotonicity.MONOTONE, MonotonicityType.MULTIPLICATIVE)); + add( Pair.makePair(Monotonicity.ANTITONE, MonotonicityType.MULTIPLICATIVE)); + }}); + + @Test + public void noneProject() { + assertEquals(NaturalLogicRelation.EQUIVALENT, none.projectLexicalRelation(NaturalLogicRelation.EQUIVALENT)); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, none.projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, none.projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(NaturalLogicRelation.NEGATION, none.projectLexicalRelation(NaturalLogicRelation.NEGATION)); + assertEquals(NaturalLogicRelation.ALTERNATION, none.projectLexicalRelation(NaturalLogicRelation.ALTERNATION)); + assertEquals(NaturalLogicRelation.COVER, none.projectLexicalRelation(NaturalLogicRelation.COVER)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, none.projectLexicalRelation(NaturalLogicRelation.INDEPENDENCE)); + } + + @Test + public void additive_antimultiplicativeProject() { + assertEquals(NaturalLogicRelation.EQUIVALENT, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.EQUIVALENT)); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(NaturalLogicRelation.COVER, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.NEGATION)); + assertEquals(NaturalLogicRelation.COVER, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.ALTERNATION)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.COVER)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, additiveAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.INDEPENDENCE)); + } + + @Test + public void multiplicative_antimultiplicativeProject() { + assertEquals(NaturalLogicRelation.EQUIVALENT, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.EQUIVALENT)); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.NEGATION)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.ALTERNATION)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.COVER)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, multiplicativeAntiMultiplicative.projectLexicalRelation(NaturalLogicRelation.INDEPENDENCE)); + } + + @Test + public void additiveProject() { + assertEquals(NaturalLogicRelation.EQUIVALENT, additive.projectLexicalRelation(NaturalLogicRelation.EQUIVALENT)); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, additive.projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, additive.projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(NaturalLogicRelation.COVER, additive.projectLexicalRelation(NaturalLogicRelation.NEGATION)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, additive.projectLexicalRelation(NaturalLogicRelation.ALTERNATION)); + assertEquals(NaturalLogicRelation.COVER, additive.projectLexicalRelation(NaturalLogicRelation.COVER)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, additive.projectLexicalRelation(NaturalLogicRelation.INDEPENDENCE)); + } + + @Test + public void antimultiplicativeProject() { + assertEquals(NaturalLogicRelation.EQUIVALENT, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.EQUIVALENT)); + assertEquals(NaturalLogicRelation.REVERSE_ENTAILMENT, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(NaturalLogicRelation.FORWARD_ENTAILMENT, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(NaturalLogicRelation.COVER, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.NEGATION)); + assertEquals(NaturalLogicRelation.COVER, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.ALTERNATION)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.COVER)); + assertEquals(NaturalLogicRelation.INDEPENDENCE, antimultiplicative.projectLexicalRelation(NaturalLogicRelation.INDEPENDENCE)); + } + + @Test + public void multiplicativeTruth() { + assertEquals(true, multiplicative.maintainsEntailment(NaturalLogicRelation.EQUIVALENT)); + assertEquals(true, multiplicative.maintainsEntailment(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(false, multiplicative.maintainsEntailment(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(false, multiplicative.maintainsEntailment(NaturalLogicRelation.NEGATION)); + assertEquals(false, multiplicative.maintainsEntailment(NaturalLogicRelation.ALTERNATION)); + assertEquals(false, multiplicative.maintainsEntailment(NaturalLogicRelation.COVER)); + assertEquals(false, multiplicative.maintainsEntailment(NaturalLogicRelation.INDEPENDENCE)); + + assertEquals(false, multiplicative.introducesNegation(NaturalLogicRelation.EQUIVALENT)); + assertEquals(false, multiplicative.introducesNegation(NaturalLogicRelation.FORWARD_ENTAILMENT)); + assertEquals(false, multiplicative.introducesNegation(NaturalLogicRelation.REVERSE_ENTAILMENT)); + assertEquals(true, multiplicative.introducesNegation(NaturalLogicRelation.NEGATION)); + assertEquals(true, multiplicative.introducesNegation(NaturalLogicRelation.ALTERNATION)); + assertEquals(false, multiplicative.introducesNegation(NaturalLogicRelation.COVER)); + assertEquals(false, multiplicative.introducesNegation(NaturalLogicRelation.INDEPENDENCE)); + } + + @Test + public void upwardDownward() { + assertEquals(true, multiplicative.isUpwards()); + assertEquals(true, additive.isUpwards()); + assertEquals(false, antimultiplicative.isUpwards()); + assertEquals(false, multiplicativeAntiMultiplicative.isUpwards()); + assertEquals(false, additiveAntiMultiplicative.isUpwards()); + + assertEquals(false, multiplicative.isDownwards()); + assertEquals(false, additive.isDownwards()); + assertEquals(true, antimultiplicative.isDownwards()); + assertEquals(true, multiplicativeAntiMultiplicative.isDownwards()); + assertEquals(true, additiveAntiMultiplicative.isDownwards()); + } +} diff --git a/test/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransitionTest.java b/test/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransitionTest.java index e680245f87..7764da0874 100644 --- a/test/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransitionTest.java +++ b/test/src/edu/stanford/nlp/parser/shiftreduce/BinaryTransitionTest.java @@ -65,7 +65,7 @@ public void checkHeads(Tree t1, Tree t2) { CoreLabel l1 = (CoreLabel) t1.label(); CoreLabel l2 = (CoreLabel) t2.label(); - assertEquals(l1.get(TreeCoreAnnotations.HeadWordAnnotation.class), l2.get(TreeCoreAnnotations.HeadWordAnnotation.class)); - assertEquals(l1.get(TreeCoreAnnotations.HeadTagAnnotation.class), l2.get(TreeCoreAnnotations.HeadTagAnnotation.class)); + assertEquals(l1.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class), l2.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); + assertEquals(l1.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class), l2.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); } } diff --git a/test/src/edu/stanford/nlp/semgraph/SemanticGraphUtilsTest.java b/test/src/edu/stanford/nlp/semgraph/SemanticGraphUtilsTest.java deleted file mode 100644 index eb1993cfa6..0000000000 --- a/test/src/edu/stanford/nlp/semgraph/SemanticGraphUtilsTest.java +++ /dev/null @@ -1,41 +0,0 @@ -package edu.stanford.nlp.semgraph; - -import java.io.IOException; -import java.io.StringReader; -import java.util.List; -import java.util.Map; -import java.util.Arrays; -import java.util.function.Function; - -import junit.framework.TestCase; -import edu.stanford.nlp.ling.IndexedWord; -import edu.stanford.nlp.trees.EnglishGrammaticalRelations; -import edu.stanford.nlp.trees.LabeledScoredTreeFactory; -import edu.stanford.nlp.trees.PennTreeReader; -import edu.stanford.nlp.trees.Tree; -import edu.stanford.nlp.util.Generics; - -/** - * - * @author Sonal Gupta - */ -public class SemanticGraphUtilsTest extends TestCase { - - SemanticGraph graph; - - public void testCreateSemgrexPattern(){ - try{ - SemanticGraph graph = SemanticGraph.valueOf("[ate subj:Bill]"); - - Function transformNode = o ->{ - return "{word: " + o.word().toLowerCase() + "; tag: " + o.tag() +"; ner: " + o.ner() + "}"; - }; - - String pat = SemanticGraphUtils.semgrexFromGraphOrderedNodes(graph, null, null, transformNode); - assertEquals("{word: ate; tag: null; ner: null}=ate >subj=E1 {word: bill; tag: null; ner: null}=Bill", pat.trim()); - }catch(Exception e){ - e.printStackTrace(); - } - } - -} \ No newline at end of file diff --git a/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java b/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java index ace397ca1b..75f211936f 100644 --- a/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java +++ b/test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java @@ -40,6 +40,12 @@ public void testBackReference() { "(A (X w) (B w))"); } + public void testForeign() { + TregexPattern tregex = TregexPattern.compile("atentát=test"); + TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("relabel test perform_atentát"); + runTest(tregex, tsurgeon, "(foo atentát)", "(foo perform_atentát)"); + } + public void testAdjoin() { TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("adjoin (FOO (BAR@)) foo"); TregexPattern tregex = TregexPattern.compile("B=foo"); diff --git a/test/src/edu/stanford/nlp/util/CoreMapTest.java b/test/src/edu/stanford/nlp/util/CoreMapTest.java index 1674945beb..437dcfa8ac 100644 --- a/test/src/edu/stanford/nlp/util/CoreMapTest.java +++ b/test/src/edu/stanford/nlp/util/CoreMapTest.java @@ -130,8 +130,7 @@ public static void main(String[] args) { CoreAnnotations.PartOfSpeechAnnotation.class, CoreAnnotations.ShapeAnnotation.class, CoreAnnotations.NamedEntityTagAnnotation.class, CoreAnnotations.DocIDAnnotation.class, CoreAnnotations.ValueAnnotation.class, CoreAnnotations.CategoryAnnotation.class, CoreAnnotations.BeforeAnnotation.class, CoreAnnotations.AfterAnnotation.class, CoreAnnotations.OriginalTextAnnotation.class, - CoreAnnotations.ProjectedCategoryAnnotation.class, CoreAnnotations.ArgumentAnnotation.class, - CoreAnnotations.MarkingAnnotation.class + CoreAnnotations.ArgumentAnnotation.class, CoreAnnotations.MarkingAnnotation.class }; // how many iterations diff --git a/test/src/edu/stanford/nlp/util/IterableIteratorTest.java b/test/src/edu/stanford/nlp/util/IterableIteratorTest.java deleted file mode 100644 index 6169c338f7..0000000000 --- a/test/src/edu/stanford/nlp/util/IterableIteratorTest.java +++ /dev/null @@ -1,71 +0,0 @@ -package edu.stanford.nlp.util; - -import junit.framework.TestCase; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.Spliterator; -import java.util.stream.Stream; -import java.util.stream.StreamSupport; - -/** - * A test for the {@link edu.stanford.nlp.util.IterableIterator}. - * Notably, I don't entirely trust myself to implement the {@link Iterable#spliterator()}} function - * properly. - * - * @author Gabor Angeli - */ -public class IterableIteratorTest extends TestCase { - - public void testBasic() { - String[] strings = new String[] { - "do", "re", "mi", "fa", "so", "la", "ti", "do", - }; - Iterator it = Arrays.asList(strings).iterator(); - IterableIterator iterit = new IterableIterator<>(it); - assertEquals("do", iterit.next()); - assertEquals("re", iterit.next()); - assertEquals("mi", iterit.next()); - assertEquals("fa", iterit.next()); - assertEquals("so", iterit.next()); - assertEquals("la", iterit.next()); - assertEquals("ti", iterit.next()); - assertEquals("do", iterit.next()); - assertFalse(iterit.hasNext()); - - } - - public void testSpliteratorInSequence() { - ArrayList x = new ArrayList<>(); - for (int i = 0; i < 1000; ++i) { - x.add(i); - } - IterableIterator iter = new IterableIterator<>(x.iterator()); - Spliterator spliterator = iter.spliterator(); - Stream stream = StreamSupport.stream(spliterator, false); - final Integer[] next = new Integer[]{0}; - stream.forEach(elem -> { - assertEquals(next[0], elem); - next[0] += 1; - }); - } - - public void testSpliteratorInParallel() { - ArrayList x = new ArrayList<>(); - for (int i = 0; i < 1000; ++i) { - x.add(i); - } - IterableIterator iter = new IterableIterator<>(x.iterator()); - Spliterator spliterator = iter.spliterator(); - Stream stream = StreamSupport.stream(spliterator, true); - final boolean[] seen = new boolean[1000]; - stream.forEach(elem -> { - assertFalse(seen[elem]); - seen[elem] = true; - }); - for (int i = 0; i < 1000; ++i) { - assertTrue(seen[i]); - } - } -}