Skip to content

Commit

Permalink
Fix the OpenIE ITest
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabor Angeli authored and Stanford NLP committed Sep 29, 2015
1 parent 5d538e8 commit 6207170
Show file tree
Hide file tree
Showing 37 changed files with 303,835 additions and 1,988 deletions.
2 changes: 1 addition & 1 deletion README.md
@@ -1,7 +1,7 @@
Stanford CoreNLP Stanford CoreNLP
================ ================


Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize and interpret dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases or word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for (Modern Standard) Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, industry, and government. Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, and mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which make it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools on it with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, government, and industry.


The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute. The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute.


Expand Down
1 change: 0 additions & 1 deletion build.gradle
Expand Up @@ -47,7 +47,6 @@ task listDeps << {


dependencies { dependencies {
compile fileTree(dir: 'lib', include: '*.jar') compile fileTree(dir: 'lib', include: '*.jar')
testCompile fileTree(dir: 'liblocal', include: '*.jar')
} }


// Eclipse plugin setup // Eclipse plugin setup
Expand Down
4 changes: 0 additions & 4 deletions build.xml
Expand Up @@ -26,10 +26,6 @@
<include name="*.jar"/> <include name="*.jar"/>
<exclude name="javanlp*"/> <exclude name="javanlp*"/>
</fileset> </fileset>
<fileset dir="${basedir}/liblocal">
<include name="*.jar"/>
<exclude name="javanlp*"/>
</fileset>
</path> </path>
</target> </target>


Expand Down
12 changes: 7 additions & 5 deletions itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java
Expand Up @@ -10,7 +10,6 @@
import org.junit.Test; import org.junit.Test;


import java.util.*; import java.util.*;
import java.util.stream.Collectors;


import static org.junit.Assert.*; import static org.junit.Assert.*;


Expand Down Expand Up @@ -52,10 +51,13 @@ public void assertExtracted(String expected, String text) {
assertTrue("The extraction '" + expected + "' was not found in '" + text + "'", found); assertTrue("The extraction '" + expected + "' was not found in '" + text + "'", found);
} }


public void assertExtracted(Set<String> expected, String text) { public void assertExtracted(Set<String> expectedSet, String text) {
Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class); Collection<RelationTriple> extractions = annotate(text).get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
Set<String> guess = extractions.stream().filter(x -> x.confidence > 0.1).map(RelationTriple::toString).collect(Collectors.toSet()); String actual = StringUtils.join(
assertEquals(StringUtils.join(expected.stream().sorted(), "\n").toLowerCase(), StringUtils.join(guess.stream().map(x -> x.substring(x.indexOf("\t") + 1)).sorted(), "\n").toLowerCase()); extractions.stream().map(x -> x.toString().substring(x.toString().indexOf("\t") + 1).toLowerCase()).sorted(),
"\n");
String expected = StringUtils.join(expectedSet.stream().map(String::toLowerCase).sorted(), "\n");
assertEquals(expected, actual);
} }


public void assertEntailed(String expected, String text) { public void assertEntailed(String expected, String text) {
Expand Down Expand Up @@ -89,9 +91,9 @@ public void testBasicExtractions() {
@Test @Test
public void testExtractionsGeorgeBoyd() { public void testExtractionsGeorgeBoyd() {
assertExtracted(new HashSet<String>() {{ assertExtracted(new HashSet<String>() {{
add("George Boyd\tjoined on\t21 february 2013");
add("George Boyd\tjoined for\tremainder"); add("George Boyd\tjoined for\tremainder");
add("George Boyd\tjoined for\tremainder of season"); add("George Boyd\tjoined for\tremainder of season");
add("George Boyd\tjoined on\t21 february 2013");
add("George Boyd\tjoined on\tloan"); add("George Boyd\tjoined on\tloan");
add("George Boyd\tjoined on\tloan from peterborough united"); add("George Boyd\tjoined on\tloan from peterborough united");
}}, "On 21 February 2013 George Boyd joined on loan from Peterborough United for the remainder of the season."); }}, "On 21 February 2013 George Boyd joined on loan from Peterborough United for the remainder of the season.");
Expand Down
14 changes: 14 additions & 0 deletions lib/README
Expand Up @@ -128,6 +128,20 @@ Not needed by CoreNLP distributions. In core, used only by web apps (jsp pages
LAST UPDATE: 2013-06-05 LAST UPDATE: 2013-06-05
LAST UPDATE BY: Sonal Gupta LAST UPDATE BY: Sonal Gupta


-----------------------------------------------------------------------
commons-math3.jar
ORIGINAL JAR NAME: commons-math3-3.5.jar
VERSION: 3.5
RELEASE DATE: 3-14-2015
SOURCE AVAILABLE: yes
DESCRIPTION: self contained fast math routines
URL: http://commons.apache.org/lang/

USED BY: edu.stanford.nlp.loglinear

LAST UPDATE: 2015-9-25
LAST UPDATE BY: Keenon Werling

----------------------------------------------------------------------- -----------------------------------------------------------------------
commons-logging.jar commons-logging.jar


Expand Down
Binary file added lib/antlr-runtime-3.1.2.jar
Binary file not shown.
Binary file added lib/commons-math3.jar
Binary file not shown.
Binary file added lib/hamcrest-core-1.3.jar
Binary file not shown.
Binary file added lib/javaruntype-1.2.jar
Binary file not shown.
Binary file added lib/junit-quickcheck-core-0.4-beta-3.jar
Binary file not shown.
Binary file added lib/junit-quickcheck-generators-0.4-beta-3.jar
Binary file not shown.
Binary file added lib/junit-theories-4.12.jar
Binary file not shown.
Binary file added lib/ognl-3.0.5.jar
Binary file not shown.
28 changes: 12 additions & 16 deletions src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java
Expand Up @@ -378,7 +378,7 @@ protected double multiThreadGradient(List<Integer> docIDs, boolean calculateEmpi
} }
} }


// TODO: this is a huge amount of machinery for no discernible reason // TODO: this is a huge amount of machinery for no discernable reason
MulticoreWrapper<Pair<Integer, List<Integer>>, Pair<Integer, Double>> wrapper = MulticoreWrapper<Pair<Integer, List<Integer>>, Pair<Integer, Double>> wrapper =
new MulticoreWrapper<Pair<Integer, List<Integer>>, Pair<Integer, Double>>(multiThreadGrad, (calculateEmpirical ? expectedAndEmpiricalThreadProcessor : expectedThreadProcessor) ); new MulticoreWrapper<Pair<Integer, List<Integer>>, Pair<Integer, Double>>(multiThreadGrad, (calculateEmpirical ? expectedAndEmpiricalThreadProcessor : expectedThreadProcessor) );


Expand Down Expand Up @@ -416,6 +416,7 @@ protected double multiThreadGradient(List<Integer> docIDs, boolean calculateEmpi
@Override @Override
public void calculate(double[] x) { public void calculate(double[] x) {


double prob = 0.0; // the log prob of the sequence given the model, which is the negation of value at this point
// final double[][] weights = to2D(x); // final double[][] weights = to2D(x);
to2D(x, weights); to2D(x, weights);
setWeights(weights); setWeights(weights);
Expand All @@ -425,7 +426,7 @@ public void calculate(double[] x) {
// double[][] E = empty2D(); // double[][] E = empty2D();
clear2D(E); clear2D(E);


double prob = regularGradientAndValue(); // the log prob of the sequence given the model, which is the negation of value at this point prob = regularGradientAndValue();


if (Double.isNaN(prob)) { // shouldn't be the case if (Double.isNaN(prob)) { // shouldn't be the case
throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()" + throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()" +
Expand Down Expand Up @@ -463,6 +464,7 @@ public int dataDimension() {


@Override @Override
public void calculateStochastic(double[] x, double [] v, int[] batch) { public void calculateStochastic(double[] x, double [] v, int[] batch) {
double prob = 0.0; // the log prob of the sequence given the model, which is the negation of value at this point
to2D(x, weights); to2D(x, weights);
setWeights(weights); setWeights(weights);


Expand All @@ -474,10 +476,8 @@ public void calculateStochastic(double[] x, double [] v, int[] batch) {


// iterate over all the documents // iterate over all the documents
List<Integer> docIDs = new ArrayList<Integer>(batch.length); List<Integer> docIDs = new ArrayList<Integer>(batch.length);
for (int item : batch) { for (int m=0; m < batch.length; m++) docIDs.add(batch[m]);
docIDs.add(item); prob = multiThreadGradient(docIDs, false);
}
double prob = multiThreadGradient(docIDs, false); // the log prob of the sequence given the model, which is the negation of value at this point


if (Double.isNaN(prob)) { // shouldn't be the case if (Double.isNaN(prob)) { // shouldn't be the case
throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()"); throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()");
Expand Down Expand Up @@ -522,6 +522,7 @@ public void calculateStochastic(double[] x, double [] v, int[] batch) {
*/ */
@Override @Override
public double calculateStochasticUpdate(double[] x, double xScale, int[] batch, double gScale) { public double calculateStochasticUpdate(double[] x, double xScale, int[] batch, double gScale) {
double prob = 0.0; // the log prob of the sequence given the model, which is the negation of value at this point
// int[][] wis = getWeightIndices(); // int[][] wis = getWeightIndices();
to2D(x, xScale, weights); to2D(x, xScale, weights);
setWeights(weights); setWeights(weights);
Expand All @@ -541,10 +542,8 @@ public double calculateStochasticUpdate(double[] x, double xScale, int[] batch,


// iterate over all the documents // iterate over all the documents
List<Integer> docIDs = new ArrayList<Integer>(batch.length); List<Integer> docIDs = new ArrayList<Integer>(batch.length);
for (int item : batch) { for (int m=0; m < batch.length; m++) docIDs.add(batch[m]);
docIDs.add(item); prob = multiThreadGradient(docIDs, true);
}
double prob = multiThreadGradient(docIDs, true); // the log prob of the sequence given the model, which is the negation of value at this point


if (Double.isNaN(prob)) { // shouldn't be the case if (Double.isNaN(prob)) { // shouldn't be the case
throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()"); throw new RuntimeException("Got NaN for prob in CRFLogConditionalObjectiveFunction.calculate()");
Expand Down Expand Up @@ -581,10 +580,8 @@ public void calculateStochasticGradient(double[] x, int[] batch) {


// iterate over all the documents // iterate over all the documents
List<Integer> docIDs = new ArrayList<Integer>(batch.length); List<Integer> docIDs = new ArrayList<Integer>(batch.length);
for (int item : batch) { for (int m=0; m < batch.length; m++) docIDs.add(batch[m]);
docIDs.add(item); multiThreadGradient(docIDs, true);
}
multiThreadGradient(docIDs, true);


int index = 0; int index = 0;
for (int i = 0; i < E.length; i++) { for (int i = 0; i < E.length; i++) {
Expand Down Expand Up @@ -865,7 +862,7 @@ public void to2D(double[] weights1D, double wScale, double[][] newWeights) {
public static void clear2D(double[][] arr2D) { public static void clear2D(double[][] arr2D) {
for (int i = 0; i < arr2D.length; i++) for (int i = 0; i < arr2D.length; i++)
for (int j = 0; j < arr2D[i].length; j++) for (int j = 0; j < arr2D[i].length; j++)
arr2D[i][j] = 0.0; arr2D[i][j] = 0;
} }


public static void to1D(double[][] weights, double[] newWeights) { public static void to1D(double[][] weights, double[] newWeights) {
Expand Down Expand Up @@ -917,5 +914,4 @@ protected double[][] empty2D() {
public int[][] getLabels() { public int[][] getLabels() {
return labels; return labels;
} }

} }
4 changes: 1 addition & 3 deletions src/edu/stanford/nlp/io/IOUtils.java
Expand Up @@ -494,8 +494,6 @@ public static InputStream getInputStreamFromURLOrClasspathOrFileSystem(String te




// todo [cdm 2015]: I think GZIPInputStream has its own buffer and so we don't need to buffer in that case. // todo [cdm 2015]: I think GZIPInputStream has its own buffer and so we don't need to buffer in that case.
// todo: Though it's default size is 512 bytes so need to make 8K in constructor. Or else buffering outside gzip is faster
// todo: final InputStream is = new GZIPInputStream( new FileInputStream( file ), 65536 );
/** /**
* Quietly opens a File. If the file ends with a ".gz" extension, * Quietly opens a File. If the file ends with a ".gz" extension,
* automatically opens a GZIPInputStream to wrap the constructed * automatically opens a GZIPInputStream to wrap the constructed
Expand Down Expand Up @@ -1838,7 +1836,7 @@ public static PrintWriter encodedOutputStreamPrintWriter(OutputStream stream,


/** /**
* A raw file copy function -- this is not public since no error checks are made as to the * A raw file copy function -- this is not public since no error checks are made as to the
* consistency of the file being copied. Use instead: * consistency of the filed being copied. Use instead:
* @see IOUtils#cp(java.io.File, java.io.File, boolean) * @see IOUtils#cp(java.io.File, java.io.File, boolean)
* @param source The source file. This is guaranteed to exist, and is guaranteed to be a file. * @param source The source file. This is guaranteed to exist, and is guaranteed to be a file.
* @param target The target file. * @param target The target file.
Expand Down
35 changes: 18 additions & 17 deletions src/edu/stanford/nlp/loglinear/inference/TableFactor.java
Expand Up @@ -3,6 +3,7 @@
import edu.stanford.nlp.loglinear.model.ConcatVector; import edu.stanford.nlp.loglinear.model.ConcatVector;
import edu.stanford.nlp.loglinear.model.GraphicalModel; import edu.stanford.nlp.loglinear.model.GraphicalModel;
import edu.stanford.nlp.loglinear.model.NDArrayDoubles; import edu.stanford.nlp.loglinear.model.NDArrayDoubles;
import org.apache.commons.math3.util.FastMath;


import java.util.ArrayList; import java.util.ArrayList;
import java.util.Iterator; import java.util.Iterator;
Expand Down Expand Up @@ -172,7 +173,7 @@ public double[][] getSummedMarginals() {
while (true) { while (true) {
double v = getAssignmentLogValue(assignment); double v = getAssignmentLogValue(assignment);
for (int i = 0; i < neighborIndices.length; i++) { for (int i = 0; i < neighborIndices.length; i++) {
results[i][assignment[i]] += Math.exp(v - maxValues[i][assignment[i]]); results[i][assignment[i]] += FastMath.exp(v - maxValues[i][assignment[i]]);
} }
// This mutates the resultAssignment[] array, rather than creating a new one // This mutates the resultAssignment[] array, rather than creating a new one
if (secondFastPassByReferenceIterator.hasNext()) { if (secondFastPassByReferenceIterator.hasNext()) {
Expand All @@ -186,7 +187,7 @@ public double[][] getSummedMarginals() {
for (int i = 0; i < neighborIndices.length; i++) { for (int i = 0; i < neighborIndices.length; i++) {
double sum = 0.0; double sum = 0.0;
for (int j = 0; j < results[i].length; j++) { for (int j = 0; j < results[i].length; j++) {
results[i][j] = Math.exp(maxValues[i][j]) * results[i][j]; results[i][j] = FastMath.exp(maxValues[i][j]) * results[i][j];
sum += results[i][j]; sum += results[i][j];
} }
if (Double.isInfinite(sum)) { if (Double.isInfinite(sum)) {
Expand Down Expand Up @@ -295,7 +296,7 @@ public TableFactor sumOut(int variable) {
for (int j = 0; j < getDimensions()[1]; j++) { for (int j = 0; j < getDimensions()[1]; j++) {
int index = k + j; int index = k + j;
if (Double.isFinite(max[j])) { if (Double.isFinite(max[j])) {
marginalized.values[j] += Math.exp(values[index] - max[j]); marginalized.values[j] += FastMath.exp(values[index] - max[j]);
} }
} }
} }
Expand All @@ -304,7 +305,7 @@ public TableFactor sumOut(int variable) {


for (int j = 0; j < getDimensions()[1]; j++) { for (int j = 0; j < getDimensions()[1]; j++) {
if (Double.isFinite(max[j])) { if (Double.isFinite(max[j])) {
marginalized.values[j] = max[j] + Math.log(marginalized.values[j]); marginalized.values[j] = max[j] + FastMath.log(marginalized.values[j]);
} }
else { else {
marginalized.values[j] = max[j]; marginalized.values[j] = max[j];
Expand Down Expand Up @@ -343,7 +344,7 @@ public TableFactor sumOut(int variable) {
for (int j = 0; j < getDimensions()[1]; j++) { for (int j = 0; j < getDimensions()[1]; j++) {
int index = k + j; int index = k + j;
if (Double.isFinite(max[i])) { if (Double.isFinite(max[i])) {
marginalized.values[i] += Math.exp(values[index] - max[i]); marginalized.values[i] += FastMath.exp(values[index] - max[i]);
} }
} }
} }
Expand All @@ -352,7 +353,7 @@ public TableFactor sumOut(int variable) {


for (int i = 0; i < getDimensions()[0]; i++) { for (int i = 0; i < getDimensions()[0]; i++) {
if (Double.isFinite(max[i])) { if (Double.isFinite(max[i])) {
marginalized.values[i] = max[i] + Math.log(marginalized.values[i]); marginalized.values[i] = max[i] + FastMath.log(marginalized.values[i]);
} }
else { else {
marginalized.values[i] = max[i]; marginalized.values[i] = max[i];
Expand All @@ -370,11 +371,11 @@ public TableFactor sumOut(int variable) {
TableFactor maxValues = maxOut(variable); TableFactor maxValues = maxOut(variable);


// Then we do the sum against an offset from the pivots // Then we do the sum against an offset from the pivots
TableFactor marginalized = marginalize(variable, 0, (marginalizedVariableValue, assignment) -> (a, b) -> a + Math.exp(b - maxValues.getAssignmentLogValue(assignment))); TableFactor marginalized = marginalize(variable, 0, (marginalizedVariableValue, assignment) -> (a, b) -> a + FastMath.exp(b - maxValues.getAssignmentLogValue(assignment)));


// Then we factor the max values back in, and // Then we factor the max values back in, and
for (int[] assignment : marginalized) { for (int[] assignment : marginalized) {
marginalized.setAssignmentLogValue(assignment, maxValues.getAssignmentLogValue(assignment) + Math.log(marginalized.getAssignmentLogValue(assignment))); marginalized.setAssignmentLogValue(assignment, maxValues.getAssignmentLogValue(assignment) + FastMath.log(marginalized.getAssignmentLogValue(assignment)));
} }


return marginalized; return marginalized;
Expand Down Expand Up @@ -509,14 +510,14 @@ public double valueSum() {


double sumExp = 0.0; double sumExp = 0.0;
for (int[] assignment : this) { for (int[] assignment : this) {
sumExp += Math.exp(getAssignmentLogValue(assignment) - max); sumExp += FastMath.exp(getAssignmentLogValue(assignment) - max);
} }


return sumExp * Math.exp(max); return sumExp * FastMath.exp(max);
} }


/** /**
* Just a pass through to the NDArray version, plus a Math.exp to ensure that to the outside world the TableFactor * Just a pass through to the NDArray version, plus a FastMath.exp to ensure that to the outside world the TableFactor
* doesn't look like it's in log-space * doesn't look like it's in log-space
* *
* @param assignment a list of variable settings, in the same order as the neighbors array of the factor * @param assignment a list of variable settings, in the same order as the neighbors array of the factor
Expand All @@ -526,19 +527,19 @@ public double valueSum() {
public double getAssignmentValue(int[] assignment) { public double getAssignmentValue(int[] assignment) {
double d = super.getAssignmentValue(assignment); double d = super.getAssignmentValue(assignment);
// if (d == null) d = Double.NEGATIVE_INFINITY; // if (d == null) d = Double.NEGATIVE_INFINITY;
return Math.exp(d); return FastMath.exp(d);
} }


/** /**
* Just a pass through to the NDArray version, plus a Math.log to ensure that to the outside world the TableFactor * Just a pass through to the NDArray version, plus a FastMath.log to ensure that to the outside world the TableFactor
* doesn't look like it's in log-space * doesn't look like it's in log-space
* *
* @param assignment a list of variable settings, in the same order as the neighbors array of the factor * @param assignment a list of variable settings, in the same order as the neighbors array of the factor
* @param value the value to put into the factor table * @param value the value to put into the factor table
*/ */
@Override @Override
public void setAssignmentValue(int[] assignment, double value) { public void setAssignmentValue(int[] assignment, double value) {
super.setAssignmentValue(assignment, Math.log(value)); super.setAssignmentValue(assignment, FastMath.log(value));
} }


//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -651,9 +652,9 @@ private void normalizeLogArr(double[] arr) {
} }
double expSum = 0.0; double expSum = 0.0;
for (double d : arr) { for (double d : arr) {
expSum += Math.exp(d-max); expSum += FastMath.exp(d-max);
} }
double logSumExp = max + Math.log(expSum); double logSumExp = max + FastMath.log(expSum);


if (Double.isInfinite(logSumExp)) { if (Double.isInfinite(logSumExp)) {
// Just put in uniform probabilities if we are normalizing all 0s // Just put in uniform probabilities if we are normalizing all 0s
Expand All @@ -664,7 +665,7 @@ private void normalizeLogArr(double[] arr) {
else { else {
// Normalize in log-scale before exponentiation, to help with stability // Normalize in log-scale before exponentiation, to help with stability
for (int i = 0; i < arr.length; i++) { for (int i = 0; i < arr.length; i++) {
arr[i] = Math.exp(arr[i] - logSumExp); arr[i] = FastMath.exp(arr[i] - logSumExp);
} }
} }
} }
Expand Down

0 comments on commit 6207170

Please sign in to comment.