Skip to content

Commit

Permalink
fix double loading of TokensRegexNER bug
Browse files Browse the repository at this point in the history
  • Loading branch information
J38 authored and Stanford NLP committed Nov 29, 2016
1 parent 07d045a commit 8106752
Show file tree
Hide file tree
Showing 22 changed files with 213 additions and 282 deletions.
6 changes: 1 addition & 5 deletions build.gradle
Expand Up @@ -11,7 +11,7 @@ sourceCompatibility = 1.8
targetCompatibility = 1.8
compileJava.options.encoding = 'UTF-8'

version = '3.7.0'
version = '3.6.0'

// Gradle application plugin
mainClassName = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
Expand Down Expand Up @@ -61,7 +61,3 @@ eclipse {
}
}
}

task wrapper(type: Wrapper) {
gradleVersion = '3.2'
}
52 changes: 27 additions & 25 deletions src/edu/stanford/nlp/classify/ColumnDataClassifier.java

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions src/edu/stanford/nlp/classify/LinearClassifier.java
Expand Up @@ -1235,15 +1235,15 @@ private LinearClassifier() { }
*
* @param weights The parameters of the classifier. The first index is the
* featureIndex value and second index is the labelIndex value.
* @param featureIndex An index from F to integers used to index the features in the weights array
* @param labelIndex An index from L to integers used to index the labels in the weights array
* @param featureIndex An index from F to integers used to index the features in the weights array
* @param labelIndex An index from L to integers used to index the labels in the weights array
*/
public LinearClassifier(double[][] weights, Index<F> featureIndex, Index<L> labelIndex) {
this.featureIndex = featureIndex;
this.labelIndex = labelIndex;
this.weights = weights;
thresholds = new double[labelIndex.size()];
// Arrays.fill(thresholds, 0.0); // not needed; Java arrays zero initialized
Arrays.fill(thresholds, 0.0);
}

// todo: This is unused and seems broken (ignores passed in thresholds)
Expand Down
Expand Up @@ -160,8 +160,8 @@ protected void calculate(double[] x1) {
Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
// double[] counts = new double[numClasses];
// Arrays.fill(counts, 0.0); // not needed; Java arrays zero initialized
double[] counts = new double[numClasses];
Arrays.fill(counts, 0.0);
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
// activation
Expand Down
@@ -1,4 +1,4 @@
package edu.stanford.nlp.classify;
package edu.stanford.nlp.classify;
import edu.stanford.nlp.util.logging.Redwood;

import java.lang.reflect.Array;
Expand Down Expand Up @@ -159,8 +159,8 @@ private void calculateSCL(double[] x) {
Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
// double[] counts = new double[numClasses];
// Arrays.fill(counts, 0.0); // not needed; Java arrays zero initialized
double[] counts = new double[numClasses];
Arrays.fill(counts, 0.0);
for (int d = 0; d < data.length; d++) {
int[] features = data[d];
// activation
Expand Down Expand Up @@ -763,8 +763,8 @@ public void calculateStochasticGradient(double[] x, int[] batch) {
Arrays.fill(derivative, 0.0);
double[] sums = new double[numClasses];
double[] probs = new double[numClasses];
//double[] counts = new double[numClasses];
// Arrays.fill(counts, 0.0); // not needed; Java arrays zero initialized
double[] counts = new double[numClasses];
Arrays.fill(counts, 0.0);
for (int d : batch) {

//Sets the index based on the current batch
Expand Down
4 changes: 2 additions & 2 deletions src/edu/stanford/nlp/classify/RVFDataset.java
Expand Up @@ -131,7 +131,7 @@ public Pair<GeneralDataset<L, F>, GeneralDataset<L, F>> split(double percentDev)

public void scaleFeaturesGaussian() {
means = new double[this.numFeatures()];
// Arrays.fill(means, 0); // not needed; Java arrays zero initialized
Arrays.fill(means, 0);

for (int i = 0; i < this.size(); i++) {
for (int j = 0; j < data[i].length; j++)
Expand All @@ -140,7 +140,7 @@ public void scaleFeaturesGaussian() {
ArrayMath.multiplyInPlace(means, 1.0 / this.size());

stdevs = new double[this.numFeatures()];
// Arrays.fill(stdevs, 0); // not needed; Java arrays zero initialized
Arrays.fill(stdevs, 0);
double[] deltaX = new double[this.numFeatures()];

for (int i = 0; i < this.size(); i++) {
Expand Down
18 changes: 5 additions & 13 deletions src/edu/stanford/nlp/ie/EntityCachingAbstractSequencePrior.java
Expand Up @@ -43,30 +43,25 @@ public EntityCachingAbstractSequencePrior(String backgroundSymbol, Index<String>

Entity[] entities;

@Override
public int leftWindow() {
return Integer.MAX_VALUE; // not Markovian!
}

@Override
public int rightWindow() {
return Integer.MAX_VALUE; // not Markovian!
}

@Override
public int[] getPossibleValues(int position) {
return possibleValues;
}

@Override
public double scoreOf(int[] sequence, int pos) {
return scoresOf(sequence, pos)[sequence[pos]];
}

/**
* @return the length of the sequence
*/
@Override
public int length() {
return doc.size();
}
Expand All @@ -86,7 +81,6 @@ public double[] getConditionalDistribution (int[] sequence, int position) {
return probs;
}

@Override
public double[] scoresOf (int[] sequence, int position) {
double[] probs = new double[numClasses];
int origClass = sequence[position];
Expand All @@ -100,11 +94,10 @@ public double[] scoresOf (int[] sequence, int position) {
return probs;
}

@Override
public void setInitialSequence(int[] initialSequence) {
this.sequence = initialSequence;
entities = new Entity[initialSequence.length];
// Arrays.fill(entities, null); // not needed; Java arrays zero initialized
Arrays.fill(entities, null);
for (int i = 0; i < initialSequence.length; i++) {
if (initialSequence[i] != backgroundSymbol) {
Entity entity = extractEntity(initialSequence, i);
Expand Down Expand Up @@ -271,7 +264,6 @@ public boolean noChange(int[] sequence, int position) {
return false;
}

@Override
public void updateSequenceElement(int[] sequence, int position, int oldVal) {
if (VERBOSE) System.out.println("changing position "+position+" from " +classIndex.get(oldVal)+" to "+classIndex.get(sequence[position]));

Expand Down Expand Up @@ -444,7 +436,7 @@ else if (removingBeginningOfEntity(sequence, position)) {

@Override
public String toString() {
StringBuilder sb = new StringBuilder();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < entities.length; i++) {
sb.append(i);
sb.append("\t");
Expand All @@ -462,7 +454,7 @@ public String toString() {
}

public String toString(int pos) {
StringBuilder sb = new StringBuilder();
StringBuffer sb = new StringBuffer();
for (int i = Math.max(0, pos - 10); i < Math.min(entities.length, pos + 10); i++) {
sb.append(i);
sb.append("\t");
Expand All @@ -486,13 +478,13 @@ class Entity {
public int type;

/**
* the beginning index of other locations where this sequence of
* the begining index of other locations where this sequence of
* words appears.
*/
public int[] otherOccurrences;

public String toString(Index<String> classIndex) {
StringBuilder sb = new StringBuilder();
StringBuffer sb = new StringBuffer();
sb.append("\"");
sb.append(StringUtils.join(words, " "));
sb.append("\" start: ");
Expand Down
@@ -1,4 +1,4 @@
package edu.stanford.nlp.ie;
package edu.stanford.nlp.ie;
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.sequences.ListeningSequenceModel;
Expand Down Expand Up @@ -123,7 +123,7 @@ public double[] scoresOf (int[] sequence, int position) {
public void setInitialSequence(int[] initialSequence) {
this.sequence = initialSequence;
entities = new EntityBIO[initialSequence.length];
// Arrays.fill(entities, null); // not needed; Java arrays zero initialized
Arrays.fill(entities, null);
for (int i = 0; i < initialSequence.length; i++) {
if (initialSequence[i] != backgroundSymbol) {
String rawTag = classIndex.get(sequence[i]);
Expand Down
5 changes: 1 addition & 4 deletions src/edu/stanford/nlp/ie/KBPStatisticalExtractor.java
Expand Up @@ -543,10 +543,7 @@ private static void relationSpecificFeatures(KBPInput input, Sentence sentence,
}

public static Counter<String> features(KBPInput input) {
// Ensure RegexNER Tags!
input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASED, false);
input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASELESS, true);


// Get useful variables
ClassicCounter<String> feats = new ClassicCounter<>();
if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {
Expand Down
@@ -1,4 +1,4 @@
package edu.stanford.nlp.ie.crf;
package edu.stanford.nlp.ie.crf;
import edu.stanford.nlp.util.logging.Redwood;

import edu.stanford.nlp.math.ArrayMath;
Expand Down Expand Up @@ -82,7 +82,7 @@ public int domainDimension() {
}

public CliquePotentialFunction getCliquePotentialFunction(double[] x) {
throw new UnsupportedOperationException("CRFLogConditionalObjectiveFloatFunction is not clique potential compatible yet");
throw new UnsupportedOperationException("CRFLogConditionalObjectiveFloatFunction is not clique potential compatible yet");
}

public float[][] to2D(float[] weights) {
Expand Down Expand Up @@ -111,7 +111,7 @@ public float[][] empty2D() {
int index = 0;
for (int i = 0; i < map.length; i++) {
d[i] = new float[labelIndices.get(map[i]).size()];
// Arrays.fill(d[i], 0); // not needed; Java arrays zero initialized
Arrays.fill(d[i], 0);
index += labelIndices.get(map[i]).size();
}
return d;
Expand Down Expand Up @@ -150,19 +150,19 @@ public static FloatFactorTable getFloatFactorTable(float[][] weights, int[][] da
for (int j = 0; j < labelIndices.size(); j++) {
Index labelIndex = labelIndices.get(j);
FloatFactorTable ft = new FloatFactorTable(numClasses, j + 1);

// ...and each possible labeling for that clique
for (int k = 0; k < labelIndex.size(); k++) {
int[] label = ((CRFLabel) labelIndex.get(k)).getLabel();
float weight = 0.0f;
for (int m = 0; m < data[j].length; m++) {
//log.info("**"+weights[data[j][m]][k]);
//log.info("**"+weights[data[j][m]][k]);
weight += weights[data[j][m]][k];
}
ft.setValue(label, weight);
//log.info(">>"+ft);
//log.info(">>"+ft);
}
//log.info("::"+ft);
//log.info("::"+ft);
if (j > 0) {
ft.multiplyInEnd(factorTable);
}
Expand Down Expand Up @@ -306,7 +306,7 @@ public void calculate(float[] x) {
}
}


// priors
if (prior == QUADRATIC_PRIOR) {
float sigmaSq = sigma * sigma;
Expand Down Expand Up @@ -358,7 +358,7 @@ public void calculateWeird1(float[] x) {
sums[i] = new float[size];
probs[i] = new float[size];
counts[i] = new float[size];
// Arrays.fill(counts[i], 0.0f); // not needed; Java arrays zero initialized
Arrays.fill(counts[i], 0.0f);
}

for (int d = 0; d < data.length; d++) {
Expand Down Expand Up @@ -406,9 +406,9 @@ public void calculateWeird1(float[] x) {
// derivative[index]--;
// }
// index++;
// }
// }
// }


value -= sums[cl][labelIndex] - total;

Expand All @@ -419,15 +419,15 @@ public void calculateWeird1(float[] x) {
// }

}

// go through each clique...
for (int j = 0; j < data[d][e].length; j++) {
Index labelIndex = labelIndices.get(j);

// ...and each possible labeling for that clique
for (int k = 0; k < labelIndex.size(); k++) {
int[] label = ((CRFLabel) labelIndex.get(k)).getLabel();

// float p = Math.pow(Math.E, factorTables[i].logProbEnd(label));
float p = probs[j][k];
for (int n = 0; n < data[d][e][j].length; n++) {
Expand All @@ -438,7 +438,7 @@ public void calculateWeird1(float[] x) {
}

}


// compute the partial derivative for each feature
int index = 0;
Expand Down Expand Up @@ -568,7 +568,7 @@ public void calculateWeird(float[] x) {
}
}
}
// compute the partial derivative for each feature
int index = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/edu/stanford/nlp/ie/ner/CMMClassifier.java
Expand Up @@ -1331,7 +1331,7 @@ public void trainSemiSup() {
double[][] confusionMatrix = new double[classIndex.size()][classIndex.size()];

for (int i = 0; i < confusionMatrix.length; i++) {
// Arrays.fill(confusionMatrix[i], 0.0); // not needed; Java arrays zero initialized
Arrays.fill(confusionMatrix[i], 0.0);
confusionMatrix[i][i] = 1.0;
}

Expand Down
Expand Up @@ -8,7 +8,7 @@

public abstract class AbstractCachingDiffFloatFunction implements DiffFloatFunction, HasFloatInitial {

private float[] lastX = null;
float[] lastX = null;

protected float[] derivative = null;
protected float value = 0.0f;
Expand All @@ -26,7 +26,7 @@ public abstract class AbstractCachingDiffFloatFunction implements DiffFloatFunct
@Override
public float[] initial() {
float[] initial = new float[domainDimension()];
// Arrays.fill(initial, 0.0f); // not needed; Java arrays zero initialized
Arrays.fill(initial, 0.0f);
return initial;
}

Expand Down Expand Up @@ -67,5 +67,4 @@ public float[] derivativeAt(float[] x) {
ensure(x);
return derivative;
}

}
Expand Up @@ -113,7 +113,7 @@ protected void clearCache() {
@Override
public double[] initial() {
double[] initial = new double[domainDimension()];
// Arrays.fill(initial, 0.0); // not needed; Java arrays zero initialized
Arrays.fill(initial, 0.0);
return initial;
}

Expand Down

0 comments on commit 8106752

Please sign in to comment.