fix double loading of TokensRegexNER bug

stanfordnlp · Nov 29, 2016 · 8106752 · 8106752
1 parent 07d045a
commit 8106752
Show file tree

Hide file tree

Showing 22 changed files with 213 additions and 282 deletions.
diff --git a/build.gradle b/build.gradle
@@ -11,7 +11,7 @@ sourceCompatibility = 1.8
 targetCompatibility = 1.8
 compileJava.options.encoding = 'UTF-8'
 
-version = '3.7.0'
+version = '3.6.0'
 
 // Gradle application plugin
 mainClassName = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
@@ -61,7 +61,3 @@ eclipse {
     }
   }
 }
-
-task wrapper(type: Wrapper) {
-    gradleVersion = '3.2'
-}
diff --git a/src/edu/stanford/nlp/classify/ColumnDataClassifier.java b/src/edu/stanford/nlp/classify/ColumnDataClassifier.java
diff --git a/src/edu/stanford/nlp/classify/LinearClassifier.java b/src/edu/stanford/nlp/classify/LinearClassifier.java
@@ -1235,15 +1235,15 @@ private LinearClassifier() { }
    *
    *  @param weights The parameters of the classifier. The first index is the
    *                 featureIndex value and second index is the labelIndex value.
-   *  @param featureIndex An index from F to integers used to index the features in the weights array
-   *  @param labelIndex An index from L to integers used to index the labels in the weights array
+   * @param featureIndex An index from F to integers used to index the features in the weights array
+   * @param labelIndex An index from L to integers used to index the labels in the weights array
    */
   public LinearClassifier(double[][] weights, Index<F> featureIndex, Index<L> labelIndex) {
     this.featureIndex = featureIndex;
     this.labelIndex = labelIndex;
     this.weights = weights;
     thresholds = new double[labelIndex.size()];
-    // Arrays.fill(thresholds, 0.0); // not needed; Java arrays zero initialized
+    Arrays.fill(thresholds, 0.0);
   }
 
   // todo: This is unused and seems broken (ignores passed in thresholds)

diff --git a/src/edu/stanford/nlp/classify/LogConditionalEqConstraintFunction.java b/src/edu/stanford/nlp/classify/LogConditionalEqConstraintFunction.java
@@ -160,8 +160,8 @@ protected void calculate(double[] x1) {
     Arrays.fill(derivative, 0.0);
     double[] sums = new double[numClasses];
     double[] probs = new double[numClasses];
-    // double[] counts = new double[numClasses];
-    // Arrays.fill(counts, 0.0); // not needed; Java arrays zero initialized
+    double[] counts = new double[numClasses];
+    Arrays.fill(counts, 0.0);
     for (int d = 0; d < data.length; d++) {
       int[] features = data[d];
       // activation

diff --git a/src/edu/stanford/nlp/classify/LogConditionalObjectiveFunction.java b/src/edu/stanford/nlp/classify/LogConditionalObjectiveFunction.java
@@ -1,4 +1,4 @@
-package edu.stanford.nlp.classify;
+package edu.stanford.nlp.classify; 
 import edu.stanford.nlp.util.logging.Redwood;
 
 import java.lang.reflect.Array;
@@ -159,8 +159,8 @@ private void calculateSCL(double[] x) {
     Arrays.fill(derivative, 0.0);
     double[] sums = new double[numClasses];
     double[] probs = new double[numClasses];
-    // double[] counts = new double[numClasses];
-    // Arrays.fill(counts, 0.0); // not needed; Java arrays zero initialized
+    double[] counts = new double[numClasses];
+    Arrays.fill(counts, 0.0);
     for (int d = 0; d < data.length; d++) {
       int[] features = data[d];
       // activation
@@ -763,8 +763,8 @@ public void calculateStochasticGradient(double[] x, int[] batch) {
     Arrays.fill(derivative, 0.0);
     double[] sums = new double[numClasses];
     double[] probs = new double[numClasses];
-    //double[] counts = new double[numClasses];
-    // Arrays.fill(counts, 0.0); // not needed; Java arrays zero initialized
+    double[] counts = new double[numClasses];
+    Arrays.fill(counts, 0.0);
     for (int d : batch) {
 
       //Sets the index based on the current batch

diff --git a/src/edu/stanford/nlp/classify/RVFDataset.java b/src/edu/stanford/nlp/classify/RVFDataset.java
@@ -131,7 +131,7 @@ public Pair<GeneralDataset<L, F>, GeneralDataset<L, F>> split(double percentDev)
 
   public void scaleFeaturesGaussian() {
     means = new double[this.numFeatures()];
-    // Arrays.fill(means, 0); // not needed; Java arrays zero initialized
+    Arrays.fill(means, 0);
 
     for (int i = 0; i < this.size(); i++) {
       for (int j = 0; j < data[i].length; j++)
@@ -140,7 +140,7 @@ public void scaleFeaturesGaussian() {
     ArrayMath.multiplyInPlace(means, 1.0 / this.size());
 
     stdevs = new double[this.numFeatures()];
-    // Arrays.fill(stdevs, 0); // not needed; Java arrays zero initialized
+    Arrays.fill(stdevs, 0);
     double[] deltaX = new double[this.numFeatures()];
 
     for (int i = 0; i < this.size(); i++) {

diff --git a/src/edu/stanford/nlp/ie/EntityCachingAbstractSequencePrior.java b/src/edu/stanford/nlp/ie/EntityCachingAbstractSequencePrior.java
@@ -43,30 +43,25 @@ public EntityCachingAbstractSequencePrior(String backgroundSymbol, Index<String>
 
   Entity[] entities;
 
-  @Override
   public int leftWindow() {
     return Integer.MAX_VALUE; // not Markovian!
   }
 
-  @Override
   public int rightWindow() {
     return Integer.MAX_VALUE; // not Markovian!
   }
 
-  @Override
   public int[] getPossibleValues(int position) {
     return possibleValues;
   }
 
-  @Override
   public double scoreOf(int[] sequence, int pos) {
     return scoresOf(sequence, pos)[sequence[pos]];
   }
 
   /**
    * @return the length of the sequence
    */
-  @Override
   public int length() {
     return doc.size();
   }
@@ -86,7 +81,6 @@ public  double[] getConditionalDistribution (int[] sequence, int position) {
     return probs;
   }
 
-  @Override
   public  double[] scoresOf (int[] sequence, int position) {
     double[] probs = new double[numClasses];
     int origClass = sequence[position];
@@ -100,11 +94,10 @@ public  double[] scoresOf (int[] sequence, int position) {
     return probs;
   }
 
-  @Override
   public void setInitialSequence(int[] initialSequence) {
     this.sequence = initialSequence;
     entities = new Entity[initialSequence.length];
-    // Arrays.fill(entities, null); // not needed; Java arrays zero initialized
+    Arrays.fill(entities, null);
     for (int i = 0; i < initialSequence.length; i++) {
       if (initialSequence[i] != backgroundSymbol) {
         Entity entity = extractEntity(initialSequence, i);
@@ -271,7 +264,6 @@ public boolean noChange(int[] sequence, int position) {
     return false;
   }
 
-  @Override
   public void updateSequenceElement(int[] sequence, int position, int oldVal) {
     if (VERBOSE) System.out.println("changing position "+position+" from " +classIndex.get(oldVal)+" to "+classIndex.get(sequence[position]));
 
@@ -444,7 +436,7 @@ else if (removingBeginningOfEntity(sequence, position)) {
 
   @Override
   public String toString() {
-    StringBuilder sb = new StringBuilder();
+    StringBuffer sb = new StringBuffer();
     for (int i = 0; i < entities.length; i++) {
       sb.append(i);
       sb.append("\t");
@@ -462,7 +454,7 @@ public String toString() {
   }
 
   public String toString(int pos) {
-    StringBuilder sb = new StringBuilder();
+    StringBuffer sb = new StringBuffer();
     for (int i = Math.max(0, pos - 10); i < Math.min(entities.length, pos + 10); i++) {
       sb.append(i);
       sb.append("\t");
@@ -486,13 +478,13 @@ class Entity {
   public int type;
 
   /**
-   * the beginning index of other locations where this sequence of
+   * the begining index of other locations where this sequence of
    * words appears.
    */
   public int[] otherOccurrences;
 
   public String toString(Index<String> classIndex) {
-    StringBuilder sb = new StringBuilder();
+    StringBuffer sb = new StringBuffer();
     sb.append("\"");
     sb.append(StringUtils.join(words, " "));
     sb.append("\" start: ");

diff --git a/src/edu/stanford/nlp/ie/EntityCachingAbstractSequencePriorBIO.java b/src/edu/stanford/nlp/ie/EntityCachingAbstractSequencePriorBIO.java
@@ -1,4 +1,4 @@
-package edu.stanford.nlp.ie;
+package edu.stanford.nlp.ie; 
 import edu.stanford.nlp.util.logging.Redwood;
 
 import edu.stanford.nlp.sequences.ListeningSequenceModel;
@@ -123,7 +123,7 @@ public  double[] scoresOf (int[] sequence, int position) {
   public void setInitialSequence(int[] initialSequence) {
     this.sequence = initialSequence;
     entities = new EntityBIO[initialSequence.length];
-    // Arrays.fill(entities, null);  // not needed; Java arrays zero initialized
+    Arrays.fill(entities, null);
     for (int i = 0; i < initialSequence.length; i++) {
       if (initialSequence[i] != backgroundSymbol) {
         String rawTag = classIndex.get(sequence[i]);

diff --git a/src/edu/stanford/nlp/ie/KBPStatisticalExtractor.java b/src/edu/stanford/nlp/ie/KBPStatisticalExtractor.java
@@ -543,10 +543,7 @@ private static void relationSpecificFeatures(KBPInput input, Sentence sentence,
   }
 
   public static Counter<String> features(KBPInput input) {
-    // Ensure RegexNER Tags!
-    input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASED, false);
-    input.sentence.regexner(DefaultPaths.DEFAULT_KBP_REGEXNER_CASELESS, true);
-
+
     // Get useful variables
     ClassicCounter<String> feats = new ClassicCounter<>();
     if (Span.overlaps(input.subjectSpan, input.objectSpan) || input.subjectSpan.size() == 0 || input.objectSpan.size() == 0) {

diff --git a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFloatFunction.java b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFloatFunction.java
@@ -1,4 +1,4 @@
-package edu.stanford.nlp.ie.crf;
+package edu.stanford.nlp.ie.crf; 
 import edu.stanford.nlp.util.logging.Redwood;
 
 import edu.stanford.nlp.math.ArrayMath;
@@ -82,7 +82,7 @@ public int domainDimension() {
   }
 
   public CliquePotentialFunction getCliquePotentialFunction(double[] x) {
-    throw new UnsupportedOperationException("CRFLogConditionalObjectiveFloatFunction is not clique potential compatible yet");
+    throw new UnsupportedOperationException("CRFLogConditionalObjectiveFloatFunction is not clique potential compatible yet"); 
   }
 
   public float[][] to2D(float[] weights) {
@@ -111,7 +111,7 @@ public float[][] empty2D() {
     int index = 0;
     for (int i = 0; i < map.length; i++) {
       d[i] = new float[labelIndices.get(map[i]).size()];
-      // Arrays.fill(d[i], 0);  // not needed; Java arrays zero initialized
+      Arrays.fill(d[i], 0);
       index += labelIndices.get(map[i]).size();
     }
     return d;
@@ -150,19 +150,19 @@ public static FloatFactorTable getFloatFactorTable(float[][] weights, int[][] da
     for (int j = 0; j < labelIndices.size(); j++) {
       Index labelIndex = labelIndices.get(j);
       FloatFactorTable ft = new FloatFactorTable(numClasses, j + 1);
-
+	    
       // ...and each possible labeling for that clique
       for (int k = 0; k < labelIndex.size(); k++) {
         int[] label = ((CRFLabel) labelIndex.get(k)).getLabel();
         float weight = 0.0f;
         for (int m = 0; m < data[j].length; m++) {
-          //log.info("**"+weights[data[j][m]][k]);
+          //log.info("**"+weights[data[j][m]][k]);	    
           weight += weights[data[j][m]][k];
         }
         ft.setValue(label, weight);
-        //log.info(">>"+ft);
+        //log.info(">>"+ft);	    
       }
-      //log.info("::"+ft);
+      //log.info("::"+ft);	    
       if (j > 0) {
         ft.multiplyInEnd(factorTable);
       }
@@ -306,7 +306,7 @@ public void calculate(float[] x) {
       }
     }
 
-
+        
     // priors
     if (prior == QUADRATIC_PRIOR) {
       float sigmaSq = sigma * sigma;
@@ -358,7 +358,7 @@ public void calculateWeird1(float[] x) {
       sums[i] = new float[size];
       probs[i] = new float[size];
       counts[i] = new float[size];
-      // Arrays.fill(counts[i], 0.0f); // not needed; Java arrays zero initialized
+      Arrays.fill(counts[i], 0.0f);
     }
 
     for (int d = 0; d < data.length; d++) {
@@ -406,9 +406,9 @@ public void calculateWeird1(float[] x) {
           // 				derivative[index]--;
           // 			    }
           // 			    index++;
-          // 			}
+          // 			}			
           // 		    }
-
+		    
 
           value -= sums[cl][labelIndex] - total;
 
@@ -419,15 +419,15 @@ public void calculateWeird1(float[] x) {
           // 		    }
 
         }
-
+		
         // go through each clique...
         for (int j = 0; j < data[d][e].length; j++) {
           Index labelIndex = labelIndices.get(j);
-
+		    
           // ...and each possible labeling for that clique
           for (int k = 0; k < labelIndex.size(); k++) {
             int[] label = ((CRFLabel) labelIndex.get(k)).getLabel();
-
+			
             // float p = Math.pow(Math.E, factorTables[i].logProbEnd(label));
             float p = probs[j][k];
             for (int n = 0; n < data[d][e][j].length; n++) {
@@ -438,7 +438,7 @@ public void calculateWeird1(float[] x) {
       }
 
     }
-
+   
 
     // compute the partial derivative for each feature
     int index = 0;
@@ -568,7 +568,7 @@ public void calculateWeird(float[] x) {
         }
       }
     }
-
+  
 
     // compute the partial derivative for each feature
     int index = 0;

diff --git a/src/edu/stanford/nlp/ie/ner/CMMClassifier.java b/src/edu/stanford/nlp/ie/ner/CMMClassifier.java
@@ -1331,7 +1331,7 @@ public void trainSemiSup() {
     double[][] confusionMatrix = new double[classIndex.size()][classIndex.size()];
 
     for (int i = 0; i < confusionMatrix.length; i++) {
-      // Arrays.fill(confusionMatrix[i], 0.0);  // not needed; Java arrays zero initialized
+      Arrays.fill(confusionMatrix[i], 0.0);
       confusionMatrix[i][i] = 1.0;
     }
 

diff --git a/src/edu/stanford/nlp/optimization/AbstractCachingDiffFloatFunction.java b/src/edu/stanford/nlp/optimization/AbstractCachingDiffFloatFunction.java
@@ -8,7 +8,7 @@
 
 public abstract class AbstractCachingDiffFloatFunction implements DiffFloatFunction, HasFloatInitial {
 
-  private float[] lastX = null;
+  float[] lastX = null;
 
   protected float[] derivative = null;
   protected float value = 0.0f;
@@ -26,7 +26,7 @@ public abstract class AbstractCachingDiffFloatFunction implements DiffFloatFunct
   @Override
   public float[] initial() {
     float[] initial = new float[domainDimension()];
-    // Arrays.fill(initial, 0.0f);  // not needed; Java arrays zero initialized
+    Arrays.fill(initial, 0.0f);
     return initial;
   }
 
@@ -67,5 +67,4 @@ public float[] derivativeAt(float[] x) {
     ensure(x);
     return derivative;
   }
-
 }
diff --git a/src/edu/stanford/nlp/optimization/AbstractStochasticCachingDiffFunction.java b/src/edu/stanford/nlp/optimization/AbstractStochasticCachingDiffFunction.java
@@ -113,7 +113,7 @@ protected void clearCache() {
   @Override
   public double[] initial() {
     double[] initial = new double[domainDimension()];
-    // Arrays.fill(initial, 0.0); // not needed; Java arrays zero initialized
+    Arrays.fill(initial, 0.0);
     return initial;
   }