Don't include extra emacs tilde copies in distro.

stanfordnlp · Jan 7, 2017 · d89a9ab · d89a9ab
1 parent ac505c7
commit d89a9ab
Show file tree

Hide file tree

Showing 23 changed files with 296 additions and 571 deletions.
diff --git a/README.md b/README.md
@@ -20,13 +20,15 @@ Here are some helpful instructions to use the latest code:
 3. Then run this command to build a jar with the latest version of the code: `cd CoreNLP/classes ; jar -cf ../stanford-corenlp.jar edu`
 4. This will create a new jar called stanford-corenlp.jar in the CoreNLP folder which contains the latest code
 5. The dependencies that work with the latest code are in CoreNLP/lib and CoreNLP/liblocal, so make sure to include those in your CLASSPATH.
-6. When using the latest version of the code make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and [english-models-kbp](http://nlp.stanford.edu/software/stanford-english-kbp-corenlp-models-current.jar) and include them in your CLASSPATH.  If you are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.
+6. When using the latest version of the code make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), and [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and include them in your CLASSPATH.  If you
+are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.
 
 #### build with Maven
 
 1. Make sure you have Maven installed, details here: [https://maven.apache.org/](https://maven.apache.org/)
 2. If you run this command in the CoreNLP directory: `mvn package` , it should run the tests and build this jar file: `CoreNLP/target/stanford-corenlp-3.7.0.jar`
-3. When using the latest version of the code make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and [english-models-kbp](http://nlp.stanford.edu/software/stanford-english-kbp-corenlp-models-current.jar) and include them in your CLASSPATH.  If you are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.  
+3. When using the latest version of the code make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), and [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and include them in your CLASSPATH.  If you
+are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.  
 4. If you want to use Stanford CoreNLP as part of a Maven project you need to install the models jars into your Maven repository.  Below is a sample command for installing the Spanish models jar.  For other languages just change the language name in the command.  To install `stanford-corenlp-models-current.jar` you will need to set `-Dclassifier=models`.  Here is the sample command for Spanish: `mvn install:install-file -Dfile=/location/of/stanford-spanish-corenlp-models-current.jar -DgroupId=edu.stanford.nlp -DartifactId=stanford-corenlp -Dversion=3.7.0 -Dclassifier=models-spanish -Dpackaging=jar` 
 
 You can find releases of Stanford CoreNLP on [Maven Central](http://search.maven.org/#artifactdetails%7Cedu.stanford.nlp%7Cstanford-corenlp%7C3.6.0%7Cjar).

diff --git a/build.gradle b/build.gradle
@@ -41,10 +41,8 @@ sourceSets {
   }
 }
 
-task listDeps {
+task listDeps << {
-    doLast {
+    configurations.compile.each { File file -> println file.name }
-        configurations.compile.each { File file -> println file.name }
-    }
 }
 
 dependencies {

diff --git a/...d/nlp/pipeline/KBPAnnotatorBenchmark.java → ...eline/KBPAnnotatorBenchmarkSlowITest.java b/...d/nlp/pipeline/KBPAnnotatorBenchmark.java → ...eline/KBPAnnotatorBenchmarkSlowITest.java
@@ -10,39 +10,15 @@
 import java.io.*;
 import java.util.*;
 
-public class KBPAnnotatorBenchmark extends TestCase {
+public class KBPAnnotatorBenchmarkSlowITest extends TestCase {
 
   public HashMap<String,String> docIDToText;
   public HashMap<String,Set<String>> docIDToRelations;
   public StanfordCoreNLP pipeline;
 
-  public String KBP_DOCS_DIR;
+  public String KBP_DOCS_DIR = "/scr/nlp/data/kbp-benchmark//kbp-docs";
-  public String GOLD_RELATIONS_PATH;
+  public String GOLD_RELATIONS_PATH = "/scr/nlp/data/kbp-benchmark/kbp-gold-relations.txt";
-  public double KBP_MINIMUM_SCORE;
+  public double KBP_MINIMUM_SCORE = .450;
-
-  public void loadGoldData() {
-    // initialize HashMaps
-    docIDToText = new HashMap<String,String>();
-    docIDToRelations = new HashMap<String,Set<String>>();
-    // load the gold relations from gold relations file
-    List<String> goldRelationLines = IOUtils.linesFromFile(GOLD_RELATIONS_PATH);
-    for (String relationLine : goldRelationLines) {
-      String[] docIDAndRelation = relationLine.split("\t");
-      if (docIDToRelations.get(docIDAndRelation[0]) == null) {
-        docIDToRelations.put(docIDAndRelation[0], new HashSet<String>());
-      }
-      docIDToRelations.get(docIDAndRelation[0]).add(docIDAndRelation[1]);
-    }
-    // load the text for each docID
-    File directoryWithDocs = new File(KBP_DOCS_DIR);
-    File[] allFiles = directoryWithDocs.listFiles();
-    for (File kbpTestDocFile : allFiles) {
-      String kbpTestDocID = kbpTestDocFile.getName();
-      String kbpTestDocPath = kbpTestDocFile.getAbsolutePath();
-      String kbpTestDocContents = IOUtils.stringFromFile(kbpTestDocPath);
-      docIDToText.put(kbpTestDocID, kbpTestDocContents);
-    }
-  }
 
   private String convertRelationName(String relationName) {
     /*if (relationName.equals("org:top_members/employees")) {
@@ -72,6 +48,38 @@ private String convertRelationName(String relationName) {
     return relationName;
   }
 
+  @Override
+  public void setUp() {
+    String pathToDocs = KBP_DOCS_DIR;
+    String goldRelationFilePath = GOLD_RELATIONS_PATH;
+    docIDToText = new HashMap<String,String>();
+    docIDToRelations = new HashMap<String,Set<String>>();
+    // load the gold relations from gold relations file
+    List<String> goldRelationLines = IOUtils.linesFromFile(goldRelationFilePath);
+    for (String relationLine : goldRelationLines) {
+      String[] docIDAndRelation = relationLine.split("\t");
+      if (docIDToRelations.get(docIDAndRelation[0]) == null) {
+        docIDToRelations.put(docIDAndRelation[0], new HashSet<String>());
+      }
+      docIDToRelations.get(docIDAndRelation[0]).add(docIDAndRelation[1]);
+    }
+    // load the text for each docID
+    File directoryWithDocs = new File(pathToDocs);
+    File[] allFiles = directoryWithDocs.listFiles();
+    for (File kbpTestDocFile : allFiles) {
+      String kbpTestDocID = kbpTestDocFile.getName();
+      String kbpTestDocPath = kbpTestDocFile.getAbsolutePath();
+      String kbpTestDocContents = IOUtils.stringFromFile(kbpTestDocPath);
+      docIDToText.put(kbpTestDocID, kbpTestDocContents);
+    }
+    // set up the pipeline
+    Properties props = new Properties();
+    props.put("annotators",
+            "tokenize,ssplit,pos,lemma,ner,regexner,parse,mention,entitymentions,coref,kbp");
+    props.put("coref.md.type", "RULE");
+    pipeline = new StanfordCoreNLP(props);
+  }
+
   public Set<String> convertKBPTriplesToStrings(List<RelationTriple> relationTripleList) {
     HashSet<String> foundRelationStrings = new HashSet<String>();
     for (RelationTriple rt : relationTripleList) {
@@ -127,7 +135,6 @@ public void testKBPAnnotatorResults() {
       finalF1 = f1;
     }
     // check final F1 score is
-    assertTrue("f1 score: " + finalF1 +" is below threshold of "+KBP_MINIMUM_SCORE
+    assertTrue("f1 score: " + finalF1 +" is below threshold of 45.3", finalF1 >= KBP_MINIMUM_SCORE);
-            , finalF1 >= KBP_MINIMUM_SCORE);
   }
 }
diff --git a/itest/src/edu/stanford/nlp/pipeline/KBPAnnotatorChineseBenchmarkSlowITest.java b/itest/src/edu/stanford/nlp/pipeline/KBPAnnotatorChineseBenchmarkSlowITest.java
diff --git a/itest/src/edu/stanford/nlp/pipeline/KBPAnnotatorEnglishBenchmarkSlowITest.java b/itest/src/edu/stanford/nlp/pipeline/KBPAnnotatorEnglishBenchmarkSlowITest.java