Have ChineseSegmenterAnnotatorITest work with jar resources not file …

…paths.
stanfordnlp · Jul 10, 2017 · c11819f · c11819f
1 parent 66bf866
commit c11819f
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 20 deletions.
diff --git a/itest/src/edu/stanford/nlp/pipeline/ChineseSegmenterAnnotatorITest.java b/itest/src/edu/stanford/nlp/pipeline/ChineseSegmenterAnnotatorITest.java
@@ -1,33 +1,36 @@
 package edu.stanford.nlp.pipeline;
 
-import junit.framework.TestCase;
-
 import java.util.List;
 import java.util.Properties;
 
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
 import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
 import edu.stanford.nlp.ling.CoreLabel;
 
-public class ChineseSegmenterAnnotatorITest extends TestCase {
-  StanfordCoreNLP pipeline = null;
 
-  @Override
-  public void setUp()
-    throws Exception
-  {
+public class ChineseSegmenterAnnotatorITest {
+
+  private StanfordCoreNLP pipeline; // = null
+
+  @Before
+  public void setUp() throws Exception {
     if (pipeline != null) {
       return;
     }
     Properties props = new Properties();
     props.setProperty("annotators", "cseg");
     props.setProperty("customAnnotatorClass.cseg", "edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");
-    props.setProperty("cseg.model", "/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz");
-    props.setProperty("cseg.sighanCorporaDict", "/u/nlp/data/gale/segtool/stanford-seg/releasedata");
-    props.setProperty("cseg.serDictionary", "/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz");
+    props.setProperty("cseg.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
+    props.setProperty("cseg.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese");
+    props.setProperty("cseg.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
     props.setProperty("cseg.sighanPostProcessing", "true");
     pipeline = new StanfordCoreNLP(props);
   }
 
+  @Test
   public void testPipeline() {
     testOne("你马上回来北京吗？",
         new String[]{"你", "马上", "回来", "北京", "吗", "？"},
@@ -60,11 +63,12 @@ private void testOne(String query, String[] expectedWords, int[] expectedBeginPo
     pipeline.annotate(annotation);
 
     List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
-    assertEquals(expectedWords.length, tokens.size());
+    Assert.assertEquals(expectedWords.length, tokens.size());
     for (int i = 0; i < expectedWords.length; ++i) {
-      assertEquals(expectedWords[i], tokens.get(i).word());
-      assertEquals(expectedBeginPositions[i], tokens.get(i).beginPosition());
-      assertEquals(expectedEndPositions[i], tokens.get(i).endPosition());
+      Assert.assertEquals(expectedWords[i], tokens.get(i).word());
+      Assert.assertEquals(expectedBeginPositions[i], tokens.get(i).beginPosition());
+      Assert.assertEquals(expectedEndPositions[i], tokens.get(i).endPosition());
     }
   }
+
 }
diff --git a/itest/src/edu/stanford/nlp/pipeline/TokenizerAnnotatorITest.java b/itest/src/edu/stanford/nlp/pipeline/TokenizerAnnotatorITest.java
@@ -2,18 +2,21 @@
 
 import java.util.*;
 
+import junit.framework.TestCase;
+
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
-import junit.framework.TestCase;
+import edu.stanford.nlp.process.AbstractTokenizer;
 
 
 /**
  * Tests a couple tokenizer options, such as working with Spanish.
  * See TokenizerAnnotatorTest for more tests.
  *
- * @author John Bauer 
+ * @author John Bauer
  */
 public class TokenizerAnnotatorITest extends TestCase {
+
   public void testNotSpanish() {
     Annotation ann = new Annotation("Damelo");
     Properties props = new Properties();
@@ -28,8 +31,7 @@ public void testNotSpanish() {
 
   private static final String spanishText = "Me voy a Madrid (ES).\n\"Me gusta\", lo dice.";
   private static List<String> spanishTokens = Arrays.asList(new String[] { "Me", "voy", "a", "Madrid", "=LRB=", "ES", "=RRB=", ".", "\"", "Me", "gusta", "\"", ",", "lo", "dice", "." });
-  private static final String spanishText2 = "Me voy a Madrid (ES).\n(Me gusta), lo dice.";
-  private static List<String> spanishTokens2 = Arrays.asList(new String[] { "Me", "voy", "a", "Madrid", "=LRB=", "ES", "=RRB=", ".", "*NL*", "\"", "Me", "gusta", "\"", ",", "lo", "dice", "." });
+  private static List<String> spanishTokens2 = Arrays.asList(new String[] { "Me", "voy", "a", "Madrid", "=LRB=", "ES", "=RRB=", ".", AbstractTokenizer.NEWLINE_TOKEN, "\"", "Me", "gusta", "\"", ",", "lo", "dice", "." });
 
   public void testSpanishTokenizer() {
     TokenizerAnnotator annotator = new TokenizerAnnotator(false, "es", null);
@@ -49,5 +51,6 @@ public void testSpanishTokenizer() {
     for (int i = 0; i < tokens.size(); ++i) {
       assertEquals(spanishTokens2.get(i), tokens.get(i).value());
     }
-  }  
+  }
+
 }