Skip to content

Commit

Permalink
Have ChineseSegmenterAnnotatorITest work with jar resources not file …
Browse files Browse the repository at this point in the history
…paths.
  • Loading branch information
manning authored and Stanford NLP committed Jul 10, 2017
1 parent 66bf866 commit c11819f
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -1,33 +1,36 @@
package edu.stanford.nlp.pipeline;

import junit.framework.TestCase;

import java.util.List;
import java.util.Properties;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation;
import edu.stanford.nlp.ling.CoreLabel;

public class ChineseSegmenterAnnotatorITest extends TestCase {
StanfordCoreNLP pipeline = null;

@Override
public void setUp()
throws Exception
{
public class ChineseSegmenterAnnotatorITest {

private StanfordCoreNLP pipeline; // = null

@Before
public void setUp() throws Exception {
if (pipeline != null) {
return;
}
Properties props = new Properties();
props.setProperty("annotators", "cseg");
props.setProperty("customAnnotatorClass.cseg", "edu.stanford.nlp.pipeline.ChineseSegmenterAnnotator");
props.setProperty("cseg.model", "/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz");
props.setProperty("cseg.sighanCorporaDict", "/u/nlp/data/gale/segtool/stanford-seg/releasedata");
props.setProperty("cseg.serDictionary", "/u/nlp/data/gale/segtool/stanford-seg/classifiers/dict-chris6.ser.gz");
props.setProperty("cseg.model", "edu/stanford/nlp/models/segmenter/chinese/ctb.gz");
props.setProperty("cseg.sighanCorporaDict", "edu/stanford/nlp/models/segmenter/chinese");
props.setProperty("cseg.serDictionary", "edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz");
props.setProperty("cseg.sighanPostProcessing", "true");
pipeline = new StanfordCoreNLP(props);
}

@Test
public void testPipeline() {
testOne("你马上回来北京吗?",
new String[]{"你", "马上", "回来", "北京", "吗", "?"},
Expand Down Expand Up @@ -60,11 +63,12 @@ private void testOne(String query, String[] expectedWords, int[] expectedBeginPo
pipeline.annotate(annotation);

List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
assertEquals(expectedWords.length, tokens.size());
Assert.assertEquals(expectedWords.length, tokens.size());
for (int i = 0; i < expectedWords.length; ++i) {
assertEquals(expectedWords[i], tokens.get(i).word());
assertEquals(expectedBeginPositions[i], tokens.get(i).beginPosition());
assertEquals(expectedEndPositions[i], tokens.get(i).endPosition());
Assert.assertEquals(expectedWords[i], tokens.get(i).word());
Assert.assertEquals(expectedBeginPositions[i], tokens.get(i).beginPosition());
Assert.assertEquals(expectedEndPositions[i], tokens.get(i).endPosition());
}
}

}
13 changes: 8 additions & 5 deletions itest/src/edu/stanford/nlp/pipeline/TokenizerAnnotatorITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,21 @@

import java.util.*;

import junit.framework.TestCase;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import junit.framework.TestCase;
import edu.stanford.nlp.process.AbstractTokenizer;


/**
* Tests a couple tokenizer options, such as working with Spanish.
* See TokenizerAnnotatorTest for more tests.
*
* @author John Bauer
* @author John Bauer
*/
public class TokenizerAnnotatorITest extends TestCase {

public void testNotSpanish() {
Annotation ann = new Annotation("Damelo");
Properties props = new Properties();
Expand All @@ -28,8 +31,7 @@ public void testNotSpanish() {

private static final String spanishText = "Me voy a Madrid (ES).\n\"Me gusta\", lo dice.";
private static List<String> spanishTokens = Arrays.asList(new String[] { "Me", "voy", "a", "Madrid", "=LRB=", "ES", "=RRB=", ".", "\"", "Me", "gusta", "\"", ",", "lo", "dice", "." });
private static final String spanishText2 = "Me voy a Madrid (ES).\n(Me gusta), lo dice.";
private static List<String> spanishTokens2 = Arrays.asList(new String[] { "Me", "voy", "a", "Madrid", "=LRB=", "ES", "=RRB=", ".", "*NL*", "\"", "Me", "gusta", "\"", ",", "lo", "dice", "." });
private static List<String> spanishTokens2 = Arrays.asList(new String[] { "Me", "voy", "a", "Madrid", "=LRB=", "ES", "=RRB=", ".", AbstractTokenizer.NEWLINE_TOKEN, "\"", "Me", "gusta", "\"", ",", "lo", "dice", "." });

public void testSpanishTokenizer() {
TokenizerAnnotator annotator = new TokenizerAnnotator(false, "es", null);
Expand All @@ -49,5 +51,6 @@ public void testSpanishTokenizer() {
for (int i = 0; i < tokens.size(); ++i) {
assertEquals(spanishTokens2.get(i), tokens.get(i).value());
}
}
}

}

0 comments on commit c11819f

Please sign in to comment.