Skip to content

Commit

Permalink
Merge branch 'master' of jamie.stanford.edu:/u/nlp/git/javanlp
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabor Angeli authored and Stanford NLP committed Jul 9, 2016
1 parent ca831b8 commit 04d7b8d
Show file tree
Hide file tree
Showing 30 changed files with 84,917 additions and 64,263 deletions.
32 changes: 16 additions & 16 deletions itest/src/edu/stanford/nlp/dcoref/DcorefBenchmarkSlowITest.java
Expand Up @@ -98,40 +98,40 @@ public void testDcoref() throws Exception {
expectedResults.setCount(MENTION_F1, 50.42);
highResults.setCount(MENTION_F1, 50.45);

lowResults.setCount(MUC_TP, 6250);
expectedResults.setCount(MUC_TP, 6253);
highResults.setCount(MUC_TP, 6260);
lowResults.setCount(MUC_TP, 6245);
expectedResults.setCount(MUC_TP, 6250);
highResults.setCount(MUC_TP, 6255);
lowResults.setCount(MUC_F1, 60.65);
expectedResults.setCount(MUC_F1, 60.67);
expectedResults.setCount(MUC_F1, 60.66);
highResults.setCount(MUC_F1, 60.7);

lowResults.setCount(BCUBED_TP, 12450);
expectedResults.setCount(BCUBED_TP, 12457.63);
highResults.setCount(BCUBED_TP, 12460);
lowResults.setCount(BCUBED_F1, 70.8);
expectedResults.setCount(BCUBED_F1, 70.81);
lowResults.setCount(BCUBED_TP, 12440);
expectedResults.setCount(BCUBED_TP, 12445.8);
highResults.setCount(BCUBED_TP, 12450);
lowResults.setCount(BCUBED_F1, 70.75);
expectedResults.setCount(BCUBED_F1, 70.80);
highResults.setCount(BCUBED_F1, 70.85);

lowResults.setCount(CEAFM_TP, 10920);
expectedResults.setCount(CEAFM_TP, 10927);
lowResults.setCount(CEAFM_TP, 10915);
expectedResults.setCount(CEAFM_TP, 10920);
highResults.setCount(CEAFM_TP, 10930);
lowResults.setCount(CEAFM_F1, 59.4);
expectedResults.setCount(CEAFM_F1, 59.44);
expectedResults.setCount(CEAFM_F1, 59.42);
highResults.setCount(CEAFM_F1, 59.5);

lowResults.setCount(CEAFE_TP, 3830);
expectedResults.setCount(CEAFE_TP, 3833.81);
expectedResults.setCount(CEAFE_TP, 3831.36);
highResults.setCount(CEAFE_TP, 3840);
lowResults.setCount(CEAFE_F1, 47.4);
expectedResults.setCount(CEAFE_F1, 47.46);
expectedResults.setCount(CEAFE_F1, 47.45);
highResults.setCount(CEAFE_F1, 47.5);

lowResults.setCount(BLANC_F1, 75.35);
expectedResults.setCount(BLANC_F1, 75.39);
expectedResults.setCount(BLANC_F1, 75.38);
highResults.setCount(BLANC_F1, 75.42);

lowResults.setCount(CONLL_SCORE, 59.6);
expectedResults.setCount(CONLL_SCORE, 59.65);
expectedResults.setCount(CONLL_SCORE, 59.64);
highResults.setCount(CONLL_SCORE, 59.7);

Counter<String> results = new ClassicCounter<String>();
Expand Down
22 changes: 13 additions & 9 deletions src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java
Expand Up @@ -657,20 +657,21 @@ public List<Triple<String, Integer, Integer>> classifyToCharacterOffsets(String
}

/**
* ONLY USE IF LOADED A CHINESE WORD SEGMENTER!!!!!
* Have a word segmenter segment a String into a list of words.
* ONLY USE IF YOU LOADED A CHINESE WORD SEGMENTER!!!!!
*
* @param sentence
* The string to be classified
* @param sentence The string to be classified
* @return List of words
*/
// This method is currently [2016] only called in a very small number of places:
// the parser's jsp webapp, ChineseSegmenterAnnotator, and SegDemo.
// Maybe we could eliminate it?
public List<String> segmentString(String sentence) {
return segmentString(sentence, defaultReaderAndWriter);
}

public List<String> segmentString(String sentence,
DocumentReaderAndWriter<IN> readerAndWriter) {
ObjectBank<List<IN>> docs = makeObjectBankFromString(sentence,
readerAndWriter);
public List<String> segmentString(String sentence, DocumentReaderAndWriter<IN> readerAndWriter) {
ObjectBank<List<IN>> docs = makeObjectBankFromString(sentence, readerAndWriter);

StringWriter stringWriter = new StringWriter();
PrintWriter stringPrintWriter = new PrintWriter(stringWriter);
Expand All @@ -685,7 +686,7 @@ public List<String> segmentString(String sentence,
return Arrays.asList(segmented.split("\\s"));
}

/**
/*
* Classify the contents of {@link SeqClassifierFlags scf.testFile}. The file
* should be in the format expected based on {@link SeqClassifierFlags
* scf.documentReader}.
Expand All @@ -707,7 +708,10 @@ public List<String> segmentString(String sentence,
* @return The same {@link List}, but with the elements annotated with their
* answers (stored under the
* {@link edu.stanford.nlp.ling.CoreAnnotations.AnswerAnnotation}
* key).
* key). The answers will be the class labels defined by the CRF
* Classifier. They might be things like entity labels (in BIO
* notation or not) or something like "1" vs. "0" on whether to
* begin a new token here or not (in word segmentation).
*/
public abstract List<IN> classify(List<IN> document);

Expand Down
2 changes: 1 addition & 1 deletion src/edu/stanford/nlp/ie/crf/CRFClassifier.java
Expand Up @@ -960,7 +960,7 @@ protected static Index<CRFLabel> allLabels(int window, Index<String> classIndex)
* Makes a CRFDatum by producing features and a label from input data at a
* specific position, using the provided factory.
*
* @param info The input data
* @param info The input data. Particular feature factories might look for arbitrary keys in the IN items.
* @param loc The position to build a datum at
* @param featureFactories The FeatureFactories to use to extract features
* @return The constructed CRFDatum
Expand Down

0 comments on commit 04d7b8d

Please sign in to comment.