Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve CoNLL-U document reader, allow additional dependencies and co…
…mments, add test.
- Loading branch information
Showing
3 changed files
with
253 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
90 changes: 90 additions & 0 deletions
90
test/src/edu/stanford/nlp/trees/CoNLLUDocumentReaderTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package edu.stanford.nlp.trees; | ||
|
||
import edu.stanford.nlp.ling.IndexedWord; | ||
import edu.stanford.nlp.semgraph.SemanticGraph; | ||
import junit.framework.TestCase; | ||
|
||
import java.io.Reader; | ||
import java.io.StringReader; | ||
import java.util.Iterator; | ||
|
||
/** | ||
* @author Sebastian Schuster | ||
*/ | ||
public class CoNLLUDocumentReaderITest extends TestCase { | ||
|
||
private static String MULTIWORD_TEST_INPUT = | ||
"1 I I PRON PRP Case=Nom|Number=Sing|Person=1 2 nsubj _ _\n" + | ||
"2-3 haven't _ _ _ _ _ _ _ _\n" + | ||
"2 have have VERB VBP Number=Sing|Person=1|Tense=Pres 0 root _ _\n" + | ||
"3 not not PART RB Negative=Neg 2 neg _ _\n" + | ||
"4 a a DET DT Definite=Ind|PronType=Art 5 det _ _\n" + | ||
"5 clue clue NOUN NN Number=Sing 2 dobj _ _\n" + | ||
"6 . . PUNCT . _ 2 punct _ _\n\n"; | ||
|
||
private static String COMMENT_TEST_INPUT = | ||
"#comment line 1\n" + | ||
"#comment line 2\n" + | ||
"1 I I PRON PRP Case=Nom|Number=Sing|Person=1 2 nsubj _ _\n" + | ||
"2 have have VERB VBP Number=Sing|Person=1|Tense=Pres 0 root _ _\n" + | ||
"3 not not PART RB Negative=Neg 2 neg _ _\n" + | ||
"4 a a DET DT Definite=Ind|PronType=Art 5 det _ _\n" + | ||
"5 clue clue NOUN NN Number=Sing 2 dobj _ _\n" + | ||
"6 . . PUNCT . _ 2 punct _ _\n\n"; | ||
|
||
private static String EXTRA_DEPS_TEST_INPUT = | ||
"1 They They PRON PRP _ 2 nsubj 4:nsubj _\n" + | ||
"2 buy buy VERB VBP _ 0 root _ _\n" + | ||
"3 and and CONJ CC _ 2 cc _ _\n" + | ||
"4 sell sell VERB VBP _ 5 conj _ _\n" + | ||
"5 books book NOUN NNS _ 2 dobj 4:dobj _\n" + | ||
"6 , , PUNCT , _ 5 punct _ _\n" + | ||
"7 newspapers newspaper NOUN NNS _ 5 conj 2:dobj|4:dobj _\n" + | ||
"8 and and CONJ CC _ 5 cc _ _\n" + | ||
"9 magazines magazine NOUN NNS _ 5 conj 2:dobj|4:dobj _\n" + | ||
"10 . . PUNCT . _ 2 punct _ _\n\n"; | ||
|
||
|
||
public void testMultiWords() { | ||
CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); | ||
Reader stringReader = new StringReader(MULTIWORD_TEST_INPUT); | ||
Iterator<SemanticGraph> it = reader.getIterator(stringReader); | ||
|
||
SemanticGraph sg = it.next(); | ||
assertNotNull(sg); | ||
assertFalse("The input only contains one dependency tree.", it.hasNext()); | ||
assertEquals("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.toCompactString(true)); | ||
|
||
for (IndexedWord iw : sg.vertexListSorted()) { | ||
if (iw.index() != 2 && iw.index() != 3) { | ||
assertEquals("", iw.originalText()); | ||
} else { | ||
assertEquals("haven't", iw.originalText()); | ||
} | ||
} | ||
} | ||
|
||
public void testComment() { | ||
CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); | ||
Reader stringReader = new StringReader(COMMENT_TEST_INPUT); | ||
Iterator<SemanticGraph> it = reader.getIterator(stringReader); | ||
|
||
SemanticGraph sg = it.next(); | ||
assertNotNull(sg); | ||
assertFalse("The input only contains one dependency tree.", it.hasNext()); | ||
assertEquals("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.toCompactString(true)); | ||
} | ||
|
||
public void testExtraDependencies() { | ||
CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); | ||
Reader stringReader = new StringReader(EXTRA_DEPS_TEST_INPUT); | ||
Iterator<SemanticGraph> it = reader.getIterator(stringReader); | ||
|
||
SemanticGraph sg = it.next(); | ||
assertNotNull(sg); | ||
assertFalse("The input only contains one dependency tree.", it.hasNext()); | ||
assertTrue(sg.containsEdge(sg.getNodeByIndex(4), sg.getNodeByIndex(1))); | ||
assertTrue(sg.containsEdge(sg.getNodeByIndex(2), sg.getNodeByIndex(7))); | ||
assertTrue(sg.containsEdge(sg.getNodeByIndex(4), sg.getNodeByIndex(7))); | ||
} | ||
} |