diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java index 613e6a6019..de1dfe457c 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java @@ -1688,10 +1688,13 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) { * dumb, could be made more sophisticated. *
* - * Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]" + * Example: {@code [ate subj>Bill dobj>[muffins compound>blueberry]]} *
* * This is the same format generated by toCompactString(). + *
+ * Indices are represented by a dash separated number after the word: + * {@code [ate-1 subj>Bill-2 ...} */ public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) { return (new SemanticGraphParsingTask(s, language, sentIndex)).parse(); @@ -1841,7 +1844,7 @@ public SemanticGraph makeSoftCopy() { // ============================================================================ - private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)"); + private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]*)-([0-9]+)"); /** * This nested class is a helper for valueOf(). It represents the task of @@ -1850,7 +1853,7 @@ public SemanticGraph makeSoftCopy() { private static class SemanticGraphParsingTask extends StringParsingTask { private SemanticGraph sg; - private Set indexesUsed = Generics.newHashSet(); + private Map indexesUsed = Generics.newHashMap(); private final Language language; private final Integer sentIndex; @@ -1922,21 +1925,19 @@ private IndexedWord makeVertex(String word) { } else { index = getNextFreeIndex(); } - indexesUsed.add(index); - // Note that, despite the use of indexesUsed and getNextFreeIndex(), - // nothing is actually enforcing that no indexes are used twice. This - // could occur if some words in the string representation being parsed - // come with index markers and some do not. + if (indexesUsed.containsKey(index)) { + return indexesUsed.get(index); + } IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); - // log.info("SemanticGraphParsingTask>>> indexesUsed = " + - // indexesUsed); + // log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); + indexesUsed.put(index, ifl); return ifl; } @@ -1953,7 +1954,7 @@ private static Pair readWordAndIndex(String word) { private Integer getNextFreeIndex() { int i = 0; - while (indexesUsed.contains(i)) + while (indexesUsed.containsKey(i)) i++; return i; } diff --git a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java index 3e090aa743..44484b8515 100644 --- a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java +++ b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java @@ -347,4 +347,52 @@ public void testValueOfIndices() { assertEquals(sg.getParentsWithReln(E, "obj").size(), 1); assertEquals(sg.getParentsWithReln(E, "dep").size(), 0); } + + /** + * Test the vertices and edges if we reuse some indices in valueOf + */ + public void testValueOfReuseIndices() { + SemanticGraph sg = SemanticGraph.valueOf("[A/foo-0 obj> B/bar-1 obj> C/foo-2 obj> -2 dep> B/bar-1 nsubj> [D/bar-3 obj> E/baz-4]]"); + + List words = sg.vertexListSorted(); + assertEquals(words.size(), 5); + + for (int i = 0; i < 5; ++i) { + assertEquals(words.get(i).index(), i); + } + IndexedWord A = words.get(0); + IndexedWord B = words.get(1); + IndexedWord C = words.get(2); + IndexedWord D = words.get(3); + IndexedWord E = words.get(4); + + assertEquals(A.word(), "A"); + assertEquals(A.tag(), "foo"); + assertEquals(B.word(), "B"); + assertEquals(B.tag(), "bar"); + assertEquals(C.word(), "C"); + assertEquals(C.tag(), "foo"); + assertEquals(D.word(), "D"); + assertEquals(D.tag(), "bar"); + assertEquals(E.word(), "E"); + assertEquals(E.tag(), "baz"); + + assertEquals(sg.getAllEdges(A, B).size(), 2); + assertEquals(sg.getParentsWithReln(B, "obj").size(), 1); + assertEquals(sg.getParentsWithReln(B, "dep").size(), 1); + + assertEquals(sg.getAllEdges(A, C).size(), 2); + assertEquals(sg.getParentsWithReln(C, "obj").size(), 1); + + assertEquals(sg.getAllEdges(A, D).size(), 1); + assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1); + assertEquals(sg.getParentsWithReln(D, "obj").size(), 0); + assertEquals(sg.getParentsWithReln(D, "dep").size(), 0); + + assertEquals(sg.getAllEdges(A, E).size(), 0); + assertEquals(sg.getAllEdges(D, E).size(), 1); + assertEquals(sg.getParentsWithReln(E, "obj").size(), 1); + assertEquals(sg.getParentsWithReln(E, "dep").size(), 0); + } + }