diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java
index 613e6a6019..de1dfe457c 100644
--- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java
+++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java
@@ -1688,10 +1688,13 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
* dumb, could be made more sophisticated.
*
*
- * Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]"
+ * Example: {@code [ate subj>Bill dobj>[muffins compound>blueberry]]}
*
*
* This is the same format generated by toCompactString().
+ *
+ * Indices are represented by a dash separated number after the word:
+ * {@code [ate-1 subj>Bill-2 ...}
*/
public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
@@ -1841,7 +1844,7 @@ public SemanticGraph makeSoftCopy() {
// ============================================================================
- private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)");
+ private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]*)-([0-9]+)");
/**
* This nested class is a helper for valueOf(). It represents the task of
@@ -1850,7 +1853,7 @@ public SemanticGraph makeSoftCopy() {
private static class SemanticGraphParsingTask extends StringParsingTask {
private SemanticGraph sg;
- private Set indexesUsed = Generics.newHashSet();
+ private Map indexesUsed = Generics.newHashMap();
private final Language language;
private final Integer sentIndex;
@@ -1922,21 +1925,19 @@ private IndexedWord makeVertex(String word) {
} else {
index = getNextFreeIndex();
}
- indexesUsed.add(index);
- // Note that, despite the use of indexesUsed and getNextFreeIndex(),
- // nothing is actually enforcing that no indexes are used twice. This
- // could occur if some words in the string representation being parsed
- // come with index markers and some do not.
+ if (indexesUsed.containsKey(index)) {
+ return indexesUsed.get(index);
+ }
IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
// log.info("SemanticGraphParsingTask>>> word = " + word);
// log.info("SemanticGraphParsingTask>>> index = " + index);
- // log.info("SemanticGraphParsingTask>>> indexesUsed = " +
- // indexesUsed);
+ // log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed);
String[] wordAndTag = word.split("/");
ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]);
ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]);
if (wordAndTag.length > 1)
ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]);
+ indexesUsed.put(index, ifl);
return ifl;
}
@@ -1953,7 +1954,7 @@ private static Pair readWordAndIndex(String word) {
private Integer getNextFreeIndex() {
int i = 0;
- while (indexesUsed.contains(i))
+ while (indexesUsed.containsKey(i))
i++;
return i;
}
diff --git a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java
index 3e090aa743..44484b8515 100644
--- a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java
+++ b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java
@@ -347,4 +347,52 @@ public void testValueOfIndices() {
assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
}
+
+ /**
+ * Test the vertices and edges if we reuse some indices in valueOf
+ */
+ public void testValueOfReuseIndices() {
+ SemanticGraph sg = SemanticGraph.valueOf("[A/foo-0 obj> B/bar-1 obj> C/foo-2 obj> -2 dep> B/bar-1 nsubj> [D/bar-3 obj> E/baz-4]]");
+
+ List words = sg.vertexListSorted();
+ assertEquals(words.size(), 5);
+
+ for (int i = 0; i < 5; ++i) {
+ assertEquals(words.get(i).index(), i);
+ }
+ IndexedWord A = words.get(0);
+ IndexedWord B = words.get(1);
+ IndexedWord C = words.get(2);
+ IndexedWord D = words.get(3);
+ IndexedWord E = words.get(4);
+
+ assertEquals(A.word(), "A");
+ assertEquals(A.tag(), "foo");
+ assertEquals(B.word(), "B");
+ assertEquals(B.tag(), "bar");
+ assertEquals(C.word(), "C");
+ assertEquals(C.tag(), "foo");
+ assertEquals(D.word(), "D");
+ assertEquals(D.tag(), "bar");
+ assertEquals(E.word(), "E");
+ assertEquals(E.tag(), "baz");
+
+ assertEquals(sg.getAllEdges(A, B).size(), 2);
+ assertEquals(sg.getParentsWithReln(B, "obj").size(), 1);
+ assertEquals(sg.getParentsWithReln(B, "dep").size(), 1);
+
+ assertEquals(sg.getAllEdges(A, C).size(), 2);
+ assertEquals(sg.getParentsWithReln(C, "obj").size(), 1);
+
+ assertEquals(sg.getAllEdges(A, D).size(), 1);
+ assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1);
+ assertEquals(sg.getParentsWithReln(D, "obj").size(), 0);
+ assertEquals(sg.getParentsWithReln(D, "dep").size(), 0);
+
+ assertEquals(sg.getAllEdges(A, E).size(), 0);
+ assertEquals(sg.getAllEdges(D, E).size(), 1);
+ assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
+ assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
+ }
+
}