Skip to content

Commit

Permalink
Add the ability to reuse indices in SemanticGraph.valueOf This possib…
Browse files Browse the repository at this point in the history
…ly changes the meaning of existing expressions, since it was previously possible to assign multiple words to the same index, but that was a bad feature anyway
  • Loading branch information
AngledLuffa committed Jan 20, 2023
1 parent 8eca9c2 commit cf97e36
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 11 deletions.
23 changes: 12 additions & 11 deletions src/edu/stanford/nlp/semgraph/SemanticGraph.java
Expand Up @@ -1688,10 +1688,13 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) {
* dumb, could be made more sophisticated.
* <br>
*
* Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]"
* Example: {@code [ate subj>Bill dobj>[muffins compound>blueberry]]}
* <br>
*
* This is the same format generated by toCompactString().
* <br>
* Indices are represented by a dash separated number after the word:
* {@code [ate-1 subj>Bill-2 ...}
*/
public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) {
return (new SemanticGraphParsingTask(s, language, sentIndex)).parse();
Expand Down Expand Up @@ -1841,7 +1844,7 @@ public SemanticGraph makeSoftCopy() {

// ============================================================================

private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)");
private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]*)-([0-9]+)");

/**
* This nested class is a helper for valueOf(). It represents the task of
Expand All @@ -1850,7 +1853,7 @@ public SemanticGraph makeSoftCopy() {
private static class SemanticGraphParsingTask extends StringParsingTask<SemanticGraph> {

private SemanticGraph sg;
private Set<Integer> indexesUsed = Generics.newHashSet();
private Map<Integer, IndexedWord> indexesUsed = Generics.newHashMap();
private final Language language;
private final Integer sentIndex;

Expand Down Expand Up @@ -1922,21 +1925,19 @@ private IndexedWord makeVertex(String word) {
} else {
index = getNextFreeIndex();
}
indexesUsed.add(index);
// Note that, despite the use of indexesUsed and getNextFreeIndex(),
// nothing is actually enforcing that no indexes are used twice. This
// could occur if some words in the string representation being parsed
// come with index markers and some do not.
if (indexesUsed.containsKey(index)) {
return indexesUsed.get(index);
}
IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index);
// log.info("SemanticGraphParsingTask>>> word = " + word);
// log.info("SemanticGraphParsingTask>>> index = " + index);
// log.info("SemanticGraphParsingTask>>> indexesUsed = " +
// indexesUsed);
// log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed);
String[] wordAndTag = word.split("/");
ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]);
ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]);
if (wordAndTag.length > 1)
ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]);
indexesUsed.put(index, ifl);
return ifl;
}

Expand All @@ -1953,7 +1954,7 @@ private static Pair<String, Integer> readWordAndIndex(String word) {

private Integer getNextFreeIndex() {
int i = 0;
while (indexesUsed.contains(i))
while (indexesUsed.containsKey(i))
i++;
return i;
}
Expand Down
48 changes: 48 additions & 0 deletions test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java
Expand Up @@ -347,4 +347,52 @@ public void testValueOfIndices() {
assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
}

/**
* Test the vertices and edges if we reuse some indices in valueOf
*/
public void testValueOfReuseIndices() {
SemanticGraph sg = SemanticGraph.valueOf("[A/foo-0 obj> B/bar-1 obj> C/foo-2 obj> -2 dep> B/bar-1 nsubj> [D/bar-3 obj> E/baz-4]]");

List<IndexedWord> words = sg.vertexListSorted();
assertEquals(words.size(), 5);

for (int i = 0; i < 5; ++i) {
assertEquals(words.get(i).index(), i);
}
IndexedWord A = words.get(0);
IndexedWord B = words.get(1);
IndexedWord C = words.get(2);
IndexedWord D = words.get(3);
IndexedWord E = words.get(4);

assertEquals(A.word(), "A");
assertEquals(A.tag(), "foo");
assertEquals(B.word(), "B");
assertEquals(B.tag(), "bar");
assertEquals(C.word(), "C");
assertEquals(C.tag(), "foo");
assertEquals(D.word(), "D");
assertEquals(D.tag(), "bar");
assertEquals(E.word(), "E");
assertEquals(E.tag(), "baz");

assertEquals(sg.getAllEdges(A, B).size(), 2);
assertEquals(sg.getParentsWithReln(B, "obj").size(), 1);
assertEquals(sg.getParentsWithReln(B, "dep").size(), 1);

assertEquals(sg.getAllEdges(A, C).size(), 2);
assertEquals(sg.getParentsWithReln(C, "obj").size(), 1);

assertEquals(sg.getAllEdges(A, D).size(), 1);
assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1);
assertEquals(sg.getParentsWithReln(D, "obj").size(), 0);
assertEquals(sg.getParentsWithReln(D, "dep").size(), 0);

assertEquals(sg.getAllEdges(A, E).size(), 0);
assertEquals(sg.getAllEdges(D, E).size(), 1);
assertEquals(sg.getParentsWithReln(E, "obj").size(), 1);
assertEquals(sg.getParentsWithReln(E, "dep").size(), 0);
}

}

0 comments on commit cf97e36

Please sign in to comment.