From cf97e3647582ea492ca9e4aff2ed9268231201db Mon Sep 17 00:00:00 2001 From: John Bauer Date: Tue, 10 Jan 2023 22:00:45 -0800 Subject: [PATCH] Add the ability to reuse indices in SemanticGraph.valueOf This possibly changes the meaning of existing expressions, since it was previously possible to assign multiple words to the same index, but that was a bad feature anyway --- .../stanford/nlp/semgraph/SemanticGraph.java | 23 ++++----- .../nlp/semgraph/SemanticGraphTest.java | 48 +++++++++++++++++++ 2 files changed, 60 insertions(+), 11 deletions(-) diff --git a/src/edu/stanford/nlp/semgraph/SemanticGraph.java b/src/edu/stanford/nlp/semgraph/SemanticGraph.java index 613e6a6019..de1dfe457c 100644 --- a/src/edu/stanford/nlp/semgraph/SemanticGraph.java +++ b/src/edu/stanford/nlp/semgraph/SemanticGraph.java @@ -1688,10 +1688,13 @@ public SemanticGraphEdge addEdge(SemanticGraphEdge edge) { * dumb, could be made more sophisticated. *
* - * Example: "[ate subj>Bill dobj>[muffins compound>blueberry]]" + * Example: {@code [ate subj>Bill dobj>[muffins compound>blueberry]]} *
* * This is the same format generated by toCompactString(). + *
+ * Indices are represented by a dash separated number after the word: + * {@code [ate-1 subj>Bill-2 ...} */ public static SemanticGraph valueOf(String s, Language language, Integer sentIndex) { return (new SemanticGraphParsingTask(s, language, sentIndex)).parse(); @@ -1841,7 +1844,7 @@ public SemanticGraph makeSoftCopy() { // ============================================================================ - private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]+)-([0-9]+)"); + private static final Pattern WORD_AND_INDEX_PATTERN = Pattern.compile("([^-]*)-([0-9]+)"); /** * This nested class is a helper for valueOf(). It represents the task of @@ -1850,7 +1853,7 @@ public SemanticGraph makeSoftCopy() { private static class SemanticGraphParsingTask extends StringParsingTask { private SemanticGraph sg; - private Set indexesUsed = Generics.newHashSet(); + private Map indexesUsed = Generics.newHashMap(); private final Language language; private final Integer sentIndex; @@ -1922,21 +1925,19 @@ private IndexedWord makeVertex(String word) { } else { index = getNextFreeIndex(); } - indexesUsed.add(index); - // Note that, despite the use of indexesUsed and getNextFreeIndex(), - // nothing is actually enforcing that no indexes are used twice. This - // could occur if some words in the string representation being parsed - // come with index markers and some do not. + if (indexesUsed.containsKey(index)) { + return indexesUsed.get(index); + } IndexedWord ifl = new IndexedWord(null, sentIndex != null ? sentIndex : 0, index); // log.info("SemanticGraphParsingTask>>> word = " + word); // log.info("SemanticGraphParsingTask>>> index = " + index); - // log.info("SemanticGraphParsingTask>>> indexesUsed = " + - // indexesUsed); + // log.info("SemanticGraphParsingTask>>> indexesUsed = " + indexesUsed); String[] wordAndTag = word.split("/"); ifl.set(CoreAnnotations.TextAnnotation.class, wordAndTag[0]); ifl.set(CoreAnnotations.ValueAnnotation.class, wordAndTag[0]); if (wordAndTag.length > 1) ifl.set(CoreAnnotations.PartOfSpeechAnnotation.class, wordAndTag[1]); + indexesUsed.put(index, ifl); return ifl; } @@ -1953,7 +1954,7 @@ private static Pair readWordAndIndex(String word) { private Integer getNextFreeIndex() { int i = 0; - while (indexesUsed.contains(i)) + while (indexesUsed.containsKey(i)) i++; return i; } diff --git a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java index 3e090aa743..44484b8515 100644 --- a/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java +++ b/test/src/edu/stanford/nlp/semgraph/SemanticGraphTest.java @@ -347,4 +347,52 @@ public void testValueOfIndices() { assertEquals(sg.getParentsWithReln(E, "obj").size(), 1); assertEquals(sg.getParentsWithReln(E, "dep").size(), 0); } + + /** + * Test the vertices and edges if we reuse some indices in valueOf + */ + public void testValueOfReuseIndices() { + SemanticGraph sg = SemanticGraph.valueOf("[A/foo-0 obj> B/bar-1 obj> C/foo-2 obj> -2 dep> B/bar-1 nsubj> [D/bar-3 obj> E/baz-4]]"); + + List words = sg.vertexListSorted(); + assertEquals(words.size(), 5); + + for (int i = 0; i < 5; ++i) { + assertEquals(words.get(i).index(), i); + } + IndexedWord A = words.get(0); + IndexedWord B = words.get(1); + IndexedWord C = words.get(2); + IndexedWord D = words.get(3); + IndexedWord E = words.get(4); + + assertEquals(A.word(), "A"); + assertEquals(A.tag(), "foo"); + assertEquals(B.word(), "B"); + assertEquals(B.tag(), "bar"); + assertEquals(C.word(), "C"); + assertEquals(C.tag(), "foo"); + assertEquals(D.word(), "D"); + assertEquals(D.tag(), "bar"); + assertEquals(E.word(), "E"); + assertEquals(E.tag(), "baz"); + + assertEquals(sg.getAllEdges(A, B).size(), 2); + assertEquals(sg.getParentsWithReln(B, "obj").size(), 1); + assertEquals(sg.getParentsWithReln(B, "dep").size(), 1); + + assertEquals(sg.getAllEdges(A, C).size(), 2); + assertEquals(sg.getParentsWithReln(C, "obj").size(), 1); + + assertEquals(sg.getAllEdges(A, D).size(), 1); + assertEquals(sg.getParentsWithReln(D, "nsubj").size(), 1); + assertEquals(sg.getParentsWithReln(D, "obj").size(), 0); + assertEquals(sg.getParentsWithReln(D, "dep").size(), 0); + + assertEquals(sg.getAllEdges(A, E).size(), 0); + assertEquals(sg.getAllEdges(D, E).size(), 1); + assertEquals(sg.getParentsWithReln(E, "obj").size(), 1); + assertEquals(sg.getParentsWithReln(E, "dep").size(), 0); + } + }